[Midnightbsd-cvs] src: sys/netinet: bring in freebsd changes.
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri Sep 12 20:54:26 EDT 2008
Log Message:
-----------
bring in freebsd changes.
Modified Files:
--------------
src/sys/netinet:
accf_data.c (r1.1.1.1 -> r1.2)
accf_http.c (r1.1.1.1 -> r1.2)
icmp6.h (r1.1.1.2 -> r1.2)
icmp_var.h (r1.1.1.1 -> r1.2)
if_atm.c (r1.1.1.1 -> r1.2)
if_ether.c (r1.7 -> r1.8)
igmp.c (r1.1.1.1 -> r1.2)
igmp.h (r1.1.1.1 -> r1.2)
igmp_var.h (r1.1.1.1 -> r1.2)
in.c (r1.1.1.2 -> r1.2)
in.h (r1.2 -> r1.3)
in_cksum.c (r1.1.1.1 -> r1.2)
in_gif.c (r1.1.1.2 -> r1.2)
in_pcb.c (r1.1.1.2 -> r1.2)
in_pcb.h (r1.1.1.1 -> r1.2)
in_proto.c (r1.1.1.2 -> r1.2)
in_rmx.c (r1.1.1.1 -> r1.2)
in_var.h (r1.1.1.1 -> r1.2)
ip.h (r1.1.1.1 -> r1.2)
ip6.h (r1.1.1.1 -> r1.2)
ip_carp.c (r1.1.1.2 -> r1.2)
ip_carp.h (r1.1.1.1 -> r1.2)
ip_divert.c (r1.1.1.1 -> r1.2)
ip_dummynet.c (r1.1.1.2 -> r1.2)
ip_dummynet.h (r1.1.1.2 -> r1.2)
ip_ecn.c (r1.1.1.1 -> r1.2)
ip_encap.c (r1.1.1.1 -> r1.2)
ip_fastfwd.c (r1.1.1.1 -> r1.2)
ip_fw.h (r1.1.1.2 -> r1.2)
ip_fw2.c (r1.2 -> r1.3)
ip_fw_pfil.c (r1.1.1.2 -> r1.2)
ip_gre.c (r1.1.1.2 -> r1.2)
ip_gre.h (r1.1.1.2 -> r1.2)
ip_icmp.c (r1.1.1.2 -> r1.2)
ip_id.c (r1.2 -> r1.3)
ip_input.c (r1.2 -> r1.3)
ip_mroute.c (r1.1.1.2 -> r1.2)
ip_mroute.h (r1.1.1.2 -> r1.2)
ip_output.c (r1.6 -> r1.7)
ip_var.h (r1.2 -> r1.3)
pim_var.h (r1.1.1.1 -> r1.2)
raw_ip.c (r1.1.1.1 -> r1.2)
tcp.h (r1.1.1.1 -> r1.2)
tcp_debug.c (r1.1.1.1 -> r1.2)
tcp_debug.h (r1.1.1.1 -> r1.2)
tcp_fsm.h (r1.1.1.1 -> r1.2)
tcp_hostcache.c (r1.1.1.1 -> r1.2)
tcp_input.c (r1.5 -> r1.6)
tcp_output.c (r1.3 -> r1.4)
tcp_sack.c (r1.1.1.1 -> r1.2)
tcp_seq.h (r1.1.1.1 -> r1.2)
tcp_subr.c (r1.2 -> r1.3)
tcp_syncache.c (r1.8 -> r1.9)
tcp_timer.c (r1.2 -> r1.3)
tcp_timer.h (r1.2 -> r1.3)
tcp_usrreq.c (r1.7 -> r1.8)
tcp_var.h (r1.3 -> r1.4)
udp.h (r1.1.1.1 -> r1.2)
udp_usrreq.c (r1.2 -> r1.3)
udp_var.h (r1.1.1.1 -> r1.2)
src/sys/netinet/libalias:
alias.c (r1.1.1.1 -> r1.2)
alias.h (r1.1.1.1 -> r1.2)
alias_cuseeme.c (r1.1.1.1 -> r1.2)
alias_db.c (r1.1.1.1 -> r1.2)
alias_ftp.c (r1.1.1.1 -> r1.2)
alias_irc.c (r1.1.1.1 -> r1.2)
alias_local.h (r1.1.1.1 -> r1.2)
alias_nbt.c (r1.1.1.1 -> r1.2)
alias_old.c (r1.1.1.1 -> r1.2)
alias_pptp.c (r1.1.1.1 -> r1.2)
alias_proxy.c (r1.2 -> r1.3)
alias_skinny.c (r1.1.1.1 -> r1.2)
alias_smedia.c (r1.1.1.1 -> r1.2)
alias_util.c (r1.1.1.1 -> r1.2)
libalias.3 (r1.1.1.1 -> r1.2)
src/sys/netinet6:
dest6.c (r1.1.1.1 -> r1.2)
frag6.c (r1.1.1.1 -> r1.2)
in6.c (r1.3 -> r1.4)
in6.h (r1.3 -> r1.4)
in6_cksum.c (r1.1.1.1 -> r1.2)
in6_gif.c (r1.1.1.2 -> r1.2)
in6_ifattach.c (r1.1.1.2 -> r1.2)
in6_ifattach.h (r1.1.1.2 -> r1.2)
in6_pcb.c (r1.1.1.2 -> r1.2)
in6_pcb.h (r1.1.1.1 -> r1.2)
in6_proto.c (r1.2 -> r1.3)
in6_rmx.c (r1.1.1.1 -> r1.2)
in6_src.c (r1.1.1.2 -> r1.2)
in6_var.h (r1.1.1.2 -> r1.2)
ip6_forward.c (r1.1.1.1 -> r1.2)
ip6_id.c (r1.1.1.1 -> r1.2)
ip6_input.c (r1.1.1.2 -> r1.2)
ip6_mroute.c (r1.2 -> r1.3)
ip6_mroute.h (r1.1.1.1 -> r1.2)
ip6_output.c (r1.1.1.2 -> r1.2)
ip6_var.h (r1.1.1.2 -> r1.2)
mld6.c (r1.2 -> r1.3)
mld6_var.h (r1.1.1.2 -> r1.2)
nd6.c (r1.3 -> r1.4)
nd6.h (r1.1.1.2 -> r1.2)
nd6_nbr.c (r1.1.1.2 -> r1.2)
nd6_rtr.c (r1.2 -> r1.3)
pim6_var.h (r1.1.1.1 -> r1.2)
raw_ip6.c (r1.1.1.2 -> r1.2)
route6.c (r1.2 -> r1.3)
scope6.c (r1.1.1.1 -> r1.2)
scope6_var.h (r1.1.1.1 -> r1.2)
udp6_usrreq.c (r1.1.1.2 -> r1.2)
udp6_var.h (r1.1.1.1 -> r1.2)
Added Files:
-----------
src/sys/netinet:
in_mcast.c (r1.1)
ip_ipsec.c (r1.1)
ip_ipsec.h (r1.1)
ip_options.c (r1.1)
ip_options.h (r1.1)
sctp.h (r1.1)
sctp_asconf.c (r1.1)
sctp_asconf.h (r1.1)
sctp_auth.c (r1.1)
sctp_auth.h (r1.1)
sctp_bsd_addr.c (r1.1)
sctp_bsd_addr.h (r1.1)
sctp_cc_functions.c (r1.1)
sctp_cc_functions.h (r1.1)
sctp_constants.h (r1.1)
sctp_crc32.c (r1.1)
sctp_crc32.h (r1.1)
sctp_header.h (r1.1)
sctp_indata.c (r1.1)
sctp_indata.h (r1.1)
sctp_input.c (r1.1)
sctp_input.h (r1.1)
sctp_lock_bsd.h (r1.1)
sctp_os.h (r1.1)
sctp_os_bsd.h (r1.1)
sctp_output.c (r1.1)
sctp_output.h (r1.1)
sctp_pcb.c (r1.1)
sctp_pcb.h (r1.1)
sctp_peeloff.c (r1.1)
sctp_peeloff.h (r1.1)
sctp_structs.h (r1.1)
sctp_sysctl.c (r1.1)
sctp_sysctl.h (r1.1)
sctp_timer.c (r1.1)
sctp_timer.h (r1.1)
sctp_uio.h (r1.1)
sctp_usrreq.c (r1.1)
sctp_var.h (r1.1)
sctputil.c (r1.1)
sctputil.h (r1.1)
tcp_reass.c (r1.1)
tcp_syncache.h (r1.1)
tcp_timewait.c (r1.1)
src/sys/netinet/libalias:
alias_dummy.c (r1.1)
alias_mod.c (r1.1)
alias_mod.h (r1.1)
-------------- next part --------------
Index: udp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/udp_var.h -L sys/netinet/udp_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/udp_var.h
+++ sys/netinet/udp_var.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,18 +28,18 @@
* SUCH DAMAGE.
*
* @(#)udp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/udp_var.h,v 1.29 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/udp_var.h,v 1.33 2007/07/10 09:30:46 rwatson Exp $
*/
#ifndef _NETINET_UDP_VAR_H_
-#define _NETINET_UDP_VAR_H_
+#define _NETINET_UDP_VAR_H_
/*
* UDP kernel structures and variables.
*/
-struct udpiphdr {
- struct ipovly ui_i; /* overlaid ip structure */
- struct udphdr ui_u; /* udp header */
+struct udpiphdr {
+ struct ipovly ui_i; /* overlaid ip structure */
+ struct udphdr ui_u; /* udp header */
};
#define ui_x1 ui_i.ih_x1
#define ui_pr ui_i.ih_pr
@@ -50,7 +51,7 @@
#define ui_ulen ui_u.uh_ulen
#define ui_sum ui_u.uh_sum
-struct udpstat {
+struct udpstat {
/* input statistics: */
u_long udps_ipackets; /* total input packets */
u_long udps_hdrops; /* packet shorter than header */
@@ -67,45 +68,45 @@
u_long udps_fastout; /* output packets on fast path */
/* of no socket on port, arrived as multicast */
u_long udps_noportmcast;
+ u_long udps_filtermcast; /* blocked by multicast filter */
};
/*
- * Names for UDP sysctl objects
+ * Names for UDP sysctl objects.
*/
#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */
-#define UDPCTL_STATS 2 /* statistics (read-only) */
+#define UDPCTL_STATS 2 /* statistics (read-only) */
#define UDPCTL_MAXDGRAM 3 /* max datagram size */
#define UDPCTL_RECVSPACE 4 /* default receive buffer space */
#define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */
-#define UDPCTL_MAXID 6
+#define UDPCTL_MAXID 6
-#define UDPCTL_NAMES { \
- { 0, 0 }, \
- { "checksum", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
- { "maxdgram", CTLTYPE_INT }, \
- { "recvspace", CTLTYPE_INT }, \
- { "pcblist", CTLTYPE_STRUCT }, \
+#define UDPCTL_NAMES { \
+ { 0, 0 }, \
+ { "checksum", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+ { "maxdgram", CTLTYPE_INT }, \
+ { "recvspace", CTLTYPE_INT }, \
+ { "pcblist", CTLTYPE_STRUCT }, \
}
#ifdef _KERNEL
SYSCTL_DECL(_net_inet_udp);
-extern struct pr_usrreqs udp_usrreqs;
-extern struct inpcbhead udb;
-extern struct inpcbinfo udbinfo;
-extern u_long udp_sendspace;
-extern u_long udp_recvspace;
-extern struct udpstat udpstat;
-extern int log_in_vain;
-
-void udp_ctlinput(int, struct sockaddr *, void *);
-void udp_init(void);
-void udp_input(struct mbuf *, int);
-
-struct inpcb *
- udp_notify(struct inpcb *inp, int errno);
-int udp_shutdown(struct socket *so);
+extern struct pr_usrreqs udp_usrreqs;
+extern struct inpcbhead udb;
+extern struct inpcbinfo udbinfo;
+extern u_long udp_sendspace;
+extern u_long udp_recvspace;
+extern struct udpstat udpstat;
+extern int udp_blackhole;
+extern int udp_log_in_vain;
+
+void udp_ctlinput(int, struct sockaddr *, void *);
+void udp_init(void);
+void udp_input(struct mbuf *, int);
+struct inpcb *udp_notify(struct inpcb *inp, int errno);
+int udp_shutdown(struct socket *so);
#endif
#endif
--- /dev/null
+++ sys/netinet/sctp_var.h
@@ -0,0 +1,336 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_var.h,v 1.24 2005/03/06 16:04:19 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_var.h,v 1.20.2.1 2007/10/25 12:27:06 rrs Exp $");
+
+#ifndef _NETINET_SCTP_VAR_H_
+#define _NETINET_SCTP_VAR_H_
+
+#include <netinet/sctp_uio.h>
+
+#if defined(_KERNEL)
+
+extern struct pr_usrreqs sctp_usrreqs;
+
+
+#define sctp_feature_on(inp, feature) (inp->sctp_features |= feature)
+#define sctp_feature_off(inp, feature) (inp->sctp_features &= ~feature)
+#define sctp_is_feature_on(inp, feature) (inp->sctp_features & feature)
+#define sctp_is_feature_off(inp, feature) ((inp->sctp_features & feature) == 0)
+
+/* managing mobility_feature in inpcb (by micchie) */
+#define sctp_mobility_feature_on(inp, feature) (inp->sctp_mobility_features |= feature)
+#define sctp_mobility_feature_off(inp, feature) (inp->sctp_mobility_features &= ~feature)
+#define sctp_is_mobility_feature_on(inp, feature) (inp->sctp_mobility_features & feature)
+#define sctp_is_mobility_feature_off(inp, feature) ((inp->sctp_mobility_features & feature) == 0)
+
+#define sctp_maxspace(sb) (max((sb)->sb_hiwat,SCTP_MINIMAL_RWND))
+
+#define sctp_sbspace(asoc, sb) ((long) ((sctp_maxspace(sb) > (asoc)->sb_cc) ? (sctp_maxspace(sb) - (asoc)->sb_cc) : 0))
+
+#define sctp_sbspace_failedmsgs(sb) ((long) ((sctp_maxspace(sb) > (sb)->sb_cc) ? (sctp_maxspace(sb) - (sb)->sb_cc) : 0))
+
+#define sctp_sbspace_sub(a,b) ((a > b) ? (a - b) : 0)
+
+/*
+ * I tried to cache the readq entries at one point. But the reality
+ * is that it did not add any performance since this meant we had to
+ * lock the STCB on read. And at that point once you have to do an
+ * extra lock, it really does not matter if the lock is in the ZONE
+ * stuff or in our code. Note that this same problem would occur with
+ * an mbuf cache as well so it is not really worth doing, at least
+ * right now :-D
+ */
+
+#define sctp_free_a_readq(_stcb, _readq) { \
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, (_readq)); \
+ SCTP_DECR_READQ_COUNT(); \
+}
+
+#define sctp_alloc_a_readq(_stcb, _readq) { \
+ (_readq) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_readq, struct sctp_queued_to_read); \
+ if ((_readq)) { \
+ SCTP_INCR_READQ_COUNT(); \
+ } \
+}
+
+#define sctp_free_a_strmoq(_stcb, _strmoq) { \
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_strmoq, (_strmoq)); \
+ SCTP_DECR_STRMOQ_COUNT(); \
+}
+
+#define sctp_alloc_a_strmoq(_stcb, _strmoq) { \
+ (_strmoq) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_strmoq, struct sctp_stream_queue_pending); \
+ if ((_strmoq)) { \
+ SCTP_INCR_STRMOQ_COUNT(); \
+ } \
+}
+
+
+#define sctp_free_a_chunk(_stcb, _chk) { \
+ if(_stcb) { \
+ SCTP_TCB_LOCK_ASSERT((_stcb)); \
+ if ((_chk)->whoTo) { \
+ sctp_free_remote_addr((_chk)->whoTo); \
+ (_chk)->whoTo = NULL; \
+ } \
+ if (((_stcb)->asoc.free_chunk_cnt > sctp_asoc_free_resc_limit) || \
+ (sctppcbinfo.ipi_free_chunks > sctp_system_free_resc_limit)) { \
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, (_chk)); \
+ SCTP_DECR_CHK_COUNT(); \
+ } else { \
+ TAILQ_INSERT_TAIL(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+ (_stcb)->asoc.free_chunk_cnt++; \
+ atomic_add_int(&sctppcbinfo.ipi_free_chunks, 1); \
+ } \
+ } else { \
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, (_chk)); \
+ SCTP_DECR_CHK_COUNT(); \
+ } \
+}
+
+#define sctp_alloc_a_chunk(_stcb, _chk) { \
+ if (TAILQ_EMPTY(&(_stcb)->asoc.free_chunks)) { \
+ (_chk) = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_chunk, struct sctp_tmit_chunk); \
+ if ((_chk)) { \
+ SCTP_INCR_CHK_COUNT(); \
+ (_chk)->whoTo = NULL; \
+ } \
+ } else { \
+ (_chk) = TAILQ_FIRST(&(_stcb)->asoc.free_chunks); \
+ TAILQ_REMOVE(&(_stcb)->asoc.free_chunks, (_chk), sctp_next); \
+ atomic_subtract_int(&sctppcbinfo.ipi_free_chunks, 1); \
+ SCTP_STAT_INCR(sctps_cached_chk); \
+ (_stcb)->asoc.free_chunk_cnt--; \
+ } \
+}
+
+
+
+#define sctp_free_remote_addr(__net) { \
+ if ((__net)) { \
+ if (atomic_fetchadd_int(&(__net)->ref_count, -1) == 1) { \
+ (void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
+ (void)SCTP_OS_TIMER_STOP(&(__net)->pmtu_timer.timer); \
+ (void)SCTP_OS_TIMER_STOP(&(__net)->fr_timer.timer); \
+ if ((__net)->ro.ro_rt) { \
+ RTFREE((__net)->ro.ro_rt); \
+ (__net)->ro.ro_rt = NULL; \
+ } \
+ if ((__net)->src_addr_selected) { \
+ sctp_free_ifa((__net)->ro._s_addr); \
+ (__net)->ro._s_addr = NULL; \
+ } \
+ (__net)->src_addr_selected = 0; \
+ (__net)->dest_state = SCTP_ADDR_NOT_REACHABLE; \
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_net, (__net)); \
+ SCTP_DECR_RADDR_COUNT(); \
+ } \
+ } \
+}
+
+#define sctp_sbfree(ctl, stcb, sb, m) { \
+ uint32_t val; \
+ val = atomic_fetchadd_int(&(sb)->sb_cc,-(SCTP_BUF_LEN((m)))); \
+ if (val < SCTP_BUF_LEN((m))) { \
+ panic("sb_cc goes negative"); \
+ } \
+ val = atomic_fetchadd_int(&(sb)->sb_mbcnt,-(MSIZE)); \
+ if (val < MSIZE) { \
+ panic("sb_mbcnt goes negative"); \
+ } \
+ if (((ctl)->do_not_ref_stcb == 0) && stcb) {\
+ val = atomic_fetchadd_int(&(stcb)->asoc.sb_cc,-(SCTP_BUF_LEN((m)))); \
+ if (val < SCTP_BUF_LEN((m))) {\
+ panic("stcb->sb_cc goes negative"); \
+ } \
+ val = atomic_fetchadd_int(&(stcb)->asoc.my_rwnd_control_len,-(MSIZE)); \
+ if (val < MSIZE) { \
+ panic("asoc->mbcnt goes negative"); \
+ } \
+ } \
+ if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+ SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+ atomic_subtract_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+
+#define sctp_sballoc(stcb, sb, m) { \
+ atomic_add_int(&(sb)->sb_cc,SCTP_BUF_LEN((m))); \
+ atomic_add_int(&(sb)->sb_mbcnt, MSIZE); \
+ if (stcb) { \
+ atomic_add_int(&(stcb)->asoc.sb_cc,SCTP_BUF_LEN((m))); \
+ atomic_add_int(&(stcb)->asoc.my_rwnd_control_len, MSIZE); \
+ } \
+ if (SCTP_BUF_TYPE(m) != MT_DATA && SCTP_BUF_TYPE(m) != MT_HEADER && \
+ SCTP_BUF_TYPE(m) != MT_OOBDATA) \
+ atomic_add_int(&(sb)->sb_ctl,SCTP_BUF_LEN((m))); \
+}
+
+
+#define sctp_ucount_incr(val) { \
+ val++; \
+}
+
+#define sctp_ucount_decr(val) { \
+ if (val > 0) { \
+ val--; \
+ } else { \
+ val = 0; \
+ } \
+}
+
+#define sctp_mbuf_crush(data) do { \
+ struct mbuf *_m; \
+ _m = (data); \
+ while(_m && (SCTP_BUF_LEN(_m) == 0)) { \
+ (data) = SCTP_BUF_NEXT(_m); \
+ SCTP_BUF_NEXT(_m) = NULL; \
+ sctp_m_free(_m); \
+ _m = (data); \
+ } \
+} while (0)
+
+#define sctp_flight_size_decrease(tp1) do { \
+ if (tp1->whoTo->flight_size >= tp1->book_size) \
+ tp1->whoTo->flight_size -= tp1->book_size; \
+ else \
+ tp1->whoTo->flight_size = 0; \
+} while (0)
+
+#define sctp_flight_size_increase(tp1) do { \
+ (tp1)->whoTo->flight_size += (tp1)->book_size; \
+} while (0)
+
+#ifdef SCTP_FS_SPEC_LOG
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+ if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ stcb->asoc.fs_index = 0;\
+ stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].incr = 0; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].decr = 1; \
+ stcb->asoc.fs_index++; \
+ tp1->window_probe = 0; \
+ if (stcb->asoc.total_flight >= tp1->book_size) { \
+ stcb->asoc.total_flight -= tp1->book_size; \
+ if (stcb->asoc.total_flight_count > 0) \
+ stcb->asoc.total_flight_count--; \
+ } else { \
+ stcb->asoc.total_flight = 0; \
+ stcb->asoc.total_flight_count = 0; \
+ } \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+ if(stcb->asoc.fs_index > SCTP_FS_SPEC_LOG_SIZE) \
+ stcb->asoc.fs_index = 0;\
+ stcb->asoc.fslog[stcb->asoc.fs_index].total_flight = stcb->asoc.total_flight; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].tsn = tp1->rec.data.TSN_seq; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].book = tp1->book_size; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].sent = tp1->sent; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].incr = 1; \
+ stcb->asoc.fslog[stcb->asoc.fs_index].decr = 0; \
+ stcb->asoc.fs_index++; \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#else
+
+#define sctp_total_flight_decrease(stcb, tp1) do { \
+ if (stcb->asoc.total_flight >= tp1->book_size) { \
+ stcb->asoc.total_flight -= tp1->book_size; \
+ if (stcb->asoc.total_flight_count > 0) \
+ stcb->asoc.total_flight_count--; \
+ } else { \
+ stcb->asoc.total_flight = 0; \
+ stcb->asoc.total_flight_count = 0; \
+ } \
+} while (0)
+
+#define sctp_total_flight_increase(stcb, tp1) do { \
+ (stcb)->asoc.total_flight_count++; \
+ (stcb)->asoc.total_flight += (tp1)->book_size; \
+} while (0)
+
+#endif
+
+
+struct sctp_nets;
+struct sctp_inpcb;
+struct sctp_tcb;
+struct sctphdr;
+
+
+void sctp_close(struct socket *so);
+int sctp_disconnect(struct socket *so);
+
+void sctp_ctlinput __P((int, struct sockaddr *, void *));
+int sctp_ctloutput __P((struct socket *, struct sockopt *));
+void sctp_input __P((struct mbuf *, int));
+void sctp_drain __P((void));
+void sctp_init __P((void));
+
+
+void sctp_pcbinfo_cleanup(void);
+
+int sctp_shutdown __P((struct socket *));
+void sctp_notify
+__P((struct sctp_inpcb *, struct ip *ip, struct sctphdr *,
+ struct sockaddr *, struct sctp_tcb *,
+ struct sctp_nets *));
+
+ int sctp_bindx(struct socket *, int, struct sockaddr_storage *,
+ int, int, struct proc *);
+
+/* can't use sctp_assoc_t here */
+ int sctp_peeloff(struct socket *, struct socket *, int, caddr_t, int *);
+
+ int sctp_ingetaddr(struct socket *,
+ struct sockaddr **
+);
+
+ int sctp_peeraddr(struct socket *,
+ struct sockaddr **
+);
+
+ int sctp_listen(struct socket *, int, struct thread *);
+
+ int sctp_accept(struct socket *, struct sockaddr **);
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_SCTP_VAR_H_ */
Index: in_cksum.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_cksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_cksum.c -L sys/netinet/in_cksum.c -u -r1.1.1.1 -r1.2
--- sys/netinet/in_cksum.c
+++ sys/netinet/in_cksum.c
@@ -27,9 +27,11 @@
* SUCH DAMAGE.
*
* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/in_cksum.c,v 1.8 2005/01/07 01:45:44 imp Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_cksum.c,v 1.10 2007/10/07 20:44:22 silby Exp $");
+
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -44,9 +46,7 @@
#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
int
-in_cksum(m, len)
- register struct mbuf *m;
- register int len;
+in_cksum(struct mbuf *m, int len)
{
register u_short *w;
register int sum = 0;
--- /dev/null
+++ sys/netinet/sctputil.h
@@ -0,0 +1,390 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctputil.h,v 1.15 2005/03/06 16:04:19 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctputil.h,v 1.29.2.1 2007/11/06 02:48:04 rrs Exp $");
+#ifndef __sctputil_h__
+#define __sctputil_h__
+
+
+#if defined(_KERNEL)
+
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void sctp_print_out_track_log(struct sctp_tcb *stcb);
+
+#endif
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *sctp_m_free(struct mbuf *m);
+void sctp_m_freem(struct mbuf *m);
+
+#else
+#define sctp_m_free m_free
+#define sctp_m_freem m_freem
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF) || defined(__APPLE__)
+void
+ sctp_log_trace(uint32_t fr, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f);
+
+#endif
+
+#define sctp_get_associd(stcb) ((sctp_assoc_t)stcb->asoc.assoc_id)
+
+
+/*
+ * Function prototypes
+ */
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr);
+
+struct sctp_ifa *
+ sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr, int hold_lock);
+
+struct sctp_ifa *
+ sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock);
+
+uint32_t sctp_select_initial_TSN(struct sctp_pcb *);
+
+uint32_t sctp_select_a_tag(struct sctp_inpcb *, int);
+
+int sctp_init_asoc(struct sctp_inpcb *, struct sctp_tcb *, int, uint32_t, uint32_t);
+
+void sctp_fill_random_store(struct sctp_pcb *);
+
+void
+sctp_timer_start(int, struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+
+void
+sctp_timer_stop(int, struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, uint32_t);
+
+int
+ sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id);
+
+uint32_t sctp_calculate_sum(struct mbuf *, int32_t *, uint32_t);
+
+void
+ sctp_mtu_size_reset(struct sctp_inpcb *, struct sctp_association *, uint32_t);
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_queued_to_read *control,
+ struct sockbuf *sb,
+ int end,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_queued_to_read *control,
+ struct mbuf *m,
+ int end,
+ int new_cumack,
+ struct sockbuf *sb);
+
+
+void sctp_iterator_worker(void);
+
+int find_next_best_mtu(int);
+
+void
+ sctp_timeout_handler(void *);
+
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *,
+ struct sctp_nets *, struct timeval *, int);
+
+uint32_t sctp_calculate_len(struct mbuf *);
+
+caddr_t sctp_m_getptr(struct mbuf *, int, int, uint8_t *);
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *, int,
+ struct sctp_paramhdr *, int);
+
+int sctp_add_pad_tombuf(struct mbuf *, int);
+
+int sctp_pad_lastmbuf(struct mbuf *, int, struct mbuf *);
+
+void
+sctp_ulp_notify(uint32_t, struct sctp_tcb *, uint32_t, void *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+ struct sctp_inpcb *new_inp,
+ struct sctp_tcb *stcb, int waitflags);
+
+
+void sctp_stop_timers_for_shutdown(struct sctp_tcb *);
+
+void
+sctp_report_all_outbound(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+int sctp_expand_mapping_array(struct sctp_association *, uint32_t);
+
+void
+sctp_abort_notification(struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+/* We abort responding to an IP packet for some reason */
+void
+sctp_abort_association(struct sctp_inpcb *, struct sctp_tcb *,
+ struct mbuf *, int, struct sctphdr *, struct mbuf *, uint32_t);
+
+
+/* We choose to abort via user input */
+void
+sctp_abort_an_association(struct sctp_inpcb *, struct sctp_tcb *, int,
+ struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+void
+sctp_handle_ootb(struct mbuf *, int, int, struct sctphdr *,
+ struct sctp_inpcb *, struct mbuf *, uint32_t);
+
+int
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+ int totaddr, int *error);
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+ int *totaddr, int *num_v4, int *num_v6, int *error, int limit, int *bad_addr);
+
+int sctp_is_there_an_abort_here(struct mbuf *, int, uint32_t *);
+uint32_t sctp_is_same_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+struct sockaddr_in6 *
+ sctp_recover_scope(struct sockaddr_in6 *, struct sockaddr_in6 *);
+
+#define sctp_recover_scope_mac(addr, store) do { \
+ if ((addr->sin6_family == AF_INET6) && \
+ (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr))) { \
+ *store = *addr; \
+ if (addr->sin6_scope_id == 0) { \
+ if (!sa6_recoverscope(store)) { \
+ addr = store; \
+ } \
+ } else { \
+ in6_clearscope(&addr->sin6_addr); \
+ addr = store; \
+ } \
+ } \
+} while (0)
+
+
+int sctp_cmpaddr(struct sockaddr *, struct sockaddr *);
+
+void sctp_print_address(struct sockaddr *);
+void sctp_print_address_pkt(struct ip *, struct sctphdr *);
+
+void
+sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb,
+ uint32_t error, int no_lock, uint32_t strseq);
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *, struct sctp_tmit_chunk *,
+ int, struct sctpchunk_listhead *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+struct mbuf *sctp_generate_invmanparam(int);
+
+void
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+ struct sockaddr *sa, sctp_assoc_t assoc_id,
+ uint32_t vrf_id, int *error, void *p);
+void
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+ struct sockaddr *sa, sctp_assoc_t assoc_id,
+ uint32_t vrf_id, int *error);
+
+int sctp_local_addr_count(struct sctp_tcb *stcb);
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *, struct sctp_association *,
+ struct sctp_tmit_chunk *, int);
+
+#else
+#define sctp_free_bufspace(stcb, asoc, tp1, chk_cnt) \
+do { \
+ if (tp1->data != NULL) { \
+ atomic_subtract_int(&((asoc)->chunks_on_out_queue), chk_cnt); \
+ if ((asoc)->total_output_queue_size >= tp1->book_size) { \
+ atomic_subtract_int(&((asoc)->total_output_queue_size), tp1->book_size); \
+ } else { \
+ (asoc)->total_output_queue_size = 0; \
+ } \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) { \
+ atomic_subtract_int(&((stcb)->sctp_socket->so_snd.sb_cc), tp1->book_size); \
+ } else { \
+ stcb->sctp_socket->so_snd.sb_cc = 0; \
+ } \
+ } \
+ } \
+} while (0)
+
+#endif
+
+#define sctp_free_spbufspace(stcb, asoc, sp) \
+do { \
+ if (sp->data != NULL) { \
+ atomic_subtract_int(&(asoc)->chunks_on_out_queue, 1); \
+ if ((asoc)->total_output_queue_size >= sp->length) { \
+ atomic_subtract_int(&(asoc)->total_output_queue_size, sp->length); \
+ } else { \
+ (asoc)->total_output_queue_size = 0; \
+ } \
+ if (stcb->sctp_socket && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ if (stcb->sctp_socket->so_snd.sb_cc >= sp->length) { \
+ atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc,sp->length); \
+ } else { \
+ stcb->sctp_socket->so_snd.sb_cc = 0; \
+ } \
+ } \
+ } \
+} while (0)
+
+#define sctp_snd_sb_alloc(stcb, sz) \
+do { \
+ atomic_add_int(&stcb->asoc.total_output_queue_size,sz); \
+ if ((stcb->sctp_socket != NULL) && \
+ ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || \
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { \
+ atomic_add_int(&stcb->sctp_socket->so_snd.sb_cc,sz); \
+ } \
+} while (0)
+
+
+int
+sctp_soreceive(struct socket *so, struct sockaddr **psa,
+ struct uio *uio,
+ struct mbuf **mp0,
+ struct mbuf **controlp,
+ int *flagsp);
+
+
+/* For those not passing mbufs, this does the
+ * translations for you. Caller owns memory
+ * of size controllen returned in controlp.
+ */
+int
+sctp_l_soreceive(struct socket *so,
+ struct sockaddr **name,
+ struct uio *uio,
+ char **controlp,
+ int *controllen,
+ int *flag);
+
+
+void
+ sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb,
+ uint32_t cumtsn,
+ uint32_t wake_cnt, int from);
+
+void sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t, uint16_t, uint16_t, int);
+
+void sctp_log_nagle_event(struct sctp_tcb *stcb, int action);
+
+
+void
+ sctp_log_mb(struct mbuf *m, int from);
+
+void
+sctp_sblog(struct sockbuf *sb,
+ struct sctp_tcb *stcb, int from, int incr);
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control,
+ struct sctp_queued_to_read *poschk,
+ int from);
+void sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *, int, uint8_t);
+void rto_logging(struct sctp_nets *net, int from);
+
+void sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc);
+
+void sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from);
+void sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *, int, int, uint8_t);
+void sctp_log_block(uint8_t, struct socket *, struct sctp_association *, int);
+void sctp_log_rwnd(uint8_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_mbcnt(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+void sctp_log_rwnd_set(uint8_t, uint32_t, uint32_t, uint32_t, uint32_t);
+int sctp_fill_stat_log(void *, size_t *);
+void sctp_log_fr(uint32_t, uint32_t, uint32_t, int);
+void sctp_log_sack(uint32_t, uint32_t, uint32_t, uint16_t, uint16_t, int);
+void sctp_log_map(uint32_t, uint32_t, uint32_t, int);
+
+void sctp_clr_stat_log(void);
+
+
+#ifdef SCTP_AUDITING_ENABLED
+void
+sctp_auditing(int, struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+void sctp_audit_log(uint8_t, uint8_t);
+
+#endif
+
+
+#endif /* _KERNEL */
+#endif
Index: ip_carp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_carp.h -L sys/netinet/ip_carp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_carp.h
+++ sys/netinet/ip_carp.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet/ip_carp.h,v 1.1 2005/02/22 13:04:03 glebius Exp $ */
+/* $FreeBSD: src/sys/netinet/ip_carp.h,v 1.3 2006/12/01 18:37:41 imp Exp $ */
/* $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */
/*
@@ -76,6 +76,10 @@
unsigned char carp_md[20]; /* SHA1 HMAC */
} __packed;
+#ifdef CTASSERT
+CTASSERT(sizeof(struct carp_header) == 36);
+#endif
+
#define CARP_DFLTTL 255
/* carp_version */
@@ -148,7 +152,6 @@
}
#ifdef _KERNEL
-void carp_ifdetach (struct ifnet *);
void carp_carpdev_state(void *);
void carp_input (struct mbuf *, int);
int carp6_input (struct mbuf **, int *, int);
--- /dev/null
+++ sys/netinet/sctp_timer.h
@@ -0,0 +1,104 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_timer.h,v 1.7 2007/09/13 10:36:42 rrs Exp $");
+
+#ifndef __sctp_timer_h__
+#define __sctp_timer_h__
+
+#if defined(_KERNEL)
+
+#define SCTP_RTT_SHIFT 3
+#define SCTP_RTT_VAR_SHIFT 2
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *,
+ struct sctp_nets *, int mode);
+
+int
+sctp_threshold_management(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, uint16_t);
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+int
+sctp_t1init_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+int
+sctp_shutdown_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+int
+sctp_heartbeat_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, int);
+
+int sctp_is_hb_timer_running(struct sctp_tcb *stcb);
+int sctp_is_sack_timer_running(struct sctp_tcb *stcb);
+
+int
+sctp_cookie_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+int
+sctp_asconf_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+
+void
+sctp_delete_prim_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *);
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *net);
+
+void sctp_audit_retranmission_queue(struct sctp_association *);
+
+void sctp_iterator_timer(struct sctp_iterator *it);
+
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_input.c
@@ -0,0 +1,5393 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.c,v 1.27 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_input.c,v 1.65.2.1.2.1 2008/02/02 12:44:13 rwatson Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_timer.h>
+
+
+
+static void
+sctp_stop_all_cookie_timers(struct sctp_tcb *stcb)
+{
+ struct sctp_nets *net;
+
+ /*
+ * This now not only stops all cookie timers it also stops any INIT
+ * timers as well. This will make sure that the timers are stopped
+ * in all collision cases.
+ */
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (net->rxt_timer.type == SCTP_TIMER_TYPE_COOKIE) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE,
+ stcb->sctp_ep,
+ stcb,
+ net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_1);
+ } else if (net->rxt_timer.type == SCTP_TIMER_TYPE_INIT) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT,
+ stcb->sctp_ep,
+ stcb,
+ net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_2);
+ }
+ }
+}
+
+/* INIT handler */
+static void
+sctp_handle_init(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+ struct sctp_init_chunk *cp, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+ struct sctp_init *init;
+ struct mbuf *op_err;
+ uint32_t init_limit;
+
+ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
+ stcb);
+ if (stcb == NULL) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ goto outnow;
+ }
+ }
+ op_err = NULL;
+ init = &cp->init;
+ /* First are we accepting? */
+ if ((inp->sctp_socket->so_qlimit == 0) && (stcb == NULL)) {
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_init: Abort, so_qlimit:%d\n",
+ inp->sctp_socket->so_qlimit);
+ /*
+ * FIX ME ?? What about TCP model and we have a
+ * match/restart case?
+ */
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
+ /* Invalid length */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ /* validate parameters */
+ if (init->initiate_tag == 0) {
+ /* protocol error... send abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
+ /* invalid parameter... send abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ if (init->num_inbound_streams == 0) {
+ /* protocol error... send abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ if (init->num_outbound_streams == 0) {
+ /* protocol error... send abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err,
+ vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ init_limit = offset + ntohs(cp->ch.chunk_length);
+ if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
+ init_limit)) {
+ /* auth parameter(s) error... send abort */
+ sctp_abort_association(inp, stcb, m, iphlen, sh, NULL, vrf_id);
+ if (stcb)
+ *abort_no_unlock = 1;
+ goto outnow;
+ }
+ /* send an INIT-ACK w/cookie */
+ SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
+ sctp_send_initiate_ack(inp, stcb, m, iphlen, offset, sh, cp, vrf_id,
+ ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
+outnow:
+ if (stcb == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ }
+}
+
+/*
+ * process peer "INIT/INIT-ACK" chunk returns value < 0 on error
+ */
+
+int
+sctp_is_there_unsent_data(struct sctp_tcb *stcb)
+{
+ int unsent_data = 0;
+ struct sctp_stream_queue_pending *sp;
+ struct sctp_stream_out *strq;
+ struct sctp_association *asoc;
+
+ /*
+ * This function returns the number of streams that have true unsent
+ * data on them. Note that as it looks through it will clean up any
+ * places that have old data that has been sent but left at top of
+ * stream queue.
+ */
+ asoc = &stcb->asoc;
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (!TAILQ_EMPTY(&asoc->out_wheel)) {
+ /* Check to see if some data queued */
+ TAILQ_FOREACH(strq, &asoc->out_wheel, next_spoke) {
+ is_there_another:
+ /* sa_ignore FREED_MEMORY */
+ sp = TAILQ_FIRST(&strq->outqueue);
+ if (sp == NULL) {
+ continue;
+ }
+ if ((sp->msg_is_complete) &&
+ (sp->length == 0) &&
+ (sp->sender_all_done)) {
+ /*
+ * We are doing differed cleanup. Last time
+ * through when we took all the data the
+ * sender_all_done was not set.
+ */
+ if (sp->put_last_out == 0) {
+ SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+ SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
+ sp->sender_all_done,
+ sp->length,
+ sp->msg_is_complete,
+ sp->put_last_out);
+ }
+ atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
+ TAILQ_REMOVE(&strq->outqueue, sp, next);
+ sctp_free_remote_addr(sp->net);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ }
+ sctp_free_a_strmoq(stcb, sp);
+ goto is_there_another;
+ } else {
+ unsent_data++;
+ continue;
+ }
+ }
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ return (unsent_data);
+}
+
+static int
+sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_init *init;
+ struct sctp_association *asoc;
+ struct sctp_nets *lnet;
+ unsigned int i;
+
+ init = &cp->init;
+ asoc = &stcb->asoc;
+ /* save off parameters */
+ asoc->peer_vtag = ntohl(init->initiate_tag);
+ asoc->peers_rwnd = ntohl(init->a_rwnd);
+ if (TAILQ_FIRST(&asoc->nets)) {
+ /* update any ssthresh's that may have a default */
+ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+ lnet->ssthresh = asoc->peers_rwnd;
+
+ if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+ sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
+ }
+ }
+ }
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) {
+ unsigned int newcnt;
+ struct sctp_stream_out *outs;
+ struct sctp_stream_queue_pending *sp;
+
+ /* cut back on number of streams */
+ newcnt = ntohs(init->num_inbound_streams);
+ /* This if is probably not needed but I am cautious */
+ if (asoc->strmout) {
+ /* First make sure no data chunks are trapped */
+ for (i = newcnt; i < asoc->pre_open_streams; i++) {
+ outs = &asoc->strmout[i];
+ sp = TAILQ_FIRST(&outs->outqueue);
+ while (sp) {
+ TAILQ_REMOVE(&outs->outqueue, sp,
+ next);
+ asoc->stream_queue_cnt--;
+ sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
+ stcb, SCTP_NOTIFY_DATAGRAM_UNSENT,
+ sp, SCTP_SO_NOT_LOCKED);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ }
+ sctp_free_remote_addr(sp->net);
+ sp->net = NULL;
+ /* Free the chunk */
+ SCTP_PRINTF("sp:%p tcb:%p weird free case\n",
+ sp, stcb);
+
+ sctp_free_a_strmoq(stcb, sp);
+ /* sa_ignore FREED_MEMORY */
+ sp = TAILQ_FIRST(&outs->outqueue);
+ }
+ }
+ }
+ /* cut back the count and abandon the upper streams */
+ asoc->pre_open_streams = newcnt;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ asoc->streamoutcnt = asoc->pre_open_streams;
+ /* init tsn's */
+ asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 5, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ /* This is the next one we expect */
+ asoc->str_reset_seq_in = asoc->asconf_seq_in + 1;
+
+ asoc->mapping_array_base_tsn = ntohl(init->initial_tsn);
+ asoc->cumulative_tsn = asoc->asconf_seq_in;
+ asoc->last_echo_tsn = asoc->asconf_seq_in;
+ asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+ /* open the requested streams */
+
+ if (asoc->strmin != NULL) {
+ /* Free the old ones */
+ struct sctp_queued_to_read *ctl;
+
+ for (i = 0; i < asoc->streamincnt; i++) {
+ ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+ while (ctl) {
+ TAILQ_REMOVE(&asoc->strmin[i].inqueue, ctl, next);
+ sctp_free_remote_addr(ctl->whoFrom);
+ ctl->whoFrom = NULL;
+ sctp_m_freem(ctl->data);
+ ctl->data = NULL;
+ sctp_free_a_readq(stcb, ctl);
+ ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+ }
+ }
+ SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+ }
+ asoc->streamincnt = ntohs(init->num_outbound_streams);
+ if (asoc->streamincnt > MAX_SCTP_STREAMS) {
+ asoc->streamincnt = MAX_SCTP_STREAMS;
+ }
+ SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
+ sizeof(struct sctp_stream_in), SCTP_M_STRMI);
+ if (asoc->strmin == NULL) {
+ /* we didn't get memory for the streams! */
+ SCTPDBG(SCTP_DEBUG_INPUT2, "process_init: couldn't get memory for the streams!\n");
+ return (-1);
+ }
+ for (i = 0; i < asoc->streamincnt; i++) {
+ asoc->strmin[i].stream_no = i;
+ asoc->strmin[i].last_sequence_delivered = 0xffff;
+ /*
+ * U-stream ranges will be set when the cookie is unpacked.
+ * Or for the INIT sender they are un set (if pr-sctp not
+ * supported) when the INIT-ACK arrives.
+ */
+ TAILQ_INIT(&asoc->strmin[i].inqueue);
+ asoc->strmin[i].delivery_started = 0;
+ }
+ /*
+ * load_address_from_init will put the addresses into the
+ * association when the COOKIE is processed or the INIT-ACK is
+ * processed. Both types of COOKIE's existing and new call this
+ * routine. It will remove addresses that are no longer in the
+ * association (for the restarting case where addresses are
+ * removed). Up front when the INIT arrives we will discard it if it
+ * is a restart and new addresses have been added.
+ */
+ /* sa_ignore MEMLEAK */
+ return (0);
+}
+
+/*
+ * INIT-ACK message processing/consumption returns value < 0 on error
+ */
+static int
+sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+ struct sctp_association *asoc;
+ struct mbuf *op_err;
+ int retval, abort_flag;
+ uint32_t initack_limit;
+
+ /* First verify that we have no illegal param's */
+ abort_flag = 0;
+ op_err = NULL;
+
+ op_err = sctp_arethere_unrecognized_parameters(m,
+ (offset + sizeof(struct sctp_init_chunk)),
+ &abort_flag, (struct sctp_chunkhdr *)cp);
+ if (abort_flag) {
+ /* Send an abort and notify peer */
+ sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_CAUSE_PROTOCOL_VIOLATION, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ asoc = &stcb->asoc;
+ /* process the peer's parameters in the INIT-ACK */
+ retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb, net);
+ if (retval < 0) {
+ return (retval);
+ }
+ initack_limit = offset + ntohs(cp->ch.chunk_length);
+ /* load all addresses */
+ if ((retval = sctp_load_addresses_from_init(stcb, m, iphlen,
+ (offset + sizeof(struct sctp_init_chunk)), initack_limit, sh,
+ NULL))) {
+ /* Huh, we should abort */
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "Load addresses from INIT causes an abort %d\n",
+ retval);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ NULL, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ /* if the peer doesn't support asconf, flush the asconf queue */
+ if (asoc->peer_supports_asconf == 0) {
+ struct sctp_asconf_addr *aparam;
+
+ while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+ /* sa_ignore FREED_MEMORY */
+ aparam = TAILQ_FIRST(&asoc->asconf_queue);
+ TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+ SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+ }
+ }
+ stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+ stcb->asoc.local_hmacs);
+ if (op_err) {
+ sctp_queue_op_err(stcb, op_err);
+ /* queuing will steal away the mbuf chain to the out queue */
+ op_err = NULL;
+ }
+ /* extract the cookie and queue it to "echo" it back... */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ net->error_count = 0;
+
+ /*
+ * Cancel the INIT timer, We do this first before queueing the
+ * cookie. We always cancel at the primary to assue that we are
+ * canceling the timer started by the INIT which always goes to the
+ * primary.
+ */
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
+ asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
+
+ /* calculate the RTO */
+ net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy);
+
+ retval = sctp_send_cookie_echo(m, offset, stcb, net);
+ if (retval < 0) {
+ /*
+ * No cookie, we probably should send a op error. But in any
+ * case if there is no cookie in the INIT-ACK, we can
+ * abandon the peer, its broke.
+ */
+ if (retval == -3) {
+ /* We abort with an error of missing mandatory param */
+ op_err =
+ sctp_generate_invmanparam(SCTP_CAUSE_MISSING_PARAM);
+ if (op_err) {
+ /*
+ * Expand beyond to include the mandatory
+ * param cookie
+ */
+ struct sctp_inv_mandatory_param *mp;
+
+ SCTP_BUF_LEN(op_err) =
+ sizeof(struct sctp_inv_mandatory_param);
+ mp = mtod(op_err,
+ struct sctp_inv_mandatory_param *);
+ /* Subtract the reserved param */
+ mp->length =
+ htons(sizeof(struct sctp_inv_mandatory_param) - 2);
+ mp->num_param = htonl(1);
+ mp->param = htons(SCTP_STATE_COOKIE);
+ mp->resv = 0;
+ }
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
+ sh, op_err, 0);
+ *abort_no_unlock = 1;
+ }
+ return (retval);
+ }
+ return (0);
+}
+
+static void
+sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ struct sockaddr_storage store;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ struct sctp_nets *r_net;
+ struct timeval tv;
+ int req_prim = 0;
+
+ if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
+ /* Invalid length */
+ return;
+ }
+ sin = (struct sockaddr_in *)&store;
+ sin6 = (struct sockaddr_in6 *)&store;
+
+ memset(&store, 0, sizeof(store));
+ if (cp->heartbeat.hb_info.addr_family == AF_INET &&
+ cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
+ sin->sin_family = cp->heartbeat.hb_info.addr_family;
+ sin->sin_len = cp->heartbeat.hb_info.addr_len;
+ sin->sin_port = stcb->rport;
+ memcpy(&sin->sin_addr, cp->heartbeat.hb_info.address,
+ sizeof(sin->sin_addr));
+ } else if (cp->heartbeat.hb_info.addr_family == AF_INET6 &&
+ cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
+ sin6->sin6_family = cp->heartbeat.hb_info.addr_family;
+ sin6->sin6_len = cp->heartbeat.hb_info.addr_len;
+ sin6->sin6_port = stcb->rport;
+ memcpy(&sin6->sin6_addr, cp->heartbeat.hb_info.address,
+ sizeof(sin6->sin6_addr));
+ } else {
+ return;
+ }
+ r_net = sctp_findnet(stcb, (struct sockaddr *)sin);
+ if (r_net == NULL) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
+ return;
+ }
+ if ((r_net && (r_net->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+ (r_net->heartbeat_random1 == cp->heartbeat.hb_info.random_value1) &&
+ (r_net->heartbeat_random2 == cp->heartbeat.hb_info.random_value2)) {
+ /*
+ * If the its a HB and it's random value is correct when can
+ * confirm the destination.
+ */
+ r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+ if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) {
+ stcb->asoc.primary_destination = r_net;
+ r_net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+ r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+ r_net = TAILQ_FIRST(&stcb->asoc.nets);
+ if (r_net != stcb->asoc.primary_destination) {
+ /*
+ * first one on the list is NOT the primary
+ * sctp_cmpaddr() is much more efficent if
+ * the primary is the first on the list,
+ * make it so.
+ */
+ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+ }
+ req_prim = 1;
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+ stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
+ }
+ r_net->error_count = 0;
+ r_net->hb_responded = 1;
+ tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
+ tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
+ if (r_net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+ r_net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+ r_net->dest_state |= SCTP_ADDR_REACHABLE;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+ SCTP_HEARTBEAT_SUCCESS, (void *)r_net, SCTP_SO_NOT_LOCKED);
+ /* now was it the primary? if so restore */
+ if (r_net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+ (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, r_net);
+ }
+ }
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the destination is in PF state,
+ * set the destination to active state and set the cwnd to one or
+ * two MTU's based on whether PF1 or PF2 is being used. If a T3
+ * timer is running, for the destination, stop the timer because a
+ * PF-heartbeat was received.
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) ==
+ SCTP_ADDR_PF) {
+ if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
+ }
+ net->dest_state &= ~SCTP_ADDR_PF;
+ net->cwnd = net->mtu * sctp_cmt_pf;
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+ net, net->cwnd);
+ }
+ /* Now lets do a RTO with this */
+ r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy);
+ /* Mobility adaptation */
+ if (req_prim) {
+ if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE) ||
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) &&
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_PRIM_DELETED)) {
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ sctp_assoc_immediate_retrans(stcb,
+ stcb->asoc.primary_destination);
+ }
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE)) {
+ sctp_move_chunks_from_deleted_prim(stcb,
+ stcb->asoc.primary_destination);
+ }
+ sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+ stcb->asoc.deleted_primary);
+ }
+ }
+}
+
+static void
+sctp_handle_abort(struct sctp_abort_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n");
+ if (stcb == NULL)
+ return;
+
+ /* stop any receive timers */
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+ /* notify user of the abort and clean up... */
+ sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ /* free the tcb */
+#if defined(SCTP_PANIC_ON_ABORT)
+ printf("stcb:%p state:%d rport:%d net:%p\n",
+ stcb, stcb->asoc.state, stcb->rport, net);
+ if (!(stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+ panic("Received an ABORT");
+ } else {
+ printf("No panic its in state %x closed\n", stcb->asoc.state);
+ }
+#endif
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
+}
+
+static void
+sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag)
+{
+ struct sctp_association *asoc;
+ int some_on_streamwheel;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_shutdown: handling SHUTDOWN\n");
+ if (stcb == NULL)
+ return;
+ asoc = &stcb->asoc;
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ return;
+ }
+ if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
+ /* Shutdown NOT the expected size */
+ return;
+ } else {
+ sctp_update_acked(stcb, cp, net, abort_flag);
+ }
+ if (asoc->control_pdapi) {
+ /*
+ * With a normal shutdown we assume the end of last record.
+ */
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ asoc->control_pdapi->end_added = 1;
+ asoc->control_pdapi->pdapi_aborted = 1;
+ asoc->control_pdapi = NULL;
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* assoc was freed while we were unlocked */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ /* goto SHUTDOWN_RECEIVED state to block new requests */
+ if (stcb->sctp_socket) {
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ /*
+ * notify upper layer that peer has initiated a
+ * shutdown
+ */
+ sctp_ulp_notify(SCTP_NOTIFY_PEER_SHUTDOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+ /* reset time */
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+ }
+ }
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ /*
+ * stop the shutdown timer, since we WILL move to
+ * SHUTDOWN-ACK-SENT.
+ */
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
+ }
+ /* Now is there unsent data on a stream somewhere? */
+ some_on_streamwheel = sctp_is_there_unsent_data(stcb);
+
+ if (!TAILQ_EMPTY(&asoc->send_queue) ||
+ !TAILQ_EMPTY(&asoc->sent_queue) ||
+ some_on_streamwheel) {
+ /* By returning we will push more data out */
+ return;
+ } else {
+ /* no outstanding data to send, so move on... */
+ /* send SHUTDOWN-ACK */
+ sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+ /* move to SHUTDOWN-ACK-SENT state */
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
+ /* start SHUTDOWN timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep,
+ stcb, net);
+ }
+}
+
+static void
+sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+#endif
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_shutdown_ack: handling SHUTDOWN ACK\n");
+ if (stcb == NULL)
+ return;
+
+ asoc = &stcb->asoc;
+ /* process according to association state */
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /* unexpected SHUTDOWN-ACK... so ignore... */
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ if (asoc->control_pdapi) {
+ /*
+ * With a normal shutdown we assume the end of last record.
+ */
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ asoc->control_pdapi->end_added = 1;
+ asoc->control_pdapi->pdapi_aborted = 1;
+ asoc->control_pdapi = NULL;
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* assoc was freed while we were unlocked */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ /* are the queues empty? */
+ if (!TAILQ_EMPTY(&asoc->send_queue) ||
+ !TAILQ_EMPTY(&asoc->sent_queue) ||
+ !TAILQ_EMPTY(&asoc->out_wheel)) {
+ sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+ }
+ /* stop the timer */
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
+ /* send SHUTDOWN-COMPLETE */
+ sctp_send_shutdown_complete(stcb, net);
+ /* notify upper layer protocol */
+ if (stcb->sctp_socket) {
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* Set the connected flag to disconnected */
+ stcb->sctp_ep->sctp_socket->so_snd.sb_cc = 0;
+ }
+ }
+ SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+ /* free the TCB but first save off the ep */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+}
+
+/*
+ * Skip past the param header and then we will find the chunk that caused the
+ * problem. There are two possiblities ASCONF or FWD-TSN other than that and
+ * our peer must be broken.
+ */
+static void
+sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
+ struct sctp_nets *net)
+{
+ struct sctp_chunkhdr *chk;
+
+ chk = (struct sctp_chunkhdr *)((caddr_t)phdr + sizeof(*phdr));
+ switch (chk->chunk_type) {
+ case SCTP_ASCONF_ACK:
+ case SCTP_ASCONF:
+ sctp_asconf_cleanup(stcb, net);
+ break;
+ case SCTP_FORWARD_CUM_TSN:
+ stcb->asoc.peer_supports_prsctp = 0;
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "Peer does not support chunk type %d(%x)??\n",
+ chk->chunk_type, (uint32_t) chk->chunk_type);
+ break;
+ }
+}
+
+/*
+ * Skip past the param header and then we will find the param that caused the
+ * problem. There are a number of param's in a ASCONF OR the prsctp param
+ * these will turn of specific features.
+ */
+static void
+sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
+{
+ struct sctp_paramhdr *pbad;
+
+ pbad = phdr + 1;
+ switch (ntohs(pbad->param_type)) {
+ /* pr-sctp draft */
+ case SCTP_PRSCTP_SUPPORTED:
+ stcb->asoc.peer_supports_prsctp = 0;
+ break;
+ case SCTP_SUPPORTED_CHUNK_EXT:
+ break;
+ /* draft-ietf-tsvwg-addip-sctp */
+ case SCTP_ECN_NONCE_SUPPORTED:
+ stcb->asoc.peer_supports_ecn_nonce = 0;
+ stcb->asoc.ecn_nonce_allowed = 0;
+ stcb->asoc.ecn_allowed = 0;
+ break;
+ case SCTP_ADD_IP_ADDRESS:
+ case SCTP_DEL_IP_ADDRESS:
+ case SCTP_SET_PRIM_ADDR:
+ stcb->asoc.peer_supports_asconf = 0;
+ break;
+ case SCTP_SUCCESS_REPORT:
+ case SCTP_ERROR_CAUSE_IND:
+ SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "Turning off ASCONF to this strange peer\n");
+ stcb->asoc.peer_supports_asconf = 0;
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "Peer does not support param type %d(%x)??\n",
+ pbad->param_type, (uint32_t) pbad->param_type);
+ break;
+ }
+}
+
+static int
+sctp_handle_error(struct sctp_chunkhdr *ch,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ int chklen;
+ struct sctp_paramhdr *phdr;
+ uint16_t error_type;
+ uint16_t error_len;
+ struct sctp_association *asoc;
+ int adjust;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ /* parse through all of the errors and process */
+ asoc = &stcb->asoc;
+ phdr = (struct sctp_paramhdr *)((caddr_t)ch +
+ sizeof(struct sctp_chunkhdr));
+ chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr);
+ while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) {
+ /* Process an Error Cause */
+ error_type = ntohs(phdr->param_type);
+ error_len = ntohs(phdr->param_length);
+ if ((error_len > chklen) || (error_len == 0)) {
+ /* invalid param length for this param */
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Bogus length in error param- chunk left:%d errorlen:%d\n",
+ chklen, error_len);
+ return (0);
+ }
+ switch (error_type) {
+ case SCTP_CAUSE_INVALID_STREAM:
+ case SCTP_CAUSE_MISSING_PARAM:
+ case SCTP_CAUSE_INVALID_PARAM:
+ case SCTP_CAUSE_NO_USER_DATA:
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Software error we got a %d back? We have a bug :/ (or do they?)\n",
+ error_type);
+ break;
+ case SCTP_CAUSE_STALE_COOKIE:
+ /*
+ * We only act if we have echoed a cookie and are
+ * waiting.
+ */
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ int *p;
+
+ p = (int *)((caddr_t)phdr + sizeof(*phdr));
+ /* Save the time doubled */
+ asoc->cookie_preserve_req = ntohl(*p) << 1;
+ asoc->stale_cookie_count++;
+ if (asoc->stale_cookie_count >
+ asoc->max_init_times) {
+ sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ /* now free the asoc */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ return (-1);
+ }
+ /* blast back to INIT state */
+ asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
+ asoc->state |= SCTP_STATE_COOKIE_WAIT;
+
+ sctp_stop_all_cookie_timers(stcb);
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ }
+ break;
+ case SCTP_CAUSE_UNRESOLVABLE_ADDR:
+ /*
+ * Nothing we can do here, we don't do hostname
+ * addresses so if the peer does not like my IPv6
+ * (or IPv4 for that matter) it does not matter. If
+ * they don't support that type of address, they can
+ * NOT possibly get that packet type... i.e. with no
+ * IPv6 you can't recieve a IPv6 packet. so we can
+ * safely ignore this one. If we ever added support
+ * for HOSTNAME Addresses, then we would need to do
+ * something here.
+ */
+ break;
+ case SCTP_CAUSE_UNRECOG_CHUNK:
+ sctp_process_unrecog_chunk(stcb, phdr, net);
+ break;
+ case SCTP_CAUSE_UNRECOG_PARAM:
+ sctp_process_unrecog_param(stcb, phdr);
+ break;
+ case SCTP_CAUSE_COOKIE_IN_SHUTDOWN:
+ /*
+ * We ignore this since the timer will drive out a
+ * new cookie anyway and there timer will drive us
+ * to send a SHUTDOWN_COMPLETE. We can't send one
+ * here since we don't have their tag.
+ */
+ break;
+ case SCTP_CAUSE_DELETING_LAST_ADDR:
+ case SCTP_CAUSE_RESOURCE_SHORTAGE:
+ case SCTP_CAUSE_DELETING_SRC_ADDR:
+ /*
+ * We should NOT get these here, but in a
+ * ASCONF-ACK.
+ */
+ SCTPDBG(SCTP_DEBUG_INPUT2, "Peer sends ASCONF errors in a Operational Error?<%d>?\n",
+ error_type);
+ break;
+ case SCTP_CAUSE_OUT_OF_RESC:
+ /*
+ * And what, pray tell do we do with the fact that
+ * the peer is out of resources? Not really sure we
+ * could do anything but abort. I suspect this
+ * should have came WITH an abort instead of in a
+ * OP-ERROR.
+ */
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_handle_error: unknown error type = 0x%xh\n",
+ error_type);
+ break;
+ }
+ adjust = SCTP_SIZE32(error_len);
+ chklen -= adjust;
+ phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust);
+ }
+ return (0);
+}
+
+static int
+sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock, uint32_t vrf_id)
+{
+ struct sctp_init_ack *init_ack;
+ struct mbuf *op_err;
+
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_init_ack: handling INIT-ACK\n");
+
+ if (stcb == NULL) {
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_init_ack: TCB is null\n");
+ return (-1);
+ }
+ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
+ /* Invalid length */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ op_err, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ init_ack = &cp->init;
+ /* validate parameters */
+ if (init_ack->initiate_tag == 0) {
+ /* protocol error... send an abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ op_err, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
+ /* protocol error... send an abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ op_err, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ if (init_ack->num_inbound_streams == 0) {
+ /* protocol error... send an abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ op_err, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ if (init_ack->num_outbound_streams == 0) {
+ /* protocol error... send an abort */
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_INVALID_PARAM);
+ sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen, sh,
+ op_err, 0);
+ *abort_no_unlock = 1;
+ return (-1);
+ }
+ /* process according to association state... */
+ switch (stcb->asoc.state & SCTP_STATE_MASK) {
+ case SCTP_STATE_COOKIE_WAIT:
+ /* this is the expected state for this chunk */
+ /* process the INIT-ACK parameters */
+ if (stcb->asoc.primary_destination->dest_state &
+ SCTP_ADDR_UNCONFIRMED) {
+ /*
+ * The primary is where we sent the INIT, we can
+ * always consider it confirmed when the INIT-ACK is
+ * returned. Do this before we load addresses
+ * though.
+ */
+ stcb->asoc.primary_destination->dest_state &=
+ ~SCTP_ADDR_UNCONFIRMED;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+ stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
+ }
+ if (sctp_process_init_ack(m, iphlen, offset, sh, cp, stcb,
+ net, abort_no_unlock, vrf_id) < 0) {
+ /* error in parsing parameters */
+ return (-1);
+ }
+ /* update our state */
+ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
+ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
+
+ /* reset the RTO calc */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+ /*
+ * collapse the init timer back in case of a exponential
+ * backoff
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep,
+ stcb, net);
+ /*
+ * the send at the end of the inbound data processing will
+ * cause the cookie to be sent
+ */
+ break;
+ case SCTP_STATE_SHUTDOWN_SENT:
+ /* incorrect state... discard */
+ break;
+ case SCTP_STATE_COOKIE_ECHOED:
+ /* incorrect state... discard */
+ break;
+ case SCTP_STATE_OPEN:
+ /* incorrect state... discard */
+ break;
+ case SCTP_STATE_EMPTY:
+ case SCTP_STATE_INUSE:
+ default:
+ /* incorrect state... discard */
+ return (-1);
+ break;
+ }
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Leaving handle-init-ack end\n");
+ return (0);
+}
+
+
+/*
+ * handle a state cookie for an existing association m: input packet mbuf
+ * chain-- assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a
+ * "split" mbuf and the cookie signature does not exist offset: offset into
+ * mbuf to the cookie-echo chunk
+ */
+static struct sctp_tcb *
+sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+ struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
+ struct sockaddr *init_src, int *notification, sctp_assoc_t * sac_assoc_id,
+ uint32_t vrf_id)
+{
+ struct sctp_association *asoc;
+ struct sctp_init_chunk *init_cp, init_buf;
+ struct sctp_init_ack_chunk *initack_cp, initack_buf;
+ int chk_length;
+ int init_offset, initack_offset, i;
+ int retval;
+ int spec_flag = 0;
+ uint32_t how_indx;
+
+ /* I know that the TCB is non-NULL from the caller */
+ asoc = &stcb->asoc;
+ for (how_indx = 0; how_indx < sizeof(asoc->cookie_how); how_indx++) {
+ if (asoc->cookie_how[how_indx] == 0)
+ break;
+ }
+ if (how_indx < sizeof(asoc->cookie_how)) {
+ asoc->cookie_how[how_indx] = 1;
+ }
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ /* SHUTDOWN came in after sending INIT-ACK */
+ struct mbuf *op_err;
+ struct sctp_paramhdr *ph;
+
+ sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err == NULL) {
+ /* FOOBAR */
+ return (NULL);
+ }
+ /* pre-reserve some space */
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ /* Set the len */
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr);
+ ph = mtod(op_err, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_COOKIE_IN_SHUTDOWN);
+ ph->param_length = htons(sizeof(struct sctp_paramhdr));
+ sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+ vrf_id);
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 2;
+ return (NULL);
+ }
+ /*
+ * find and validate the INIT chunk in the cookie (peer's info) the
+ * INIT should start after the cookie-echo header struct (chunk
+ * header, state cookie header struct)
+ */
+ init_offset = offset += sizeof(struct sctp_cookie_echo_chunk);
+
+ init_cp = (struct sctp_init_chunk *)
+ sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+ (uint8_t *) & init_buf);
+ if (init_cp == NULL) {
+ /* could not pull a INIT chunk in cookie */
+ return (NULL);
+ }
+ chk_length = ntohs(init_cp->ch.chunk_length);
+ if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+ return (NULL);
+ }
+ /*
+ * find and validate the INIT-ACK chunk in the cookie (my info) the
+ * INIT-ACK follows the INIT chunk
+ */
+ initack_offset = init_offset + SCTP_SIZE32(chk_length);
+ initack_cp = (struct sctp_init_ack_chunk *)
+ sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+ (uint8_t *) & initack_buf);
+ if (initack_cp == NULL) {
+ /* could not pull INIT-ACK chunk in cookie */
+ return (NULL);
+ }
+ chk_length = ntohs(initack_cp->ch.chunk_length);
+ if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+ return (NULL);
+ }
+ if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
+ (ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag)) {
+ /*
+ * case D in Section 5.2.4 Table 2: MMAA process accordingly
+ * to get into the OPEN state
+ */
+ if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+ /*-
+ * Opps, this means that we somehow generated two vtag's
+ * the same. I.e. we did:
+ * Us Peer
+ * <---INIT(tag=a)------
+ * ----INIT-ACK(tag=t)-->
+ * ----INIT(tag=t)------> *1
+ * <---INIT-ACK(tag=a)---
+ * <----CE(tag=t)------------- *2
+ *
+ * At point *1 we should be generating a different
+ * tag t'. Which means we would throw away the CE and send
+ * ours instead. Basically this is case C (throw away side).
+ */
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 17;
+ return (NULL);
+
+ }
+ switch SCTP_GET_STATE
+ (asoc) {
+ case SCTP_STATE_COOKIE_WAIT:
+ case SCTP_STATE_COOKIE_ECHOED:
+ /*
+ * INIT was sent but got a COOKIE_ECHO with the
+ * correct tags... just accept it...but we must
+ * process the init so that we can make sure we have
+ * the right seq no's.
+ */
+ /* First we must process the INIT !! */
+ retval = sctp_process_init(init_cp, stcb, net);
+ if (retval < 0) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 3;
+ return (NULL);
+ }
+ /* we have already processed the INIT so no problem */
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb,
+ net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
+ /* update current state */
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+ else
+ SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+ sctp_stop_all_cookie_timers(stcb);
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+ (inp->sctp_socket->so_qlimit == 0)
+ ) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ /*
+ * Here is where collision would go if we
+ * did a connect() and instead got a
+ * init/init-ack/cookie done before the
+ * init-ack came back..
+ */
+ stcb->sctp_ep->sctp_flags |=
+ SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (NULL);
+ }
+#endif
+ soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ /* notify upper layer */
+ *notification = SCTP_NOTIFY_ASSOC_UP;
+ /*
+ * since we did not send a HB make sure we don't
+ * double things
+ */
+ net->hb_responded = 1;
+ net->RTO = sctp_calculate_rto(stcb, asoc, net,
+ &cookie->time_entered, sctp_align_unsafe_makecopy);
+
+ if (stcb->asoc.sctp_autoclose_ticks &&
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) {
+ sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+ inp, stcb, NULL);
+ }
+ break;
+ default:
+ /*
+ * we're in the OPEN state (or beyond), so peer must
+ * have simply lost the COOKIE-ACK
+ */
+ break;
+ } /* end switch */
+ sctp_stop_all_cookie_timers(stcb);
+ /*
+ * We ignore the return code here.. not sure if we should
+ * somehow abort.. but we do have an existing asoc. This
+ * really should not fail.
+ */
+ if (sctp_load_addresses_from_init(stcb, m, iphlen,
+ init_offset + sizeof(struct sctp_init_chunk),
+ initack_offset, sh, init_src)) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 4;
+ return (NULL);
+ }
+ /* respond with a COOKIE-ACK */
+ sctp_toss_old_cookies(stcb, asoc);
+ sctp_send_cookie_ack(stcb);
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 5;
+ return (stcb);
+ }
+ if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+ ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
+ cookie->tie_tag_my_vtag == 0 &&
+ cookie->tie_tag_peer_vtag == 0) {
+ /*
+ * case C in Section 5.2.4 Table 2: XMOO silently discard
+ */
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 6;
+ return (NULL);
+ }
+ if (ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag &&
+ (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag ||
+ init_cp->init.initiate_tag == 0)) {
+ /*
+ * case B in Section 5.2.4 Table 2: MXAA or MOAA my info
+ * should be ok, re-accept peer info
+ */
+ if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
+ /*
+ * Extension of case C. If we hit this, then the
+ * random number generator returned the same vtag
+ * when we first sent our INIT-ACK and when we later
+ * sent our INIT. The side with the seq numbers that
+ * are different will be the one that normnally
+ * would have hit case C. This in effect "extends"
+ * our vtags in this collision case to be 64 bits.
+ * The same collision could occur aka you get both
+ * vtag and seq number the same twice in a row.. but
+ * is much less likely. If it did happen then we
+ * would proceed through and bring up the assoc.. we
+ * may end up with the wrong stream setup however..
+ * which would be bad.. but there is no way to
+ * tell.. until we send on a stream that does not
+ * exist :-)
+ */
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 7;
+
+ return (NULL);
+ }
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 8;
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
+ sctp_stop_all_cookie_timers(stcb);
+ /*
+ * since we did not send a HB make sure we don't double
+ * things
+ */
+ net->hb_responded = 1;
+ if (stcb->asoc.sctp_autoclose_ticks &&
+ sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+ NULL);
+ }
+ asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+ asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+
+ /* Note last_cwr_tsn? where is this used? */
+ asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+ if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) {
+ /*
+ * Ok the peer probably discarded our data (if we
+ * echoed a cookie+data). So anything on the
+ * sent_queue should be marked for retransmit, we
+ * may not get something to kick us so it COULD
+ * still take a timeout to move these.. but it can't
+ * hurt to mark them.
+ */
+ struct sctp_tmit_chunk *chk;
+
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_flight_size_decrease(chk);
+ sctp_total_flight_decrease(stcb, chk);
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ spec_flag++;
+ }
+ }
+
+ }
+ /* process the INIT info (peer's info) */
+ retval = sctp_process_init(init_cp, stcb, net);
+ if (retval < 0) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 9;
+ return (NULL);
+ }
+ if (sctp_load_addresses_from_init(stcb, m, iphlen,
+ init_offset + sizeof(struct sctp_init_chunk),
+ initack_offset, sh, init_src)) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 10;
+ return (NULL);
+ }
+ if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
+ (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
+ *notification = SCTP_NOTIFY_ASSOC_UP;
+
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+ (inp->sctp_socket->so_qlimit == 0)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ stcb->sctp_ep->sctp_flags |=
+ SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (NULL);
+ }
+#endif
+ soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
+ SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+ else
+ SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+ SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+ } else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
+ } else {
+ SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ sctp_stop_all_cookie_timers(stcb);
+ sctp_toss_old_cookies(stcb, asoc);
+ sctp_send_cookie_ack(stcb);
+ if (spec_flag) {
+ /*
+ * only if we have retrans set do we do this. What
+ * this call does is get only the COOKIE-ACK out and
+ * then when we return the normal call to
+ * sctp_chunk_output will get the retrans out behind
+ * this.
+ */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED);
+ }
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 11;
+
+ return (stcb);
+ }
+ if ((ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
+ ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) &&
+ cookie->tie_tag_my_vtag == asoc->my_vtag_nonce &&
+ cookie->tie_tag_peer_vtag == asoc->peer_vtag_nonce &&
+ cookie->tie_tag_peer_vtag != 0) {
+ struct sctpasochead *head;
+
+ /*
+ * case A in Section 5.2.4 Table 2: XXMM (peer restarted)
+ */
+ /* temp code */
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 12;
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+
+ *sac_assoc_id = sctp_get_associd(stcb);
+ /* notify upper layer */
+ *notification = SCTP_NOTIFY_ASSOC_RESTART;
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
+ SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+ }
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
+ } else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
+ }
+ if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+
+ } else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
+ /* move to OPEN state, if not in SHUTDOWN_SENT */
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ }
+ asoc->pre_open_streams =
+ ntohs(initack_cp->init.num_outbound_streams);
+ asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+ asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+
+ asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+ asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+
+ asoc->str_reset_seq_in = asoc->init_seq_number;
+
+ asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+ if (asoc->mapping_array) {
+ memset(asoc->mapping_array, 0,
+ asoc->mapping_array_size);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_WLOCK(stcb->sctp_ep);
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ /* send up all the data */
+ SCTP_TCB_SEND_LOCK(stcb);
+
+ sctp_report_all_outbound(stcb, 1, SCTP_SO_NOT_LOCKED);
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].stream_no = i;
+ stcb->asoc.strmout[i].next_sequence_sent = 0;
+ stcb->asoc.strmout[i].last_msg_incomplete = 0;
+ }
+ /* process the INIT-ACK info (my info) */
+ asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+ asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+
+ /* pull from vtag hash */
+ LIST_REMOVE(stcb, sctp_asocs);
+ /* re-insert to new vtag position */
+ head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
+ sctppcbinfo.hashasocmark)];
+ /*
+ * put it in the bucket in the vtag hash of assoc's for the
+ * system
+ */
+ LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+
+ /* Is this the first restart? */
+ if (stcb->asoc.in_restart_hash == 0) {
+ /* Ok add it to assoc_id vtag hash */
+ head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(stcb->asoc.assoc_id,
+ sctppcbinfo.hashrestartmark)];
+ LIST_INSERT_HEAD(head, stcb, sctp_tcbrestarhash);
+ stcb->asoc.in_restart_hash = 1;
+ }
+ /* process the INIT info (peer's info) */
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ SCTP_INP_WUNLOCK(stcb->sctp_ep);
+ SCTP_INP_INFO_WUNLOCK();
+
+ retval = sctp_process_init(init_cp, stcb, net);
+ if (retval < 0) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 13;
+
+ return (NULL);
+ }
+ /*
+ * since we did not send a HB make sure we don't double
+ * things
+ */
+ net->hb_responded = 1;
+
+ if (sctp_load_addresses_from_init(stcb, m, iphlen,
+ init_offset + sizeof(struct sctp_init_chunk),
+ initack_offset, sh, init_src)) {
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 14;
+
+ return (NULL);
+ }
+ /* respond with a COOKIE-ACK */
+ sctp_stop_all_cookie_timers(stcb);
+ sctp_toss_old_cookies(stcb, asoc);
+ sctp_send_cookie_ack(stcb);
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 15;
+
+ return (stcb);
+ }
+ if (how_indx < sizeof(asoc->cookie_how))
+ asoc->cookie_how[how_indx] = 16;
+ /* all other cases... */
+ return (NULL);
+}
+
+
+/*
+ * handle a state cookie for a new association m: input packet mbuf chain--
+ * assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a "split" mbuf
+ * and the cookie signature does not exist offset: offset into mbuf to the
+ * cookie-echo chunk length: length of the cookie chunk to: where the init
+ * was from returns a new TCB
+ */
+static struct sctp_tcb *
+sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
+ struct sctp_inpcb *inp, struct sctp_nets **netp,
+ struct sockaddr *init_src, int *notification,
+ int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+ uint32_t vrf_id)
+{
+ struct sctp_tcb *stcb;
+ struct sctp_init_chunk *init_cp, init_buf;
+ struct sctp_init_ack_chunk *initack_cp, initack_buf;
+ struct sockaddr_storage sa_store;
+ struct sockaddr *initack_src = (struct sockaddr *)&sa_store;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ struct sctp_association *asoc;
+ int chk_length;
+ int init_offset, initack_offset, initack_limit;
+ int retval;
+ int error = 0;
+ uint32_t old_tag;
+ uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(inp);
+#endif
+
+ /*
+ * find and validate the INIT chunk in the cookie (peer's info) the
+ * INIT should start after the cookie-echo header struct (chunk
+ * header, state cookie header struct)
+ */
+ init_offset = offset + sizeof(struct sctp_cookie_echo_chunk);
+ init_cp = (struct sctp_init_chunk *)
+ sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
+ (uint8_t *) & init_buf);
+ if (init_cp == NULL) {
+ /* could not pull a INIT chunk in cookie */
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "process_cookie_new: could not pull INIT chunk hdr\n");
+ return (NULL);
+ }
+ chk_length = ntohs(init_cp->ch.chunk_length);
+ if (init_cp->ch.chunk_type != SCTP_INITIATION) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n");
+ return (NULL);
+ }
+ initack_offset = init_offset + SCTP_SIZE32(chk_length);
+ /*
+ * find and validate the INIT-ACK chunk in the cookie (my info) the
+ * INIT-ACK follows the INIT chunk
+ */
+ initack_cp = (struct sctp_init_ack_chunk *)
+ sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
+ (uint8_t *) & initack_buf);
+ if (initack_cp == NULL) {
+ /* could not pull INIT-ACK chunk in cookie */
+ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n");
+ return (NULL);
+ }
+ chk_length = ntohs(initack_cp->ch.chunk_length);
+ if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
+ return (NULL);
+ }
+ /*
+ * NOTE: We can't use the INIT_ACK's chk_length to determine the
+ * "initack_limit" value. This is because the chk_length field
+ * includes the length of the cookie, but the cookie is omitted when
+ * the INIT and INIT_ACK are tacked onto the cookie...
+ */
+ initack_limit = offset + cookie_len;
+
+ /*
+ * now that we know the INIT/INIT-ACK are in place, create a new TCB
+ * and popluate
+ */
+
+ /*
+ * Here we do a trick, we set in NULL for the proc/thread argument.
+ * We do this since in effect we only use the p argument when the
+ * socket is unbound and we must do an implicit bind. Since we are
+ * getting a cookie, we cannot be unbound.
+ */
+ stcb = sctp_aloc_assoc(inp, init_src, 0, &error,
+ ntohl(initack_cp->init.initiate_tag), vrf_id,
+ (struct thread *)NULL
+ );
+ if (stcb == NULL) {
+ struct mbuf *op_err;
+
+ /* memory problem? */
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "process_cookie_new: no room for another TCB!\n");
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+
+ sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+ sh, op_err, vrf_id);
+ return (NULL);
+ }
+ /* get the correct sctp_nets */
+ if (netp)
+ *netp = sctp_findnet(stcb, init_src);
+
+ asoc = &stcb->asoc;
+ /* get scope variables out of cookie */
+ asoc->ipv4_local_scope = cookie->ipv4_scope;
+ asoc->site_scope = cookie->site_scope;
+ asoc->local_scope = cookie->local_scope;
+ asoc->loopback_scope = cookie->loopback_scope;
+
+ if ((asoc->ipv4_addr_legal != cookie->ipv4_addr_legal) ||
+ (asoc->ipv6_addr_legal != cookie->ipv6_addr_legal)) {
+ struct mbuf *op_err;
+
+ /*
+ * Houston we have a problem. The EP changed while the
+ * cookie was in flight. Only recourse is to abort the
+ * association.
+ */
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
+ sh, op_err, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
+ SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+ }
+ /* process the INIT-ACK info (my info) */
+ old_tag = asoc->my_vtag;
+ asoc->assoc_id = asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
+ asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
+ asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
+ asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
+ asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
+ asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+ asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
+ asoc->str_reset_seq_in = asoc->init_seq_number;
+
+ asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+
+ /* process the INIT info (peer's info) */
+ if (netp)
+ retval = sctp_process_init(init_cp, stcb, *netp);
+ else
+ retval = 0;
+ if (retval < 0) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+ }
+ /* load all addresses */
+ if (sctp_load_addresses_from_init(stcb, m, iphlen,
+ init_offset + sizeof(struct sctp_init_chunk), initack_offset, sh,
+ init_src)) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+ }
+ /*
+ * verify any preceding AUTH chunk that was skipped
+ */
+ /* pull the local authentication parameters from the cookie/init-ack */
+ sctp_auth_get_cookie_params(stcb, m,
+ initack_offset + sizeof(struct sctp_init_ack_chunk),
+ initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)));
+ if (auth_skipped) {
+ struct sctp_auth_chunk *auth;
+
+ auth = (struct sctp_auth_chunk *)
+ sctp_m_getptr(m, auth_offset, auth_len, auth_chunk_buf);
+ if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) {
+ /* auth HMAC failed, dump the assoc and packet */
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "COOKIE-ECHO: AUTH failed\n");
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+ } else {
+ /* remaining chunks checked... good to go */
+ stcb->asoc.authenticated = 1;
+ }
+ }
+ /* update current state */
+ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ sctp_stop_all_cookie_timers(stcb);
+ SCTP_STAT_INCR_COUNTER32(sctps_passiveestab);
+ SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+
+ /*
+ * if we're doing ASCONFs, check to see if we have any new local
+ * addresses that need to get added to the peer (eg. addresses
+ * changed while cookie echo in flight). This needs to be done
+ * after we go to the OPEN state to do the correct asconf
+ * processing. else, make sure we have the correct addresses in our
+ * lists
+ */
+
+ /* warning, we re-use sin, sin6, sa_store here! */
+ /* pull in local_address (our "from" address) */
+ if (cookie->laddr_type == SCTP_IPV4_ADDRESS) {
+ /* source addr is IPv4 */
+ sin = (struct sockaddr_in *)initack_src;
+ memset(sin, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_addr.s_addr = cookie->laddress[0];
+ } else if (cookie->laddr_type == SCTP_IPV6_ADDRESS) {
+ /* source addr is IPv6 */
+ sin6 = (struct sockaddr_in6 *)initack_src;
+ memset(sin6, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_scope_id = cookie->scope_id;
+ memcpy(&sin6->sin6_addr, cookie->laddress,
+ sizeof(sin6->sin6_addr));
+ } else {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+ }
+
+ /* set up to notify upper layer */
+ *notification = SCTP_NOTIFY_ASSOC_UP;
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+ (inp->sctp_socket->so_qlimit == 0)) {
+ /*
+ * This is an endpoint that called connect() how it got a
+ * cookie that is NEW is a bit of a mystery. It must be that
+ * the INIT was sent, but before it got there.. a complete
+ * INIT/INIT-ACK/COOKIE arrived. But of course then it
+ * should have went to the other code.. not here.. oh well..
+ * a bit of protection is worth having..
+ */
+ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (NULL);
+ }
+#endif
+ soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ } else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (inp->sctp_socket->so_qlimit)) {
+ /*
+ * We don't want to do anything with this one. Since it is
+ * the listening guy. The timer will get started for
+ * accepted connections in the caller.
+ */
+ ;
+ }
+ /* since we did not send a HB make sure we don't double things */
+ if ((netp) && (*netp))
+ (*netp)->hb_responded = 1;
+
+ if (stcb->asoc.sctp_autoclose_ticks &&
+ sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
+ }
+ /* calculate the RTT */
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+ if ((netp) && (*netp)) {
+ (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
+ &cookie->time_entered, sctp_align_unsafe_makecopy);
+ }
+ /* respond with a COOKIE-ACK */
+ sctp_send_cookie_ack(stcb);
+
+ /*
+ * check the address lists for any ASCONFs that need to be sent
+ * AFTER the cookie-ack is sent
+ */
+ sctp_check_address_list(stcb, m,
+ initack_offset + sizeof(struct sctp_init_ack_chunk),
+ initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
+ initack_src, cookie->local_scope, cookie->site_scope,
+ cookie->ipv4_scope, cookie->loopback_scope);
+
+
+ return (stcb);
+}
+
+
+/*
+ * handles a COOKIE-ECHO message stcb: modified to either a new or left as
+ * existing (non-NULL) TCB
+ */
+static struct mbuf *
+sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
+ struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
+ int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
+ struct sctp_tcb **locked_tcb, uint32_t vrf_id)
+{
+ struct sctp_state_cookie *cookie;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct sctp_tcb *l_stcb = *stcb;
+ struct sctp_inpcb *l_inp;
+ struct sockaddr *to;
+ sctp_assoc_t sac_restart_id;
+ struct sctp_pcb *ep;
+ struct mbuf *m_sig;
+ uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
+ uint8_t *sig;
+ uint8_t cookie_ok = 0;
+ unsigned int size_of_pkt, sig_offset, cookie_offset;
+ unsigned int cookie_len;
+ struct timeval now;
+ struct timeval time_expires;
+ struct sockaddr_storage dest_store;
+ struct sockaddr *localep_sa = (struct sockaddr *)&dest_store;
+ struct ip *iph;
+ int notification = 0;
+ struct sctp_nets *netl;
+ int had_a_existing_tcb = 0;
+
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_cookie: handling COOKIE-ECHO\n");
+
+ if (inp_p == NULL) {
+ return (NULL);
+ }
+ /* First get the destination address setup too. */
+ iph = mtod(m, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* its IPv4 */
+ struct sockaddr_in *lsin;
+
+ lsin = (struct sockaddr_in *)(localep_sa);
+ memset(lsin, 0, sizeof(*lsin));
+ lsin->sin_family = AF_INET;
+ lsin->sin_len = sizeof(*lsin);
+ lsin->sin_port = sh->dest_port;
+ lsin->sin_addr.s_addr = iph->ip_dst.s_addr;
+ size_of_pkt = SCTP_GET_IPV4_LENGTH(iph);
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* its IPv6 */
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 *lsin6;
+
+ lsin6 = (struct sockaddr_in6 *)(localep_sa);
+ memset(lsin6, 0, sizeof(*lsin6));
+ lsin6->sin6_family = AF_INET6;
+ lsin6->sin6_len = sizeof(struct sockaddr_in6);
+ ip6 = mtod(m, struct ip6_hdr *);
+ lsin6->sin6_port = sh->dest_port;
+ lsin6->sin6_addr = ip6->ip6_dst;
+ size_of_pkt = SCTP_GET_IPV6_LENGTH(ip6) + iphlen;
+ } else {
+ return (NULL);
+ }
+
+ cookie = &cp->cookie;
+ cookie_offset = offset + sizeof(struct sctp_chunkhdr);
+ cookie_len = ntohs(cp->ch.chunk_length);
+
+ if ((cookie->peerport != sh->src_port) &&
+ (cookie->myport != sh->dest_port) &&
+ (cookie->my_vtag != sh->v_tag)) {
+ /*
+ * invalid ports or bad tag. Note that we always leave the
+ * v_tag in the header in network order and when we stored
+ * it in the my_vtag slot we also left it in network order.
+ * This maintains the match even though it may be in the
+ * opposite byte order of the machine :->
+ */
+ return (NULL);
+ }
+ if (cookie_len > size_of_pkt ||
+ cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
+ sizeof(struct sctp_init_chunk) +
+ sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
+ /* cookie too long! or too small */
+ return (NULL);
+ }
+ /*
+ * split off the signature into its own mbuf (since it should not be
+ * calculated in the sctp_hmac_m() call).
+ */
+ sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
+ if (sig_offset > size_of_pkt) {
+ /* packet not correct size! */
+ /* XXX this may already be accounted for earlier... */
+ return (NULL);
+ }
+ m_sig = m_split(m, sig_offset, M_DONTWAIT);
+ if (m_sig == NULL) {
+ /* out of memory or ?? */
+ return (NULL);
+ }
+ /*
+ * compute the signature/digest for the cookie
+ */
+ ep = &(*inp_p)->sctp_ep;
+ l_inp = *inp_p;
+ if (l_stcb) {
+ SCTP_TCB_UNLOCK(l_stcb);
+ }
+ SCTP_INP_RLOCK(l_inp);
+ if (l_stcb) {
+ SCTP_TCB_LOCK(l_stcb);
+ }
+ /* which cookie is it? */
+ if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) &&
+ (ep->current_secret_number != ep->last_secret_number)) {
+ /* it's the old cookie */
+ (void)sctp_hmac_m(SCTP_HMAC,
+ (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+ SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+ } else {
+ /* it's the current cookie */
+ (void)sctp_hmac_m(SCTP_HMAC,
+ (uint8_t *) ep->secret_key[(int)ep->current_secret_number],
+ SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+ }
+ /* get the signature */
+ SCTP_INP_RUNLOCK(l_inp);
+ sig = (uint8_t *) sctp_m_getptr(m_sig, 0, SCTP_SIGNATURE_SIZE, (uint8_t *) & tmp_sig);
+ if (sig == NULL) {
+ /* couldn't find signature */
+ sctp_m_freem(m_sig);
+ return (NULL);
+ }
+ /* compare the received digest with the computed digest */
+ if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
+ /* try the old cookie? */
+ if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
+ (ep->current_secret_number != ep->last_secret_number)) {
+ /* compute digest with old */
+ (void)sctp_hmac_m(SCTP_HMAC,
+ (uint8_t *) ep->secret_key[(int)ep->last_secret_number],
+ SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
+ /* compare */
+ if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
+ cookie_ok = 1;
+ }
+ } else {
+ cookie_ok = 1;
+ }
+
+ /*
+ * Now before we continue we must reconstruct our mbuf so that
+ * normal processing of any other chunks will work.
+ */
+ {
+ struct mbuf *m_at;
+
+ m_at = m;
+ while (SCTP_BUF_NEXT(m_at) != NULL) {
+ m_at = SCTP_BUF_NEXT(m_at);
+ }
+ SCTP_BUF_NEXT(m_at) = m_sig;
+ }
+
+ if (cookie_ok == 0) {
+ SCTPDBG(SCTP_DEBUG_INPUT2, "handle_cookie_echo: cookie signature validation failed!\n");
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "offset = %u, cookie_offset = %u, sig_offset = %u\n",
+ (uint32_t) offset, cookie_offset, sig_offset);
+ return (NULL);
+ }
+ /*
+ * check the cookie timestamps to be sure it's not stale
+ */
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ /* Expire time is in Ticks, so we convert to seconds */
+ time_expires.tv_sec = cookie->time_entered.tv_sec + TICKS_TO_SEC(cookie->cookie_life);
+ time_expires.tv_usec = cookie->time_entered.tv_usec;
+ if (timevalcmp(&now, &time_expires, >)) {
+ /* cookie is stale! */
+ struct mbuf *op_err;
+ struct sctp_stale_cookie_msg *scm;
+ uint32_t tim;
+
+ op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_stale_cookie_msg),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err == NULL) {
+ /* FOOBAR */
+ return (NULL);
+ }
+ /* pre-reserve some space */
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+
+ /* Set the len */
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_stale_cookie_msg);
+ scm = mtod(op_err, struct sctp_stale_cookie_msg *);
+ scm->ph.param_type = htons(SCTP_CAUSE_STALE_COOKIE);
+ scm->ph.param_length = htons((sizeof(struct sctp_paramhdr) +
+ (sizeof(uint32_t))));
+ /* seconds to usec */
+ tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
+ /* add in usec */
+ if (tim == 0)
+ tim = now.tv_usec - cookie->time_entered.tv_usec;
+ scm->time_usec = htonl(tim);
+ sctp_send_operr_to(m, iphlen, op_err, cookie->peers_vtag,
+ vrf_id);
+ return (NULL);
+ }
+ /*
+ * Now we must see with the lookup address if we have an existing
+ * asoc. This will only happen if we were in the COOKIE-WAIT state
+ * and a INIT collided with us and somewhere the peer sent the
+ * cookie on another address besides the single address our assoc
+ * had for him. In this case we will have one of the tie-tags set at
+ * least AND the address field in the cookie can be used to look it
+ * up.
+ */
+ to = NULL;
+ if (cookie->addr_type == SCTP_IPV6_ADDRESS) {
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(sin6);
+ sin6.sin6_port = sh->src_port;
+ sin6.sin6_scope_id = cookie->scope_id;
+ memcpy(&sin6.sin6_addr.s6_addr, cookie->address,
+ sizeof(sin6.sin6_addr.s6_addr));
+ to = (struct sockaddr *)&sin6;
+ } else if (cookie->addr_type == SCTP_IPV4_ADDRESS) {
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ sin.sin_port = sh->src_port;
+ sin.sin_addr.s_addr = cookie->address[0];
+ to = (struct sockaddr *)&sin;
+ } else {
+ /* This should not happen */
+ return (NULL);
+ }
+ if ((*stcb == NULL) && to) {
+ /* Yep, lets check */
+ *stcb = sctp_findassociation_ep_addr(inp_p, to, netp, localep_sa, NULL);
+ if (*stcb == NULL) {
+ /*
+ * We should have only got back the same inp. If we
+ * got back a different ep we have a problem. The
+ * original findep got back l_inp and now
+ */
+ if (l_inp != *inp_p) {
+ SCTP_PRINTF("Bad problem find_ep got a diff inp then special_locate?\n");
+ }
+ } else {
+ if (*locked_tcb == NULL) {
+ /*
+ * In this case we found the assoc only
+ * after we locked the create lock. This
+ * means we are in a colliding case and we
+ * must make sure that we unlock the tcb if
+ * its one of the cases where we throw away
+ * the incoming packets.
+ */
+ *locked_tcb = *stcb;
+
+ /*
+ * We must also increment the inp ref count
+ * since the ref_count flags was set when we
+ * did not find the TCB, now we found it
+ * which reduces the refcount.. we must
+ * raise it back out to balance it all :-)
+ */
+ SCTP_INP_INCR_REF((*stcb)->sctp_ep);
+ if ((*stcb)->sctp_ep != l_inp) {
+ SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
+ (*stcb)->sctp_ep, l_inp);
+ }
+ }
+ }
+ }
+ if (to == NULL)
+ return (NULL);
+
+ cookie_len -= SCTP_SIGNATURE_SIZE;
+ if (*stcb == NULL) {
+ /* this is the "normal" case... get a new TCB */
+ *stcb = sctp_process_cookie_new(m, iphlen, offset, sh, cookie,
+ cookie_len, *inp_p, netp, to, ¬ification,
+ auth_skipped, auth_offset, auth_len, vrf_id);
+ } else {
+ /* this is abnormal... cookie-echo on existing TCB */
+ had_a_existing_tcb = 1;
+ *stcb = sctp_process_cookie_existing(m, iphlen, offset, sh,
+ cookie, cookie_len, *inp_p, *stcb, *netp, to,
+ ¬ification, &sac_restart_id, vrf_id);
+ }
+
+ if (*stcb == NULL) {
+ /* still no TCB... must be bad cookie-echo */
+ return (NULL);
+ }
+ /*
+ * Ok, we built an association so confirm the address we sent the
+ * INIT-ACK to.
+ */
+ netl = sctp_findnet(*stcb, to);
+ /*
+ * This code should in theory NOT run but
+ */
+ if (netl == NULL) {
+ /* TSNH! Huh, why do I need to add this address here? */
+ int ret;
+
+ ret = sctp_add_remote_addr(*stcb, to, SCTP_DONOT_SETSCOPE,
+ SCTP_IN_COOKIE_PROC);
+ netl = sctp_findnet(*stcb, to);
+ }
+ if (netl) {
+ if (netl->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+ (void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL,
+ netl);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
+ (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ if (*stcb) {
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, *inp_p,
+ *stcb, NULL);
+ }
+ if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+ if (!had_a_existing_tcb ||
+ (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+ /*
+ * If we have a NEW cookie or the connect never
+ * reached the connected state during collision we
+ * must do the TCP accept thing.
+ */
+ struct socket *so, *oso;
+ struct sctp_inpcb *inp;
+
+ if (notification == SCTP_NOTIFY_ASSOC_RESTART) {
+ /*
+ * For a restart we will keep the same
+ * socket, no need to do anything. I THINK!!
+ */
+ sctp_ulp_notify(notification, *stcb, 0, (void *)&sac_restart_id, SCTP_SO_NOT_LOCKED);
+ return (m);
+ }
+ oso = (*inp_p)->sctp_socket;
+ atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK((*stcb));
+ so = sonewconn(oso, 0
+ );
+ SCTP_TCB_LOCK((*stcb));
+ atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+ if (so == NULL) {
+ struct mbuf *op_err;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *pcb_so;
+
+#endif
+ /* Too many sockets */
+ SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ sctp_abort_association(*inp_p, NULL, m, iphlen,
+ sh, op_err, vrf_id);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ pcb_so = SCTP_INP_SO(*inp_p);
+ atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK((*stcb));
+ SCTP_SOCKET_LOCK(pcb_so, 1);
+ SCTP_TCB_LOCK((*stcb));
+ atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(pcb_so, 1);
+#endif
+ return (NULL);
+ }
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ SCTP_INP_INCR_REF(inp);
+ /*
+ * We add the unbound flag here so that if we get an
+ * soabort() before we get the move_pcb done, we
+ * will properly cleanup.
+ */
+ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+ SCTP_PCB_FLAGS_CONNECTED |
+ SCTP_PCB_FLAGS_IN_TCPPOOL |
+ SCTP_PCB_FLAGS_UNBOUND |
+ (SCTP_PCB_COPY_FLAGS & (*inp_p)->sctp_flags) |
+ SCTP_PCB_FLAGS_DONT_WAKE);
+ inp->sctp_features = (*inp_p)->sctp_features;
+ inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
+ inp->sctp_socket = so;
+ inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
+ inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
+ inp->sctp_context = (*inp_p)->sctp_context;
+ inp->inp_starting_point_for_iterator = NULL;
+ /*
+ * copy in the authentication parameters from the
+ * original endpoint
+ */
+ if (inp->sctp_ep.local_hmacs)
+ sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+ inp->sctp_ep.local_hmacs =
+ sctp_copy_hmaclist((*inp_p)->sctp_ep.local_hmacs);
+ if (inp->sctp_ep.local_auth_chunks)
+ sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+ inp->sctp_ep.local_auth_chunks =
+ sctp_copy_chunklist((*inp_p)->sctp_ep.local_auth_chunks);
+ (void)sctp_copy_skeylist(&(*inp_p)->sctp_ep.shared_keys,
+ &inp->sctp_ep.shared_keys);
+
+ /*
+ * Now we must move it from one hash table to
+ * another and get the tcb in the right place.
+ */
+ sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
+
+ atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK((*stcb));
+
+ sctp_pull_off_control_to_new_inp((*inp_p), inp, *stcb,
+ 0);
+ SCTP_TCB_LOCK((*stcb));
+ atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+
+
+ /*
+ * now we must check to see if we were aborted while
+ * the move was going on and the lock/unlock
+ * happened.
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /*
+ * yep it was, we leave the assoc attached
+ * to the socket since the sctp_inpcb_free()
+ * call will send an abort for us.
+ */
+ SCTP_INP_DECR_REF(inp);
+ return (NULL);
+ }
+ SCTP_INP_DECR_REF(inp);
+ /* Switch over to the new guy */
+ *inp_p = inp;
+ sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+
+ /*
+ * Pull it from the incomplete queue and wake the
+ * guy
+ */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ atomic_add_int(&(*stcb)->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK((*stcb));
+ SCTP_SOCKET_LOCK(so, 1);
+#endif
+ soisconnected(so);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_TCB_LOCK((*stcb));
+ atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ return (m);
+ }
+ }
+ if ((notification) && ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
+ sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+ }
+ return (m);
+}
+
+static void
+sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /* cp must not be used, others call this without a c-ack :-) */
+ struct sctp_association *asoc;
+
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_cookie_ack: handling COOKIE-ACK\n");
+ if (stcb == NULL)
+ return;
+
+ asoc = &stcb->asoc;
+
+ sctp_stop_all_cookie_timers(stcb);
+ /* process according to association state */
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ /* state change only needed when I am in right state */
+ SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
+ SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
+ if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+
+ }
+ /* update RTO */
+ SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
+ SCTP_STAT_INCR_GAUGE32(sctps_currestab);
+ if (asoc->overall_error_count == 0) {
+ net->RTO = sctp_calculate_rto(stcb, asoc, net,
+ &asoc->time_entered, sctp_align_safe_nocopy);
+ }
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ soisconnected(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+ stcb, net);
+ /*
+ * since we did not send a HB make sure we don't double
+ * things
+ */
+ net->hb_responded = 1;
+
+ if (stcb->asoc.sctp_autoclose_ticks &&
+ sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
+ stcb->sctp_ep, stcb, NULL);
+ }
+ /*
+ * send ASCONF if parameters are pending and ASCONFs are
+ * allowed (eg. addresses changed when init/cookie echo were
+ * in flight)
+ */
+ if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
+ (stcb->asoc.peer_supports_asconf) &&
+ (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+ stcb->sctp_ep, stcb,
+ stcb->asoc.primary_destination);
+#else
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ SCTP_ADDR_NOT_LOCKED);
+#endif
+ }
+ }
+ /* Toss the cookie if I can */
+ sctp_toss_old_cookies(stcb, asoc);
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ /* Restart the timer if we have pending data */
+ struct sctp_tmit_chunk *chk;
+
+ chk = TAILQ_FIRST(&asoc->sent_queue);
+ if (chk) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, chk->whoTo);
+ }
+ }
+}
+
+static void
+sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp,
+ struct sctp_tcb *stcb)
+{
+ struct sctp_nets *net;
+ struct sctp_tmit_chunk *lchk;
+ uint32_t tsn;
+
+ if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_ecne_chunk)) {
+ return;
+ }
+ SCTP_STAT_INCR(sctps_recvecne);
+ tsn = ntohl(cp->tsn);
+ /* ECN Nonce stuff: need a resync and disable the nonce sum check */
+ /* Also we make sure we disable the nonce_wait */
+ lchk = TAILQ_FIRST(&stcb->asoc.send_queue);
+ if (lchk == NULL) {
+ stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+ } else {
+ stcb->asoc.nonce_resync_tsn = lchk->rec.data.TSN_seq;
+ }
+ stcb->asoc.nonce_wait_for_ecne = 0;
+ stcb->asoc.nonce_sum_check = 0;
+
+ /* Find where it was sent, if possible */
+ net = NULL;
+ lchk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ while (lchk) {
+ if (lchk->rec.data.TSN_seq == tsn) {
+ net = lchk->whoTo;
+ break;
+ }
+ if (compare_with_wrap(lchk->rec.data.TSN_seq, tsn, MAX_SEQ))
+ break;
+ lchk = TAILQ_NEXT(lchk, sctp_next);
+ }
+ if (net == NULL)
+ /* default is we use the primary */
+ net = stcb->asoc.primary_destination;
+
+ if (compare_with_wrap(tsn, stcb->asoc.last_cwr_tsn, MAX_TSN)) {
+ /*
+ * JRS - Use the congestion control given in the pluggable
+ * CC module
+ */
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net);
+ /*
+ * we reduce once every RTT. So we will only lower cwnd at
+ * the next sending seq i.e. the resync_tsn.
+ */
+ stcb->asoc.last_cwr_tsn = stcb->asoc.nonce_resync_tsn;
+ }
+ /*
+ * We always send a CWR this way if our previous one was lost our
+ * peer will get an update, or if it is not time again to reduce we
+ * still get the cwr to the peer.
+ */
+ sctp_send_cwr(stcb, net, tsn);
+}
+
+static void
+sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb)
+{
+ /*
+ * Here we get a CWR from the peer. We must look in the outqueue and
+ * make sure that we have a covered ECNE in teh control chunk part.
+ * If so remove it.
+ */
+ struct sctp_tmit_chunk *chk;
+ struct sctp_ecne_chunk *ecne;
+
+ TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
+ continue;
+ }
+ /*
+ * Look for and remove if it is the right TSN. Since there
+ * is only ONE ECNE on the control queue at any one time we
+ * don't need to worry about more than one!
+ */
+ ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+ if (compare_with_wrap(ntohl(cp->tsn), ntohl(ecne->tsn),
+ MAX_TSN) || (cp->tsn == ecne->tsn)) {
+ /* this covers this ECNE, we can remove it */
+ stcb->asoc.ecn_echo_cnt_onq--;
+ TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
+ sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ stcb->asoc.ctrl_queue_cnt--;
+ sctp_free_a_chunk(stcb, chk);
+ break;
+ }
+ }
+}
+
+static void
+sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ struct sctp_association *asoc;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_shutdown_complete: handling SHUTDOWN-COMPLETE\n");
+ if (stcb == NULL)
+ return;
+
+ asoc = &stcb->asoc;
+ /* process according to association state */
+ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ /* unexpected SHUTDOWN-COMPLETE... so ignore... */
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ /* notify upper layer protocol */
+ if (stcb->sctp_socket) {
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
+ /* are the queues empty? they should be */
+ if (!TAILQ_EMPTY(&asoc->send_queue) ||
+ !TAILQ_EMPTY(&asoc->sent_queue) ||
+ !TAILQ_EMPTY(&asoc->out_wheel)) {
+ sctp_report_all_outbound(stcb, 0, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ /* stop the timer */
+ sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
+ SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
+ /* free the TCB */
+ SCTPDBG(SCTP_DEBUG_INPUT2,
+ "sctp_handle_shutdown_complete: calls free-asoc\n");
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ return;
+}
+
+static int
+process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
+ struct sctp_nets *net, uint8_t flg)
+{
+ switch (desc->chunk_type) {
+ case SCTP_DATA:
+ /* find the tsn to resend (possibly */
+ {
+ uint32_t tsn;
+ struct sctp_tmit_chunk *tp1;
+
+ tsn = ntohl(desc->tsn_ifany);
+ tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ while (tp1) {
+ if (tp1->rec.data.TSN_seq == tsn) {
+ /* found it */
+ break;
+ }
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, tsn,
+ MAX_TSN)) {
+ /* not found */
+ tp1 = NULL;
+ break;
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ }
+ if (tp1 == NULL) {
+ /*
+ * Do it the other way , aka without paying
+ * attention to queue seq order.
+ */
+ SCTP_STAT_INCR(sctps_pdrpdnfnd);
+ tp1 = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ while (tp1) {
+ if (tp1->rec.data.TSN_seq == tsn) {
+ /* found it */
+ break;
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ }
+ }
+ if (tp1 == NULL) {
+ SCTP_STAT_INCR(sctps_pdrptsnnf);
+ }
+ if ((tp1) && (tp1->sent < SCTP_DATAGRAM_ACKED)) {
+ uint8_t *ddp;
+
+ if ((stcb->asoc.peers_rwnd == 0) &&
+ ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
+ SCTP_STAT_INCR(sctps_pdrpdiwnp);
+ return (0);
+ }
+ if (stcb->asoc.peers_rwnd == 0 &&
+ (flg & SCTP_FROM_MIDDLE_BOX)) {
+ SCTP_STAT_INCR(sctps_pdrpdizrw);
+ return (0);
+ }
+ ddp = (uint8_t *) (mtod(tp1->data, caddr_t)+
+ sizeof(struct sctp_data_chunk));
+ {
+ unsigned int iii;
+
+ for (iii = 0; iii < sizeof(desc->data_bytes);
+ iii++) {
+ if (ddp[iii] != desc->data_bytes[iii]) {
+ SCTP_STAT_INCR(sctps_pdrpbadd);
+ return (-1);
+ }
+ }
+ }
+ /*
+ * We zero out the nonce so resync not
+ * needed
+ */
+ tp1->rec.data.ect_nonce = 0;
+
+ if (tp1->do_rtt) {
+ /*
+ * this guy had a RTO calculation
+ * pending on it, cancel it
+ */
+ tp1->do_rtt = 0;
+ }
+ SCTP_STAT_INCR(sctps_pdrpmark);
+ if (tp1->sent != SCTP_DATAGRAM_RESEND)
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ tp1->sent = SCTP_DATAGRAM_RESEND;
+ /*
+ * mark it as if we were doing a FR, since
+ * we will be getting gap ack reports behind
+ * the info from the router.
+ */
+ tp1->rec.data.doing_fast_retransmit = 1;
+ /*
+ * mark the tsn with what sequences can
+ * cause a new FR.
+ */
+ if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+ tp1->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+ } else {
+ tp1->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+ }
+
+ /* restart the timer */
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, tp1->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, tp1->whoTo);
+
+ /* fix counts and things */
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) stcb,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(tp1);
+ sctp_total_flight_decrease(stcb, tp1);
+ } {
+ /* audit code */
+ unsigned int audit;
+
+ audit = 0;
+ TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
+ if (tp1->sent == SCTP_DATAGRAM_RESEND)
+ audit++;
+ }
+ TAILQ_FOREACH(tp1, &stcb->asoc.control_send_queue,
+ sctp_next) {
+ if (tp1->sent == SCTP_DATAGRAM_RESEND)
+ audit++;
+ }
+ if (audit != stcb->asoc.sent_queue_retran_cnt) {
+ SCTP_PRINTF("**Local Audit finds cnt:%d asoc cnt:%d\n",
+ audit, stcb->asoc.sent_queue_retran_cnt);
+#ifndef SCTP_AUDITING_ENABLED
+ stcb->asoc.sent_queue_retran_cnt = audit;
+#endif
+ }
+ }
+ }
+ break;
+ case SCTP_ASCONF:
+ {
+ struct sctp_tmit_chunk *asconf;
+
+ TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
+ sctp_next) {
+ if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
+ break;
+ }
+ }
+ if (asconf) {
+ if (asconf->sent != SCTP_DATAGRAM_RESEND)
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ asconf->sent = SCTP_DATAGRAM_RESEND;
+ asconf->snd_count--;
+ }
+ }
+ break;
+ case SCTP_INITIATION:
+ /* resend the INIT */
+ stcb->asoc.dropped_special_cnt++;
+ if (stcb->asoc.dropped_special_cnt < SCTP_RETRY_DROPPED_THRESH) {
+ /*
+ * If we can get it in, in a few attempts we do
+ * this, otherwise we let the timer fire.
+ */
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
+ stcb, net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
+ }
+ break;
+ case SCTP_SELECTIVE_ACK:
+ /* resend the sack */
+ sctp_send_sack(stcb);
+ break;
+ case SCTP_HEARTBEAT_REQUEST:
+ /* resend a demand HB */
+ if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) {
+ /*
+ * Only retransmit if we KNOW we wont destroy the
+ * tcb
+ */
+ (void)sctp_send_hb(stcb, 1, net);
+ }
+ break;
+ case SCTP_SHUTDOWN:
+ sctp_send_shutdown(stcb, net);
+ break;
+ case SCTP_SHUTDOWN_ACK:
+ sctp_send_shutdown_ack(stcb, net);
+ break;
+ case SCTP_COOKIE_ECHO:
+ {
+ struct sctp_tmit_chunk *cookie;
+
+ cookie = NULL;
+ TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue,
+ sctp_next) {
+ if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+ break;
+ }
+ }
+ if (cookie) {
+ if (cookie->sent != SCTP_DATAGRAM_RESEND)
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ cookie->sent = SCTP_DATAGRAM_RESEND;
+ sctp_stop_all_cookie_timers(stcb);
+ }
+ }
+ break;
+ case SCTP_COOKIE_ACK:
+ sctp_send_cookie_ack(stcb);
+ break;
+ case SCTP_ASCONF_ACK:
+ /* resend last asconf ack */
+ sctp_send_asconf_ack(stcb);
+ break;
+ case SCTP_FORWARD_CUM_TSN:
+ send_forward_tsn(stcb, &stcb->asoc);
+ break;
+ /* can't do anything with these */
+ case SCTP_PACKET_DROPPED:
+ case SCTP_INITIATION_ACK: /* this should not happen */
+ case SCTP_HEARTBEAT_ACK:
+ case SCTP_ABORT_ASSOCIATION:
+ case SCTP_OPERATION_ERROR:
+ case SCTP_SHUTDOWN_COMPLETE:
+ case SCTP_ECN_ECHO:
+ case SCTP_ECN_CWR:
+ default:
+ break;
+ }
+ return (0);
+}
+
+void
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+ int i;
+ uint16_t temp;
+
+ /*
+ * We set things to 0xffff since this is the last delivered sequence
+ * and we will be sending in 0 after the reset.
+ */
+
+ if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(list[i]);
+ if (temp >= stcb->asoc.streamincnt) {
+ continue;
+ }
+ stcb->asoc.strmin[temp].last_sequence_delivered = 0xffff;
+ }
+ } else {
+ list = NULL;
+ for (i = 0; i < stcb->asoc.streamincnt; i++) {
+ stcb->asoc.strmin[i].last_sequence_delivered = 0xffff;
+ }
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_reset_out_streams(struct sctp_tcb *stcb, int number_entries, uint16_t * list)
+{
+ int i;
+
+ if (number_entries == 0) {
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ stcb->asoc.strmout[i].next_sequence_sent = 0;
+ }
+ } else if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ uint16_t temp;
+
+ temp = ntohs(list[i]);
+ if (temp >= stcb->asoc.streamoutcnt) {
+ /* no such stream */
+ continue;
+ }
+ stcb->asoc.strmout[temp].next_sequence_sent = 0;
+ }
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
+}
+
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_reset_out_req *req;
+ struct sctp_stream_reset_out_request *r;
+ struct sctp_tmit_chunk *chk;
+ int len, clen;
+
+ asoc = &stcb->asoc;
+ if (TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+ asoc->stream_reset_outstanding = 0;
+ return (NULL);
+ }
+ if (stcb->asoc.str_reset == NULL) {
+ asoc->stream_reset_outstanding = 0;
+ return (NULL);
+ }
+ chk = stcb->asoc.str_reset;
+ if (chk->data == NULL) {
+ return (NULL);
+ }
+ if (bchk) {
+ /* he wants a copy of the chk pointer */
+ *bchk = chk;
+ }
+ clen = chk->send_size;
+ req = mtod(chk->data, struct sctp_stream_reset_out_req *);
+ r = &req->sr_req;
+ if (ntohl(r->request_seq) == seq) {
+ /* found it */
+ return (r);
+ }
+ len = SCTP_SIZE32(ntohs(r->ph.param_length));
+ if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
+ /* move to the next one, there can only be a max of two */
+ r = (struct sctp_stream_reset_out_request *)((caddr_t)r + len);
+ if (ntohl(r->request_seq) == seq) {
+ return (r);
+ }
+ }
+ /* that seq is not here */
+ return (NULL);
+}
+
+static void
+sctp_clean_up_stream_reset(struct sctp_tcb *stcb)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk = stcb->asoc.str_reset;
+
+ if (stcb->asoc.str_reset == NULL) {
+ return;
+ }
+ asoc = &stcb->asoc;
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
+ TAILQ_REMOVE(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ asoc->ctrl_queue_cnt--;
+ sctp_free_a_chunk(stcb, chk);
+ /* sa_ignore NO_NULL_CHK */
+ stcb->asoc.str_reset = NULL;
+}
+
+
+static int
+sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
+ uint32_t seq, uint32_t action,
+ struct sctp_stream_reset_response *respin)
+{
+ uint16_t type;
+ int lparm_len;
+ struct sctp_association *asoc = &stcb->asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_stream_reset_out_request *srparam;
+ int number_entries;
+
+ if (asoc->stream_reset_outstanding == 0) {
+ /* duplicate */
+ return (0);
+ }
+ if (seq == stcb->asoc.str_reset_seq_out) {
+ srparam = sctp_find_stream_reset(stcb, seq, &chk);
+ if (srparam) {
+ stcb->asoc.str_reset_seq_out++;
+ type = ntohs(srparam->ph.param_type);
+ lparm_len = ntohs(srparam->ph.param_length);
+ if (type == SCTP_STR_RESET_OUT_REQUEST) {
+ number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
+ asoc->stream_reset_out_is_outstanding = 0;
+ if (asoc->stream_reset_outstanding)
+ asoc->stream_reset_outstanding--;
+ if (action == SCTP_STREAM_RESET_PERFORMED) {
+ /* do it */
+ sctp_reset_out_streams(stcb, number_entries, srparam->list_of_streams);
+ } else {
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ }
+ } else if (type == SCTP_STR_RESET_IN_REQUEST) {
+ /* Answered my request */
+ number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
+ if (asoc->stream_reset_outstanding)
+ asoc->stream_reset_outstanding--;
+ if (action != SCTP_STREAM_RESET_PERFORMED) {
+ sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb, number_entries, srparam->list_of_streams, SCTP_SO_NOT_LOCKED);
+ }
+ } else if (type == SCTP_STR_RESET_TSN_REQUEST) {
+ /**
+ * a) Adopt the new in tsn.
+ * b) reset the map
+ * c) Adopt the new out-tsn
+ */
+ struct sctp_stream_reset_response_tsn *resp;
+ struct sctp_forward_tsn_chunk fwdtsn;
+ int abort_flag = 0;
+
+ if (respin == NULL) {
+ /* huh ? */
+ return (0);
+ }
+ if (action == SCTP_STREAM_RESET_PERFORMED) {
+ resp = (struct sctp_stream_reset_response_tsn *)respin;
+ asoc->stream_reset_outstanding--;
+ fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+ fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+ fwdtsn.new_cumulative_tsn = htonl(ntohl(resp->senders_next_tsn) - 1);
+ sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+ if (abort_flag) {
+ return (1);
+ }
+ stcb->asoc.highest_tsn_inside_map = (ntohl(resp->senders_next_tsn) - 1);
+ stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+ stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
+ memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+ stcb->asoc.sending_seq = ntohl(resp->receivers_next_tsn);
+ stcb->asoc.last_acked_seq = stcb->asoc.cumulative_tsn;
+
+ sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+ sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+
+ }
+ }
+ /* get rid of the request and get the request flags */
+ if (asoc->stream_reset_outstanding == 0) {
+ sctp_clean_up_stream_reset(stcb);
+ }
+ }
+ }
+ return (0);
+}
+
+static void
+sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
+ struct sctp_tmit_chunk *chk,
+ struct sctp_stream_reset_in_request *req, int trunc)
+{
+ uint32_t seq;
+ int len, i;
+ int number_entries;
+ uint16_t temp;
+
+ /*
+ * peer wants me to send a str-reset to him for my outgoing seq's if
+ * seq_in is right.
+ */
+ struct sctp_association *asoc = &stcb->asoc;
+
+ seq = ntohl(req->request_seq);
+ if (asoc->str_reset_seq_in == seq) {
+ if (trunc) {
+ /* Can't do it, since they exceeded our buffer size */
+ asoc->last_reset_action[1] = asoc->last_reset_action[0];
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+ } else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
+ len = ntohs(req->ph.param_length);
+ number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
+ for (i = 0; i < number_entries; i++) {
+ temp = ntohs(req->list_of_streams[i]);
+ req->list_of_streams[i] = temp;
+ }
+ /* move the reset action back one */
+ asoc->last_reset_action[1] = asoc->last_reset_action[0];
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ sctp_add_stream_reset_out(chk, number_entries, req->list_of_streams,
+ asoc->str_reset_seq_out,
+ seq, (asoc->sending_seq - 1));
+ asoc->stream_reset_out_is_outstanding = 1;
+ asoc->str_reset = chk;
+ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+ stcb->asoc.stream_reset_outstanding++;
+ } else {
+ /* Can't do it, since we have sent one out */
+ asoc->last_reset_action[1] = asoc->last_reset_action[0];
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_TRY_LATER;
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+ }
+ asoc->str_reset_seq_in++;
+ } else if (asoc->str_reset_seq_in - 1 == seq) {
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+ } else if (asoc->str_reset_seq_in - 2 == seq) {
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+ } else {
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ }
+}
+
+static int
+sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb,
+ struct sctp_tmit_chunk *chk,
+ struct sctp_stream_reset_tsn_request *req)
+{
+ /* reset all in and out and update the tsn */
+ /*
+ * A) reset my str-seq's on in and out. B) Select a receive next,
+ * and set cum-ack to it. Also process this selected number as a
+ * fwd-tsn as well. C) set in the response my next sending seq.
+ */
+ struct sctp_forward_tsn_chunk fwdtsn;
+ struct sctp_association *asoc = &stcb->asoc;
+ int abort_flag = 0;
+ uint32_t seq;
+
+ seq = ntohl(req->request_seq);
+ if (asoc->str_reset_seq_in == seq) {
+ fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
+ fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+ fwdtsn.ch.chunk_flags = 0;
+ fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
+ sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
+ if (abort_flag) {
+ return (1);
+ }
+ stcb->asoc.highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
+ stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
+ stcb->asoc.mapping_array_base_tsn = stcb->asoc.highest_tsn_inside_map + 1;
+ memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+ atomic_add_int(&stcb->asoc.sending_seq, 1);
+ /* save off historical data for retrans */
+ stcb->asoc.last_sending_seq[1] = stcb->asoc.last_sending_seq[0];
+ stcb->asoc.last_sending_seq[0] = stcb->asoc.sending_seq;
+ stcb->asoc.last_base_tsnsent[1] = stcb->asoc.last_base_tsnsent[0];
+ stcb->asoc.last_base_tsnsent[0] = stcb->asoc.mapping_array_base_tsn;
+
+ sctp_add_stream_reset_result_tsn(chk,
+ ntohl(req->request_seq),
+ SCTP_STREAM_RESET_PERFORMED,
+ stcb->asoc.sending_seq,
+ stcb->asoc.mapping_array_base_tsn);
+ sctp_reset_out_streams(stcb, 0, (uint16_t *) NULL);
+ sctp_reset_in_stream(stcb, 0, (uint16_t *) NULL);
+ stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
+ stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+
+ asoc->str_reset_seq_in++;
+ } else if (asoc->str_reset_seq_in - 1 == seq) {
+ sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
+ stcb->asoc.last_sending_seq[0],
+ stcb->asoc.last_base_tsnsent[0]
+ );
+ } else if (asoc->str_reset_seq_in - 2 == seq) {
+ sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1],
+ stcb->asoc.last_sending_seq[1],
+ stcb->asoc.last_base_tsnsent[1]
+ );
+ } else {
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ }
+ return (0);
+}
+
+static void
+sctp_handle_str_reset_request_out(struct sctp_tcb *stcb,
+ struct sctp_tmit_chunk *chk,
+ struct sctp_stream_reset_out_request *req, int trunc)
+{
+ uint32_t seq, tsn;
+ int number_entries, len;
+ struct sctp_association *asoc = &stcb->asoc;
+
+ seq = ntohl(req->request_seq);
+
+ /* now if its not a duplicate we process it */
+ if (asoc->str_reset_seq_in == seq) {
+ len = ntohs(req->ph.param_length);
+ number_entries = ((len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t));
+ /*
+ * the sender is resetting, handle the list issue.. we must
+ * a) verify if we can do the reset, if so no problem b) If
+ * we can't do the reset we must copy the request. c) queue
+ * it, and setup the data in processor to trigger it off
+ * when needed and dequeue all the queued data.
+ */
+ tsn = ntohl(req->send_reset_at_tsn);
+
+ /* move the reset action back one */
+ asoc->last_reset_action[1] = asoc->last_reset_action[0];
+ if (trunc) {
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+ } else if ((tsn == asoc->cumulative_tsn) ||
+ (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN))) {
+ /* we can do it now */
+ sctp_reset_in_stream(stcb, number_entries, req->list_of_streams);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ } else {
+ /*
+ * we must queue it up and thus wait for the TSN's
+ * to arrive that are at or before tsn
+ */
+ struct sctp_stream_reset_list *liste;
+ int siz;
+
+ siz = sizeof(struct sctp_stream_reset_list) + (number_entries * sizeof(uint16_t));
+ SCTP_MALLOC(liste, struct sctp_stream_reset_list *,
+ siz, SCTP_M_STRESET);
+ if (liste == NULL) {
+ /* gak out of memory */
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_DENIED);
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_DENIED;
+ return;
+ }
+ liste->tsn = tsn;
+ liste->number_entries = number_entries;
+ memcpy(&liste->req, req,
+ (sizeof(struct sctp_stream_reset_out_request) + (number_entries * sizeof(uint16_t))));
+ TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_PERFORMED);
+ asoc->last_reset_action[0] = SCTP_STREAM_RESET_PERFORMED;
+ }
+ asoc->str_reset_seq_in++;
+ } else if ((asoc->str_reset_seq_in - 1) == seq) {
+ /*
+ * one seq back, just echo back last action since my
+ * response was lost.
+ */
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
+ } else if ((asoc->str_reset_seq_in - 2) == seq) {
+ /*
+ * two seq back, just echo back last action since my
+ * response was lost.
+ */
+ sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
+ } else {
+ sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_BAD_SEQNO);
+ }
+}
+
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+ static int
+ sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+ struct sctp_stream_reset_out_req *sr_req)
+{
+ int chk_length, param_len, ptype;
+ struct sctp_paramhdr pstore;
+ uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
+
+ uint32_t seq;
+ int num_req = 0;
+ int trunc = 0;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+ struct sctp_paramhdr *ph;
+ int ret_code = 0;
+ int num_param = 0;
+
+ /* now it may be a reset or a reset-response */
+ chk_length = ntohs(sr_req->ch.chunk_length);
+
+ /* setup for adding the response */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ return (ret_code);
+ }
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->asoc = &stcb->asoc;
+ chk->no_fr_allowed = 0;
+ chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
+ chk->book_size_scale = 0;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+strres_nochunk:
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ return (ret_code);
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = stcb->asoc.primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->send_size);
+ SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+ offset += sizeof(struct sctp_chunkhdr);
+ while ((size_t)chk_length >= sizeof(struct sctp_stream_reset_tsn_request)) {
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *) & pstore);
+ if (ph == NULL)
+ break;
+ param_len = ntohs(ph->param_length);
+ if (param_len < (int)sizeof(struct sctp_stream_reset_tsn_request)) {
+ /* bad param */
+ break;
+ }
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, (int)sizeof(cstore)),
+ (uint8_t *) & cstore);
+ ptype = ntohs(ph->param_type);
+ num_param++;
+ if (param_len > (int)sizeof(cstore)) {
+ trunc = 1;
+ } else {
+ trunc = 0;
+ }
+
+ if (num_param > SCTP_MAX_RESET_PARAMS) {
+ /* hit the max of parameters already sorry.. */
+ break;
+ }
+ if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
+ struct sctp_stream_reset_out_request *req_out;
+
+ req_out = (struct sctp_stream_reset_out_request *)ph;
+ num_req++;
+ if (stcb->asoc.stream_reset_outstanding) {
+ seq = ntohl(req_out->response_seq);
+ if (seq == stcb->asoc.str_reset_seq_out) {
+ /* implicit ack */
+ (void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_PERFORMED, NULL);
+ }
+ }
+ sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc);
+ } else if (ptype == SCTP_STR_RESET_IN_REQUEST) {
+ struct sctp_stream_reset_in_request *req_in;
+
+ num_req++;
+
+ req_in = (struct sctp_stream_reset_in_request *)ph;
+
+ sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc);
+ } else if (ptype == SCTP_STR_RESET_TSN_REQUEST) {
+ struct sctp_stream_reset_tsn_request *req_tsn;
+
+ num_req++;
+ req_tsn = (struct sctp_stream_reset_tsn_request *)ph;
+
+ if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) {
+ ret_code = 1;
+ goto strres_nochunk;
+ }
+ /* no more */
+ break;
+ } else if (ptype == SCTP_STR_RESET_RESPONSE) {
+ struct sctp_stream_reset_response *resp;
+ uint32_t result;
+
+ resp = (struct sctp_stream_reset_response *)ph;
+ seq = ntohl(resp->response_seq);
+ result = ntohl(resp->result);
+ if (sctp_handle_stream_reset_response(stcb, seq, result, resp)) {
+ ret_code = 1;
+ goto strres_nochunk;
+ }
+ } else {
+ break;
+ }
+ offset += SCTP_SIZE32(param_len);
+ chk_length -= SCTP_SIZE32(param_len);
+ }
+ if (num_req == 0) {
+ /* we have no response free the stuff */
+ goto strres_nochunk;
+ }
+ /* ok we have a chunk to link in */
+ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue,
+ chk,
+ sctp_next);
+ stcb->asoc.ctrl_queue_cnt++;
+ return (ret_code);
+}
+
+/*
+ * Handle a router or endpoints report of a packet loss, there are two ways
+ * to handle this, either we get the whole packet and must disect it
+ * ourselves (possibly with truncation and or corruption) or it is a summary
+ * from a middle box that did the disectting for us.
+ */
+static void
+sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
+ struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t limit)
+{
+ uint32_t bottle_bw, on_queue;
+ uint16_t trunc_len;
+ unsigned int chlen;
+ unsigned int at;
+ struct sctp_chunk_desc desc;
+ struct sctp_chunkhdr *ch;
+
+ chlen = ntohs(cp->ch.chunk_length);
+ chlen -= sizeof(struct sctp_pktdrop_chunk);
+ /* XXX possible chlen underflow */
+ if (chlen == 0) {
+ ch = NULL;
+ if (cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)
+ SCTP_STAT_INCR(sctps_pdrpbwrpt);
+ } else {
+ ch = (struct sctp_chunkhdr *)(cp->data + sizeof(struct sctphdr));
+ chlen -= sizeof(struct sctphdr);
+ /* XXX possible chlen underflow */
+ memset(&desc, 0, sizeof(desc));
+ }
+ trunc_len = (uint16_t) ntohs(cp->trunc_len);
+ if (trunc_len > limit) {
+ trunc_len = limit;
+ }
+ /* now the chunks themselves */
+ while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
+ desc.chunk_type = ch->chunk_type;
+ /* get amount we need to move */
+ at = ntohs(ch->chunk_length);
+ if (at < sizeof(struct sctp_chunkhdr)) {
+ /* corrupt chunk, maybe at the end? */
+ SCTP_STAT_INCR(sctps_pdrpcrupt);
+ break;
+ }
+ if (trunc_len == 0) {
+ /* we are supposed to have all of it */
+ if (at > chlen) {
+ /* corrupt skip it */
+ SCTP_STAT_INCR(sctps_pdrpcrupt);
+ break;
+ }
+ } else {
+ /* is there enough of it left ? */
+ if (desc.chunk_type == SCTP_DATA) {
+ if (chlen < (sizeof(struct sctp_data_chunk) +
+ sizeof(desc.data_bytes))) {
+ break;
+ }
+ } else {
+ if (chlen < sizeof(struct sctp_chunkhdr)) {
+ break;
+ }
+ }
+ }
+ if (desc.chunk_type == SCTP_DATA) {
+ /* can we get out the tsn? */
+ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+ SCTP_STAT_INCR(sctps_pdrpmbda);
+
+ if (chlen >= (sizeof(struct sctp_data_chunk) + sizeof(uint32_t))) {
+ /* yep */
+ struct sctp_data_chunk *dcp;
+ uint8_t *ddp;
+ unsigned int iii;
+
+ dcp = (struct sctp_data_chunk *)ch;
+ ddp = (uint8_t *) (dcp + 1);
+ for (iii = 0; iii < sizeof(desc.data_bytes); iii++) {
+ desc.data_bytes[iii] = ddp[iii];
+ }
+ desc.tsn_ifany = dcp->dp.tsn;
+ } else {
+ /* nope we are done. */
+ SCTP_STAT_INCR(sctps_pdrpnedat);
+ break;
+ }
+ } else {
+ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
+ SCTP_STAT_INCR(sctps_pdrpmbct);
+ }
+
+ if (process_chunk_drop(stcb, &desc, net, cp->ch.chunk_flags)) {
+ SCTP_STAT_INCR(sctps_pdrppdbrk);
+ break;
+ }
+ if (SCTP_SIZE32(at) > chlen) {
+ break;
+ }
+ chlen -= SCTP_SIZE32(at);
+ if (chlen < sizeof(struct sctp_chunkhdr)) {
+ /* done, none left */
+ break;
+ }
+ ch = (struct sctp_chunkhdr *)((caddr_t)ch + SCTP_SIZE32(at));
+ }
+ /* Now update any rwnd --- possibly */
+ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) == 0) {
+ /* From a peer, we get a rwnd report */
+ uint32_t a_rwnd;
+
+ SCTP_STAT_INCR(sctps_pdrpfehos);
+
+ bottle_bw = ntohl(cp->bottle_bw);
+ on_queue = ntohl(cp->current_onq);
+ if (bottle_bw && on_queue) {
+ /* a rwnd report is in here */
+ if (bottle_bw > on_queue)
+ a_rwnd = bottle_bw - on_queue;
+ else
+ a_rwnd = 0;
+
+ if (a_rwnd == 0)
+ stcb->asoc.peers_rwnd = 0;
+ else {
+ if (a_rwnd > stcb->asoc.total_flight) {
+ stcb->asoc.peers_rwnd =
+ a_rwnd - stcb->asoc.total_flight;
+ } else {
+ stcb->asoc.peers_rwnd = 0;
+ }
+ if (stcb->asoc.peers_rwnd <
+ stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ stcb->asoc.peers_rwnd = 0;
+ }
+ }
+ }
+ } else {
+ SCTP_STAT_INCR(sctps_pdrpfmbox);
+ }
+
+ /* now middle boxes in sat networks get a cwnd bump */
+ if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) &&
+ (stcb->asoc.sat_t3_loss_recovery == 0) &&
+ (stcb->asoc.sat_network)) {
+ /*
+ * This is debateable but for sat networks it makes sense
+ * Note if a T3 timer has went off, we will prohibit any
+ * changes to cwnd until we exit the t3 loss recovery.
+ */
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb,
+ net, cp, &bottle_bw, &on_queue);
+ }
+}
+
+/*
+ * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to
+ * still contain IP/SCTP header - stcb: is the tcb found for this packet -
+ * offset: offset into the mbuf chain to first chunkhdr - length: is the
+ * length of the complete packet outputs: - length: modified to remaining
+ * length after control processing - netp: modified to new sctp_nets after
+ * cookie-echo processing - return NULL to discard the packet (ie. no asoc,
+ * bad packet,...) otherwise return the tcb for this packet
+ */
+#ifdef __GNUC__
+__attribute__((noinline))
+#endif
+ static struct sctp_tcb *
+ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
+ struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
+ uint32_t vrf_id)
+{
+ struct sctp_association *asoc;
+ uint32_t vtag_in;
+ int num_chunks = 0; /* number of control chunks processed */
+ uint32_t chk_length;
+ int ret;
+ int abort_no_unlock = 0;
+
+ /*
+ * How big should this be, and should it be alloc'd? Lets try the
+ * d-mtu-ceiling for now (2k) and that should hopefully work ...
+ * until we get into jumbo grams and such..
+ */
+ uint8_t chunk_buf[SCTP_CHUNK_BUFFER_SIZE];
+ struct sctp_tcb *locked_tcb = stcb;
+ int got_auth = 0;
+ uint32_t auth_offset = 0, auth_len = 0;
+ int auth_skipped = 0;
+ int asconf_cnt = 0;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
+ iphlen, *offset, length, stcb);
+
+ /* validate chunk header length... */
+ if (ntohs(ch->chunk_length) < sizeof(*ch)) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Invalid header length %d\n",
+ ntohs(ch->chunk_length));
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ /*
+ * validate the verification tag
+ */
+ vtag_in = ntohl(sh->v_tag);
+
+ if (locked_tcb) {
+ SCTP_TCB_LOCK_ASSERT(locked_tcb);
+ }
+ if (ch->chunk_type == SCTP_INITIATION) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Its an INIT of len:%d vtag:%x\n",
+ ntohs(ch->chunk_length), vtag_in);
+ if (vtag_in != 0) {
+ /* protocol error- silently discard... */
+ SCTP_STAT_INCR(sctps_badvtag);
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ } else if (ch->chunk_type != SCTP_COOKIE_ECHO) {
+ /*
+ * If there is no stcb, skip the AUTH chunk and process
+ * later after a stcb is found (to validate the lookup was
+ * valid.
+ */
+ if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
+ (stcb == NULL) && !sctp_auth_disable) {
+ /* save this chunk for later processing */
+ auth_skipped = 1;
+ auth_offset = *offset;
+ auth_len = ntohs(ch->chunk_length);
+
+ /* (temporarily) move past this chunk */
+ *offset += SCTP_SIZE32(auth_len);
+ if (*offset >= length) {
+ /* no more data left in the mbuf chain */
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_chunkhdr), chunk_buf);
+ }
+ if (ch == NULL) {
+ /* Help */
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if (ch->chunk_type == SCTP_COOKIE_ECHO) {
+ goto process_control_chunks;
+ }
+ /*
+ * first check if it's an ASCONF with an unknown src addr we
+ * need to look inside to find the association
+ */
+ if (ch->chunk_type == SCTP_ASCONF && stcb == NULL) {
+ struct sctp_chunkhdr *asconf_ch = ch;
+ uint32_t asconf_offset = 0, asconf_len = 0;
+
+ /* inp's refcount may be reduced */
+ SCTP_INP_INCR_REF(inp);
+
+ asconf_offset = *offset;
+ do {
+ asconf_len = ntohs(asconf_ch->chunk_length);
+ if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
+ break;
+ stcb = sctp_findassociation_ep_asconf(m, iphlen,
+ *offset, sh, &inp, netp);
+ if (stcb != NULL)
+ break;
+ asconf_offset += SCTP_SIZE32(asconf_len);
+ asconf_ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, asconf_offset,
+ sizeof(struct sctp_chunkhdr), chunk_buf);
+ } while (asconf_ch != NULL && asconf_ch->chunk_type == SCTP_ASCONF);
+ if (stcb == NULL) {
+ /*
+ * reduce inp's refcount if not reduced in
+ * sctp_findassociation_ep_asconf().
+ */
+ SCTP_INP_DECR_REF(inp);
+ } else {
+ locked_tcb = stcb;
+ }
+
+ /* now go back and verify any auth chunk to be sure */
+ if (auth_skipped && (stcb != NULL)) {
+ struct sctp_auth_chunk *auth;
+
+ auth = (struct sctp_auth_chunk *)
+ sctp_m_getptr(m, auth_offset,
+ auth_len, chunk_buf);
+ got_auth = 1;
+ auth_skipped = 0;
+ if ((auth == NULL) || sctp_handle_auth(stcb, auth, m,
+ auth_offset)) {
+ /* auth HMAC failed so dump it */
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ } else {
+ /* remaining chunks are HMAC checked */
+ stcb->asoc.authenticated = 1;
+ }
+ }
+ }
+ if (stcb == NULL) {
+ /* no association, so it's out of the blue... */
+ sctp_handle_ootb(m, iphlen, *offset, sh, inp, NULL,
+ vrf_id);
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ asoc = &stcb->asoc;
+ /* ABORT and SHUTDOWN can use either v_tag... */
+ if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
+ (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
+ (ch->chunk_type == SCTP_PACKET_DROPPED)) {
+ if ((vtag_in == asoc->my_vtag) ||
+ ((ch->chunk_flags & SCTP_HAD_NO_TCB) &&
+ (vtag_in == asoc->peer_vtag))) {
+ /* this is valid */
+ } else {
+ /* drop this packet... */
+ SCTP_STAT_INCR(sctps_badvtag);
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ } else if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+ if (vtag_in != asoc->my_vtag) {
+ /*
+ * this could be a stale SHUTDOWN-ACK or the
+ * peer never got the SHUTDOWN-COMPLETE and
+ * is still hung; we have started a new asoc
+ * but it won't complete until the shutdown
+ * is completed
+ */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ sctp_handle_ootb(m, iphlen, *offset, sh, inp,
+ NULL, vrf_id);
+ return (NULL);
+ }
+ } else {
+ /* for all other chunks, vtag must match */
+ if (vtag_in != asoc->my_vtag) {
+ /* invalid vtag... */
+ SCTPDBG(SCTP_DEBUG_INPUT3,
+ "invalid vtag: %xh, expect %xh\n",
+ vtag_in, asoc->my_vtag);
+ SCTP_STAT_INCR(sctps_badvtag);
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ }
+ } /* end if !SCTP_COOKIE_ECHO */
+ /*
+ * process all control chunks...
+ */
+ if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
+ (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ /* implied cookie-ack.. we must have lost the ack */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
+ *netp);
+ }
+process_control_chunks:
+ while (IS_SCTP_CONTROL(ch)) {
+ /* validate chunk length */
+ chk_length = ntohs(ch->chunk_length);
+ SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_process_control: processing a chunk type=%u, len=%u\n",
+ ch->chunk_type, chk_length);
+ SCTP_LTRACE_CHK(inp, stcb, ch->chunk_type, chk_length);
+ if (chk_length < sizeof(*ch) ||
+ (*offset + (int)chk_length) > length) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ SCTP_STAT_INCR_COUNTER64(sctps_incontrolchunks);
+ /*
+ * INIT-ACK only gets the init ack "header" portion only
+ * because we don't have to process the peer's COOKIE. All
+ * others get a complete chunk.
+ */
+ if ((ch->chunk_type == SCTP_INITIATION_ACK) ||
+ (ch->chunk_type == SCTP_INITIATION)) {
+ /* get an init-ack chunk */
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_init_ack_chunk), chunk_buf);
+ if (ch == NULL) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ } else {
+ /* For cookies and all other chunks. */
+ if (chk_length > sizeof(chunk_buf)) {
+ /*
+ * use just the size of the chunk buffer so
+ * the front part of our chunks fit in
+ * contiguous space up to the chunk buffer
+ * size (508 bytes). For chunks that need to
+ * get more than that they must use the
+ * sctp_m_getptr() function or other means
+ * (e.g. know how to parse mbuf chains).
+ * Cookies do this already.
+ */
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ (sizeof(chunk_buf) - 4),
+ chunk_buf);
+ if (ch == NULL) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ } else {
+ /* We can fit it all */
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ chk_length, chunk_buf);
+ if (ch == NULL) {
+ SCTP_PRINTF("sctp_process_control: Can't get the all data....\n");
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ }
+ }
+ num_chunks++;
+ /* Save off the last place we got a control from */
+ if (stcb != NULL) {
+ if (((netp != NULL) && (*netp != NULL)) || (ch->chunk_type == SCTP_ASCONF)) {
+ /*
+ * allow last_control to be NULL if
+ * ASCONF... ASCONF processing will find the
+ * right net later
+ */
+ if ((netp != NULL) && (*netp != NULL))
+ stcb->asoc.last_control_chunk_from = *netp;
+ }
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xB0, ch->chunk_type);
+#endif
+
+ /* check to see if this chunk required auth, but isn't */
+ if ((stcb != NULL) && !sctp_auth_disable &&
+ sctp_auth_is_required_chunk(ch->chunk_type,
+ stcb->asoc.local_auth_chunks) &&
+ !stcb->asoc.authenticated) {
+ /* "silently" ignore */
+ SCTP_STAT_INCR(sctps_recvauthmissing);
+ goto next_chunk;
+ }
+ switch (ch->chunk_type) {
+ case SCTP_INITIATION:
+ /* must be first and only chunk */
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n");
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore? */
+ if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+ /*
+ * collision case where we are
+ * sending to them too
+ */
+ ;
+ } else {
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ }
+ if ((chk_length > SCTP_LARGEST_INIT_ACCEPTED) ||
+ (num_chunks > 1) ||
+ (sctp_strict_init && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if ((stcb != NULL) &&
+ (SCTP_GET_STATE(&stcb->asoc) ==
+ SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ sctp_send_shutdown_ack(stcb,
+ stcb->asoc.primary_destination);
+ *offset = length;
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if (netp) {
+ sctp_handle_init(m, iphlen, *offset, sh,
+ (struct sctp_init_chunk *)ch, inp,
+ stcb, *netp, &abort_no_unlock, vrf_id);
+ }
+ if (abort_no_unlock)
+ return (NULL);
+
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ break;
+ case SCTP_PAD_CHUNK:
+ break;
+ case SCTP_INITIATION_ACK:
+ /* must be first and only chunk */
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n");
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore */
+ if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+ ;
+ } else {
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ return (NULL);
+ }
+ }
+ if ((num_chunks > 1) ||
+ (sctp_strict_init && (length - *offset > (int)SCTP_SIZE32(chk_length)))) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if ((netp) && (*netp)) {
+ ret = sctp_handle_init_ack(m, iphlen, *offset, sh,
+ (struct sctp_init_ack_chunk *)ch, stcb, *netp, &abort_no_unlock, vrf_id);
+ } else {
+ ret = -1;
+ }
+ /*
+ * Special case, I must call the output routine to
+ * get the cookie echoed
+ */
+ if (abort_no_unlock)
+ return (NULL);
+
+ if ((stcb) && ret == 0)
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ break;
+ case SCTP_SELECTIVE_ACK:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n");
+ SCTP_STAT_INCR(sctps_recvsacks);
+ {
+ struct sctp_sack_chunk *sack;
+ int abort_now = 0;
+ uint32_t a_rwnd, cum_ack;
+ uint16_t num_seg;
+ int nonce_sum_flag;
+
+ if ((stcb == NULL) || (chk_length < sizeof(struct sctp_sack_chunk))) {
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on sack chunk, too small\n");
+ ignore_sack:
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
+ /*-
+ * If we have sent a shutdown-ack, we will pay no
+ * attention to a sack sent in to us since
+ * we don't care anymore.
+ */
+ goto ignore_sack;
+ }
+ sack = (struct sctp_sack_chunk *)ch;
+ nonce_sum_flag = ch->chunk_flags & SCTP_SACK_NONCE_SUM;
+ cum_ack = ntohl(sack->sack.cum_tsn_ack);
+ num_seg = ntohs(sack->sack.num_gap_ack_blks);
+ a_rwnd = (uint32_t) ntohl(sack->sack.a_rwnd);
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
+ cum_ack,
+ num_seg,
+ a_rwnd
+ );
+ stcb->asoc.seen_a_sack_this_pkt = 1;
+ if ((stcb->asoc.pr_sctp_cnt == 0) &&
+ (num_seg == 0) &&
+ ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) ||
+ (cum_ack == stcb->asoc.last_acked_seq)) &&
+ (stcb->asoc.saw_sack_with_frags == 0) &&
+ (!TAILQ_EMPTY(&stcb->asoc.sent_queue))
+ ) {
+ /*
+ * We have a SIMPLE sack having no
+ * prior segments and data on sent
+ * queue to be acked.. Use the
+ * faster path sack processing. We
+ * also allow window update sacks
+ * with no missing segments to go
+ * this way too.
+ */
+ sctp_express_handle_sack(stcb, cum_ack, a_rwnd, nonce_sum_flag,
+ &abort_now);
+ } else {
+ if (netp && *netp)
+ sctp_handle_sack(m, *offset,
+ sack, stcb, *netp, &abort_now, chk_length, a_rwnd);
+ }
+ if (abort_now) {
+ /* ABORT signal from sack processing */
+ *offset = length;
+ return (NULL);
+ }
+ }
+ break;
+ case SCTP_HEARTBEAT_REQUEST:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT\n");
+ if ((stcb) && netp && *netp) {
+ SCTP_STAT_INCR(sctps_recvheartbeat);
+ sctp_send_heartbeat_ack(stcb, m, *offset,
+ chk_length, *netp);
+
+ /* He's alive so give him credit */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ }
+ break;
+ case SCTP_HEARTBEAT_ACK:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT-ACK\n");
+ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_heartbeat_chunk))) {
+ /* Its not ours */
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ /* He's alive so give him credit */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ SCTP_STAT_INCR(sctps_recvheartbeatack);
+ if (netp && *netp)
+ sctp_handle_heartbeat_ack((struct sctp_heartbeat_chunk *)ch,
+ stcb, *netp);
+ break;
+ case SCTP_ABORT_ASSOCIATION:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
+ stcb);
+ if ((stcb) && netp && *netp)
+ sctp_handle_abort((struct sctp_abort_chunk *)ch,
+ stcb, *netp);
+ *offset = length;
+ return (NULL);
+ break;
+ case SCTP_SHUTDOWN:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
+ stcb);
+ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if (netp && *netp) {
+ int abort_flag = 0;
+
+ sctp_handle_shutdown((struct sctp_shutdown_chunk *)ch,
+ stcb, *netp, &abort_flag);
+ if (abort_flag) {
+ *offset = length;
+ return (NULL);
+ }
+ }
+ break;
+ case SCTP_SHUTDOWN_ACK:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", stcb);
+ if ((stcb) && (netp) && (*netp))
+ sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
+ *offset = length;
+ return (NULL);
+ break;
+
+ case SCTP_OPERATION_ERROR:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n");
+ if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) {
+
+ *offset = length;
+ return (NULL);
+ }
+ break;
+ case SCTP_COOKIE_ECHO:
+ SCTPDBG(SCTP_DEBUG_INPUT3,
+ "SCTP_COOKIE-ECHO, stcb %p\n", stcb);
+ if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+ ;
+ } else {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore */
+ *offset = length;
+ return (NULL);
+ }
+ }
+ /*
+ * First are we accepting? We do this again here
+ * sincen it is possible that a previous endpoint
+ * WAS listening responded to a INIT-ACK and then
+ * closed. We opened and bound.. and are now no
+ * longer listening.
+ */
+
+ if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (sctp_abort_if_one_2_one_hits_limit)) {
+ struct mbuf *oper;
+ struct sctp_paramhdr *phdr;
+
+ oper = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr);
+ phdr = mtod(oper,
+ struct sctp_paramhdr *);
+ phdr->param_type =
+ htons(SCTP_CAUSE_OUT_OF_RESC);
+ phdr->param_length =
+ htons(sizeof(struct sctp_paramhdr));
+ }
+ sctp_abort_association(inp, stcb, m,
+ iphlen, sh, oper, vrf_id);
+ }
+ *offset = length;
+ return (NULL);
+ } else {
+ struct mbuf *ret_buf;
+ struct sctp_inpcb *linp;
+
+ if (stcb) {
+ linp = NULL;
+ } else {
+ linp = inp;
+ }
+
+ if (linp) {
+ SCTP_ASOC_CREATE_LOCK(linp);
+ }
+ if (netp) {
+ ret_buf =
+ sctp_handle_cookie_echo(m, iphlen,
+ *offset, sh,
+ (struct sctp_cookie_echo_chunk *)ch,
+ &inp, &stcb, netp,
+ auth_skipped,
+ auth_offset,
+ auth_len,
+ &locked_tcb,
+ vrf_id);
+ } else {
+ ret_buf = NULL;
+ }
+ if (linp) {
+ SCTP_ASOC_CREATE_UNLOCK(linp);
+ }
+ if (ret_buf == NULL) {
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ SCTPDBG(SCTP_DEBUG_INPUT3,
+ "GAK, null buffer\n");
+ auth_skipped = 0;
+ *offset = length;
+ return (NULL);
+ }
+ /* if AUTH skipped, see if it verified... */
+ if (auth_skipped) {
+ got_auth = 1;
+ auth_skipped = 0;
+ }
+ if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+ /*
+ * Restart the timer if we have
+ * pending data
+ */
+ struct sctp_tmit_chunk *chk;
+
+ chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ if (chk) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb,
+ chk->whoTo);
+ }
+ }
+ }
+ break;
+ case SCTP_COOKIE_ACK:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", stcb);
+ if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore */
+ if ((stcb) && (stcb->asoc.total_output_queue_size)) {
+ ;
+ } else if (stcb) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ *offset = length;
+ return (NULL);
+ }
+ }
+ /* He's alive so give him credit */
+ if ((stcb) && netp && *netp) {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp);
+ }
+ break;
+ case SCTP_ECN_ECHO:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-ECHO\n");
+ /* He's alive so give him credit */
+ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_ecne_chunk))) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if (stcb) {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch,
+ stcb);
+ }
+ break;
+ case SCTP_ECN_CWR:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-CWR\n");
+ /* He's alive so give him credit */
+ if ((stcb == NULL) || (chk_length != sizeof(struct sctp_cwr_chunk))) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if (stcb) {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb);
+ }
+ break;
+ case SCTP_SHUTDOWN_COMPLETE:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", stcb);
+ /* must be first and only chunk */
+ if ((num_chunks > 1) ||
+ (length - *offset > (int)SCTP_SIZE32(chk_length))) {
+ *offset = length;
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ return (NULL);
+ }
+ if ((stcb) && netp && *netp) {
+ sctp_handle_shutdown_complete((struct sctp_shutdown_complete_chunk *)ch,
+ stcb, *netp);
+ }
+ *offset = length;
+ return (NULL);
+ break;
+ case SCTP_ASCONF:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
+ /* He's alive so give him credit */
+ if (stcb) {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_asconf(m, *offset,
+ (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
+ asconf_cnt++;
+ }
+ break;
+ case SCTP_ASCONF_ACK:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF-ACK\n");
+ if (chk_length < sizeof(struct sctp_asconf_ack_chunk)) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if ((stcb) && netp && *netp) {
+ /* He's alive so give him credit */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_asconf_ack(m, *offset,
+ (struct sctp_asconf_ack_chunk *)ch, stcb, *netp, &abort_no_unlock);
+ if (abort_no_unlock)
+ return (NULL);
+ }
+ break;
+ case SCTP_FORWARD_CUM_TSN:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n");
+ if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ /* He's alive so give him credit */
+ if (stcb) {
+ int abort_flag = 0;
+
+ stcb->asoc.overall_error_count = 0;
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ *fwd_tsn_seen = 1;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ *offset = length;
+ return (NULL);
+ }
+ sctp_handle_forward_tsn(stcb,
+ (struct sctp_forward_tsn_chunk *)ch, &abort_flag, m, *offset);
+ if (abort_flag) {
+ *offset = length;
+ return (NULL);
+ } else {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ }
+
+ }
+ break;
+ case SCTP_STREAM_RESET:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_STREAM_RESET\n");
+ if (((stcb == NULL) || (ch == NULL) || (chk_length < sizeof(struct sctp_stream_reset_tsn_req)))) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* We are not interested anymore */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ *offset = length;
+ return (NULL);
+ }
+ if (stcb->asoc.peer_supports_strreset == 0) {
+ /*
+ * hmm, peer should have announced this, but
+ * we will turn it on since he is sending us
+ * a stream reset.
+ */
+ stcb->asoc.peer_supports_strreset = 1;
+ }
+ if (sctp_handle_stream_reset(stcb, m, *offset, (struct sctp_stream_reset_out_req *)ch)) {
+ /* stop processing */
+ *offset = length;
+ return (NULL);
+ }
+ break;
+ case SCTP_PACKET_DROPPED:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_PACKET_DROPPED\n");
+ /* re-get it all please */
+ if (chk_length < sizeof(struct sctp_pktdrop_chunk)) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if (ch && (stcb) && netp && (*netp)) {
+ sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
+ stcb, *netp,
+ min(chk_length, (sizeof(chunk_buf) - 4)));
+
+ }
+ break;
+
+ case SCTP_AUTHENTICATION:
+ SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
+ if (sctp_auth_disable)
+ goto unknown_chunk;
+
+ if (stcb == NULL) {
+ /* save the first AUTH for later processing */
+ if (auth_skipped == 0) {
+ auth_offset = *offset;
+ auth_len = chk_length;
+ auth_skipped = 1;
+ }
+ /* skip this chunk (temporarily) */
+ goto next_chunk;
+ }
+ if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
+ (chk_length > (sizeof(struct sctp_auth_chunk) +
+ SCTP_AUTH_DIGEST_LEN_MAX))) {
+ /* Its not ours */
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ if (got_auth == 1) {
+ /* skip this chunk... it's already auth'd */
+ goto next_chunk;
+ }
+ got_auth = 1;
+ if ((ch == NULL) || sctp_handle_auth(stcb, (struct sctp_auth_chunk *)ch,
+ m, *offset)) {
+ /* auth HMAC failed so dump the packet */
+ *offset = length;
+ return (stcb);
+ } else {
+ /* remaining chunks are HMAC checked */
+ stcb->asoc.authenticated = 1;
+ }
+ break;
+
+ default:
+ unknown_chunk:
+ /* it's an unknown chunk! */
+ if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
+ struct mbuf *mm;
+ struct sctp_paramhdr *phd;
+
+ mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (mm) {
+ phd = mtod(mm, struct sctp_paramhdr *);
+ /*
+ * We cheat and use param type since
+ * we did not bother to define a
+ * error cause struct. They are the
+ * same basic format with different
+ * names.
+ */
+ phd->param_type = htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ phd->param_length = htons(chk_length + sizeof(*phd));
+ SCTP_BUF_LEN(mm) = sizeof(*phd);
+ SCTP_BUF_NEXT(mm) = SCTP_M_COPYM(m, *offset, SCTP_SIZE32(chk_length),
+ M_DONTWAIT);
+ if (SCTP_BUF_NEXT(mm)) {
+ sctp_queue_op_err(stcb, mm);
+ } else {
+ sctp_m_freem(mm);
+ }
+ }
+ }
+ if ((ch->chunk_type & 0x80) == 0) {
+ /* discard this packet */
+ *offset = length;
+ return (stcb);
+ } /* else skip this bad chunk and continue... */
+ break;
+ } /* switch (ch->chunk_type) */
+
+
+next_chunk:
+ /* get the next chunk */
+ *offset += SCTP_SIZE32(chk_length);
+ if (*offset >= length) {
+ /* no more data left in the mbuf chain */
+ break;
+ }
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_chunkhdr), chunk_buf);
+ if (ch == NULL) {
+ if (locked_tcb) {
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ *offset = length;
+ return (NULL);
+ }
+ } /* while */
+
+ if (asconf_cnt > 0 && stcb != NULL) {
+ sctp_send_asconf_ack(stcb);
+ }
+ return (stcb);
+}
+
+
+/*
+ * Process the ECN bits we have something set so we must look to see if it is
+ * ECN(0) or ECN(1) or CE
+ */
+static void
+sctp_process_ecn_marked_a(struct sctp_tcb *stcb, struct sctp_nets *net,
+ uint8_t ecn_bits)
+{
+ if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+ ;
+ } else if ((ecn_bits & SCTP_ECT1_BIT) == SCTP_ECT1_BIT) {
+ /*
+ * we only add to the nonce sum for ECT1, ECT0 does not
+ * change the NS bit (that we have yet to find a way to send
+ * it yet).
+ */
+
+ /* ECN Nonce stuff */
+ stcb->asoc.receiver_nonce_sum++;
+ stcb->asoc.receiver_nonce_sum &= SCTP_SACK_NONCE_SUM;
+
+ /*
+ * Drag up the last_echo point if cumack is larger since we
+ * don't want the point falling way behind by more than
+ * 2^^31 and then having it be incorrect.
+ */
+ if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+ stcb->asoc.last_echo_tsn, MAX_TSN)) {
+ stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+ }
+ } else if ((ecn_bits & SCTP_ECT0_BIT) == SCTP_ECT0_BIT) {
+ /*
+ * Drag up the last_echo point if cumack is larger since we
+ * don't want the point falling way behind by more than
+ * 2^^31 and then having it be incorrect.
+ */
+ if (compare_with_wrap(stcb->asoc.cumulative_tsn,
+ stcb->asoc.last_echo_tsn, MAX_TSN)) {
+ stcb->asoc.last_echo_tsn = stcb->asoc.cumulative_tsn;
+ }
+ }
+}
+
+static void
+sctp_process_ecn_marked_b(struct sctp_tcb *stcb, struct sctp_nets *net,
+ uint32_t high_tsn, uint8_t ecn_bits)
+{
+ if ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS) {
+ /*
+ * we possibly must notify the sender that a congestion
+ * window reduction is in order. We do this by adding a ECNE
+ * chunk to the output chunk queue. The incoming CWR will
+ * remove this chunk.
+ */
+ if (compare_with_wrap(high_tsn, stcb->asoc.last_echo_tsn,
+ MAX_TSN)) {
+ /* Yep, we need to add a ECNE */
+ sctp_send_ecn_echo(stcb, net, high_tsn);
+ stcb->asoc.last_echo_tsn = high_tsn;
+ }
+ }
+}
+
+#ifdef INVARIANTS
+static void
+sctp_validate_no_locks(struct sctp_inpcb *inp)
+{
+ struct sctp_tcb *stcb;
+
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ if (mtx_owned(&stcb->tcb_mtx)) {
+ panic("Own lock on stcb at return from input");
+ }
+ }
+}
+
+#endif
+
+/*
+ * common input chunk processing (v4 and v6)
+ */
+void
+sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset,
+ int length, struct sctphdr *sh, struct sctp_chunkhdr *ch,
+ struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net,
+ uint8_t ecn_bits, uint32_t vrf_id)
+{
+ /*
+ * Control chunk processing
+ */
+ uint32_t high_tsn;
+ int fwd_tsn_seen = 0, data_processed = 0;
+ struct mbuf *m = *mm;
+ int abort_flag = 0;
+ int un_sent;
+
+ SCTP_STAT_INCR(sctps_recvdatagrams);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xE0, 1);
+ sctp_auditing(0, inp, stcb, net);
+#endif
+
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d stcb:%p\n",
+ m, iphlen, offset, stcb);
+ if (stcb) {
+ /* always clear this before beginning a packet */
+ stcb->asoc.authenticated = 0;
+ stcb->asoc.seen_a_sack_this_pkt = 0;
+ SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
+ stcb, stcb->asoc.state);
+
+ if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
+ (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+ /*-
+ * If we hit here, we had a ref count
+ * up when the assoc was aborted and the
+ * timer is clearing out the assoc, we should
+ * NOT respond to any packet.. its OOTB.
+ */
+ SCTP_TCB_UNLOCK(stcb);
+ sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ vrf_id);
+ goto out_now;
+ }
+ }
+ if (IS_SCTP_CONTROL(ch)) {
+ /* process the control portion of the SCTP packet */
+ /* sa_ignore NO_NULL_CHK */
+ stcb = sctp_process_control(m, iphlen, &offset, length, sh, ch,
+ inp, stcb, &net, &fwd_tsn_seen, vrf_id);
+ if (stcb) {
+ /*
+ * This covers us if the cookie-echo was there and
+ * it changes our INP.
+ */
+ inp = stcb->sctp_ep;
+ }
+ } else {
+ /*
+ * no control chunks, so pre-process DATA chunks (these
+ * checks are taken care of by control processing)
+ */
+
+ /*
+ * if DATA only packet, and auth is required, then punt...
+ * can't have authenticated without any AUTH (control)
+ * chunks
+ */
+ if ((stcb != NULL) && !sctp_auth_disable &&
+ sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.local_auth_chunks)) {
+ /* "silently" ignore */
+ SCTP_STAT_INCR(sctps_recvauthmissing);
+ SCTP_TCB_UNLOCK(stcb);
+ goto out_now;
+ }
+ if (stcb == NULL) {
+ /* out of the blue DATA chunk */
+ sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ vrf_id);
+ goto out_now;
+ }
+ if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
+ /* v_tag mismatch! */
+ SCTP_STAT_INCR(sctps_badvtag);
+ SCTP_TCB_UNLOCK(stcb);
+ goto out_now;
+ }
+ }
+
+ if (stcb == NULL) {
+ /*
+ * no valid TCB for this packet, or we found it's a bad
+ * packet while processing control, or we're done with this
+ * packet (done or skip rest of data), so we drop it...
+ */
+ goto out_now;
+ }
+ /*
+ * DATA chunk processing
+ */
+ /* plow through the data chunks while length > offset */
+
+ /*
+ * Rest should be DATA only. Check authentication state if AUTH for
+ * DATA is required.
+ */
+ if ((length > offset) && (stcb != NULL) && !sctp_auth_disable &&
+ sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.local_auth_chunks) &&
+ !stcb->asoc.authenticated) {
+ /* "silently" ignore */
+ SCTP_STAT_INCR(sctps_recvauthmissing);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "Data chunk requires AUTH, skipped\n");
+ goto trigger_send;
+ }
+ if (length > offset) {
+ int retval;
+
+ /*
+ * First check to make sure our state is correct. We would
+ * not get here unless we really did have a tag, so we don't
+ * abort if this happens, just dump the chunk silently.
+ */
+ switch (SCTP_GET_STATE(&stcb->asoc)) {
+ case SCTP_STATE_COOKIE_ECHOED:
+ /*
+ * we consider data with valid tags in this state
+ * shows us the cookie-ack was lost. Imply it was
+ * there.
+ */
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INPUT,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, net);
+ break;
+ case SCTP_STATE_COOKIE_WAIT:
+ /*
+ * We consider OOTB any data sent during asoc setup.
+ */
+ sctp_handle_ootb(m, iphlen, offset, sh, inp, NULL,
+ vrf_id);
+ SCTP_TCB_UNLOCK(stcb);
+ goto out_now;
+ /* sa_ignore NOTREACHED */
+ break;
+ case SCTP_STATE_EMPTY: /* should not happen */
+ case SCTP_STATE_INUSE: /* should not happen */
+ case SCTP_STATE_SHUTDOWN_RECEIVED: /* This is a peer error */
+ case SCTP_STATE_SHUTDOWN_ACK_SENT:
+ default:
+ SCTP_TCB_UNLOCK(stcb);
+ goto out_now;
+ /* sa_ignore NOTREACHED */
+ break;
+ case SCTP_STATE_OPEN:
+ case SCTP_STATE_SHUTDOWN_SENT:
+ break;
+ }
+ /* take care of ECN, part 1. */
+ if (stcb->asoc.ecn_allowed &&
+ (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+ sctp_process_ecn_marked_a(stcb, net, ecn_bits);
+ }
+ /* plow through the data chunks while length > offset */
+ retval = sctp_process_data(mm, iphlen, &offset, length, sh,
+ inp, stcb, net, &high_tsn);
+ if (retval == 2) {
+ /*
+ * The association aborted, NO UNLOCK needed since
+ * the association is destroyed.
+ */
+ goto out_now;
+ }
+ data_processed = 1;
+ if (retval == 0) {
+ /* take care of ecn part 2. */
+ if (stcb->asoc.ecn_allowed &&
+ (ecn_bits & (SCTP_ECT0_BIT | SCTP_ECT1_BIT))) {
+ sctp_process_ecn_marked_b(stcb, net, high_tsn,
+ ecn_bits);
+ }
+ }
+ /*
+ * Anything important needs to have been m_copy'ed in
+ * process_data
+ */
+ }
+ if ((data_processed == 0) && (fwd_tsn_seen)) {
+ int was_a_gap = 0;
+
+ if (compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+ stcb->asoc.cumulative_tsn, MAX_TSN)) {
+ /* there was a gap before this data was processed */
+ was_a_gap = 1;
+ }
+ sctp_sack_check(stcb, 1, was_a_gap, &abort_flag);
+ if (abort_flag) {
+ /* Again, we aborted so NO UNLOCK needed */
+ goto out_now;
+ }
+ }
+ /* trigger send of any chunks in queue... */
+trigger_send:
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xE0, 2);
+ sctp_auditing(1, inp, stcb, net);
+#endif
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "Check for chunk output prw:%d tqe:%d tf=%d\n",
+ stcb->asoc.peers_rwnd,
+ TAILQ_EMPTY(&stcb->asoc.control_send_queue),
+ stcb->asoc.total_flight);
+ un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+ if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue) ||
+ ((un_sent) &&
+ (stcb->asoc.peers_rwnd > 0 ||
+ (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
+ SCTPDBG(SCTP_DEBUG_INPUT3, "Calling chunk OUTPUT\n");
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
+ SCTPDBG(SCTP_DEBUG_INPUT3, "chunk OUTPUT returns\n");
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xE0, 3);
+ sctp_auditing(2, inp, stcb, net);
+#endif
+ SCTP_TCB_UNLOCK(stcb);
+out_now:
+#ifdef INVARIANTS
+ sctp_validate_no_locks(inp);
+#endif
+ return;
+}
+
+
+
+void
+sctp_input(i_pak, off)
+ struct mbuf *i_pak;
+ int off;
+
+{
+#ifdef SCTP_MBUF_LOGGING
+ struct mbuf *mat;
+
+#endif
+ struct mbuf *m;
+ int iphlen;
+ uint32_t vrf_id = 0;
+ uint8_t ecn_bits;
+ struct ip *ip;
+ struct sctphdr *sh;
+ struct sctp_inpcb *inp = NULL;
+
+ uint32_t check, calc_check;
+ struct sctp_nets *net;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_chunkhdr *ch;
+ int refcount_up = 0;
+ int length, mlen, offset;
+
+
+ if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
+ SCTP_RELEASE_PKT(i_pak);
+ return;
+ }
+ mlen = SCTP_HEADER_LEN(i_pak);
+ iphlen = off;
+ m = SCTP_HEADER_TO_CHAIN(i_pak);
+
+ net = NULL;
+ SCTP_STAT_INCR(sctps_recvpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
+
+
+#ifdef SCTP_MBUF_LOGGING
+ /* Log in any input mbufs */
+ if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+ mat = m;
+ while (mat) {
+ if (SCTP_BUF_IS_EXTENDED(mat)) {
+ sctp_log_mb(mat, SCTP_MBUF_INPUT);
+ }
+ mat = SCTP_BUF_NEXT(mat);
+ }
+ }
+#endif
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(m, mlen);
+#endif
+ /*
+ * Must take out the iphlen, since mlen expects this (only effect lb
+ * case)
+ */
+ mlen -= iphlen;
+
+ /*
+ * Get IP, SCTP, and first chunk header together in first mbuf.
+ */
+ ip = mtod(m, struct ip *);
+ offset = iphlen + sizeof(*sh) + sizeof(*ch);
+ if (SCTP_BUF_LEN(m) < offset) {
+ if ((m = m_pullup(m, offset)) == 0) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+ ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(*sh));
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "sctp_input() length:%d iphlen:%d\n", mlen, iphlen);
+
+ /* SCTP does not allow broadcasts or multicasts */
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
+ goto bad;
+ }
+ if (SCTP_IS_IT_BROADCAST(ip->ip_dst, m)) {
+ /*
+ * We only look at broadcast if its a front state, All
+ * others we will not have a tcb for anyway.
+ */
+ goto bad;
+ }
+ /* validate SCTP checksum */
+ check = sh->checksum; /* save incoming checksum */
+ if ((check == 0) && (sctp_no_csum_on_loopback) &&
+ ((ip->ip_src.s_addr == ip->ip_dst.s_addr) ||
+ (SCTP_IS_IT_LOOPBACK(m)))
+ ) {
+ goto sctp_skip_csum_4;
+ }
+ sh->checksum = 0; /* prepare for calc */
+ calc_check = sctp_calculate_sum(m, &mlen, iphlen);
+ if (calc_check != check) {
+ SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x m:%p mlen:%d iphlen:%d\n",
+ calc_check, check, m, mlen, iphlen);
+
+ stcb = sctp_findassociation_addr(m, iphlen,
+ offset - sizeof(*ch),
+ sh, ch, &inp, &net,
+ vrf_id);
+ if ((inp) && (stcb)) {
+ sctp_send_packet_dropped(stcb, net, m, iphlen, 1);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
+ } else if ((inp != NULL) && (stcb == NULL)) {
+ refcount_up = 1;
+ }
+ SCTP_STAT_INCR(sctps_badsum);
+ SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
+ goto bad;
+ }
+ sh->checksum = calc_check;
+sctp_skip_csum_4:
+ /* destination port of 0 is illegal, based on RFC2960. */
+ if (sh->dest_port == 0) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto bad;
+ }
+ /* validate mbuf chain length with IP payload length */
+ if (mlen < (ip->ip_len - iphlen)) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto bad;
+ }
+ /*
+ * Locate pcb and tcb for datagram sctp_findassociation_addr() wants
+ * IP/SCTP/first chunk header...
+ */
+ stcb = sctp_findassociation_addr(m, iphlen, offset - sizeof(*ch),
+ sh, ch, &inp, &net, vrf_id);
+ /* inp's ref-count increased && stcb locked */
+ if (inp == NULL) {
+ struct sctp_init_chunk *init_chk, chunk_buf;
+
+ SCTP_STAT_INCR(sctps_noport);
+#ifdef ICMP_BANDLIM
+ /*
+ * we use the bandwidth limiting to protect against sending
+ * too many ABORTS all at once. In this case these count the
+ * same as an ICMP message.
+ */
+ if (badport_bandlim(0) < 0)
+ goto bad;
+#endif /* ICMP_BANDLIM */
+ SCTPDBG(SCTP_DEBUG_INPUT1,
+ "Sending a ABORT from packet entry!\n");
+ if (ch->chunk_type == SCTP_INITIATION) {
+ /*
+ * we do a trick here to get the INIT tag, dig in
+ * and get the tag from the INIT and put it in the
+ * common header.
+ */
+ init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+ iphlen + sizeof(*sh), sizeof(*init_chk),
+ (uint8_t *) & chunk_buf);
+ if (init_chk != NULL)
+ sh->v_tag = init_chk->init.initiate_tag;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
+ sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id);
+ goto bad;
+ }
+ if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
+ goto bad;
+ }
+ if (ch->chunk_type != SCTP_ABORT_ASSOCIATION)
+ sctp_send_abort(m, iphlen, sh, 0, NULL, vrf_id);
+ goto bad;
+ } else if (stcb == NULL) {
+ refcount_up = 1;
+ }
+#ifdef IPSEC
+ /*
+ * I very much doubt any of the IPSEC stuff will work but I have no
+ * idea, so I will leave it in place.
+ */
+ if (inp && ipsec4_in_reject(m, &inp->ip_inp.inp)) {
+ ipsec4stat.in_polvio++;
+ SCTP_STAT_INCR(sctps_hdrops);
+ goto bad;
+ }
+#endif /* IPSEC */
+
+ /*
+ * common chunk processing
+ */
+ length = ip->ip_len + iphlen;
+ offset -= sizeof(struct sctp_chunkhdr);
+
+ ecn_bits = ip->ip_tos;
+
+ /* sa_ignore NO_NULL_CHK */
+ sctp_common_input_processing(&m, iphlen, offset, length, sh, ch,
+ inp, stcb, net, ecn_bits, vrf_id);
+ /* inp's ref-count reduced && stcb unlocked */
+ if (m) {
+ sctp_m_freem(m);
+ }
+ if ((inp) && (refcount_up)) {
+ /* reduce ref-count */
+ SCTP_INP_DECR_REF(inp);
+ }
+ return;
+bad:
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ if ((inp) && (refcount_up)) {
+ /* reduce ref-count */
+ SCTP_INP_DECR_REF(inp);
+ }
+ if (m) {
+ sctp_m_freem(m);
+ }
+ return;
+}
Index: raw_ip.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/raw_ip.c -L sys/netinet/raw_ip.c -u -r1.1.1.1 -r1.2
--- sys/netinet/raw_ip.c
+++ sys/netinet/raw_ip.c
@@ -27,9 +27,11 @@
* SUCH DAMAGE.
*
* @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
- * $FreeBSD: src/sys/netinet/raw_ip.c,v 1.150.2.3 2005/11/04 18:34:45 maxim Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/raw_ip.c,v 1.180 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
@@ -38,9 +40,9 @@
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
@@ -66,14 +68,12 @@
#include <netinet/ip_fw.h>
#include <netinet/ip_dummynet.h>
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#endif /*FAST_IPSEC*/
-
#ifdef IPSEC
-#include <netinet6/ipsec.h>
+#include <netipsec/ipsec.h>
#endif /*IPSEC*/
+#include <security/mac/mac_framework.h>
+
struct inpcbhead ripcb;
struct inpcbinfo ripcbinfo;
@@ -104,34 +104,48 @@
void (*ip_rsvp_force_done)(struct socket *);
/*
- * Nominal space allocated to a raw ip socket.
- */
-#define RIPSNDQ 8192
-#define RIPRCVQ 8192
-
-/*
* Raw interface to IP protocol.
*/
/*
* Initialize raw connection block q.
*/
+static void
+rip_zone_change(void *tag)
+{
+
+ uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+rip_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp = mem;
+
+ INP_LOCK_INIT(inp, "inp", "rawinp");
+ return (0);
+}
+
void
-rip_init()
+rip_init(void)
{
+
INP_INFO_LOCK_INIT(&ripcbinfo, "rip");
LIST_INIT(&ripcb);
- ripcbinfo.listhead = &ripcb;
+ ripcbinfo.ipi_listhead = &ripcb;
/*
* XXX We don't use the hash list for raw IP, but it's easier
* to allocate a one entry hash list than it is to check all
* over the place for hashbase == NULL.
*/
- ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
- ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
+ ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask);
+ ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+ &ripcbinfo.ipi_porthashmask);
ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
+ EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change,
+ NULL, EVENTHANDLER_PRI_ANY);
}
static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
@@ -143,16 +157,12 @@
INP_LOCK_ASSERT(last);
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
/* check AH/ESP integrity. */
if (ipsec4_in_reject(n, last)) {
policyfail = 1;
-#ifdef IPSEC
- ipsecstat.in_polvio++;
-#endif /*IPSEC*/
- /* do not inject data to pcb */
}
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
#ifdef MAC
if (!policyfail && mac_check_inpcb_deliver(last, n) != 0)
policyfail = 1;
@@ -166,7 +176,7 @@
so = last->inp_socket;
if ((last->inp_flags & INP_CONTROLOPTS) ||
- (so->so_options & SO_TIMESTAMP))
+ (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
ip_savecontrol(last, &opts, ip, n);
SOCKBUF_LOCK(&so->so_rcv);
if (sbappendaddr_locked(&so->so_rcv,
@@ -316,7 +326,7 @@
ipstat.ips_rawout++;
}
- if (inp->inp_vflag & INP_ONESBCAST)
+ if (inp->inp_flags & INP_ONESBCAST)
flags |= IP_SENDONES;
#ifdef MAC
@@ -341,7 +351,7 @@
* after it gives up privilege, so some caution is required. For options
* passed down to the IP layer via ip_ctloutput(), checks are assumed to be
* performed in ip_ctloutput() and therefore no check occurs here.
- * Unilaterally checking suser() here breaks normal IP socket option
+ * Unilaterally checking priv_check() here breaks normal IP socket option
* operations on raw sockets.
*
* When adding new socket options here, make sure to add access control
@@ -369,7 +379,13 @@
case IP_FW_GET:
case IP_FW_TABLE_GETSIZE:
case IP_FW_TABLE_LIST:
- error = suser(curthread);
+ case IP_FW_NAT_GET_CONFIG:
+ case IP_FW_NAT_GET_LOG:
+ /*
+ * XXXRW: Isn't this checked one layer down? Yes, it
+ * is.
+ */
+ error = priv_check(curthread, PRIV_NETINET_IPFW);
if (error != 0)
return (error);
if (ip_fw_ctl_ptr != NULL)
@@ -379,7 +395,7 @@
break;
case IP_DUMMYNET_GET:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
if (error != 0)
return (error);
if (ip_dn_ctl_ptr != NULL)
@@ -400,7 +416,7 @@
case MRT_API_CONFIG:
case MRT_ADD_BW_UPCALL:
case MRT_DEL_BW_UPCALL:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error != 0)
return (error);
error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
@@ -434,7 +450,12 @@
case IP_FW_TABLE_ADD:
case IP_FW_TABLE_DEL:
case IP_FW_TABLE_FLUSH:
- error = suser(curthread);
+ case IP_FW_NAT_CFG:
+ case IP_FW_NAT_DEL:
+ /*
+ * XXXRW: Isn't this checked one layer down?
+ */
+ error = priv_check(curthread, PRIV_NETINET_IPFW);
if (error != 0)
return (error);
if (ip_fw_ctl_ptr != NULL)
@@ -446,7 +467,7 @@
case IP_DUMMYNET_CONFIGURE:
case IP_DUMMYNET_DEL:
case IP_DUMMYNET_FLUSH:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_DUMMYNET);
if (error != 0)
return (error);
if (ip_dn_ctl_ptr != NULL)
@@ -456,14 +477,14 @@
break ;
case IP_RSVP_ON:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error != 0)
return (error);
error = ip_rsvp_init(so);
break;
case IP_RSVP_OFF:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error != 0)
return (error);
error = ip_rsvp_done();
@@ -471,7 +492,7 @@
case IP_RSVP_VIF_ON:
case IP_RSVP_VIF_OFF:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error != 0)
return (error);
error = ip_rsvp_vif ?
@@ -490,7 +511,7 @@
case MRT_API_CONFIG:
case MRT_ADD_BW_UPCALL:
case MRT_DEL_BW_UPCALL:
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error != 0)
return (error);
error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
@@ -564,12 +585,12 @@
}
}
-u_long rip_sendspace = RIPSNDQ;
-u_long rip_recvspace = RIPRCVQ;
+u_long rip_sendspace = 9216;
+u_long rip_recvspace = 9216;
-SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
&rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
-SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
&rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
static int
@@ -578,39 +599,24 @@
struct inpcb *inp;
int error;
- /* XXX why not lower? */
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp) {
- /* XXX counter, printf */
- INP_INFO_WUNLOCK(&ripcbinfo);
- return EINVAL;
- }
- if (jailed(td->td_ucred) && !jail_allow_raw_sockets) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- return (EPERM);
- }
- if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL)) != 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
+
+ error = priv_check(td, PRIV_NETINET_RAW);
+ if (error)
return error;
- }
- if (proto >= IPPROTO_MAX || proto < 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ if (proto >= IPPROTO_MAX || proto < 0)
return EPROTONOSUPPORT;
- }
-
error = soreserve(so, rip_sendspace, rip_recvspace);
- if (error) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ if (error)
return error;
- }
- error = in_pcballoc(so, &ripcbinfo, "rawinp");
+ INP_INFO_WLOCK(&ripcbinfo);
+ error = in_pcballoc(so, &ripcbinfo);
if (error) {
INP_INFO_WUNLOCK(&ripcbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
- INP_LOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
inp->inp_vflag |= INP_IPV4;
inp->inp_ip_p = proto;
@@ -620,12 +626,17 @@
}
static void
-rip_pcbdetach(struct socket *so, struct inpcb *inp)
+rip_detach(struct socket *so)
{
+ struct inpcb *inp;
- INP_INFO_WLOCK_ASSERT(&ripcbinfo);
- INP_LOCK_ASSERT(inp);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
+ KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
+ ("rip_detach: not closed"));
+ INP_INFO_WLOCK(&ripcbinfo);
+ INP_LOCK(inp);
if (so == ip_mrouter && ip_mrouter_done)
ip_mrouter_done();
if (ip_rsvp_force_done)
@@ -633,53 +644,68 @@
if (so == ip_rsvpd)
ip_rsvp_done();
in_pcbdetach(inp);
+ in_pcbfree(inp);
+ INP_INFO_WUNLOCK(&ripcbinfo);
}
-static int
-rip_detach(struct socket *so)
+static void
+rip_dodisconnect(struct socket *so, struct inpcb *inp)
+{
+
+ INP_LOCK_ASSERT(inp);
+
+ inp->inp_faddr.s_addr = INADDR_ANY;
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_ISCONNECTED;
+ SOCK_UNLOCK(so);
+}
+
+static void
+rip_abort(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- /* XXX counter, printf */
- INP_INFO_WUNLOCK(&ripcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
+
+ INP_INFO_WLOCK(&ripcbinfo);
INP_LOCK(inp);
- rip_pcbdetach(so, inp);
+ rip_dodisconnect(so, inp);
+ INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
- return 0;
}
-static int
-rip_abort(struct socket *so)
+static void
+rip_close(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- return EINVAL; /* ??? possible? panic instead? */
- }
+ KASSERT(inp != NULL, ("rip_close: inp == NULL"));
+
+ INP_INFO_WLOCK(&ripcbinfo);
INP_LOCK(inp);
- soisdisconnected(so);
- if (so->so_state & SS_NOFDREF)
- rip_pcbdetach(so, inp);
- else
- INP_UNLOCK(inp);
+ rip_dodisconnect(so, inp);
+ INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
- return 0;
}
static int
rip_disconnect(struct socket *so)
{
+ struct inpcb *inp;
+
if ((so->so_state & SS_ISCONNECTED) == 0)
return ENOTCONN;
- return rip_abort(so);
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
+ INP_INFO_WLOCK(&ripcbinfo);
+ INP_LOCK(inp);
+ rip_dodisconnect(so, inp);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&ripcbinfo);
+ return (0);
}
static int
@@ -705,12 +731,9 @@
ifa_ifwithaddr((struct sockaddr *)addr) == 0))
return EADDRNOTAVAIL;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
+ INP_INFO_WLOCK(&ripcbinfo);
INP_LOCK(inp);
inp->inp_laddr = addr->sin_addr;
INP_UNLOCK(inp);
@@ -731,12 +754,9 @@
if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
return EAFNOSUPPORT;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
+ INP_INFO_WLOCK(&ripcbinfo);
INP_LOCK(inp);
inp->inp_faddr = addr->sin_addr;
soisconnected(so);
@@ -750,14 +770,9 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_RUNLOCK(&ripcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&ripcbinfo);
socantsendmore(so);
INP_UNLOCK(inp);
return 0;
@@ -765,32 +780,30 @@
static int
rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
- struct mbuf *control, struct thread *td)
+ struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
u_long dst;
- int ret;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip_send: inp == NULL"));
+ /*
+ * Note: 'dst' reads below are unlocked.
+ */
if (so->so_state & SS_ISCONNECTED) {
if (nam) {
- INP_INFO_WUNLOCK(&ripcbinfo);
m_freem(m);
return EISCONN;
}
- dst = inp->inp_faddr.s_addr;
+ dst = inp->inp_faddr.s_addr; /* Unlocked read. */
} else {
if (nam == NULL) {
- INP_INFO_WUNLOCK(&ripcbinfo);
m_freem(m);
return ENOTCONN;
}
dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
}
- ret = rip_output(m, so, dst);
- INP_INFO_WUNLOCK(&ripcbinfo);
- return ret;
+ return rip_output(m, so, dst);
}
static int
@@ -836,7 +849,7 @@
return ENOMEM;
INP_INFO_RLOCK(&ripcbinfo);
- for (inp = LIST_FIRST(ripcbinfo.listhead), i = 0; inp && i < n;
+ for (inp = LIST_FIRST(ripcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt &&
@@ -852,6 +865,7 @@
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
+ INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@@ -860,8 +874,10 @@
bcopy(inp, &xi.xi_inp, sizeof *inp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
+ INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
- }
+ } else
+ INP_UNLOCK(inp);
}
if (!error) {
/*
@@ -882,27 +898,6 @@
return error;
}
-/*
- * This is the wrapper function for in_setsockaddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-rip_sockaddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setsockaddr(so, nam, &ripcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-rip_peeraddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setpeeraddr(so, nam, &ripcbinfo));
-}
-
-
SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD, 0, 0,
rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
@@ -914,9 +909,10 @@
.pru_control = in_control,
.pru_detach = rip_detach,
.pru_disconnect = rip_disconnect,
- .pru_peeraddr = rip_peeraddr,
+ .pru_peeraddr = in_getpeeraddr,
.pru_send = rip_send,
.pru_shutdown = rip_shutdown,
- .pru_sockaddr = rip_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel
+ .pru_sockaddr = in_getsockaddr,
+ .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_close = rip_close,
};
Index: udp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/udp_usrreq.c -L sys/netinet/udp_usrreq.c -u -r1.2 -r1.3
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,23 +28,25 @@
* SUCH DAMAGE.
*
* @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
- * $FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.175.2.5 2006/02/14 21:40:21 rwatson Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/udp_usrreq.c,v 1.218 2007/10/07 20:44:24 silby Exp $");
+
#include "opt_ipfw.h"
-#include "opt_ipsec.h"
#include "opt_inet6.h"
+#include "opt_ipsec.h"
#include "opt_mac.h"
#include <sys/param.h>
-#include <sys/systm.h>
#include <sys/domain.h>
+#include <sys/eventhandler.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
@@ -52,6 +55,7 @@
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/systm.h>
#include <vm/uma.h>
@@ -59,8 +63,8 @@
#include <net/route.h>
#include <netinet/in.h>
-#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#ifdef INET6
@@ -69,88 +73,190 @@
#include <netinet/ip_icmp.h>
#include <netinet/icmp_var.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#ifdef INET6
#include <netinet6/ip6_var.h>
#endif
#include <netinet/udp.h>
#include <netinet/udp_var.h>
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#endif /*FAST_IPSEC*/
-
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif /*IPSEC*/
+#include <netipsec/ipsec.h>
+#endif
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
/*
* UDP protocol implementation.
* Per RFC 768, August, 1980.
*/
-#ifndef COMPAT_42
-static int udpcksum = 1;
+
+/*
+ * BSD 4.2 defaulted the udp checksum to be off. Turning off udp checksums
+ * removes the only data integrity mechanism for packets and malformed
+ * packets that would otherwise be discarded due to bad checksums, and may
+ * cause problems (especially for NFS data blocks).
+ */
+static int udp_cksum = 1;
+SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW, &udp_cksum,
+ 0, "");
+
+int udp_log_in_vain = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
+ &udp_log_in_vain, 0, "Log all incoming UDP packets");
+
+int udp_blackhole = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &udp_blackhole, 0,
+ "Do not send port unreachables for refused connects");
+
+u_long udp_sendspace = 9216; /* really max datagram size */
+ /* 40 1K datagrams */
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
+ &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
+
+u_long udp_recvspace = 40 * (1024 +
+#ifdef INET6
+ sizeof(struct sockaddr_in6)
#else
-static int udpcksum = 0; /* XXX */
+ sizeof(struct sockaddr_in)
#endif
-SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW,
- &udpcksum, 0, "");
+ );
-int log_in_vain = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
- &log_in_vain, 0, "Log all incoming UDP packets");
+SYSCTL_ULONG(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+ &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
-static int blackhole = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW,
- &blackhole, 0, "Do not send port unreachables for refused connects");
-
-static int strict_mcast_mship = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW,
- &strict_mcast_mship, 0, "Only send multicast to member sockets");
-
-struct inpcbhead udb; /* from udp_var.h */
-#define udb6 udb /* for KAME src sync over BSD*'s */
-struct inpcbinfo udbinfo;
+struct inpcbhead udb; /* from udp_var.h */
+struct inpcbinfo udbinfo;
#ifndef UDBHASHSIZE
-#define UDBHASHSIZE 16
+#define UDBHASHSIZE 16
#endif
-struct udpstat udpstat; /* from udp_var.h */
-SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW,
- &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
-
-static void udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
- int off, struct sockaddr_in *udp_in);
-
-static int udp_detach(struct socket *so);
-static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
- struct mbuf *, struct thread *);
+struct udpstat udpstat; /* from udp_var.h */
+SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RW, &udpstat,
+ udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)");
+
+static void udp_detach(struct socket *so);
+static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *,
+ struct mbuf *, struct thread *);
+
+static void
+udp_zone_change(void *tag)
+{
+
+ uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
+}
+
+static int
+udp_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp;
+
+ inp = mem;
+ INP_LOCK_INIT(inp, "inp", "udpinp");
+ return (0);
+}
void
-udp_init()
+udp_init(void)
{
+
INP_INFO_LOCK_INIT(&udbinfo, "udp");
LIST_INIT(&udb);
- udbinfo.listhead = &udb;
- udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask);
- udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
- &udbinfo.porthashmask);
+ udbinfo.ipi_listhead = &udb;
+ udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB,
+ &udbinfo.ipi_hashmask);
+ udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB,
+ &udbinfo.ipi_porthashmask);
udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
- NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
+ EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
+ EVENTHANDLER_PRI_ANY);
+}
+
+/*
+ * Subroutine of udp_input(), which appends the provided mbuf chain to the
+ * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
+ * contains the source address. If the socket ends up being an IPv6 socket,
+ * udp_append() will convert to a sockaddr_in6 before passing the address
+ * into the socket code.
+ */
+static void
+udp_append(struct inpcb *inp, struct ip *ip, struct mbuf *n, int off,
+ struct sockaddr_in *udp_in)
+{
+ struct sockaddr *append_sa;
+ struct socket *so;
+ struct mbuf *opts = 0;
+#ifdef INET6
+ struct sockaddr_in6 udp_in6;
+#endif
+
+ INP_LOCK_ASSERT(inp);
+
+#ifdef IPSEC
+ /* Check AH/ESP integrity. */
+ if (ipsec4_in_reject(n, inp)) {
+ m_freem(n);
+ ipsec4stat.in_polvio++;
+ return;
+ }
+#endif /* IPSEC */
+#ifdef MAC
+ if (mac_check_inpcb_deliver(inp, n) != 0) {
+ m_freem(n);
+ return;
+ }
+#endif
+ if (inp->inp_flags & INP_CONTROLOPTS ||
+ inp->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ int savedflags;
+
+ savedflags = inp->inp_flags;
+ inp->inp_flags &= ~INP_UNMAPPABLEOPTS;
+ ip6_savecontrol(inp, n, &opts);
+ inp->inp_flags = savedflags;
+ } else
+#endif
+ ip_savecontrol(inp, &opts, ip, n);
+ }
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ bzero(&udp_in6, sizeof(udp_in6));
+ udp_in6.sin6_len = sizeof(udp_in6);
+ udp_in6.sin6_family = AF_INET6;
+ in6_sin_2_v4mapsin6(udp_in, &udp_in6);
+ append_sa = (struct sockaddr *)&udp_in6;
+ } else
+#endif
+ append_sa = (struct sockaddr *)udp_in;
+ m_adj(n, off);
+
+ so = inp->inp_socket;
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ m_freem(n);
+ if (opts)
+ m_freem(opts);
+ udpstat.udps_fullsock++;
+ } else
+ sorwakeup_locked(so);
}
void
-udp_input(m, off)
- register struct mbuf *m;
- int off;
+udp_input(struct mbuf *m, int off)
{
int iphlen = off;
- register struct ip *ip;
- register struct udphdr *uh;
- register struct inpcb *inp;
+ struct ip *ip;
+ struct udphdr *uh;
+ struct ifnet *ifp;
+ struct inpcb *inp;
int len;
struct ip save_ip;
struct sockaddr_in udp_in;
@@ -158,13 +264,13 @@
struct m_tag *fwd_tag;
#endif
+ ifp = m->m_pkthdr.rcvif;
udpstat.udps_ipackets++;
/*
- * Strip IP options, if any; should skip this,
- * make available to user, and use on returned packets,
- * but we don't yet have a way to check the checksum
- * with options still present.
+ * Strip IP options, if any; should skip this, make available to
+ * user, and use on returned packets, but we don't yet have a way to
+ * check the checksum with options still present.
*/
if (iphlen > sizeof (struct ip)) {
ip_stripoptions(m, (struct mbuf *)0);
@@ -184,13 +290,15 @@
}
uh = (struct udphdr *)((caddr_t)ip + iphlen);
- /* destination port of 0 is illegal, based on RFC768. */
+ /*
+ * Destination port of 0 is illegal, based on RFC768.
+ */
if (uh->uh_dport == 0)
goto badunlocked;
/*
- * Construct sockaddr format source address.
- * Stuff source address and datagram in user buffer.
+ * Construct sockaddr format source address. Stuff source address
+ * and datagram in user buffer.
*/
bzero(&udp_in, sizeof(udp_in));
udp_in.sin_len = sizeof(udp_in);
@@ -199,8 +307,8 @@
udp_in.sin_addr = ip->ip_src;
/*
- * Make mbuf data length reflect UDP length.
- * If not enough data to reflect UDP length, drop.
+ * Make mbuf data length reflect UDP length. If not enough data to
+ * reflect UDP length, drop.
*/
len = ntohs((u_short)uh->uh_ulen);
if (ip->ip_len != len) {
@@ -211,18 +319,22 @@
m_adj(m, len - ip->ip_len);
/* ip->ip_len = len; */
}
+
/*
- * Save a copy of the IP header in case we want restore it
- * for sending an ICMP error message in response.
+ * Save a copy of the IP header in case we want restore it for
+ * sending an ICMP error message in response.
*/
- if (!blackhole)
+ if (!udp_blackhole)
save_ip = *ip;
+ else
+ memset(&save_ip, 0, sizeof(save_ip));
/*
* Checksum extended UDP header and data.
*/
if (uh->uh_sum) {
u_short uh_sum;
+
if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
uh_sum = m->m_pkthdr.csum_data;
@@ -233,6 +345,7 @@
uh_sum ^= 0xffff;
} else {
char b[9];
+
bcopy(((struct ipovly *)ip)->ih_x1, b, 9);
bzero(((struct ipovly *)ip)->ih_x1, 9);
((struct ipovly *)ip)->ih_len = uh->uh_ulen;
@@ -248,46 +361,33 @@
udpstat.udps_nosum++;
#ifdef IPFIREWALL_FORWARD
- /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
+ /*
+ * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+ */
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
-
if (fwd_tag != NULL) {
struct sockaddr_in *next_hop;
- /* Do the hack. */
+ /*
+ * Do the hack.
+ */
next_hop = (struct sockaddr_in *)(fwd_tag + 1);
ip->ip_dst = next_hop->sin_addr;
uh->uh_dport = ntohs(next_hop->sin_port);
- /* Remove the tag from the packet. We don't need it anymore. */
+
+ /*
+ * Remove the tag from the packet. We don't need it anymore.
+ */
m_tag_delete(m, fwd_tag);
}
#endif
INP_INFO_RLOCK(&udbinfo);
-
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
- in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+ in_broadcast(ip->ip_dst, ifp)) {
struct inpcb *last;
- /*
- * Deliver a multicast or broadcast datagram to *all* sockets
- * for which the local and remote addresses and ports match
- * those of the incoming datagram. This allows more than
- * one process to receive multi/broadcasts on the same port.
- * (This really ought to be done for unicast datagrams as
- * well, but that would cause problems with existing
- * applications that open both address-specific sockets and
- * a wildcard socket listening to the same port -- they would
- * end up receiving duplicates of every unicast datagram.
- * Those applications open the multiple sockets to overcome an
- * inadequacy of the UDP socket interface, but for backwards
- * compatibility we avoid the problem here rather than
- * fixing the interface. Maybe 4.5BSD will remedy this?)
- */
+ struct ip_moptions *imo;
- /*
- * Locate pcb(s) for datagram.
- * (Algorithm copied from raw_intr().)
- */
last = NULL;
LIST_FOREACH(inp, &udb, inp_list) {
if (inp->inp_lport != uh->uh_dport)
@@ -296,73 +396,111 @@
if ((inp->inp_vflag & INP_IPV4) == 0)
continue;
#endif
- if (inp->inp_laddr.s_addr != INADDR_ANY) {
- if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
- continue;
- }
- if (inp->inp_faddr.s_addr != INADDR_ANY) {
- if (inp->inp_faddr.s_addr !=
- ip->ip_src.s_addr ||
- inp->inp_fport != uh->uh_sport)
- continue;
- }
+ if (inp->inp_laddr.s_addr != INADDR_ANY &&
+ inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
+ continue;
+ if (inp->inp_faddr.s_addr != INADDR_ANY &&
+ inp->inp_faddr.s_addr != ip->ip_src.s_addr)
+ continue;
+ /*
+ * XXX: Do not check source port of incoming datagram
+ * unless inp_connect() has been called to bind the
+ * fport part of the 4-tuple; the source could be
+ * trying to talk to us with an ephemeral port.
+ */
+ if (inp->inp_fport != 0 &&
+ inp->inp_fport != uh->uh_sport)
+ continue;
+
INP_LOCK(inp);
/*
- * Check multicast packets to make sure they are only
- * sent to sockets with multicast memberships for the
- * packet's destination address and arrival interface
+ * Handle socket delivery policy for any-source
+ * and source-specific multicast. [RFC3678]
*/
-#define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)])
-#define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships)
- if (strict_mcast_mship && inp->inp_moptions != NULL) {
- int mship, foundmship = 0;
-
- for (mship = 0; mship < NMSHIPS(inp); mship++) {
- if (MSHIP(inp, mship)->inm_addr.s_addr
- == ip->ip_dst.s_addr &&
- MSHIP(inp, mship)->inm_ifp
- == m->m_pkthdr.rcvif) {
- foundmship = 1;
- break;
+ imo = inp->inp_moptions;
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
+ imo != NULL) {
+ struct sockaddr_in sin;
+ struct in_msource *ims;
+ int blocked, mode;
+ size_t idx;
+
+ bzero(&sin, sizeof(struct sockaddr_in));
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_family = AF_INET;
+ sin.sin_addr = ip->ip_dst;
+
+ blocked = 0;
+ idx = imo_match_group(imo, ifp,
+ (struct sockaddr *)&sin);
+ if (idx == -1) {
+ /*
+ * No group membership for this socket.
+ * Do not bump udps_noportbcast, as
+ * this will happen further down.
+ */
+ blocked++;
+ } else {
+ /*
+ * Check for a multicast source filter
+ * entry on this socket for this group.
+ * MCAST_EXCLUDE is the default
+ * behaviour. It means default accept;
+ * entries, if present, denote sources
+ * to be excluded from delivery.
+ */
+ ims = imo_match_source(imo, idx,
+ (struct sockaddr *)&udp_in);
+ mode = imo->imo_mfilters[idx].imf_fmode;
+ if ((ims != NULL &&
+ mode == MCAST_EXCLUDE) ||
+ (ims == NULL &&
+ mode == MCAST_INCLUDE)) {
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: blocked by"
+ " source filter\n",
+ __func__);
+ }
+#endif
+ udpstat.udps_filtermcast++;
+ blocked++;
}
}
- if (foundmship == 0) {
+ if (blocked != 0) {
INP_UNLOCK(inp);
continue;
}
}
-#undef NMSHIPS
-#undef MSHIP
if (last != NULL) {
struct mbuf *n;
n = m_copy(m, 0, M_COPYALL);
if (n != NULL)
- udp_append(last, ip, n,
- iphlen +
- sizeof(struct udphdr),
- &udp_in);
+ udp_append(last, ip, n, iphlen +
+ sizeof(struct udphdr), &udp_in);
INP_UNLOCK(last);
}
last = inp;
/*
* Don't look for additional matches if this one does
* not have either the SO_REUSEPORT or SO_REUSEADDR
- * socket options set. This heuristic avoids searching
- * through all pcbs in the common case of a non-shared
- * port. It * assumes that an application will never
- * clear these options after setting them.
+ * socket options set. This heuristic avoids
+ * searching through all pcbs in the common case of a
+ * non-shared port. It assumes that an application
+ * will never clear these options after setting them.
*/
- if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR)) == 0)
+ if ((last->inp_socket->so_options &
+ (SO_REUSEPORT|SO_REUSEADDR)) == 0)
break;
}
if (last == NULL) {
/*
- * No matching pcb found; discard datagram.
- * (No need to send an ICMP Port Unreachable
- * for a broadcast or multicast datgram.)
+ * No matching pcb found; discard datagram. (No need
+ * to send an ICMP Port Unreachable for a broadcast
+ * or multicast datgram.)
*/
udpstat.udps_noportbcast++;
goto badheadlocked;
@@ -373,13 +511,14 @@
INP_INFO_RUNLOCK(&udbinfo);
return;
}
+
/*
* Locate pcb for datagram.
*/
inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport,
- ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+ ip->ip_dst, uh->uh_dport, 1, ifp);
if (inp == NULL) {
- if (log_in_vain) {
+ if (udp_log_in_vain) {
char buf[4*sizeof "123"];
strcpy(buf, inet_ntoa(ip->ip_dst));
@@ -393,7 +532,7 @@
udpstat.udps_noportbcast++;
goto badheadlocked;
}
- if (blackhole)
+ if (udp_blackhole)
goto badheadlocked;
if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0)
goto badheadlocked;
@@ -403,8 +542,11 @@
INP_INFO_RUNLOCK(&udbinfo);
return;
}
+
+ /*
+ * Check the minimum TTL for socket.
+ */
INP_LOCK(inp);
- /* Check the minimum TTL for socket. */
if (inp->inp_ip_minttl && inp->inp_ip_minttl > ip->ip_ttl)
goto badheadlocked;
udp_append(inp, ip, m, iphlen + sizeof(struct udphdr), &udp_in);
@@ -418,111 +560,27 @@
INP_INFO_RUNLOCK(&udbinfo);
badunlocked:
m_freem(m);
- return;
}
/*
- * Subroutine of udp_input(), which appends the provided mbuf chain to the
- * passed pcb/socket. The caller must provide a sockaddr_in via udp_in that
- * contains the source address. If the socket ends up being an IPv6 socket,
- * udp_append() will convert to a sockaddr_in6 before passing the address
- * into the socket code.
- */
-static void
-udp_append(last, ip, n, off, udp_in)
- struct inpcb *last;
- struct ip *ip;
- struct mbuf *n;
- int off;
- struct sockaddr_in *udp_in;
-{
- struct sockaddr *append_sa;
- struct socket *so;
- struct mbuf *opts = 0;
-#ifdef INET6
- struct sockaddr_in6 udp_in6;
-#endif
-
- INP_LOCK_ASSERT(last);
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
- /* check AH/ESP integrity. */
- if (ipsec4_in_reject(n, last)) {
-#ifdef IPSEC
- ipsecstat.in_polvio++;
-#endif /*IPSEC*/
- m_freem(n);
- return;
- }
-#endif /*IPSEC || FAST_IPSEC*/
-#ifdef MAC
- if (mac_check_inpcb_deliver(last, n) != 0) {
- m_freem(n);
- return;
- }
-#endif
- if (last->inp_flags & INP_CONTROLOPTS ||
- last->inp_socket->so_options & (SO_TIMESTAMP | SO_BINTIME)) {
-#ifdef INET6
- if (last->inp_vflag & INP_IPV6) {
- int savedflags;
-
- savedflags = last->inp_flags;
- last->inp_flags &= ~INP_UNMAPPABLEOPTS;
- ip6_savecontrol(last, n, &opts);
- last->inp_flags = savedflags;
- } else
-#endif
- ip_savecontrol(last, &opts, ip, n);
- }
-#ifdef INET6
- if (last->inp_vflag & INP_IPV6) {
- bzero(&udp_in6, sizeof(udp_in6));
- udp_in6.sin6_len = sizeof(udp_in6);
- udp_in6.sin6_family = AF_INET6;
- in6_sin_2_v4mapsin6(udp_in, &udp_in6);
- append_sa = (struct sockaddr *)&udp_in6;
- } else
-#endif
- append_sa = (struct sockaddr *)udp_in;
- m_adj(n, off);
-
- so = last->inp_socket;
- SOCKBUF_LOCK(&so->so_rcv);
- if (sbappendaddr_locked(&so->so_rcv, append_sa, n, opts) == 0) {
- m_freem(n);
- if (opts)
- m_freem(opts);
- udpstat.udps_fullsock++;
- SOCKBUF_UNLOCK(&so->so_rcv);
- } else
- sorwakeup_locked(so);
-}
-
-/*
- * Notify a udp user of an asynchronous error;
- * just wake up so that he can collect error status.
+ * Notify a udp user of an asynchronous error; just wake up so that they can
+ * collect error status.
*/
struct inpcb *
-udp_notify(inp, errno)
- register struct inpcb *inp;
- int errno;
+udp_notify(struct inpcb *inp, int errno)
{
+
inp->inp_socket->so_error = errno;
sorwakeup(inp->inp_socket);
sowwakeup(inp->inp_socket);
- return inp;
+ return (inp);
}
void
-udp_ctlinput(cmd, sa, vip)
- int cmd;
- struct sockaddr *sa;
- void *vip;
+udp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
struct ip *ip = vip;
struct udphdr *uh;
- struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
struct in_addr faddr;
struct inpcb *inp;
@@ -535,16 +593,18 @@
*/
if (PRC_IS_REDIRECT(cmd))
return;
+
/*
* Hostdead is ugly because it goes linearly through all PCBs.
- * XXX: We never get this from ICMP, otherwise it makes an
- * excellent DoS attack on machines with many connections.
+ *
+ * XXX: We never get this from ICMP, otherwise it makes an excellent
+ * DoS attack on machines with many connections.
*/
if (cmd == PRC_HOSTDEAD)
- ip = 0;
+ ip = NULL;
else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
return;
- if (ip) {
+ if (ip != NULL) {
uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
INP_INFO_RLOCK(&udbinfo);
inp = in_pcblookup_hash(&udbinfo, faddr, uh->uh_dport,
@@ -552,13 +612,14 @@
if (inp != NULL) {
INP_LOCK(inp);
if (inp->inp_socket != NULL) {
- (*notify)(inp, inetctlerrmap[cmd]);
+ udp_notify(inp, inetctlerrmap[cmd]);
}
INP_UNLOCK(inp);
}
INP_INFO_RUNLOCK(&udbinfo);
} else
- in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify);
+ in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd],
+ udp_notify);
}
static int
@@ -570,18 +631,18 @@
struct xinpgen xig;
/*
- * The process of preparing the TCB list is too time-consuming and
+ * The process of preparing the PCB list is too time-consuming and
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == 0) {
n = udbinfo.ipi_count;
req->oldidx = 2 * (sizeof xig)
+ (n + n/8) * sizeof(struct xinpcb);
- return 0;
+ return (0);
}
if (req->newptr != 0)
- return EPERM;
+ return (EPERM);
/*
* OK, now we're committed to doing something.
@@ -602,14 +663,14 @@
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
- return error;
+ return (error);
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
if (inp_list == 0)
- return ENOMEM;
+ return (ENOMEM);
INP_INFO_RLOCK(&udbinfo);
- for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n;
+ for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n;
inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt &&
@@ -623,6 +684,7 @@
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
+ INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@@ -632,16 +694,17 @@
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
xi.xi_inp.inp_gencnt = inp->inp_gencnt;
+ INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
- }
+ } else
+ INP_UNLOCK(inp);
}
if (!error) {
/*
- * Give the user an updated idea of our state.
- * If the generation differs from what we told
- * her before, she knows that something happened
- * while we were processing this request, and it
- * might be necessary to retry.
+ * Give the user an updated idea of our state. If the
+ * generation differs from what we told her before, she knows
+ * that something happened while we were processing this
+ * request, and it might be necessary to retry.
*/
INP_INFO_RLOCK(&udbinfo);
xig.xig_gen = udbinfo.ipi_gencnt;
@@ -651,11 +714,11 @@
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
- return error;
+ return (error);
}
SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
- udp_pcblist, "S,xinpcb", "List of active UDP sockets");
+ udp_pcblist, "S,xinpcb", "List of active UDP sockets");
static int
udp_getcred(SYSCTL_HANDLER_ARGS)
@@ -665,7 +728,7 @@
struct inpcb *inp;
int error;
- error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+ error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
return (error);
error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -694,15 +757,11 @@
udp_getcred, "S,xucred", "Get the xucred of a UDP connection");
static int
-udp_output(inp, m, addr, control, td)
- register struct inpcb *inp;
- struct mbuf *m;
- struct sockaddr *addr;
- struct mbuf *control;
- struct thread *td;
+udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
+ struct mbuf *control, struct thread *td)
{
- register struct udpiphdr *ui;
- register int len = m->m_pkthdr.len;
+ struct udpiphdr *ui;
+ int len = m->m_pkthdr.len;
struct in_addr faddr, laddr;
struct cmsghdr *cm;
struct sockaddr_in *sin, src;
@@ -713,34 +772,34 @@
/*
* udp_output() may need to temporarily bind or connect the current
- * inpcb. As such, we don't know up front what inpcb locks we will
- * need. Do any work to decide what is needed up front before
- * acquiring locks.
+ * inpcb. As such, we don't know up front whether we will need the
+ * pcbinfo lock or not. Do any work to decide what is needed up
+ * front before acquiring any locks.
*/
if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
if (control)
m_freem(control);
m_freem(m);
- return EMSGSIZE;
+ return (EMSGSIZE);
}
- src.sin_addr.s_addr = INADDR_ANY;
+ src.sin_family = 0;
if (control != NULL) {
/*
- * XXX: Currently, we assume all the optional information
- * is stored in a single mbuf.
+ * XXX: Currently, we assume all the optional information is
+ * stored in a single mbuf.
*/
if (control->m_next) {
m_freem(control);
m_freem(m);
- return EINVAL;
+ return (EINVAL);
}
for (; control->m_len > 0;
control->m_data += CMSG_ALIGN(cm->cmsg_len),
control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
cm = mtod(control, struct cmsghdr *);
- if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0 ||
- cm->cmsg_len > control->m_len) {
+ if (control->m_len < sizeof(*cm) || cm->cmsg_len == 0
+ || cm->cmsg_len > control->m_len) {
error = EINVAL;
break;
}
@@ -758,8 +817,10 @@
src.sin_family = AF_INET;
src.sin_len = sizeof(src);
src.sin_port = inp->inp_lport;
- src.sin_addr = *(struct in_addr *)CMSG_DATA(cm);
+ src.sin_addr =
+ *(struct in_addr *)CMSG_DATA(cm);
break;
+
default:
error = ENOPROTOOPT;
break;
@@ -771,11 +832,10 @@
}
if (error) {
m_freem(m);
- return error;
+ return (error);
}
- if (src.sin_addr.s_addr != INADDR_ANY ||
- addr != NULL) {
+ if (src.sin_family == AF_INET || addr != NULL) {
INP_INFO_WLOCK(&udbinfo);
unlock_udbinfo = 1;
} else
@@ -786,10 +846,17 @@
mac_create_mbuf_from_inpcb(inp, m);
#endif
+ /*
+ * If the IP_SENDSRCADDR control message was specified, override the
+ * source address for this datagram. Its use is invalidated if the
+ * address thus specified is incomplete or clobbers other inpcbs.
+ */
laddr = inp->inp_laddr;
lport = inp->inp_lport;
- if (src.sin_addr.s_addr != INADDR_ANY) {
- if (lport == 0) {
+ if (src.sin_family == AF_INET) {
+ if ((lport == 0) ||
+ (laddr.s_addr == INADDR_ANY &&
+ src.sin_addr.s_addr == INADDR_ANY)) {
error = EINVAL;
goto release;
}
@@ -802,7 +869,8 @@
if (addr) {
sin = (struct sockaddr_in *)addr;
if (jailed(td->td_ucred))
- prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr);
+ prison_remote_ip(td->td_ucred, 0,
+ &sin->sin_addr.s_addr);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
error = EISCONN;
goto release;
@@ -852,8 +920,8 @@
m->m_pkthdr.len -= max_linkhdr;
/*
- * Fill in mbuf with extended UDP header
- * and addresses and length put into network format.
+ * Fill in mbuf with extended UDP header and addresses and length put
+ * into network format.
*/
ui = mtod(m, struct udpiphdr *);
bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */
@@ -869,6 +937,7 @@
*/
if (inp->inp_flags & INP_DONTFRAG) {
struct ip *ip;
+
ip = (struct ip *)&ui->ui_i;
ip->ip_off |= IP_DF;
}
@@ -878,22 +947,21 @@
ipflags |= IP_ROUTETOIF;
if (inp->inp_socket->so_options & SO_BROADCAST)
ipflags |= IP_ALLOWBROADCAST;
- if (inp->inp_vflag & INP_ONESBCAST)
+ if (inp->inp_flags & INP_ONESBCAST)
ipflags |= IP_SENDONES;
/*
* Set up checksum and output datagram.
*/
- if (udpcksum) {
- if (inp->inp_vflag & INP_ONESBCAST)
+ if (udp_cksum) {
+ if (inp->inp_flags & INP_ONESBCAST)
faddr.s_addr = INADDR_BROADCAST;
ui->ui_sum = in_pseudo(ui->ui_src.s_addr, faddr.s_addr,
htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP));
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- } else {
+ } else
ui->ui_sum = 0;
- }
((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len;
((struct ip *)ui)->ip_ttl = inp->inp_ip_ttl; /* XXX */
((struct ip *)ui)->ip_tos = inp->inp_ip_tos; /* XXX */
@@ -914,37 +982,22 @@
return (error);
}
-u_long udp_sendspace = 9216; /* really max datagram size */
- /* 40 1K datagrams */
-SYSCTL_INT(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLFLAG_RW,
- &udp_sendspace, 0, "Maximum outgoing UDP datagram size");
-
-u_long udp_recvspace = 40 * (1024 +
-#ifdef INET6
- sizeof(struct sockaddr_in6)
-#else
- sizeof(struct sockaddr_in)
-#endif
- );
-SYSCTL_INT(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
- &udp_recvspace, 0, "Maximum space for incoming UDP datagrams");
-
-static int
+static void
udp_abort(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL; /* ??? possible? panic instead? */
- }
+ KASSERT(inp != NULL, ("udp_abort: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
- soisdisconnected(so);
- in_pcbdetach(inp);
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ in_pcbdisconnect(inp);
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ soisdisconnected(so);
+ }
+ INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return 0;
}
static int
@@ -953,30 +1006,24 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp != 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp == NULL, ("udp_attach: inp != NULL"));
error = soreserve(so, udp_sendspace, udp_recvspace);
+ if (error)
+ return (error);
+ INP_INFO_WLOCK(&udbinfo);
+ error = in_pcballoc(so, &udbinfo);
if (error) {
INP_INFO_WUNLOCK(&udbinfo);
- return error;
- }
- error = in_pcballoc(so, &udbinfo, "udpinp");
- if (error) {
- INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
}
inp = (struct inpcb *)so->so_pcb;
- INP_LOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
inp->inp_vflag |= INP_IPV4;
inp->inp_ip_ttl = ip_defttl;
INP_UNLOCK(inp);
- return 0;
+ return (0);
}
static int
@@ -985,17 +1032,32 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp_bind: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
error = in_pcbbind(inp, nam, td->td_ucred);
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
+}
+
+static void
+udp_close(struct socket *so)
+{
+ struct inpcb *inp;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("udp_close: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
+ INP_LOCK(inp);
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ in_pcbdisconnect(inp);
+ inp->inp_laddr.s_addr = INADDR_ANY;
+ soisdisconnected(so);
+ }
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&udbinfo);
}
static int
@@ -1005,17 +1067,14 @@
int error;
struct sockaddr_in *sin;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp_connect: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
if (inp->inp_faddr.s_addr != INADDR_ANY) {
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return EISCONN;
+ return (EISCONN);
}
sin = (struct sockaddr_in *)nam;
if (jailed(td->td_ucred))
@@ -1025,24 +1084,23 @@
soisconnected(so);
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
}
-static int
+static void
udp_detach(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp_detach: inp == NULL"));
+ KASSERT(inp->inp_faddr.s_addr == INADDR_ANY,
+ ("udp_detach: not disconnected"));
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
in_pcbdetach(inp);
+ in_pcbfree(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return 0;
}
static int
@@ -1050,35 +1108,35 @@
{
struct inpcb *inp;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp_disconnect: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
if (inp->inp_faddr.s_addr == INADDR_ANY) {
INP_INFO_WUNLOCK(&udbinfo);
INP_UNLOCK(inp);
- return ENOTCONN;
+ return (ENOTCONN);
}
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_ISCONNECTED; /* XXX */
+ SOCK_UNLOCK(so);
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- so->so_state &= ~SS_ISCONNECTED; /* XXX */
- return 0;
+ return (0);
}
static int
udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
- struct mbuf *control, struct thread *td)
+ struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
inp = sotoinpcb(so);
- return udp_output(inp, m, addr, control, td);
+ KASSERT(inp != NULL, ("udp_send: inp == NULL"));
+ return (udp_output(inp, m, addr, control, td));
}
int
@@ -1086,38 +1144,12 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_RUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp_shutdown: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&udbinfo);
socantsendmore(so);
INP_UNLOCK(inp);
- return 0;
-}
-
-/*
- * This is the wrapper function for in_setsockaddr. We just pass down
- * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
- * here because in_setsockaddr will call malloc and might block.
- */
-static int
-udp_sockaddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setsockaddr(so, nam, &udbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-udp_peeraddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setpeeraddr(so, nam, &udbinfo));
+ return (0);
}
struct pr_usrreqs udp_usrreqs = {
@@ -1128,9 +1160,11 @@
.pru_control = in_control,
.pru_detach = udp_detach,
.pru_disconnect = udp_disconnect,
- .pru_peeraddr = udp_peeraddr,
+ .pru_peeraddr = in_getpeeraddr,
.pru_send = udp_send,
+ .pru_sosend = sosend_dgram,
.pru_shutdown = udp_shutdown,
- .pru_sockaddr = udp_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel
+ .pru_sockaddr = in_getsockaddr,
+ .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_close = udp_close,
};
--- /dev/null
+++ sys/netinet/tcp_reass.c
@@ -0,0 +1,285 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_reass.c,v 1.353 2007/10/07 20:44:24 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+
+#include <vm/uma.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip6.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/nd6.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet6/tcp6_var.h>
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif /* TCPDEBUG */
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
+/* Initialize TCP reassembly queue */
+static void
+tcp_reass_zone_change(void *tag)
+{
+
+ tcp_reass_maxseg = nmbclusters / 16;
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
+uma_zone_t tcp_reass_zone;
+
+void
+tcp_reass_init(void)
+{
+
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+ EVENTHANDLER_REGISTER(nmbclusters_change,
+ tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+int
+tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
+{
+ struct tseg_qent *q;
+ struct tseg_qent *p = NULL;
+ struct tseg_qent *nq;
+ struct tseg_qent *te = NULL;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int flags;
+
+ INP_LOCK_ASSERT(tp->t_inpcb);
+
+ /*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
+ * Call with th==NULL after become established to
+ * force pre-ESTABLISHED data up to user socket.
+ */
+ if (th == NULL)
+ goto present;
+
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ *tlenp = 0;
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
+ if (te == NULL) {
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ *tlenp = 0;
+ return (0);
+ }
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ LIST_FOREACH(q, &tp->t_segq, tqe_q) {
+ if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
+ break;
+ p = q;
+ }
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if (p != NULL) {
+ int i;
+ /* conversion to int (in i) handles seq wraparound */
+ i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
+ if (i > 0) {
+ if (i >= *tlenp) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += *tlenp;
+ m_freem(m);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
+ /*
+ * Try to present any queued data
+ * at the left window edge to the user.
+ * This is needed after the 3-WHS
+ * completes.
+ */
+ goto present; /* ??? */
+ }
+ m_adj(m, i);
+ *tlenp -= i;
+ th->th_seq += i;
+ }
+ }
+ tcpstat.tcps_rcvoopack++;
+ tcpstat.tcps_rcvoobyte += *tlenp;
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q) {
+ int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
+ if (i <= 0)
+ break;
+ if (i < q->tqe_len) {
+ q->tqe_th->th_seq += i;
+ q->tqe_len -= i;
+ m_adj(q->tqe_m, i);
+ break;
+ }
+
+ nq = LIST_NEXT(q, tqe_q);
+ LIST_REMOVE(q, tqe_q);
+ m_freem(q->tqe_m);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
+ q = nq;
+ }
+
+ /* Insert the new segment queue entry into place. */
+ te->tqe_m = m;
+ te->tqe_th = th;
+ te->tqe_len = *tlenp;
+
+ if (p == NULL) {
+ LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
+ } else {
+ LIST_INSERT_AFTER(p, te, tqe_q);
+ }
+
+present:
+ /*
+ * Present data to user, advancing rcv_nxt through
+ * completed sequence space.
+ */
+ if (!TCPS_HAVEESTABLISHED(tp->t_state))
+ return (0);
+ q = LIST_FIRST(&tp->t_segq);
+ if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
+ return (0);
+ SOCKBUF_LOCK(&so->so_rcv);
+ do {
+ tp->rcv_nxt += q->tqe_len;
+ flags = q->tqe_th->th_flags & TH_FIN;
+ nq = LIST_NEXT(q, tqe_q);
+ LIST_REMOVE(q, tqe_q);
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
+ m_freem(q->tqe_m);
+ else
+ sbappendstream_locked(&so->so_rcv, q->tqe_m);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
+ q = nq;
+ } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
+ ND6_HINT(tp);
+ sorwakeup_locked(so);
+ return (flags);
+}
--- /dev/null
+++ sys/netinet/sctp_peeloff.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+/* $KAME: sctp_peeloff.c,v 1.13 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_peeloff.c,v 1.16.4.1 2008/02/02 12:44:13 rwatson Exp $");
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_peeloff.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_auth.h>
+
+
+int
+sctp_can_peel_off(struct socket *head, sctp_assoc_t assoc_id)
+{
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ uint32_t state;
+
+ inp = (struct sctp_inpcb *)head->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+ return (EFAULT);
+ }
+ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOENT);
+ return (ENOENT);
+ }
+ state = SCTP_GET_STATE((&stcb->asoc));
+ if ((state == SCTP_STATE_EMPTY) ||
+ (state == SCTP_STATE_INUSE) ||
+ (state == SCTP_STATE_COOKIE_WAIT) ||
+ (state == SCTP_STATE_COOKIE_ECHOED)) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+ return (ENOTCONN);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ /* We are clear to peel this one off */
+ return (0);
+}
+
+int
+sctp_do_peeloff(struct socket *head, struct socket *so, sctp_assoc_t assoc_id)
+{
+ struct sctp_inpcb *inp, *n_inp;
+ struct sctp_tcb *stcb;
+ uint32_t state;
+
+ inp = (struct sctp_inpcb *)head->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+ return (EFAULT);
+ }
+ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+ return (ENOTCONN);
+ }
+ state = SCTP_GET_STATE((&stcb->asoc));
+ if ((state == SCTP_STATE_EMPTY) ||
+ (state == SCTP_STATE_INUSE) ||
+ (state == SCTP_STATE_COOKIE_WAIT) ||
+ (state == SCTP_STATE_COOKIE_ECHOED)) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+ return (ENOTCONN);
+ }
+ n_inp = (struct sctp_inpcb *)so->so_pcb;
+ n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+ SCTP_PCB_FLAGS_CONNECTED |
+ SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */
+ (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+ n_inp->sctp_socket = so;
+ n_inp->sctp_features = inp->sctp_features;
+ n_inp->sctp_mobility_features = inp->sctp_mobility_features;
+ n_inp->sctp_frag_point = inp->sctp_frag_point;
+ n_inp->partial_delivery_point = inp->partial_delivery_point;
+ n_inp->sctp_context = inp->sctp_context;
+ n_inp->inp_starting_point_for_iterator = NULL;
+ /* copy in the authentication parameters from the original endpoint */
+ if (n_inp->sctp_ep.local_hmacs)
+ sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+ n_inp->sctp_ep.local_hmacs =
+ sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+ if (n_inp->sctp_ep.local_auth_chunks)
+ sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+ n_inp->sctp_ep.local_auth_chunks =
+ sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+ (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+ &n_inp->sctp_ep.shared_keys);
+ /*
+ * Now we must move it from one hash table to another and get the
+ * stcb in the right place.
+ */
+ sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+
+ sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+ return (0);
+}
+
+
+struct socket *
+sctp_get_peeloff(struct socket *head, sctp_assoc_t assoc_id, int *error)
+{
+ struct socket *newso;
+ struct sctp_inpcb *inp, *n_inp;
+ struct sctp_tcb *stcb;
+
+ SCTPDBG(SCTP_DEBUG_PEEL1, "SCTP peel-off called\n");
+ inp = (struct sctp_inpcb *)head->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, EFAULT);
+ *error = EFAULT;
+ return (NULL);
+ }
+ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1);
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PEELOFF, ENOTCONN);
+ *error = ENOTCONN;
+ return (NULL);
+ }
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ newso = sonewconn(head, SS_ISCONNECTED
+ );
+ if (newso == NULL) {
+ SCTPDBG(SCTP_DEBUG_PEEL1, "sctp_peeloff:sonewconn failed\n");
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_PEELOFF, ENOMEM);
+ *error = ENOMEM;
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (NULL);
+
+ }
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ n_inp = (struct sctp_inpcb *)newso->so_pcb;
+ SOCK_LOCK(head);
+ n_inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+ SCTP_PCB_FLAGS_CONNECTED |
+ SCTP_PCB_FLAGS_IN_TCPPOOL | /* Turn on Blocking IO */
+ (SCTP_PCB_COPY_FLAGS & inp->sctp_flags));
+ n_inp->sctp_features = inp->sctp_features;
+ n_inp->sctp_frag_point = inp->sctp_frag_point;
+ n_inp->partial_delivery_point = inp->partial_delivery_point;
+ n_inp->sctp_context = inp->sctp_context;
+ n_inp->inp_starting_point_for_iterator = NULL;
+
+ /* copy in the authentication parameters from the original endpoint */
+ if (n_inp->sctp_ep.local_hmacs)
+ sctp_free_hmaclist(n_inp->sctp_ep.local_hmacs);
+ n_inp->sctp_ep.local_hmacs =
+ sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+ if (n_inp->sctp_ep.local_auth_chunks)
+ sctp_free_chunklist(n_inp->sctp_ep.local_auth_chunks);
+ n_inp->sctp_ep.local_auth_chunks =
+ sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+ (void)sctp_copy_skeylist(&inp->sctp_ep.shared_keys,
+ &n_inp->sctp_ep.shared_keys);
+
+ n_inp->sctp_socket = newso;
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+ sctp_feature_off(n_inp, SCTP_PCB_FLAGS_AUTOCLOSE);
+ n_inp->sctp_ep.auto_close_time = 0;
+ sctp_timer_stop(SCTP_TIMER_TYPE_AUTOCLOSE, n_inp, stcb, NULL,
+ SCTP_FROM_SCTP_PEELOFF + SCTP_LOC_1);
+ }
+ /* Turn off any non-blocking semantic. */
+ SCTP_CLEAR_SO_NBIO(newso);
+ newso->so_state |= SS_ISCONNECTED;
+ /* We remove it right away */
+
+#ifdef SCTP_LOCK_LOGGING
+ if (sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) {
+ sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+ }
+#endif
+ TAILQ_REMOVE(&head->so_comp, newso, so_list);
+ head->so_qlen--;
+ SOCK_UNLOCK(head);
+ /*
+ * Now we must move it from one hash table to another and get the
+ * stcb in the right place.
+ */
+ sctp_move_pcb_and_assoc(inp, n_inp, stcb);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ /*
+ * And now the final hack. We move data in the pending side i.e.
+ * head to the new socket buffer. Let the GRUBBING begin :-0
+ */
+ sctp_pull_off_control_to_new_inp(inp, n_inp, stcb, SBL_WAIT);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (newso);
+}
--- /dev/null
+++ sys/netinet/sctp_os.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_os.h,v 1.9 2007/08/24 00:53:52 rrs Exp $");
+#ifndef __sctp_os_h__
+#define __sctp_os_h__
+
+/*
+ * General kernel memory allocation:
+ * SCTP_MALLOC(element, type, size, name)
+ * SCTP_FREE(element)
+ * Kernel memory allocation for "soname"- memory must be zeroed.
+ * SCTP_MALLOC_SONAME(name, type, size)
+ * SCTP_FREE_SONAME(name)
+ */
+
+/*
+ * Zone(pool) allocation routines: MUST be defined for each OS.
+ * zone = zone/pool pointer.
+ * name = string name of the zone/pool.
+ * size = size of each zone/pool element.
+ * number = number of elements in zone/pool.
+ * type = structure type to allocate
+ *
+ * sctp_zone_t
+ * SCTP_ZONE_INIT(zone, name, size, number)
+ * SCTP_ZONE_GET(zone, type)
+ * SCTP_ZONE_FREE(zone, element)
+ * SCTP_ZONE_DESTROY(zone)
+ */
+
+#include <netinet/sctp_os_bsd.h>
+
+
+
+
+
+/* All os's must implement this address gatherer. If
+ * no VRF's exist, then vrf 0 is the only one and all
+ * addresses and ifn's live here.
+ */
+#define SCTP_DEFAULT_VRF 0
+void sctp_init_vrf_list(int vrfid);
+
+#endif
--- /dev/null
+++ sys/netinet/ip_options.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/ip_options.h,v 1.3 2007/05/11 10:48:30 rwatson Exp $
+ */
+
+#ifndef _NETINET_IP_OPTIONS_H_
+#define _NETINET_IP_OPTIONS_H_
+
+struct ipoptrt {
+ struct in_addr dst; /* final destination */
+ char nop; /* one NOP to align */
+ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
+ struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
+};
+
+struct ipopt_tag {
+ struct m_tag tag; /* m_tag */
+ int ip_nhops;
+ struct ipoptrt ip_srcrt;
+};
+
+extern int ip_doopts; /* process or ignore IP options */
+
+int ip_dooptions(struct mbuf *, int);
+struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
+int ip_optcopy(struct ip *, struct ip *);
+int ip_pcbopts(struct inpcb *, int, struct mbuf *);
+void ip_stripoptions(struct mbuf *, struct mbuf *);
+struct mbuf *ip_srcroute(struct mbuf *);
+
+#endif /* !_NETINET_IP_OPTIONS_H_ */
Index: igmp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp.c -L sys/netinet/igmp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp.c
+++ sys/netinet/igmp.c
@@ -31,7 +31,6 @@
* SUCH DAMAGE.
*
* @(#)igmp.c 8.1 (Berkeley) 7/19/93
- * $FreeBSD: src/sys/netinet/igmp.c,v 1.48.2.1 2005/08/24 17:30:44 rwatson Exp $
*/
/*
@@ -45,11 +44,13 @@
* MULTICAST Revision: 3.5.1.4
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/igmp.c,v 1.54 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -65,11 +66,14 @@
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#include <netinet/igmp.h>
#include <netinet/igmp_var.h>
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
static MALLOC_DEFINE(M_IGMP, "igmp", "igmp state");
static struct router_info *find_rti(struct ifnet *ifp);
@@ -81,11 +85,11 @@
igmpstat, "");
/*
- * igmp_mtx protects all mutable global variables in igmp.c, as well as
- * the data fields in struct router_info. In general, a router_info
- * structure will be valid as long as the referencing struct in_multi is
- * valid, so no reference counting is used. We allow unlocked reads of
- * router_info data when accessed via an in_multi read-only.
+ * igmp_mtx protects all mutable global variables in igmp.c, as well as the
+ * data fields in struct router_info. In general, a router_info structure
+ * will be valid as long as the referencing struct in_multi is valid, so no
+ * reference counting is used. We allow unlocked reads of router_info data
+ * when accessed via an in_multi read-only.
*/
static struct mtx igmp_mtx;
static SLIST_HEAD(, router_info) router_info_head;
@@ -122,7 +126,7 @@
igmp_timers_are_running = 0;
/*
- * Construct a Router Alert option to use in outgoing packets
+ * Construct a Router Alert option to use in outgoing packets.
*/
MGET(router_alert, M_DONTWAIT, MT_DATA);
ra = mtod(router_alert, struct ipoption *);
@@ -148,21 +152,20 @@
if (rti->rti_ifp == ifp) {
IGMP_PRINTF(
"[igmp.c, _find_rti] --> found old entry \n");
- return rti;
+ return (rti);
}
}
MALLOC(rti, struct router_info *, sizeof *rti, M_IGMP, M_NOWAIT);
if (rti == NULL) {
- IGMP_PRINTF( "[igmp.c, _find_rti] --> no memory for entry\n");
- return NULL;
+ IGMP_PRINTF("[igmp.c, _find_rti] --> no memory for entry\n");
+ return (NULL);
}
rti->rti_ifp = ifp;
rti->rti_type = IGMP_V2_ROUTER;
rti->rti_time = 0;
SLIST_INSERT_HEAD(&router_info_head, rti, rti_list);
-
IGMP_PRINTF("[igmp.c, _find_rti] --> created an entry \n");
- return rti;
+ return (rti);
}
void
@@ -186,7 +189,7 @@
igmplen = ip->ip_len;
/*
- * Validate lengths
+ * Validate lengths.
*/
if (igmplen < IGMP_MINLEN) {
++igmpstat.igps_rcv_tooshort;
@@ -201,7 +204,7 @@
}
/*
- * Validate checksum
+ * Validate checksum.
*/
m->m_data += iphlen;
m->m_len -= iphlen;
@@ -223,12 +226,12 @@
* In the IGMPv2 specification, there are 3 states and a flag.
*
* In Non-Member state, we simply don't have a membership record.
- * In Delaying Member state, our timer is running (inm->inm_timer)
- * In Idle Member state, our timer is not running (inm->inm_timer==0)
+ * In Delaying Member state, our timer is running (inm->inm_timer).
+ * In Idle Member state, our timer is not running (inm->inm_timer==0).
*
- * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if
- * we have heard a report from another member, or IGMP_IREPORTEDLAST
- * if I sent the last report.
+ * The flag is inm->inm_state, it is set to IGMP_OTHERMEMBER if we
+ * have heard a report from another member, or IGMP_IREPORTEDLAST if
+ * I sent the last report.
*/
switch (igmp->igmp_type) {
case IGMP_MEMBERSHIP_QUERY:
@@ -240,8 +243,8 @@
if (igmp->igmp_code == 0) {
/*
* Old router. Remember that the querier on this
- * interface is old, and set the timer to the
- * value in RFC 1112.
+ * interface is old, and set the timer to the value
+ * in RFC 1112.
*/
mtx_lock(&igmp_mtx);
@@ -277,14 +280,14 @@
}
/*
- * - Start the timers in all of our membership records
- * that the query applies to for the interface on
- * which the query arrived excl. those that belong
- * to the "all-hosts" group (224.0.0.1).
- * - Restart any timer that is already running but has
- * a value longer than the requested timeout.
- * - Use the value specified in the query message as
- * the maximum timeout.
+ * - Start the timers in all of our membership records that
+ * the query applies to for the interface on which the
+ * query arrived excl. those that belong to the "all-hosts"
+ * group (224.0.0.1).
+ * - Restart any timer that is already running but has a
+ * value longer than the requested timeout.
+ * - Use the value specified in the query message as the
+ * maximum timeout.
*/
IN_MULTI_LOCK();
IN_FIRST_MULTI(step, inm);
@@ -303,19 +306,19 @@
IN_NEXT_MULTI(step, inm);
}
IN_MULTI_UNLOCK();
-
break;
case IGMP_V1_MEMBERSHIP_REPORT:
case IGMP_V2_MEMBERSHIP_REPORT:
/*
* For fast leave to work, we have to know that we are the
- * last person to send a report for this group. Reports
- * can potentially get looped back if we are a multicast
- * router, so discard reports sourced by me.
+ * last person to send a report for this group. Reports can
+ * potentially get looped back if we are a multicast router,
+ * so discard reports sourced by me.
*/
IFP_TO_IA(ifp, ia);
- if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
+ if (ia != NULL &&
+ ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr)
break;
++igmpstat.igps_rcv_reports;
@@ -338,29 +341,29 @@
* to compensate for the lack of any way for a process to
* determine the arrival interface of an incoming packet.
*/
- if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0)
- if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet);
+ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) {
+ if (ia != NULL)
+ ip->ip_src.s_addr = htonl(ia->ia_subnet);
+ }
/*
- * If we belong to the group being reported, stop
- * our timer for that group.
+ * If we belong to the group being reported, stop our timer
+ * for that group.
*/
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
if (inm != NULL) {
inm->inm_timer = 0;
++igmpstat.igps_rcv_ourreports;
-
inm->inm_state = IGMP_OTHERMEMBER;
}
IN_MULTI_UNLOCK();
-
break;
}
/*
- * Pass all valid IGMP packets up to any process(es) listening
- * on a raw IGMP socket.
+ * Pass all valid IGMP packets up to any process(es) listening on a
+ * raw IGMP socket.
*/
rip_input(m, off);
}
@@ -410,8 +413,8 @@
struct in_multistep step;
/*
- * Quick check to see if any work needs to be done, in order
- * to minimize the overhead of fasttimo processing.
+ * Quick check to see if any work needs to be done, in order to
+ * minimize the overhead of fasttimo processing.
*/
if (!igmp_timers_are_running)
@@ -462,7 +465,7 @@
IN_MULTI_LOCK_ASSERT();
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL)
return;
@@ -501,8 +504,7 @@
imo.imo_multicast_loop = (ip_mrouter != NULL);
/*
- * XXX
- * Do we have to worry about reentrancy here? Don't think so.
+ * XXX: Do we have to worry about reentrancy here? Don't think so.
*/
ip_output(m, router_alert, &igmprt, 0, &imo, NULL);
Index: if_ether.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -L sys/netinet/if_ether.c -L sys/netinet/if_ether.c -u -r1.7 -r1.8
--- sys/netinet/if_ether.c
+++ sys/netinet/if_ether.c
@@ -27,7 +27,6 @@
* SUCH DAMAGE.
*
* @(#)if_ether.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/if_ether.c,v 1.137.2.12 2006/03/22 07:48:31 glebius Exp $
*/
/*
@@ -36,8 +35,10 @@
* add "inuse/lock" bit (or ref. count) along with valid bit
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/if_ether.c,v 1.162 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_inet.h"
-#include "opt_bdg.h"
#include "opt_mac.h"
#include "opt_carp.h"
@@ -46,7 +47,6 @@
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/socket.h>
@@ -59,7 +59,6 @@
#include <net/netisr.h>
#include <net/if_llc.h>
#include <net/ethernet.h>
-#include <net/bridge.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
@@ -72,6 +71,8 @@
#include <netinet/ip_carp.h>
#endif
+#include <security/mac/mac_framework.h>
+
#define SIN(s) ((struct sockaddr_in *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -79,40 +80,34 @@
SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
/* timer values */
-static int arpt_prune = (5*60*1); /* walk list every 5 minutes */
static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
-SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, CTLFLAG_RW,
- &arpt_prune, 0, "");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW,
- &arpt_keep, 0, "");
+ &arpt_keep, 0, "ARP entry lifetime in seconds");
#define rt_expire rt_rmx.rmx_expire
struct llinfo_arp {
- LIST_ENTRY(llinfo_arp) la_le;
+ struct callout la_timer;
struct rtentry *la_rt;
struct mbuf *la_hold; /* last packet until resolved/timeout */
u_short la_preempt; /* countdown for pre-expiry arps */
u_short la_asked; /* # requests sent */
};
-static LIST_HEAD(, llinfo_arp) llinfo_arp;
-
static struct ifqueue arpintrq;
static int arp_allocated;
static int arp_maxtries = 5;
static int useloopback = 1; /* use loopback interface for local traffic */
static int arp_proxyall = 0;
-static struct callout arp_callout;
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW,
- &arp_maxtries, 0, "");
+ &arp_maxtries, 0, "ARP resolution attempts before returning error");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW,
- &useloopback, 0, "");
+ &useloopback, 0, "Use the loopback interface for local traffic");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
- &arp_proxyall, 0, "");
+ &arp_proxyall, 0, "Enable proxy ARP for all suitable requests");
static void arp_init(void);
static void arp_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
@@ -127,53 +122,30 @@
#endif
/*
- * Timeout routine. Age arp_tab entries periodically.
+ * Timeout routine.
*/
-/* ARGSUSED */
static void
-arptimer(void * __unused unused)
+arptimer(void *arg)
{
- struct llinfo_arp *la, *ola;
+ struct rtentry *rt = (struct rtentry *)arg;
- RADIX_NODE_HEAD_LOCK(rt_tables[AF_INET]);
- LIST_FOREACH_SAFE(la, &llinfo_arp, la_le, ola) {
- struct rtentry *rt = la->la_rt;
-
- RT_LOCK(rt);
- if (rt->rt_expire && rt->rt_expire <= time_second) {
- struct sockaddr_dl *sdl = SDL(rt->rt_gateway);
-
- KASSERT(sdl->sdl_family == AF_LINK, ("sdl_family %d",
- sdl->sdl_family));
- if (rt->rt_refcnt > 1) {
- sdl->sdl_alen = 0;
- la->la_preempt = la->la_asked = 0;
- RT_UNLOCK(rt);
- continue;
- }
- RT_UNLOCK(rt);
- /*
- * XXX: LIST_REMOVE() is deep inside rtrequest().
- */
- rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0,
- NULL);
- continue;
- }
- RT_UNLOCK(rt);
- }
- RADIX_NODE_HEAD_UNLOCK(rt_tables[AF_INET]);
+ RT_LOCK_ASSERT(rt);
+ /*
+ * The lock is needed to close a theoretical race
+ * between spontaneous expiry and intentional removal.
+ * We still got an extra reference on rtentry, so can
+ * safely pass pointers to its contents.
+ */
+ RT_UNLOCK(rt);
- callout_reset(&arp_callout, arpt_prune * hz, arptimer, NULL);
+ rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
}
/*
* Parallel to llc_rtrequest.
*/
static void
-arp_rtrequest(req, rt, info)
- int req;
- struct rtentry *rt;
- struct rt_addrinfo *info;
+arp_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
{
struct sockaddr *gate;
struct llinfo_arp *la;
@@ -208,7 +180,7 @@
gate = rt->rt_gateway;
SDL(gate)->sdl_type = rt->rt_ifp->if_type;
SDL(gate)->sdl_index = rt->rt_ifp->if_index;
- rt->rt_expire = time_second;
+ rt->rt_expire = time_uptime;
break;
}
/* Announce a new entry if requested. */
@@ -252,8 +224,8 @@
RT_ADDREF(rt);
la->la_rt = rt;
rt->rt_flags |= RTF_LLINFO;
- RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
- LIST_INSERT_HEAD(&llinfo_arp, la, la_le);
+ callout_init_mtx(&la->la_timer, &rt->rt_mtx,
+ CALLOUT_RETURNUNLOCKED);
#ifdef INET
/*
@@ -296,8 +268,10 @@
rt->rt_expire = 0;
bcopy(IF_LLADDR(rt->rt_ifp), LLADDR(SDL(gate)),
SDL(gate)->sdl_alen = rt->rt_ifp->if_addrlen);
- if (useloopback)
+ if (useloopback) {
rt->rt_ifp = loif;
+ rt->rt_rmx.rmx_mtu = loif->if_mtu;
+ }
/*
* make sure to set rt->rt_ifa to the interface
@@ -314,13 +288,12 @@
break;
case RTM_DELETE:
- if (la == 0)
+ if (la == NULL) /* XXX: at least CARP does this. */
break;
- RADIX_NODE_HEAD_LOCK_ASSERT(rt_tables[AF_INET]);
- LIST_REMOVE(la, la_le);
- RT_REMREF(rt);
- rt->rt_llinfo = 0;
+ callout_stop(&la->la_timer);
+ rt->rt_llinfo = NULL;
rt->rt_flags &= ~RTF_LLINFO;
+ RT_REMREF(rt);
if (la->la_hold)
m_freem(la->la_hold);
Free((caddr_t)la);
@@ -334,10 +307,8 @@
* - arp header source ethernet address
*/
static void
-arprequest(ifp, sip, tip, enaddr)
- struct ifnet *ifp;
- struct in_addr *sip, *tip;
- u_char *enaddr;
+arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
+ u_char *enaddr)
{
struct mbuf *m;
struct arphdr *ah;
@@ -384,7 +355,7 @@
*/
int
arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
- struct sockaddr *dst, u_char *desten)
+ struct sockaddr *dst, u_char *desten)
{
struct llinfo_arp *la = NULL;
struct rtentry *rt = NULL;
@@ -438,7 +409,7 @@
* Check the address family and length is valid, the address
* is resolved; otherwise, try to resolve.
*/
- if ((rt->rt_expire == 0 || rt->rt_expire > time_second) &&
+ if ((rt->rt_expire == 0 || rt->rt_expire > time_uptime) &&
sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
@@ -448,7 +419,7 @@
* send an ARP request.
*/
if ((rt->rt_expire != 0) &&
- (time_second + la->la_preempt > rt->rt_expire)) {
+ (time_uptime + la->la_preempt > rt->rt_expire)) {
struct in_addr sin =
SIN(rt->rt_ifa->ifa_addr)->sin_addr;
@@ -495,11 +466,12 @@
else
error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
- if (la->la_asked == 0 || rt->rt_expire != time_second) {
+ if (la->la_asked == 0 || rt->rt_expire != time_uptime) {
struct in_addr sin =
SIN(rt->rt_ifa->ifa_addr)->sin_addr;
- rt->rt_expire = time_second;
+ rt->rt_expire = time_uptime;
+ callout_reset(&la->la_timer, hz, arptimer, rt);
la->la_asked++;
RT_UNLOCK(rt);
@@ -587,13 +559,10 @@
static void
-in_arpinput(m)
- struct mbuf *m;
+in_arpinput(struct mbuf *m)
{
struct arphdr *ah;
struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct iso88025_header *th = (struct iso88025_header *)0;
- struct iso88025_sockaddr_dl_data *trld;
struct llinfo_arp *la;
struct rtentry *rt;
struct ifaddr *ifa;
@@ -610,7 +579,7 @@
int carp_match = 0;
#endif
- if (do_bridge || ifp->if_bridge)
+ if (ifp->if_bridge)
bridged = 1;
req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
@@ -634,7 +603,7 @@
* XXX: This is really ugly!
*/
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
- if (((bridged && ia->ia_ifp->if_bridge != NULL) || do_bridge ||
+ if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
(ia->ia_ifp == ifp)) &&
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
goto match;
@@ -648,7 +617,7 @@
#endif
}
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
- if (((bridged && ia->ia_ifp->if_bridge != NULL) || do_bridge ||
+ if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
(ia->ia_ifp == ifp)) &&
isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
goto match;
@@ -657,7 +626,7 @@
* as a dummy address for the rest of the function.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
- if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
ia = ifatoia(ifa);
goto match;
}
@@ -767,6 +736,9 @@
* routing info.
*/
if (ifp->if_type == IFT_ISO88025) {
+ struct iso88025_header *th = NULL;
+ struct iso88025_sockaddr_dl_data *trld;
+
th = (struct iso88025_header *)m->m_pkthdr.header;
trld = SDL_ISO88025(sdl);
rif_len = TR_RCF_RIFLEN(th->rcf);
@@ -792,8 +764,10 @@
m->m_pkthdr.len += 8;
th->rcf = trld->trld_rcf;
}
- if (rt->rt_expire)
- rt->rt_expire = time_second + arpt_keep;
+ if (rt->rt_expire) {
+ rt->rt_expire = time_uptime + arpt_keep;
+ callout_reset(&la->la_timer, hz * arpt_keep, arptimer, rt);
+ }
la->la_asked = 0;
la->la_preempt = arp_maxtries;
hold = la->la_hold;
@@ -866,13 +840,13 @@
} else {
/*
* Return proxied ARP replies only on the interface
- * or bridge cluster where this network resides.
+ * or bridge cluster where this network resides.
* Otherwise we may conflict with the host we are
* proxying for.
*/
if (rt->rt_ifp != ifp &&
- (rt->rt_ifp->if_bridge != ifp->if_bridge ||
- ifp->if_bridge == NULL)) {
+ (rt->rt_ifp->if_bridge != ifp->if_bridge ||
+ ifp->if_bridge == NULL)) {
RT_UNLOCK(rt);
goto drop;
}
@@ -884,19 +858,18 @@
}
if (itaddr.s_addr == myaddr.s_addr &&
- IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
+ IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
/* RFC 3927 link-local IPv4; always reply by broadcast. */
#ifdef DEBUG_LINKLOCAL
printf("arp: sending reply for link-local addr %s\n",
- inet_ntoa(itaddr));
+ inet_ntoa(itaddr));
#endif
-
m->m_flags |= M_BCAST;
m->m_flags &= ~M_MCAST;
} else {
/* default behaviour; never reply by broadcast. */
m->m_flags &= ~(M_BCAST|M_MCAST);
- }
+ }
(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
ah->ar_op = htons(ARPOP_REPLY);
@@ -917,9 +890,7 @@
* Lookup or enter a new address in arptab.
*/
static struct rtentry *
-arplookup(addr, create, proxy)
- u_long addr;
- int create, proxy;
+arplookup(u_long addr, int create, int proxy)
{
struct rtentry *rt;
struct sockaddr_inarp sin;
@@ -966,9 +937,7 @@
}
void
-arp_ifinit(ifp, ifa)
- struct ifnet *ifp;
- struct ifaddr *ifa;
+arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
@@ -978,10 +947,7 @@
}
void
-arp_ifinit2(ifp, ifa, enaddr)
- struct ifnet *ifp;
- struct ifaddr *ifa;
- u_char *enaddr;
+arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
{
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
@@ -996,9 +962,6 @@
arpintrq.ifq_maxlen = 50;
mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
- LIST_INIT(&llinfo_arp);
- callout_init(&arp_callout, CALLOUT_MPSAFE);
netisr_register(NETISR_ARP, arpintr, &arpintrq, NETISR_MPSAFE);
- callout_reset(&arp_callout, hz, arptimer, NULL);
}
SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: if_atm.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/if_atm.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/if_atm.c -L sys/netinet/if_atm.c -u -r1.1.1.1 -r1.2
--- sys/netinet/if_atm.c
+++ sys/netinet/if_atm.c
@@ -32,7 +32,7 @@
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/if_atm.c,v 1.19.2.2 2005/08/31 13:58:28 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/if_atm.c,v 1.21 2005/08/26 15:27:18 glebius Exp $");
/*
* IP <=> ATM address resolution.
Index: tcp_debug.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_debug.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_debug.c -L sys/netinet/tcp_debug.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_debug.c
+++ sys/netinet/tcp_debug.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,9 +28,11 @@
* SUCH DAMAGE.
*
* @(#)tcp_debug.c 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_debug.c,v 1.26 2005/01/07 01:45:45 imp Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_debug.c,v 1.29 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
@@ -48,7 +51,10 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/socket.h>
@@ -67,107 +73,95 @@
#include <netinet/tcp_debug.h>
#ifdef TCPDEBUG
-static int tcpconsdebug = 0;
+static int tcpconsdebug = 0;
#endif
-static struct tcp_debug tcp_debug[TCP_NDEBUG];
-static int tcp_debx;
+/*
+ * Global ring buffer of TCP debugging state. Each entry captures a snapshot
+ * of TCP connection state at any given moment. tcp_debx addresses at the
+ * next available slot. There is no explicit export of this data structure;
+ * it will be read via /dev/kmem by debugging tools.
+ */
+static struct tcp_debug tcp_debug[TCP_NDEBUG];
+static int tcp_debx;
/*
- * Tcp debug routines
+ * All global state is protected by tcp_debug_mtx; tcp_trace() is split into
+ * two parts, one of which saves connection and other state into the global
+ * array (locked by tcp_debug_mtx).
+ */
+struct mtx tcp_debug_mtx;
+MTX_SYSINIT(tcp_debug_mtx, &tcp_debug_mtx, "tcp_debug_mtx", MTX_DEF);
+
+/*
+ * Save TCP state at a given moment; optionally, both tcpcb and TCP packet
+ * header state will be saved.
*/
void
-tcp_trace(act, ostate, tp, ipgen, th, req)
- short act, ostate;
- struct tcpcb *tp;
- void *ipgen;
- struct tcphdr *th;
- int req;
+tcp_trace(short act, short ostate, struct tcpcb *tp, void *ipgen,
+ struct tcphdr *th, int req)
{
#ifdef INET6
int isipv6;
#endif /* INET6 */
tcp_seq seq, ack;
int len, flags;
- struct tcp_debug *td = &tcp_debug[tcp_debx++];
+ struct tcp_debug *td;
+ mtx_lock(&tcp_debug_mtx);
+ td = &tcp_debug[tcp_debx++];
+ if (tcp_debx == TCP_NDEBUG)
+ tcp_debx = 0;
+ bzero(td, sizeof(*td));
#ifdef INET6
isipv6 = (ipgen != NULL && ((struct ip *)ipgen)->ip_v == 6) ? 1 : 0;
#endif /* INET6 */
td->td_family =
#ifdef INET6
- (isipv6 != 0) ? AF_INET6 :
+ (isipv6 != 0) ? AF_INET6 :
#endif
- AF_INET;
- if (tcp_debx == TCP_NDEBUG)
- tcp_debx = 0;
+ AF_INET;
td->td_time = iptime();
td->td_act = act;
td->td_ostate = ostate;
td->td_tcb = (caddr_t)tp;
- if (tp)
+ if (tp != NULL)
td->td_cb = *tp;
- else
- bzero((caddr_t)&td->td_cb, sizeof (*tp));
- if (ipgen) {
+ if (ipgen != NULL) {
switch (td->td_family) {
case AF_INET:
- bcopy((caddr_t)ipgen, (caddr_t)&td->td_ti.ti_i,
- sizeof(td->td_ti.ti_i));
- bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
+ bcopy(ipgen, &td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
break;
#ifdef INET6
case AF_INET6:
- bcopy((caddr_t)ipgen, (caddr_t)td->td_ip6buf,
- sizeof(td->td_ip6buf));
- bzero((caddr_t)&td->td_ti.ti_i,
- sizeof(td->td_ti.ti_i));
+ bcopy(ipgen, td->td_ip6buf, sizeof(td->td_ip6buf));
break;
#endif
- default:
- bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
- bzero((caddr_t)&td->td_ti.ti_i,
- sizeof(td->td_ti.ti_i));
- break;
}
- } else {
- bzero((caddr_t)&td->td_ti.ti_i, sizeof(td->td_ti.ti_i));
- bzero((caddr_t)td->td_ip6buf, sizeof(td->td_ip6buf));
}
- if (th) {
+ if (th != NULL) {
switch (td->td_family) {
case AF_INET:
td->td_ti.ti_t = *th;
- bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
break;
#ifdef INET6
case AF_INET6:
td->td_ti6.th = *th;
- bzero((caddr_t)&td->td_ti.ti_t,
- sizeof(td->td_ti.ti_t));
break;
#endif
- default:
- bzero((caddr_t)&td->td_ti.ti_t,
- sizeof(td->td_ti.ti_t));
- bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
- break;
}
- } else {
- bzero((caddr_t)&td->td_ti.ti_t, sizeof(td->td_ti.ti_t));
- bzero((caddr_t)&td->td_ti6.th, sizeof(td->td_ti6.th));
}
td->td_req = req;
+ mtx_unlock(&tcp_debug_mtx);
#ifdef TCPDEBUG
if (tcpconsdebug == 0)
return;
- if (tp)
+ if (tp != NULL)
printf("%p %s:", tp, tcpstates[ostate]);
else
printf("???????? ");
printf("%s ", tanames[act]);
switch (act) {
-
case TA_INPUT:
case TA_OUTPUT:
case TA_DROP:
@@ -177,9 +171,9 @@
ack = th->th_ack;
len =
#ifdef INET6
- isipv6 ? ((struct ip6_hdr *)ipgen)->ip6_plen :
+ isipv6 ? ((struct ip6_hdr *)ipgen)->ip6_plen :
#endif
- ((struct ip *)ipgen)->ip_len;
+ ((struct ip *)ipgen)->ip_len;
if (act == TA_OUTPUT) {
seq = ntohl(seq);
ack = ntohl(ack);
@@ -212,11 +206,11 @@
printf("<%s>", tcptimers[req>>8]);
break;
}
- if (tp)
+ if (tp != NULL)
printf(" -> %s", tcpstates[tp->t_state]);
/* print out internal state of tp !?! */
printf("\n");
- if (tp == 0)
+ if (tp == NULL)
return;
printf(
"\trcv_(nxt,wnd,up) (%lx,%lx,%lx) snd_(una,nxt,max) (%lx,%lx,%lx)\n",
--- /dev/null
+++ sys/netinet/sctp.h
@@ -0,0 +1,541 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/* $KAME: sctp.h,v 1.18 2005/03/06 16:04:16 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp.h,v 1.21.2.1 2007/12/09 20:23:47 rrs Exp $");
+
+#ifndef _NETINET_SCTP_H_
+#define _NETINET_SCTP_H_
+
+#include <sys/types.h>
+
+/*
+ * SCTP protocol - RFC2960.
+ */
+struct sctphdr {
+ uint16_t src_port; /* source port */
+ uint16_t dest_port; /* destination port */
+ uint32_t v_tag; /* verification tag of packet */
+ uint32_t checksum; /* Adler32 C-Sum */
+ /* chunks follow... */
+}
+
+__attribute__((packed));
+
+/*
+ * SCTP Chunks
+ */
+ struct sctp_chunkhdr {
+ uint8_t chunk_type; /* chunk type */
+ uint8_t chunk_flags; /* chunk flags */
+ uint16_t chunk_length; /* chunk length */
+ /* optional params follow */
+ } __attribute__((packed));
+
+/*
+ * SCTP chunk parameters
+ */
+ struct sctp_paramhdr {
+ uint16_t param_type; /* parameter type */
+ uint16_t param_length; /* parameter length */
+ } __attribute__((packed));
+
+/*
+ * user socket options: socket API defined
+ */
+/*
+ * read-write options
+ */
+#define SCTP_RTOINFO 0x00000001
+#define SCTP_ASSOCINFO 0x00000002
+#define SCTP_INITMSG 0x00000003
+#define SCTP_NODELAY 0x00000004
+#define SCTP_AUTOCLOSE 0x00000005
+#define SCTP_SET_PEER_PRIMARY_ADDR 0x00000006
+#define SCTP_PRIMARY_ADDR 0x00000007
+#define SCTP_ADAPTATION_LAYER 0x00000008
+/* same as above */
+#define SCTP_ADAPTION_LAYER 0x00000008
+#define SCTP_DISABLE_FRAGMENTS 0x00000009
+#define SCTP_PEER_ADDR_PARAMS 0x0000000a
+#define SCTP_DEFAULT_SEND_PARAM 0x0000000b
+/* ancillary data/notification interest options */
+#define SCTP_EVENTS 0x0000000c
+/* Without this applied we will give V4 and V6 addresses on a V6 socket */
+#define SCTP_I_WANT_MAPPED_V4_ADDR 0x0000000d
+#define SCTP_MAXSEG 0x0000000e
+#define SCTP_DELAYED_SACK 0x0000000f
+#define SCTP_FRAGMENT_INTERLEAVE 0x00000010
+#define SCTP_PARTIAL_DELIVERY_POINT 0x00000011
+/* authentication support */
+#define SCTP_AUTH_CHUNK 0x00000012
+#define SCTP_AUTH_KEY 0x00000013
+#define SCTP_HMAC_IDENT 0x00000014
+#define SCTP_AUTH_ACTIVE_KEY 0x00000015
+#define SCTP_AUTH_DELETE_KEY 0x00000016
+#define SCTP_USE_EXT_RCVINFO 0x00000017
+#define SCTP_AUTO_ASCONF 0x00000018 /* rw */
+#define SCTP_MAXBURST 0x00000019 /* rw */
+#define SCTP_MAX_BURST 0x00000019 /* rw */
+/* assoc level context */
+#define SCTP_CONTEXT 0x0000001a /* rw */
+/* explict EOR signalling */
+#define SCTP_EXPLICIT_EOR 0x0000001b
+
+/*
+ * read-only options
+ */
+#define SCTP_STATUS 0x00000100
+#define SCTP_GET_PEER_ADDR_INFO 0x00000101
+/* authentication support */
+#define SCTP_PEER_AUTH_CHUNKS 0x00000102
+#define SCTP_LOCAL_AUTH_CHUNKS 0x00000103
+#define SCTP_GET_ASSOC_NUMBER 0x00000104 /* ro */
+#define SCTP_GET_ASSOC_ID_LIST 0x00000105 /* ro */
+
+/*
+ * user socket options: BSD implementation specific
+ */
+/*
+ * Blocking I/O is enabled on any TCP type socket by default. For the UDP
+ * model if this is turned on then the socket buffer is shared for send
+ * resources amongst all associations. The default for the UDP model is that
+ * is SS_NBIO is set. Which means all associations have a seperate send
+ * limit BUT they will NOT ever BLOCK instead you will get an error back
+ * EAGAIN if you try to send to much. If you want the blocking symantics you
+ * set this option at the cost of sharing one socket send buffer size amongst
+ * all associations. Peeled off sockets turn this option off and block. But
+ * since both TCP and peeled off sockets have only one assoc per socket this
+ * is fine. It probably does NOT make sense to set this on SS_NBIO on a TCP
+ * model OR peeled off UDP model, but we do allow you to do so. You just use
+ * the normal syscall to toggle SS_NBIO the way you want.
+ *
+ * Blocking I/O is controled by the SS_NBIO flag on the socket state so_state
+ * field.
+ */
+
+/* these should probably go into sockets API */
+#define SCTP_RESET_STREAMS 0x00001004 /* wo */
+
+
+/* here on down are more implementation specific */
+#define SCTP_SET_DEBUG_LEVEL 0x00001005
+#define SCTP_CLR_STAT_LOG 0x00001007
+/* CMT ON/OFF socket option */
+#define SCTP_CMT_ON_OFF 0x00001200
+#define SCTP_CMT_USE_DAC 0x00001201
+/* JRS - Pluggable Congestion Control Socket option */
+#define SCTP_PLUGGABLE_CC 0x00001202
+
+/* read only */
+#define SCTP_GET_SNDBUF_USE 0x00001101
+#define SCTP_GET_STAT_LOG 0x00001103
+#define SCTP_PCB_STATUS 0x00001104
+#define SCTP_GET_NONCE_VALUES 0x00001105
+
+
+/* Special hook for dynamically setting primary for all assoc's,
+ * this is a write only option that requires root privledge.
+ */
+#define SCTP_SET_DYNAMIC_PRIMARY 0x00002001
+
+/* VRF (virtual router feature) and multi-VRF support
+ * options. VRF's provide splits within a router
+ * that give the views of multiple routers. A
+ * standard host, without VRF support, is just
+ * a single VRF. If VRF's are supported then
+ * the transport must be VRF aware. This means
+ * that every socket call coming in must be directed
+ * within the endpoint to one of the VRF's it belongs
+ * to. The endpoint, before binding, may select
+ * the "default" VRF it is in by using a set socket
+ * option with SCTP_VRF_ID. This will also
+ * get propegated to the default VRF. Once the
+ * endpoint binds an address then it CANNOT add
+ * additional VRF's to become a Multi-VRF endpoint.
+ *
+ * Before BINDING additional VRF's can be added with
+ * the SCTP_ADD_VRF_ID call or deleted with
+ * SCTP_DEL_VRF_ID.
+ *
+ * Associations are ALWAYS contained inside a single
+ * VRF. They cannot reside in two (or more) VRF's. Incoming
+ * packets, assuming the router is VRF aware, can always
+ * tell us what VRF they arrived on. A host not supporting
+ * any VRF's will find that the packets always arrived on the
+ * single VRF that the host has.
+ *
+ */
+
+#define SCTP_VRF_ID 0x00003001
+#define SCTP_ADD_VRF_ID 0x00003002
+#define SCTP_GET_VRF_IDS 0x00003003
+#define SCTP_GET_ASOC_VRF 0x00003004
+#define SCTP_DEL_VRF_ID 0x00003005
+
+/*
+ * If you enable packet logging you can get
+ * a poor mans ethereal output in binary
+ * form. Note this is a compile option to
+ * the kernel, SCTP_PACKET_LOGGING, and
+ * without it in your kernel you
+ * will get a EOPNOTSUPP
+ */
+#define SCTP_GET_PACKET_LOG 0x00004001
+
+/*
+ * hidden implementation specific options these are NOT user visible (should
+ * move out of sctp.h)
+ */
+/* sctp_bindx() flags as hidden socket options */
+#define SCTP_BINDX_ADD_ADDR 0x00008001
+#define SCTP_BINDX_REM_ADDR 0x00008002
+/* Hidden socket option that gets the addresses */
+#define SCTP_GET_PEER_ADDRESSES 0x00008003
+#define SCTP_GET_LOCAL_ADDRESSES 0x00008004
+/* return the total count in bytes needed to hold all local addresses bound */
+#define SCTP_GET_LOCAL_ADDR_SIZE 0x00008005
+/* Return the total count in bytes needed to hold the remote address */
+#define SCTP_GET_REMOTE_ADDR_SIZE 0x00008006
+/* hidden option for connectx */
+#define SCTP_CONNECT_X 0x00008007
+/* hidden option for connectx_delayed, part of sendx */
+#define SCTP_CONNECT_X_DELAYED 0x00008008
+#define SCTP_CONNECT_X_COMPLETE 0x00008009
+/* hidden socket option based sctp_peeloff */
+#define SCTP_PEELOFF 0x0000800a
+/* the real worker for sctp_getaddrlen() */
+#define SCTP_GET_ADDR_LEN 0x0000800b
+/* temporary workaround for Apple listen() issue, no args used */
+#define SCTP_LISTEN_FIX 0x0000800c
+/* Debug things that need to be purged */
+#define SCTP_SET_INITIAL_DBG_SEQ 0x00009f00
+
+/* JRS - Supported congestion control modules for pluggable
+ * congestion control
+ */
+/* Standard TCP Congestion Control */
+#define SCTP_CC_RFC2581 0x00000000
+/* High Speed TCP Congestion Control (Floyd) */
+#define SCTP_CC_HSTCP 0x00000001
+/* HTCP Congestion Control */
+#define SCTP_CC_HTCP 0x00000002
+
+
+/* fragment interleave constants
+ * setting must be one of these or
+ * EINVAL returned.
+ */
+#define SCTP_FRAG_LEVEL_0 0x00000000
+#define SCTP_FRAG_LEVEL_1 0x00000001
+#define SCTP_FRAG_LEVEL_2 0x00000002
+
+/*
+ * user state values
+ */
+#define SCTP_CLOSED 0x0000
+#define SCTP_BOUND 0x1000
+#define SCTP_LISTEN 0x2000
+#define SCTP_COOKIE_WAIT 0x0002
+#define SCTP_COOKIE_ECHOED 0x0004
+#define SCTP_ESTABLISHED 0x0008
+#define SCTP_SHUTDOWN_SENT 0x0010
+#define SCTP_SHUTDOWN_RECEIVED 0x0020
+#define SCTP_SHUTDOWN_ACK_SENT 0x0040
+#define SCTP_SHUTDOWN_PENDING 0x0080
+
+/*
+ * SCTP operational error codes (user visible)
+ */
+#define SCTP_CAUSE_NO_ERROR 0x0000
+#define SCTP_CAUSE_INVALID_STREAM 0x0001
+#define SCTP_CAUSE_MISSING_PARAM 0x0002
+#define SCTP_CAUSE_STALE_COOKIE 0x0003
+#define SCTP_CAUSE_OUT_OF_RESC 0x0004
+#define SCTP_CAUSE_UNRESOLVABLE_ADDR 0x0005
+#define SCTP_CAUSE_UNRECOG_CHUNK 0x0006
+#define SCTP_CAUSE_INVALID_PARAM 0x0007
+#define SCTP_CAUSE_UNRECOG_PARAM 0x0008
+#define SCTP_CAUSE_NO_USER_DATA 0x0009
+#define SCTP_CAUSE_COOKIE_IN_SHUTDOWN 0x000a
+#define SCTP_CAUSE_RESTART_W_NEWADDR 0x000b
+#define SCTP_CAUSE_USER_INITIATED_ABT 0x000c
+#define SCTP_CAUSE_PROTOCOL_VIOLATION 0x000d
+
+/* Error causes from RFC5061 */
+#define SCTP_CAUSE_DELETING_LAST_ADDR 0xa0
+#define SCTP_CAUSE_RESOURCE_SHORTAGE 0xa1
+#define SCTP_CAUSE_DELETING_SRC_ADDR 0xa2
+#define SCTP_CAUSE_ILLEGAL_ASCONF_ACK 0xa3
+#define SCTP_CAUSE_REQUEST_REFUSED 0xa4
+
+/* Error causes from RFC4895 */
+#define SCTP_CAUSE_UNSUPPORTED_HMACID 0x0105
+
+/*
+ * error cause parameters (user visisble)
+ */
+ struct sctp_error_cause {
+ uint16_t code;
+ uint16_t length;
+ /* optional cause-specific info may follow */
+ } __attribute__((packed));
+
+ struct sctp_error_invalid_stream {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_INVALID_STRE
+ * AM */
+ uint16_t stream_id; /* stream id of the DATA in error */
+ uint16_t reserved;
+ } __attribute__((packed));
+
+ struct sctp_error_missing_param {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_MISSING_PARA
+ * M */
+ uint32_t num_missing_params; /* number of missing
+ * parameters */
+ /* uint16_t param_type's follow */
+ } __attribute__((packed));
+
+ struct sctp_error_stale_cookie {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_STALE_COOKIE
+ * */
+ uint32_t stale_time; /* time in usec of staleness */
+ } __attribute__((packed));
+
+ struct sctp_error_out_of_resource {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_OUT_OF_RESOU
+ * RCES */
+ } __attribute__((packed));
+
+ struct sctp_error_unresolv_addr {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRESOLVABLE
+ * _ADDR */
+
+ } __attribute__((packed));
+
+ struct sctp_error_unrecognized_chunk {
+ struct sctp_error_cause cause; /* code=SCTP_ERROR_UNRECOG_CHUN
+ * K */
+ struct sctp_chunkhdr ch; /* header from chunk in error */
+ } __attribute__((packed));
+
+/*
+ * Main SCTP chunk types we place these here so natd and f/w's in user land
+ * can find them.
+ */
+/************0x00 series ***********/
+#define SCTP_DATA 0x00
+#define SCTP_INITIATION 0x01
+#define SCTP_INITIATION_ACK 0x02
+#define SCTP_SELECTIVE_ACK 0x03
+#define SCTP_HEARTBEAT_REQUEST 0x04
+#define SCTP_HEARTBEAT_ACK 0x05
+#define SCTP_ABORT_ASSOCIATION 0x06
+#define SCTP_SHUTDOWN 0x07
+#define SCTP_SHUTDOWN_ACK 0x08
+#define SCTP_OPERATION_ERROR 0x09
+#define SCTP_COOKIE_ECHO 0x0a
+#define SCTP_COOKIE_ACK 0x0b
+#define SCTP_ECN_ECHO 0x0c
+#define SCTP_ECN_CWR 0x0d
+#define SCTP_SHUTDOWN_COMPLETE 0x0e
+/* RFC4895 */
+#define SCTP_AUTHENTICATION 0x0f
+/************0x40 series ***********/
+/************0x80 series ***********/
+/* RFC5061 */
+#define SCTP_ASCONF_ACK 0x80
+/* draft-ietf-stewart-pktdrpsctp */
+#define SCTP_PACKET_DROPPED 0x81
+/* draft-ietf-stewart-strreset-xxx */
+#define SCTP_STREAM_RESET 0x82
+
+/* RFC4820 */
+#define SCTP_PAD_CHUNK 0x84
+/************0xc0 series ***********/
+/* RFC3758 */
+#define SCTP_FORWARD_CUM_TSN 0xc0
+/* RFC5061 */
+#define SCTP_ASCONF 0xc1
+
+
+/* ABORT and SHUTDOWN COMPLETE FLAG */
+#define SCTP_HAD_NO_TCB 0x01
+
+/* Packet dropped flags */
+#define SCTP_FROM_MIDDLE_BOX SCTP_HAD_NO_TCB
+#define SCTP_BADCRC 0x02
+#define SCTP_PACKET_TRUNCATED 0x04
+
+#define SCTP_SAT_NETWORK_MIN 400 /* min ms for RTT to set satellite
+ * time */
+#define SCTP_SAT_NETWORK_BURST_INCR 2 /* how many times to multiply maxburst
+ * in sat */
+
+/* Data Chuck Specific Flags */
+#define SCTP_DATA_FRAG_MASK 0x03
+#define SCTP_DATA_MIDDLE_FRAG 0x00
+#define SCTP_DATA_LAST_FRAG 0x01
+#define SCTP_DATA_FIRST_FRAG 0x02
+#define SCTP_DATA_NOT_FRAG 0x03
+#define SCTP_DATA_UNORDERED 0x04
+
+/* ECN Nonce: SACK Chunk Specific Flags */
+#define SCTP_SACK_NONCE_SUM 0x01
+
+/* CMT DAC algorithm SACK flag */
+#define SCTP_SACK_CMT_DAC 0x80
+
+/*
+ * PCB flags (in sctp_flags bitmask).
+ * Note the features and flags are meant
+ * for use by netstat.
+ */
+#define SCTP_PCB_FLAGS_UDPTYPE 0x00000001
+#define SCTP_PCB_FLAGS_TCPTYPE 0x00000002
+#define SCTP_PCB_FLAGS_BOUNDALL 0x00000004
+#define SCTP_PCB_FLAGS_ACCEPTING 0x00000008
+#define SCTP_PCB_FLAGS_UNBOUND 0x00000010
+#define SCTP_PCB_FLAGS_LISTENING 0x00000020
+#define SCTP_PCB_FLAGS_CLOSE_IP 0x00040000
+#define SCTP_PCB_FLAGS_WAS_CONNECTED 0x00080000
+#define SCTP_PCB_FLAGS_WAS_ABORTED 0x00100000
+/* TCP model support */
+
+#define SCTP_PCB_FLAGS_CONNECTED 0x00200000
+#define SCTP_PCB_FLAGS_IN_TCPPOOL 0x00400000
+#define SCTP_PCB_FLAGS_DONT_WAKE 0x00800000
+#define SCTP_PCB_FLAGS_WAKEOUTPUT 0x01000000
+#define SCTP_PCB_FLAGS_WAKEINPUT 0x02000000
+#define SCTP_PCB_FLAGS_BOUND_V6 0x04000000
+#define SCTP_PCB_FLAGS_NEEDS_MAPPED_V4 0x08000000
+#define SCTP_PCB_FLAGS_BLOCKING_IO 0x10000000
+#define SCTP_PCB_FLAGS_SOCKET_GONE 0x20000000
+#define SCTP_PCB_FLAGS_SOCKET_ALLGONE 0x40000000
+/* flags to copy to new PCB */
+#define SCTP_PCB_COPY_FLAGS 0x0e000004
+
+
+/*
+ * PCB Features (in sctp_features bitmask)
+ */
+#define SCTP_PCB_FLAGS_EXT_RCVINFO 0x00000002
+#define SCTP_PCB_FLAGS_DONOT_HEARTBEAT 0x00000004
+#define SCTP_PCB_FLAGS_FRAG_INTERLEAVE 0x00000008
+#define SCTP_PCB_FLAGS_INTERLEAVE_STRMS 0x00000010
+#define SCTP_PCB_FLAGS_DO_ASCONF 0x00000020
+#define SCTP_PCB_FLAGS_AUTO_ASCONF 0x00000040
+#define SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE 0x00000080
+
+/* socket options */
+#define SCTP_PCB_FLAGS_NODELAY 0x00000100
+#define SCTP_PCB_FLAGS_AUTOCLOSE 0x00000200
+#define SCTP_PCB_FLAGS_RECVDATAIOEVNT 0x00000400
+#define SCTP_PCB_FLAGS_RECVASSOCEVNT 0x00000800
+#define SCTP_PCB_FLAGS_RECVPADDREVNT 0x00001000
+#define SCTP_PCB_FLAGS_RECVPEERERR 0x00002000
+#define SCTP_PCB_FLAGS_RECVSENDFAILEVNT 0x00004000
+#define SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT 0x00008000
+#define SCTP_PCB_FLAGS_ADAPTATIONEVNT 0x00010000
+#define SCTP_PCB_FLAGS_PDAPIEVNT 0x00020000
+#define SCTP_PCB_FLAGS_AUTHEVNT 0x00040000
+#define SCTP_PCB_FLAGS_STREAM_RESETEVNT 0x00080000
+#define SCTP_PCB_FLAGS_NO_FRAGMENT 0x00100000
+#define SCTP_PCB_FLAGS_EXPLICIT_EOR 0x00400000
+
+/*-
+ * mobility_features parameters (by micchie).Note
+ * these features are applied against the
+ * sctp_mobility_features flags.. not the sctp_features
+ * flags.
+ */
+#define SCTP_MOBILITY_BASE 0x00000001
+#define SCTP_MOBILITY_FASTHANDOFF 0x00000002
+#define SCTP_MOBILITY_PRIM_DELETED 0x00000004
+
+
+#define SCTP_SMALLEST_PMTU 512 /* smallest pmtu allowed when disabling PMTU
+ * discovery */
+
+#include <netinet/sctp_uio.h>
+
+/* This dictates the size of the packet
+ * collection buffer. This only applies
+ * if SCTP_PACKET_LOGGING is enabled in
+ * your config.
+ */
+#define SCTP_PACKET_LOG_SIZE 65536
+
+/* Maximum delays and such a user can set for options that
+ * take ms.
+ */
+#define SCTP_MAX_SACK_DELAY 500 /* per RFC4960 */
+#define SCTP_MAX_HB_INTERVAL 14400000 /* 4 hours in ms */
+#define SCTP_MAX_COOKIE_LIFE 3600000 /* 1 hour in ms */
+
+
+/* Types of logging/KTR tracing that can be enabled via the
+ * sysctl net.inet.sctp.sctp_logging. You must also enable
+ * SUBSYS tracing.
+ * Note that you must have the SCTP option in the kernel
+ * to enable these as well.
+ */
+#define SCTP_BLK_LOGGING_ENABLE 0x00000001
+#define SCTP_CWND_MONITOR_ENABLE 0x00000002
+#define SCTP_CWND_LOGGING_ENABLE 0x00000004
+#define SCTP_EARLYFR_LOGGING_ENABLE 0x00000010
+#define SCTP_FLIGHT_LOGGING_ENABLE 0x00000020
+#define SCTP_FR_LOGGING_ENABLE 0x00000040
+#define SCTP_LOCK_LOGGING_ENABLE 0x00000080
+#define SCTP_MAP_LOGGING_ENABLE 0x00000100
+#define SCTP_MBCNT_LOGGING_ENABLE 0x00000200
+#define SCTP_MBUF_LOGGING_ENABLE 0x00000400
+#define SCTP_NAGLE_LOGGING_ENABLE 0x00000800
+#define SCTP_RECV_RWND_LOGGING_ENABLE 0x00001000
+#define SCTP_RTTVAR_LOGGING_ENABLE 0x00002000
+#define SCTP_SACK_LOGGING_ENABLE 0x00004000
+#define SCTP_SACK_RWND_LOGGING_ENABLE 0x00008000
+#define SCTP_SB_LOGGING_ENABLE 0x00010000
+#define SCTP_STR_LOGGING_ENABLE 0x00020000
+#define SCTP_WAKE_LOGGING_ENABLE 0x00040000
+#define SCTP_LOG_MAXBURST_ENABLE 0x00080000
+#define SCTP_LOG_RWND_ENABLE 0x00100000
+#define SCTP_LOG_SACK_ARRIVALS_ENABLE 0x00200000
+#define SCTP_LTRACE_CHUNK_ENABLE 0x00400000
+#define SCTP_LTRACE_ERROR_ENABLE 0x00800000
+#define SCTP_LAST_PACKET_TRACING 0x01000000
+#define SCTP_THRESHOLD_LOGGING 0x02000000
+#define SCTP_LOG_AT_SEND_2_SCTP 0x04000000
+#define SCTP_LOG_AT_SEND_2_OUTQ 0x08000000
+
+
+
+#endif /* !_NETINET_SCTP_H_ */
Index: in_gif.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_gif.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_gif.c -L sys/netinet/in_gif.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_gif.c
+++ sys/netinet/in_gif.c
@@ -1,4 +1,3 @@
-/* $FreeBSD: src/sys/netinet/in_gif.c,v 1.31.2.3 2006/01/31 15:56:46 glebius Exp $ */
/* $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */
/*-
@@ -30,6 +29,9 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_gif.c,v 1.38 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_mrouting.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -68,8 +70,6 @@
#include <net/if_gif.h>
-#include <net/net_osdep.h>
-
static int gif_validate4(const struct ip *, struct gif_softc *,
struct ifnet *);
@@ -90,10 +90,7 @@
&ip_gif_ttl, 0, "");
int
-in_gif_output(ifp, family, m)
- struct ifnet *ifp;
- int family;
- struct mbuf *m;
+in_gif_output(struct ifnet *ifp, int family, struct mbuf *m)
{
struct gif_softc *sc = ifp->if_softc;
struct sockaddr_in *dst = (struct sockaddr_in *)&sc->gif_ro.ro_dst;
@@ -240,9 +237,7 @@
}
void
-in_gif_input(m, off)
- struct mbuf *m;
- int off;
+in_gif_input(struct mbuf *m, int off)
{
struct ifnet *gifp = NULL;
struct gif_softc *sc;
@@ -336,10 +331,7 @@
* validate outer address.
*/
static int
-gif_validate4(ip, sc, ifp)
- const struct ip *ip;
- struct gif_softc *sc;
- struct ifnet *ifp;
+gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp)
{
struct sockaddr_in *src, *dst;
struct in_ifaddr *ia4;
@@ -384,10 +376,10 @@
(u_int32_t)ntohl(sin.sin_addr.s_addr));
#endif
if (rt)
- rtfree(rt);
+ RTFREE_LOCKED(rt);
return 0;
}
- rtfree(rt);
+ RTFREE_LOCKED(rt);
}
return 32 * 2;
@@ -398,11 +390,7 @@
* matched the physical addr family. see gif_encapcheck().
*/
int
-gif_encapcheck4(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+gif_encapcheck4(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip ip;
struct gif_softc *sc;
@@ -419,8 +407,7 @@
}
int
-in_gif_attach(sc)
- struct gif_softc *sc;
+in_gif_attach(struct gif_softc *sc)
{
sc->encap_cookie4 = encap_attach_func(AF_INET, -1, gif_encapcheck,
&in_gif_protosw, sc);
@@ -430,8 +417,7 @@
}
int
-in_gif_detach(sc)
- struct gif_softc *sc;
+in_gif_detach(struct gif_softc *sc)
{
int error;
--- /dev/null
+++ sys/netinet/sctp_asconf.c
@@ -0,0 +1,3062 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.c,v 1.24 2005/03/06 16:04:16 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_asconf.c,v 1.34 2007/10/01 03:22:28 rrs Exp $");
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_timer.h>
+
+/*
+ * debug flags:
+ * SCTP_DEBUG_ASCONF1: protocol info, general info and errors
+ * SCTP_DEBUG_ASCONF2: detailed info
+ */
+#ifdef SCTP_DEBUG
+#endif /* SCTP_DEBUG */
+
+
+static void
+sctp_asconf_get_source_ip(struct mbuf *m, struct sockaddr *sa)
+{
+ struct ip *iph;
+ struct sockaddr_in *sin;
+
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+
+#endif
+
+ iph = mtod(m, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* IPv4 source */
+ sin = (struct sockaddr_in *)sa;
+ bzero(sin, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_port = 0;
+ sin->sin_addr.s_addr = iph->ip_src.s_addr;
+ return;
+ }
+#ifdef INET6
+ else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* IPv6 source */
+ struct ip6_hdr *ip6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_port = 0;
+ ip6 = mtod(m, struct ip6_hdr *);
+ sin6->sin6_addr = ip6->ip6_src;
+ return;
+ }
+#endif /* INET6 */
+ else
+ return;
+}
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ *
+ * An ASCONF parameter queue exists per asoc which holds the pending address
+ * operations. Lists are updated upon receipt of ASCONF-ACK.
+ *
+ * A restricted_addrs list exists per assoc to hold local addresses that are
+ * not (yet) usable by the assoc as a source address. These addresses are
+ * either pending an ASCONF operation (and exist on the ASCONF parameter
+ * queue), or they are permanently restricted (the peer has returned an
+ * ERROR indication to an ASCONF(ADD), or the peer does not support ASCONF).
+ *
+ * Deleted addresses are always immediately removed from the lists as they will
+ * (shortly) no longer exist in the kernel. We send ASCONFs as a courtesy,
+ * only if allowed.
+ */
+
+/*
+ * ASCONF parameter processing.
+ * response_required: set if a reply is required (eg. SUCCESS_REPORT).
+ * returns a mbuf to an "error" response parameter or NULL/"success" if ok.
+ * FIX: allocating this many mbufs on the fly is pretty inefficient...
+ */
+static struct mbuf *
+sctp_asconf_success_response(uint32_t id)
+{
+ struct mbuf *m_reply = NULL;
+ struct sctp_asconf_paramhdr *aph;
+
+ m_reply = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_paramhdr),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_reply == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_success_response: couldn't get mbuf!\n");
+ return NULL;
+ }
+ aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+ aph->correlation_id = id;
+ aph->ph.param_type = htons(SCTP_SUCCESS_REPORT);
+ aph->ph.param_length = sizeof(struct sctp_asconf_paramhdr);
+ SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+ aph->ph.param_length = htons(aph->ph.param_length);
+
+ return m_reply;
+}
+
+static struct mbuf *
+sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t * error_tlv,
+ uint16_t tlv_length)
+{
+ struct mbuf *m_reply = NULL;
+ struct sctp_asconf_paramhdr *aph;
+ struct sctp_error_cause *error;
+ uint8_t *tlv;
+
+ m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
+ tlv_length +
+ sizeof(struct sctp_error_cause)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_reply == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_error_response: couldn't get mbuf!\n");
+ return NULL;
+ }
+ aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
+ error = (struct sctp_error_cause *)(aph + 1);
+
+ aph->correlation_id = id;
+ aph->ph.param_type = htons(SCTP_ERROR_CAUSE_IND);
+ error->code = htons(cause);
+ error->length = tlv_length + sizeof(struct sctp_error_cause);
+ aph->ph.param_length = error->length +
+ sizeof(struct sctp_asconf_paramhdr);
+
+ if (aph->ph.param_length > MLEN) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_error_response: tlv_length (%xh) too big\n",
+ tlv_length);
+ sctp_m_freem(m_reply); /* discard */
+ return NULL;
+ }
+ if (error_tlv != NULL) {
+ tlv = (uint8_t *) (error + 1);
+ memcpy(tlv, error_tlv, tlv_length);
+ }
+ SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
+ error->length = htons(error->length);
+ aph->ph.param_length = htons(aph->ph.param_length);
+
+ return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_add_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+ struct sctp_tcb *stcb, int response_required)
+{
+ struct mbuf *m_reply = NULL;
+ struct sockaddr_storage sa_source, sa_store;
+ struct sctp_ipv4addr_param *v4addr;
+ uint16_t param_type, param_length, aparam_length;
+ struct sockaddr *sa;
+ struct sockaddr_in *sin;
+ int zero_address = 0;
+
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ struct sctp_ipv6addr_param *v6addr;
+
+#endif /* INET6 */
+
+ aparam_length = ntohs(aph->ph.param_length);
+ v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+ v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif /* INET6 */
+ param_type = ntohs(v4addr->ph.param_type);
+ param_length = ntohs(v4addr->ph.param_length);
+
+ sa = (struct sockaddr *)&sa_store;
+ switch (param_type) {
+ case SCTP_IPV4_ADDRESS:
+ if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+ sin = (struct sockaddr_in *)&sa_store;
+ bzero(sin, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_port = stcb->rport;
+ sin->sin_addr.s_addr = v4addr->addr;
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ break;
+ case SCTP_IPV6_ADDRESS:
+#ifdef INET6
+ if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+ sin6 = (struct sockaddr_in6 *)&sa_store;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_port = stcb->rport;
+ memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+ sizeof(struct in6_addr));
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_add_ip: adding ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+ /* IPv6 not enabled! */
+ /* FIX ME: currently sends back an invalid param error */
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_INVALID_PARAM, (uint8_t *) aph, aparam_length);
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_add_ip: v6 disabled- skipping ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ return m_reply;
+#endif
+ break;
+ default:
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+ aparam_length);
+ return m_reply;
+ } /* end switch */
+
+ /* if 0.0.0.0/::0, add the source address instead */
+ if (zero_address && sctp_nat_friendly) {
+ sa = (struct sockaddr *)&sa_source;
+ sctp_asconf_get_source_ip(m, sa);
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_add_ip: using source addr ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ }
+ /* add the address */
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE,
+ SCTP_ADDR_DYNAMIC_ADDED) != 0) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_add_ip: error adding address\n");
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_RESOURCE_SHORTAGE, (uint8_t *) aph,
+ aparam_length);
+ } else {
+ /* notify upper layer */
+ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_ADD_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+ if (response_required) {
+ m_reply =
+ sctp_asconf_success_response(aph->correlation_id);
+ }
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
+ NULL, SCTP_FROM_SCTP_ASCONF + SCTP_LOC_1);
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+ stcb, NULL);
+ }
+
+ return m_reply;
+}
+
+static int
+sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
+{
+ struct sctp_nets *src_net, *net;
+
+ /* make sure the source address exists as a destination net */
+ src_net = sctp_findnet(stcb, src);
+ if (src_net == NULL) {
+ /* not found */
+ return -1;
+ }
+ /* delete all destination addresses except the source */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (net != src_net) {
+ /* delete this address */
+ sctp_remove_net(stcb, net);
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_del_remote_addrs_except: deleting ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1,
+ (struct sockaddr *)&net->ro._l_addr);
+ /* notify upper layer */
+ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0,
+ (struct sockaddr *)&net->ro._l_addr, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ return 0;
+}
+
+static struct mbuf *
+sctp_process_asconf_delete_ip(struct mbuf *m, struct sctp_asconf_paramhdr *aph,
+ struct sctp_tcb *stcb, int response_required)
+{
+ struct mbuf *m_reply = NULL;
+ struct sockaddr_storage sa_source, sa_store;
+ struct sctp_ipv4addr_param *v4addr;
+ uint16_t param_type, param_length, aparam_length;
+ struct sockaddr *sa;
+ struct sockaddr_in *sin;
+ int zero_address = 0;
+ int result;
+
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ struct sctp_ipv6addr_param *v6addr;
+
+#endif /* INET6 */
+
+ /* get the source IP address for src and 0.0.0.0/::0 delete checks */
+ sctp_asconf_get_source_ip(m, (struct sockaddr *)&sa_source);
+
+ aparam_length = ntohs(aph->ph.param_length);
+ v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+ v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif /* INET6 */
+ param_type = ntohs(v4addr->ph.param_type);
+ param_length = ntohs(v4addr->ph.param_length);
+
+ sa = (struct sockaddr *)&sa_store;
+ switch (param_type) {
+ case SCTP_IPV4_ADDRESS:
+ if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+ sin = (struct sockaddr_in *)&sa_store;
+ bzero(sin, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_port = stcb->rport;
+ sin->sin_addr.s_addr = v4addr->addr;
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_delete_ip: deleting ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ break;
+ case SCTP_IPV6_ADDRESS:
+ if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+#ifdef INET6
+ sin6 = (struct sockaddr_in6 *)&sa_store;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_port = stcb->rport;
+ memcpy(&sin6->sin6_addr, v6addr->addr,
+ sizeof(struct in6_addr));
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_delete_ip: deleting ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+ /* IPv6 not enabled! No "action" needed; just ack it */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_delete_ip: v6 disabled- ignoring: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ /* just respond with a "success" ASCONF-ACK */
+ return NULL;
+#endif
+ break;
+ default:
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+ aparam_length);
+ return m_reply;
+ }
+
+ /* make sure the source address is not being deleted */
+ if (sctp_cmpaddr(sa, (struct sockaddr *)&sa_source)) {
+ /* trying to delete the source address! */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete source addr\n");
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_DELETING_SRC_ADDR, (uint8_t *) aph,
+ aparam_length);
+ return m_reply;
+ }
+ /* if deleting 0.0.0.0/::0, delete all addresses except src addr */
+ if (zero_address && sctp_nat_friendly) {
+ result = sctp_asconf_del_remote_addrs_except(stcb,
+ (struct sockaddr *)&sa_source);
+
+ if (result) {
+ /* src address did not exist? */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: src addr does not exist?\n");
+ /* what error to reply with?? */
+ m_reply =
+ sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_REQUEST_REFUSED, (uint8_t *) aph,
+ aparam_length);
+ } else if (response_required) {
+ m_reply =
+ sctp_asconf_success_response(aph->correlation_id);
+ }
+ return m_reply;
+ }
+ /* delete the address */
+ result = sctp_del_remote_addr(stcb, sa);
+ /*
+ * note if result == -2, the address doesn't exist in the asoc but
+ * since it's being deleted anyways, we just ack the delete -- but
+ * this probably means something has already gone awry
+ */
+ if (result == -1) {
+ /* only one address in the asoc */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_delete_ip: tried to delete last IP addr!\n");
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_DELETING_LAST_ADDR, (uint8_t *) aph,
+ aparam_length);
+ } else {
+ if (response_required) {
+ m_reply = sctp_asconf_success_response(aph->correlation_id);
+ }
+ /* notify upper layer */
+ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+ }
+ return m_reply;
+}
+
+static struct mbuf *
+sctp_process_asconf_set_primary(struct mbuf *m,
+ struct sctp_asconf_paramhdr *aph,
+ struct sctp_tcb *stcb, int response_required)
+{
+ struct mbuf *m_reply = NULL;
+ struct sockaddr_storage sa_source, sa_store;
+ struct sctp_ipv4addr_param *v4addr;
+ uint16_t param_type, param_length, aparam_length;
+ struct sockaddr *sa;
+ struct sockaddr_in *sin;
+ int zero_address = 0;
+
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ struct sctp_ipv6addr_param *v6addr;
+
+#endif /* INET6 */
+
+ aparam_length = ntohs(aph->ph.param_length);
+ v4addr = (struct sctp_ipv4addr_param *)(aph + 1);
+#ifdef INET6
+ v6addr = (struct sctp_ipv6addr_param *)(aph + 1);
+#endif /* INET6 */
+ param_type = ntohs(v4addr->ph.param_type);
+ param_length = ntohs(v4addr->ph.param_length);
+
+ sa = (struct sockaddr *)&sa_store;
+ switch (param_type) {
+ case SCTP_IPV4_ADDRESS:
+ if (param_length != sizeof(struct sctp_ipv4addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+ sin = (struct sockaddr_in *)&sa_store;
+ bzero(sin, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_addr.s_addr = v4addr->addr;
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ break;
+ case SCTP_IPV6_ADDRESS:
+ if (param_length != sizeof(struct sctp_ipv6addr_param)) {
+ /* invalid param size */
+ return NULL;
+ }
+#ifdef INET6
+ sin6 = (struct sockaddr_in6 *)&sa_store;
+ bzero(sin6, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ memcpy((caddr_t)&sin6->sin6_addr, v6addr->addr,
+ sizeof(struct in6_addr));
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ zero_address = 1;
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "process_asconf_set_primary: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+#else
+ /* IPv6 not enabled! No "action" needed; just ack it */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_set_primary: v6 disabled- ignoring: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ /* just respond with a "success" ASCONF-ACK */
+ return NULL;
+#endif
+ break;
+ default:
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+ aparam_length);
+ return m_reply;
+ }
+
+ /* if 0.0.0.0/::0, use the source address instead */
+ if (zero_address && sctp_nat_friendly) {
+ sa = (struct sockaddr *)&sa_source;
+ sctp_asconf_get_source_ip(m, sa);
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_set_primary: using source addr ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ }
+ /* set the primary address */
+ if (sctp_set_primary_addr(stcb, sa, NULL) == 0) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_set_primary: primary address set\n");
+ /* notify upper layer */
+ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_SET_PRIMARY, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+
+ if (response_required) {
+ m_reply = sctp_asconf_success_response(aph->correlation_id);
+ }
+ /*
+ * Mobility adaptation. Ideally, when the reception of SET
+ * PRIMARY with DELETE IP ADDRESS of the previous primary
+ * destination, unacknowledged DATA are retransmitted
+ * immediately to the new primary destination for seamless
+ * handover. If the destination is UNCONFIRMED and marked
+ * to REQ_PRIM, The retransmission occur when reception of
+ * the HEARTBEAT-ACK. (See sctp_handle_heartbeat_ack in
+ * sctp_input.c) Also, when change of the primary
+ * destination, it is better that all subsequent new DATA
+ * containing already queued DATA are transmitted to the new
+ * primary destination. (by micchie)
+ */
+ if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE) ||
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) &&
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_PRIM_DELETED) &&
+ (stcb->asoc.primary_destination->dest_state &
+ SCTP_ADDR_UNCONFIRMED) == 0) {
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_TIMER + SCTP_LOC_7);
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ sctp_assoc_immediate_retrans(stcb,
+ stcb->asoc.primary_destination);
+ }
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE)) {
+ sctp_move_chunks_from_deleted_prim(stcb,
+ stcb->asoc.primary_destination);
+ }
+ sctp_delete_prim_timer(stcb->sctp_ep, stcb,
+ stcb->asoc.deleted_primary);
+ }
+ } else {
+ /* couldn't set the requested primary address! */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_asconf_set_primary: set primary failed!\n");
+ /* must have been an invalid address, so report */
+ m_reply = sctp_asconf_error_response(aph->correlation_id,
+ SCTP_CAUSE_UNRESOLVABLE_ADDR, (uint8_t *) aph,
+ aparam_length);
+ }
+
+ return m_reply;
+}
+
+/*
+ * handles an ASCONF chunk.
+ * if all parameters are processed ok, send a plain (empty) ASCONF-ACK
+ */
+void
+sctp_handle_asconf(struct mbuf *m, unsigned int offset,
+ struct sctp_asconf_chunk *cp, struct sctp_tcb *stcb,
+ int first)
+{
+ struct sctp_association *asoc;
+ uint32_t serial_num;
+ struct mbuf *n, *m_ack, *m_result, *m_tail;
+ struct sctp_asconf_ack_chunk *ack_cp;
+ struct sctp_asconf_paramhdr *aph, *ack_aph;
+ struct sctp_ipv6addr_param *p_addr;
+ unsigned int asconf_limit;
+ int error = 0; /* did an error occur? */
+
+ /* asconf param buffer */
+ uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_asconf_ack *ack, *ack_next;
+
+ /* verify minimum length */
+ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_chunk)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf: chunk too small = %xh\n",
+ ntohs(cp->ch.chunk_length));
+ return;
+ }
+ asoc = &stcb->asoc;
+ serial_num = ntohl(cp->serial_number);
+
+ if (compare_with_wrap(asoc->asconf_seq_in, serial_num, MAX_SEQ) ||
+ serial_num == asoc->asconf_seq_in) {
+ /* got a duplicate ASCONF */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf: got duplicate serial number = %xh\n",
+ serial_num);
+ return;
+ } else if (serial_num != (asoc->asconf_seq_in + 1)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: incorrect serial number = %xh (expected next = %xh)\n",
+ serial_num, asoc->asconf_seq_in + 1);
+ return;
+ }
+ /* it's the expected "next" sequence number, so process it */
+ asoc->asconf_seq_in = serial_num; /* update sequence */
+ /* get length of all the param's in the ASCONF */
+ asconf_limit = offset + ntohs(cp->ch.chunk_length);
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf: asconf_limit=%u, sequence=%xh\n",
+ asconf_limit, serial_num);
+
+ if (first) {
+ /* delete old cache */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: Now processing firstASCONF. Try to delte old cache\n");
+
+ ack = TAILQ_FIRST(&stcb->asoc.asconf_ack_sent);
+ while (ack != NULL) {
+ ack_next = TAILQ_NEXT(ack, next);
+ if (ack->serial_number == serial_num)
+ break;
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: delete old(%u) < first(%u)\n",
+ ack->serial_number, serial_num);
+ TAILQ_REMOVE(&stcb->asoc.asconf_ack_sent, ack, next);
+ if (ack->data != NULL) {
+ sctp_m_freem(ack->data);
+ }
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asconf_ack, ack);
+ ack = ack_next;
+ }
+ }
+ m_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_ack_chunk), 0,
+ M_DONTWAIT, 1, MT_DATA);
+ if (m_ack == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf: couldn't get mbuf!\n");
+ return;
+ }
+ m_tail = m_ack; /* current reply chain's tail */
+
+ /* fill in ASCONF-ACK header */
+ ack_cp = mtod(m_ack, struct sctp_asconf_ack_chunk *);
+ ack_cp->ch.chunk_type = SCTP_ASCONF_ACK;
+ ack_cp->ch.chunk_flags = 0;
+ ack_cp->serial_number = htonl(serial_num);
+ /* set initial lengths (eg. just an ASCONF-ACK), ntohx at the end! */
+ SCTP_BUF_LEN(m_ack) = sizeof(struct sctp_asconf_ack_chunk);
+ ack_cp->ch.chunk_length = sizeof(struct sctp_asconf_ack_chunk);
+
+ /* skip the lookup address parameter */
+ offset += sizeof(struct sctp_asconf_chunk);
+ p_addr = (struct sctp_ipv6addr_param *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr), (uint8_t *) & aparam_buf);
+ if (p_addr == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf: couldn't get lookup addr!\n");
+ /* respond with a missing/invalid mandatory parameter error */
+ return;
+ }
+ /* param_length is already validated in process_control... */
+ offset += ntohs(p_addr->ph.param_length); /* skip lookup addr */
+
+ /* get pointer to first asconf param in ASCONF-ACK */
+ ack_aph = (struct sctp_asconf_paramhdr *)(mtod(m_ack, caddr_t)+sizeof(struct sctp_asconf_ack_chunk));
+ if (ack_aph == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "Gak in asconf2\n");
+ return;
+ }
+ /* get pointer to first asconf param in ASCONF */
+ aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *) & aparam_buf);
+ if (aph == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "Empty ASCONF received?\n");
+ goto send_reply;
+ }
+ /* process through all parameters */
+ while (aph != NULL) {
+ unsigned int param_length, param_type;
+
+ param_type = ntohs(aph->ph.param_type);
+ param_length = ntohs(aph->ph.param_length);
+ if (offset + param_length > asconf_limit) {
+ /* parameter goes beyond end of chunk! */
+ sctp_m_freem(m_ack);
+ return;
+ }
+ m_result = NULL;
+
+ if (param_length > sizeof(aparam_buf)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) larger than buffer size!\n", param_length);
+ sctp_m_freem(m_ack);
+ return;
+ }
+ if (param_length <= sizeof(struct sctp_paramhdr)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) too short\n", param_length);
+ sctp_m_freem(m_ack);
+ }
+ /* get the entire parameter */
+ aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+ if (aph == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: couldn't get entire param\n");
+ sctp_m_freem(m_ack);
+ return;
+ }
+ switch (param_type) {
+ case SCTP_ADD_IP_ADDRESS:
+ asoc->peer_supports_asconf = 1;
+ m_result = sctp_process_asconf_add_ip(m, aph, stcb,
+ error);
+ break;
+ case SCTP_DEL_IP_ADDRESS:
+ asoc->peer_supports_asconf = 1;
+ m_result = sctp_process_asconf_delete_ip(m, aph, stcb,
+ error);
+ break;
+ case SCTP_ERROR_CAUSE_IND:
+ /* not valid in an ASCONF chunk */
+ break;
+ case SCTP_SET_PRIM_ADDR:
+ asoc->peer_supports_asconf = 1;
+ m_result = sctp_process_asconf_set_primary(m, aph,
+ stcb, error);
+ break;
+ case SCTP_SUCCESS_REPORT:
+ /* not valid in an ASCONF chunk */
+ break;
+ case SCTP_ULP_ADAPTATION:
+ /* FIX */
+ break;
+ default:
+ if ((param_type & 0x8000) == 0) {
+ /* Been told to STOP at this param */
+ asconf_limit = offset;
+ /*
+ * FIX FIX - We need to call
+ * sctp_arethere_unrecognized_parameters()
+ * to get a operr and send it for any
+ * param's with the 0x4000 bit set OR do it
+ * here ourselves... note we still must STOP
+ * if the 0x8000 bit is clear.
+ */
+ }
+ /* unknown/invalid param type */
+ break;
+ } /* switch */
+
+ /* add any (error) result to the reply mbuf chain */
+ if (m_result != NULL) {
+ SCTP_BUF_NEXT(m_tail) = m_result;
+ m_tail = m_result;
+ /* update lengths, make sure it's aligned too */
+ SCTP_BUF_LEN(m_result) = SCTP_SIZE32(SCTP_BUF_LEN(m_result));
+ ack_cp->ch.chunk_length += SCTP_BUF_LEN(m_result);
+ /* set flag to force success reports */
+ error = 1;
+ }
+ offset += SCTP_SIZE32(param_length);
+ /* update remaining ASCONF message length to process */
+ if (offset >= asconf_limit) {
+ /* no more data in the mbuf chain */
+ break;
+ }
+ /* get pointer to next asconf param */
+ aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_asconf_paramhdr),
+ (uint8_t *) & aparam_buf);
+ if (aph == NULL) {
+ /* can't get an asconf paramhdr */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: can't get asconf param hdr!\n");
+ /* FIX ME - add error here... */
+ }
+ }
+
+send_reply:
+ ack_cp->ch.chunk_length = htons(ack_cp->ch.chunk_length);
+ /* save the ASCONF-ACK reply */
+ ack = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_asconf_ack,
+ struct sctp_asconf_ack);
+ if (ack == NULL) {
+ sctp_m_freem(m_ack);
+ return;
+ }
+ ack->serial_number = serial_num;
+ ack->last_sent_to = NULL;
+ ack->data = m_ack;
+ n = m_ack;
+ while (n) {
+ ack->len += SCTP_BUF_LEN(n);
+ n = SCTP_BUF_NEXT(n);
+ }
+ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_ack_sent, ack, next);
+
+ /* see if last_control_chunk_from is set properly (use IP src addr) */
+ if (stcb->asoc.last_control_chunk_from == NULL) {
+ /*
+ * this could happen if the source address was just newly
+ * added
+ */
+ struct ip *iph;
+ struct sctphdr *sh;
+ struct sockaddr_storage from_store;
+ struct sockaddr *from = (struct sockaddr *)&from_store;
+
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: looking up net for IP source address\n");
+ /* pullup already done, IP options already stripped */
+ iph = mtod(m, struct ip *);
+ sh = (struct sctphdr *)((caddr_t)iph + sizeof(*iph));
+ if (iph->ip_v == IPVERSION) {
+ struct sockaddr_in *from4;
+
+ from4 = (struct sockaddr_in *)&from_store;
+ bzero(from4, sizeof(*from4));
+ from4->sin_family = AF_INET;
+ from4->sin_len = sizeof(struct sockaddr_in);
+ from4->sin_addr.s_addr = iph->ip_src.s_addr;
+ from4->sin_port = sh->src_port;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 *from6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ from6 = (struct sockaddr_in6 *)&from_store;
+ bzero(from6, sizeof(*from6));
+ from6->sin6_family = AF_INET6;
+ from6->sin6_len = sizeof(struct sockaddr_in6);
+ from6->sin6_addr = ip6->ip6_src;
+ from6->sin6_port = sh->src_port;
+ /* Get the scopes in properly to the sin6 addr's */
+ /* we probably don't need these operations */
+ (void)sa6_recoverscope(from6);
+ sa6_embedscope(from6, ip6_use_defzone);
+ } else {
+ /* unknown address type */
+ from = NULL;
+ }
+ if (from != NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "Looking for IP source: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, from);
+ /* look up the from address */
+ stcb->asoc.last_control_chunk_from = sctp_findnet(stcb, from);
+#ifdef SCTP_DEBUG
+ if (stcb->asoc.last_control_chunk_from == NULL)
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: IP source address not found?!\n");
+#endif
+ }
+ }
+}
+
+/*
+ * does the address match? returns 0 if not, 1 if so
+ */
+static uint32_t
+sctp_asconf_addr_match(struct sctp_asconf_addr *aa, struct sockaddr *sa)
+{
+#ifdef INET6
+ if (sa->sa_family == AF_INET6) {
+ /* IPv6 sa address */
+ /* XXX scopeid */
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
+
+ if ((aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) &&
+ (memcmp(&aa->ap.addrp.addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr)) == 0)) {
+ return (1);
+ }
+ } else
+#endif /* INET6 */
+ if (sa->sa_family == AF_INET) {
+ /* IPv4 sa address */
+ struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+ if ((aa->ap.addrp.ph.param_type == SCTP_IPV4_ADDRESS) &&
+ (memcmp(&aa->ap.addrp.addr, &sin->sin_addr,
+ sizeof(struct in_addr)) == 0)) {
+ return (1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Cleanup for non-responded/OP ERR'd ASCONF
+ */
+void
+sctp_asconf_cleanup(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /* mark peer as ASCONF incapable */
+ stcb->asoc.peer_supports_asconf = 0;
+ /*
+ * clear out any existing asconfs going out
+ */
+ sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_2);
+ stcb->asoc.asconf_seq_out++;
+ /* remove the old ASCONF on our outbound queue */
+ sctp_toss_old_asconf(stcb);
+}
+
+/*
+ * cleanup any cached source addresses that may be topologically
+ * incorrect after a new address has been added to this interface.
+ */
+static void
+sctp_asconf_nets_cleanup(struct sctp_tcb *stcb, struct sctp_ifn *ifn)
+{
+ struct sctp_nets *net;
+
+ /*
+ * Ideally, we want to only clear cached routes and source addresses
+ * that are topologically incorrect. But since there is no easy way
+ * to know whether the newly added address on the ifn would cause a
+ * routing change (i.e. a new egress interface would be chosen)
+ * without doing a new routing lookup and source address selection,
+ * we will (for now) just flush any cached route using a different
+ * ifn (and cached source addrs) and let output re-choose them
+ * during the next send on that net.
+ */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ /*
+ * clear any cached route (and cached source address) if the
+ * route's interface is NOT the same as the address change.
+ * If it's the same interface, just clear the cached source
+ * address.
+ */
+ if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro) &&
+ SCTP_GET_IF_INDEX_FROM_ROUTE(&net->ro) != ifn->ifn_index) {
+ /* clear any cached route */
+ RTFREE(net->ro.ro_rt);
+ net->ro.ro_rt = NULL;
+ }
+ /* clear any cached source address */
+ if (net->src_addr_selected) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ }
+}
+
+void
+sctp_move_chunks_from_deleted_prim(struct sctp_tcb *stcb, struct sctp_nets *dst)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *outs;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_stream_queue_pending *sp;
+
+ if (dst->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ return;
+ }
+ if (stcb->asoc.deleted_primary == NULL) {
+ return;
+ }
+ asoc = &stcb->asoc;
+
+ /*
+ * now through all the streams checking for chunks sent to our bad
+ * network.
+ */
+ TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
+ /* now clean up any chunks here */
+ TAILQ_FOREACH(sp, &outs->outqueue, next) {
+ if (sp->net == asoc->deleted_primary) {
+ sctp_free_remote_addr(sp->net);
+ sp->net = dst;
+ atomic_add_int(&dst->ref_count, 1);
+ }
+ }
+ }
+ /* Now check the pending queue */
+ TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+ if (chk->whoTo == asoc->deleted_primary) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = dst;
+ atomic_add_int(&dst->ref_count, 1);
+ }
+ }
+
+}
+
+
+void
+sctp_assoc_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *dstnet)
+{
+ int error;
+
+ if (dstnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ return;
+ }
+ if (stcb->asoc.deleted_primary == NULL) {
+ return;
+ }
+ if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "assoc_immediate_retrans: Deleted primary is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "Current Primary is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.primary_destination->ro._l_addr.sa);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb,
+ stcb->asoc.deleted_primary,
+ SCTP_FROM_SCTP_TIMER + SCTP_LOC_8);
+ stcb->asoc.num_send_timers_up--;
+ if (stcb->asoc.num_send_timers_up < 0) {
+ stcb->asoc.num_send_timers_up = 0;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ error = sctp_t3rxt_timer(stcb->sctp_ep, stcb,
+ stcb->asoc.deleted_primary);
+ if (error) {
+ SCTP_INP_DECR_REF(stcb->sctp_ep);
+ return;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, stcb->sctp_ep, stcb->asoc.deleted_primary);
+#endif
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+ if ((stcb->asoc.num_send_timers_up == 0) &&
+ (stcb->asoc.sent_queue_cnt > 0)) {
+ struct sctp_tmit_chunk *chk;
+
+ chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, chk->whoTo);
+ }
+ }
+ return;
+}
+
+static int
+ sctp_asconf_queue_mgmt(struct sctp_tcb *, struct sctp_ifa *, uint16_t);
+
+void
+sctp_net_immediate_retrans(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ struct sctp_tmit_chunk *chk;
+
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "net_immediate_retrans: RTO is %d\n", net->RTO);
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_TIMER + SCTP_LOC_5);
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+ net->error_count = 0;
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if (chk->whoTo == net) {
+ if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ sctp_flight_size_decrease(chk);
+ sctp_total_flight_decrease(stcb, chk);
+ net->marked_retrans++;
+ stcb->asoc.marked_retrans++;
+ }
+ }
+ }
+ if (net->marked_retrans) {
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+ }
+}
+
+static void
+sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
+{
+ struct sctp_nets *net;
+ int addrnum, changed;
+
+ /*
+ * If number of local valid addresses is 1, the valid address is
+ * probably newly added address. Several valid addresses in this
+ * association. A source address may not be changed. Additionally,
+ * they can be configured on a same interface as "alias" addresses.
+ * (by micchie)
+ */
+ addrnum = sctp_local_addr_count(stcb);
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "p_check_react(): %d local addresses\n",
+ addrnum);
+ if (addrnum == 1) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ /* clear any cached route and source address */
+ if (net->ro.ro_rt) {
+ RTFREE(net->ro.ro_rt);
+ net->ro.ro_rt = NULL;
+ }
+ if (net->src_addr_selected) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ /* Retransmit unacknowledged DATA chunks immediately */
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ sctp_net_immediate_retrans(stcb, net);
+ }
+ /* also, SET PRIMARY is maybe already sent */
+ }
+ return;
+ }
+ /* Multiple local addresses exsist in the association. */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ /* clear any cached route and source address */
+ if (net->ro.ro_rt) {
+ RTFREE(net->ro.ro_rt);
+ net->ro.ro_rt = NULL;
+ }
+ if (net->src_addr_selected) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ /*
+ * Check if the nexthop is corresponding to the new address.
+ * If the new address is corresponding to the current
+ * nexthop, the path will be changed. If the new address is
+ * NOT corresponding to the current nexthop, the path will
+ * not be changed.
+ */
+ SCTP_RTALLOC((sctp_route_t *) & net->ro,
+ stcb->sctp_ep->def_vrf_id);
+ if (net->ro.ro_rt == NULL)
+ continue;
+
+ changed = 0;
+ if (net->ro._l_addr.sa.sa_family == AF_INET) {
+ if (sctp_v4src_match_nexthop(newifa, (sctp_route_t *) & net->ro))
+ changed = 1;
+ }
+ if (net->ro._l_addr.sa.sa_family == AF_INET6) {
+ if (sctp_v6src_match_nexthop(
+ &newifa->address.sin6, (sctp_route_t *) & net->ro))
+ changed = 1;
+ }
+ /*
+ * if the newly added address does not relate routing
+ * information, we skip.
+ */
+ if (changed == 0)
+ continue;
+ /* Retransmit unacknowledged DATA chunks immediately */
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ sctp_net_immediate_retrans(stcb, net);
+ }
+ /* Send SET PRIMARY for this new address */
+ if (net == stcb->asoc.primary_destination) {
+ (void)sctp_asconf_queue_mgmt(stcb, newifa,
+ SCTP_SET_PRIM_ADDR);
+ }
+ }
+}
+
+/*
+ * process an ADD/DELETE IP ack from peer.
+ * addr: corresponding sctp_ifa to the address being added/deleted.
+ * type: SCTP_ADD_IP_ADDRESS or SCTP_DEL_IP_ADDRESS.
+ * flag: 1=success, 0=failure.
+ */
+static void
+sctp_asconf_addr_mgmt_ack(struct sctp_tcb *stcb, struct sctp_ifa *addr,
+ uint16_t type, uint32_t flag)
+{
+ /*
+ * do the necessary asoc list work- if we get a failure indication,
+ * leave the address on the assoc's restricted list. If we get a
+ * success indication, remove the address from the restricted list.
+ */
+ /*
+ * Note: this will only occur for ADD_IP_ADDRESS, since
+ * DEL_IP_ADDRESS is never actually added to the list...
+ */
+ if (flag) {
+ /* success case, so remove from the restricted list */
+ sctp_del_local_addr_restricted(stcb, addr);
+
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE) ||
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ sctp_path_check_and_react(stcb, addr);
+ return;
+ }
+ /* clear any cached/topologically incorrect source addresses */
+ sctp_asconf_nets_cleanup(stcb, addr->ifn_p);
+ }
+ /* else, leave it on the list */
+}
+
+/*
+ * add an asconf add/delete/set primary IP address parameter to the queue.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if queued, -1 if not queued/removed.
+ * NOTE: if adding, but a delete for the same address is already scheduled
+ * (and not yet sent out), simply remove it from queue. Same for deleting
+ * an address already scheduled for add. If a duplicate operation is found,
+ * ignore the new one.
+ */
+static int
+sctp_asconf_queue_mgmt(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+ uint16_t type)
+{
+ struct sctp_asconf_addr *aa, *aa_next;
+ struct sockaddr *sa;
+
+ /* make sure the request isn't already in the queue */
+ for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+ aa = aa_next) {
+ aa_next = TAILQ_NEXT(aa, next);
+ /* address match? */
+ if (sctp_asconf_addr_match(aa, &ifa->address.sa) == 0)
+ continue;
+ /* is the request already in queue (sent or not) */
+ if (aa->ap.aph.ph.param_type == type) {
+ return (-1);
+ }
+ /* is the negative request already in queue, and not sent */
+ if ((aa->sent == 0) && (type == SCTP_ADD_IP_ADDRESS) &&
+ (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS)) {
+ /* add requested, delete already queued */
+ TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+ /* remove the ifa from the restricted list */
+ sctp_del_local_addr_restricted(stcb, ifa);
+ /* free the asconf param */
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: add removes queued entry\n");
+ return (-1);
+ }
+ if ((aa->sent == 0) && (type == SCTP_DEL_IP_ADDRESS) &&
+ (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS)) {
+ /* delete requested, add already queued */
+ TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+ /* remove the aa->ifa from the restricted list */
+ sctp_del_local_addr_restricted(stcb, aa->ifa);
+ /* free the asconf param */
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_mgmt: delete removes queued entry\n");
+ return (-1);
+ }
+ } /* for each aa */
+
+ /* adding new request to the queue */
+ SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+ SCTP_M_ASC_ADDR);
+ if (aa == NULL) {
+ /* didn't get memory */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "asconf_queue_mgmt: failed to get memory!\n");
+ return (-1);
+ }
+ /* fill in asconf address parameter fields */
+ /* top level elements are "networked" during send */
+ aa->ap.aph.ph.param_type = type;
+ aa->ifa = ifa;
+ atomic_add_int(&ifa->refcount, 1);
+ /* correlation_id filled in during send routine later... */
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ /* IPv6 address */
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ifa->address.sa;
+ sa = (struct sockaddr *)sin6;
+ aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+ aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+ aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+ sizeof(struct sctp_ipv6addr_param);
+ memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ } else if (ifa->address.sa.sa_family == AF_INET) {
+ /* IPv4 address */
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ifa->address.sa;
+ sa = (struct sockaddr *)sin;
+ aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+ aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+ aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) +
+ sizeof(struct sctp_ipv4addr_param);
+ memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+ sizeof(struct in_addr));
+ } else {
+ /* invalid family! */
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ sctp_free_ifa(ifa);
+ return (-1);
+ }
+ aa->sent = 0; /* clear sent flag */
+
+ /*
+ * if we are deleting an address it should go out last otherwise,
+ * add it to front of the pending queue
+ */
+ if (type == SCTP_ADD_IP_ADDRESS) {
+ /* add goes to the front of the queue */
+ TAILQ_INSERT_HEAD(&stcb->asoc.asconf_queue, aa, next);
+ SCTPDBG(SCTP_DEBUG_ASCONF2,
+ "asconf_queue_mgmt: inserted asconf ADD_IP_ADDRESS: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ } else {
+ /* delete and set primary goes to the back of the queue */
+ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+#ifdef SCTP_DEBUG
+ if (sctp_debug_on && SCTP_DEBUG_ASCONF2) {
+ if (type == SCTP_DEL_IP_ADDRESS) {
+ SCTP_PRINTF("asconf_queue_mgmt: appended asconf DEL_IP_ADDRESS: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ } else {
+ SCTP_PRINTF("asconf_queue_mgmt: appended asconf SET_PRIM_ADDR: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ }
+ }
+#endif
+ }
+
+ return (0);
+}
+
+
+/*
+ * add an asconf operation for the given ifa and type.
+ * type = SCTP_ADD_IP_ADDRESS, SCTP_DEL_IP_ADDRESS, SCTP_SET_PRIM_ADDR.
+ * returns 0 if completed, -1 if not completed, 1 if immediate send is
+ * advisable.
+ */
+static int
+sctp_asconf_queue_add(struct sctp_tcb *stcb, struct sctp_ifa *ifa,
+ uint16_t type)
+{
+ uint32_t status;
+ int pending_delete_queued = 0;
+
+ /* see if peer supports ASCONF */
+ if (stcb->asoc.peer_supports_asconf == 0) {
+ return (-1);
+ }
+ /*
+ * if this is deleting the last address from the assoc, mark it as
+ * pending.
+ */
+ if ((type == SCTP_DEL_IP_ADDRESS) && !stcb->asoc.asconf_del_pending &&
+ (sctp_local_addr_count(stcb) < 2)) {
+ /* set the pending delete info only */
+ stcb->asoc.asconf_del_pending = 1;
+ stcb->asoc.asconf_addr_del_pending = ifa;
+ atomic_add_int(&ifa->refcount, 1);
+ SCTPDBG(SCTP_DEBUG_ASCONF2,
+ "asconf_queue_add: mark delete last address pending\n");
+ return (-1);
+ }
+ /*
+ * if this is an add, and there is a delete also pending (i.e. the
+ * last local address is being changed), queue the pending delete
+ * too.
+ */
+ if ((type == SCTP_ADD_IP_ADDRESS) && stcb->asoc.asconf_del_pending) {
+ /* queue in the pending delete */
+ if (sctp_asconf_queue_mgmt(stcb,
+ stcb->asoc.asconf_addr_del_pending,
+ SCTP_DEL_IP_ADDRESS) == 0) {
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "asconf_queue_add: queing pending delete\n");
+ pending_delete_queued = 1;
+ /* clear out the pending delete info */
+ stcb->asoc.asconf_del_pending = 0;
+ sctp_free_ifa(stcb->asoc.asconf_addr_del_pending);
+ stcb->asoc.asconf_addr_del_pending = NULL;
+ }
+ }
+ /* queue an asconf parameter */
+ status = sctp_asconf_queue_mgmt(stcb, ifa, type);
+
+ if (pending_delete_queued && (status == 0)) {
+ struct sctp_nets *net;
+
+ /*
+ * since we know that the only/last address is now being
+ * changed in this case, reset the cwnd/rto on all nets to
+ * start as a new address and path. Also clear the error
+ * counts to give the assoc the best chance to complete the
+ * address change.
+ */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb,
+ net);
+ net->RTO = 0;
+ net->error_count = 0;
+ }
+ stcb->asoc.overall_error_count = 0;
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_ASCONF,
+ __LINE__);
+ }
+ /* queue in an advisory set primary too */
+ (void)sctp_asconf_queue_mgmt(stcb, ifa, SCTP_SET_PRIM_ADDR);
+ /* let caller know we should send this out immediately */
+ status = 1;
+ }
+ return (status);
+}
+
+/*-
+ * add an asconf delete IP address parameter to the queue by sockaddr and
+ * possibly with no sctp_ifa available. This is only called by the routine
+ * that checks the addresses in an INIT-ACK against the current address list.
+ * returns 0 if completed, non-zero if not completed.
+ * NOTE: if an add is already scheduled (and not yet sent out), simply
+ * remove it from queue. If a duplicate operation is found, ignore the
+ * new one.
+ */
+static int
+sctp_asconf_queue_sa_delete(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+ struct sctp_ifa *ifa;
+ struct sctp_asconf_addr *aa, *aa_next;
+ uint32_t vrf_id;
+
+ if (stcb == NULL) {
+ return (-1);
+ }
+ /* see if peer supports ASCONF */
+ if (stcb->asoc.peer_supports_asconf == 0) {
+ return (-1);
+ }
+ /* make sure the request isn't already in the queue */
+ for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+ aa = aa_next) {
+ aa_next = TAILQ_NEXT(aa, next);
+ /* address match? */
+ if (sctp_asconf_addr_match(aa, sa) == 0)
+ continue;
+ /* is the request already in queue (sent or not) */
+ if (aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+ return (-1);
+ }
+ /* is the negative request already in queue, and not sent */
+ if (aa->sent == 1)
+ continue;
+ if (aa->ap.aph.ph.param_type == SCTP_ADD_IP_ADDRESS) {
+ /* add already queued, so remove existing entry */
+ TAILQ_REMOVE(&stcb->asoc.asconf_queue, aa, next);
+ sctp_del_local_addr_restricted(stcb, aa->ifa);
+ /* free the entry */
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ return (-1);
+ }
+ } /* for each aa */
+
+ /* find any existing ifa-- NOTE ifa CAN be allowed to be NULL */
+ if (stcb) {
+ vrf_id = stcb->asoc.vrf_id;
+ } else {
+ vrf_id = SCTP_DEFAULT_VRFID;
+ }
+ ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+
+ /* adding new request to the queue */
+ SCTP_MALLOC(aa, struct sctp_asconf_addr *, sizeof(*aa),
+ SCTP_M_ASC_ADDR);
+ if (aa == NULL) {
+ /* didn't get memory */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "sctp_asconf_queue_sa_delete: failed to get memory!\n");
+ return (-1);
+ }
+ /* fill in asconf address parameter fields */
+ /* top level elements are "networked" during send */
+ aa->ap.aph.ph.param_type = SCTP_DEL_IP_ADDRESS;
+ aa->ifa = ifa;
+ if (ifa)
+ atomic_add_int(&ifa->refcount, 1);
+ /* correlation_id filled in during send routine later... */
+ if (sa->sa_family == AF_INET6) {
+ /* IPv6 address */
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ aa->ap.addrp.ph.param_type = SCTP_IPV6_ADDRESS;
+ aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv6addr_param));
+ aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv6addr_param);
+ memcpy(&aa->ap.addrp.addr, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ } else if (sa->sa_family == AF_INET) {
+ /* IPv4 address */
+ struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+ aa->ap.addrp.ph.param_type = SCTP_IPV4_ADDRESS;
+ aa->ap.addrp.ph.param_length = (sizeof(struct sctp_ipv4addr_param));
+ aa->ap.aph.ph.param_length = sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_ipv4addr_param);
+ memcpy(&aa->ap.addrp.addr, &sin->sin_addr,
+ sizeof(struct in_addr));
+ } else {
+ /* invalid family! */
+ SCTP_FREE(aa, SCTP_M_ASC_ADDR);
+ if (ifa)
+ sctp_free_ifa(ifa);
+ return (-1);
+ }
+ aa->sent = 0; /* clear sent flag */
+
+ /* delete goes to the back of the queue */
+ TAILQ_INSERT_TAIL(&stcb->asoc.asconf_queue, aa, next);
+
+ /* sa_ignore MEMLEAK {memory is put on the tailq} */
+ return (0);
+}
+
+/*
+ * find a specific asconf param on our "sent" queue
+ */
+static struct sctp_asconf_addr *
+sctp_asconf_find_param(struct sctp_tcb *stcb, uint32_t correlation_id)
+{
+ struct sctp_asconf_addr *aa;
+
+ TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+ if (aa->ap.aph.correlation_id == correlation_id &&
+ aa->sent == 1) {
+ /* found it */
+ return (aa);
+ }
+ }
+ /* didn't find it */
+ return (NULL);
+}
+
+/*
+ * process an SCTP_ERROR_CAUSE_IND for a ASCONF-ACK parameter and do
+ * notifications based on the error response
+ */
+static void
+sctp_asconf_process_error(struct sctp_tcb *stcb,
+ struct sctp_asconf_paramhdr *aph)
+{
+ struct sctp_error_cause *eh;
+ struct sctp_paramhdr *ph;
+ uint16_t param_type;
+ uint16_t error_code;
+
+ eh = (struct sctp_error_cause *)(aph + 1);
+ ph = (struct sctp_paramhdr *)(eh + 1);
+ /* validate lengths */
+ if (htons(eh->length) + sizeof(struct sctp_error_cause) >
+ htons(aph->ph.param_length)) {
+ /* invalid error cause length */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_process_error: cause element too long\n");
+ return;
+ }
+ if (htons(ph->param_length) + sizeof(struct sctp_paramhdr) >
+ htons(eh->length)) {
+ /* invalid included TLV length */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_process_error: included TLV too long\n");
+ return;
+ }
+ /* which error code ? */
+ error_code = ntohs(eh->code);
+ param_type = ntohs(aph->ph.param_type);
+ /* FIX: this should go back up the REMOTE_ERROR ULP notify */
+ switch (error_code) {
+ case SCTP_CAUSE_RESOURCE_SHORTAGE:
+ /* we allow ourselves to "try again" for this error */
+ break;
+ default:
+ /* peer can't handle it... */
+ switch (param_type) {
+ case SCTP_ADD_IP_ADDRESS:
+ case SCTP_DEL_IP_ADDRESS:
+ stcb->asoc.peer_supports_asconf = 0;
+ break;
+ case SCTP_SET_PRIM_ADDR:
+ stcb->asoc.peer_supports_asconf = 0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/*
+ * process an asconf queue param.
+ * aparam: parameter to process, will be removed from the queue.
+ * flag: 1=success case, 0=failure case
+ */
+static void
+sctp_asconf_process_param_ack(struct sctp_tcb *stcb,
+ struct sctp_asconf_addr *aparam, uint32_t flag)
+{
+ uint16_t param_type;
+
+ /* process this param */
+ param_type = aparam->ap.aph.ph.param_type;
+ switch (param_type) {
+ case SCTP_ADD_IP_ADDRESS:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_param_ack: added IP address\n");
+ sctp_asconf_addr_mgmt_ack(stcb, aparam->ifa, param_type, flag);
+ break;
+ case SCTP_DEL_IP_ADDRESS:
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "process_param_ack: deleted IP address\n");
+ /* nothing really to do... lists already updated */
+ break;
+ case SCTP_SET_PRIM_ADDR:
+ /* nothing to do... peer may start using this addr */
+ if (flag == 0)
+ stcb->asoc.peer_supports_asconf = 0;
+ break;
+ default:
+ /* should NEVER happen */
+ break;
+ }
+
+ /* remove the param and free it */
+ TAILQ_REMOVE(&stcb->asoc.asconf_queue, aparam, next);
+ if (aparam->ifa)
+ sctp_free_ifa(aparam->ifa);
+ SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+}
+
+/*
+ * cleanup from a bad asconf ack parameter
+ */
+static void
+sctp_asconf_ack_clear(struct sctp_tcb *stcb)
+{
+ /* assume peer doesn't really know how to do asconfs */
+ stcb->asoc.peer_supports_asconf = 0;
+ /* XXX we could free the pending queue here */
+}
+
+void
+sctp_handle_asconf_ack(struct mbuf *m, int offset,
+ struct sctp_asconf_ack_chunk *cp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int *abort_no_unlock)
+{
+ struct sctp_association *asoc;
+ uint32_t serial_num;
+ uint16_t ack_length;
+ struct sctp_asconf_paramhdr *aph;
+ struct sctp_asconf_addr *aa, *aa_next;
+ uint32_t last_error_id = 0; /* last error correlation id */
+ uint32_t id;
+ struct sctp_asconf_addr *ap;
+
+ /* asconf param buffer */
+ uint8_t aparam_buf[SCTP_PARAM_BUFFER_SIZE];
+
+ /* verify minimum length */
+ if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_asconf_ack_chunk)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "handle_asconf_ack: chunk too small = %xh\n",
+ ntohs(cp->ch.chunk_length));
+ return;
+ }
+ asoc = &stcb->asoc;
+ serial_num = ntohl(cp->serial_number);
+
+ /*
+ * NOTE: we may want to handle this differently- currently, we will
+ * abort when we get an ack for the expected serial number + 1 (eg.
+ * we didn't send it), process an ack normally if it is the expected
+ * serial number, and re-send the previous ack for *ALL* other
+ * serial numbers
+ */
+
+ /*
+ * if the serial number is the next expected, but I didn't send it,
+ * abort the asoc, since someone probably just hijacked us...
+ */
+ if (serial_num == (asoc->asconf_seq_out + 1)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got unexpected next serial number! Aborting asoc!\n");
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_CAUSE_ILLEGAL_ASCONF_ACK, NULL, SCTP_SO_NOT_LOCKED);
+ *abort_no_unlock = 1;
+ return;
+ }
+ if (serial_num != asoc->asconf_seq_out) {
+ /* got a duplicate/unexpected ASCONF-ACK */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got duplicate/unexpected serial number = %xh (expected = %xh)\n",
+ serial_num, asoc->asconf_seq_out);
+ return;
+ }
+ if (stcb->asoc.asconf_sent == 0) {
+ /* got a unexpected ASCONF-ACK for serial not in flight */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf_ack: got serial number = %xh but not in flight\n",
+ serial_num);
+ /* nothing to do... duplicate ACK received */
+ return;
+ }
+ /* stop our timer */
+ sctp_timer_stop(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_ASCONF + SCTP_LOC_3);
+
+ /* process the ASCONF-ACK contents */
+ ack_length = ntohs(cp->ch.chunk_length) -
+ sizeof(struct sctp_asconf_ack_chunk);
+ offset += sizeof(struct sctp_asconf_ack_chunk);
+ /* process through all parameters */
+ while (ack_length >= sizeof(struct sctp_asconf_paramhdr)) {
+ unsigned int param_length, param_type;
+
+ /* get pointer to next asconf parameter */
+ aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_asconf_paramhdr), aparam_buf);
+ if (aph == NULL) {
+ /* can't get an asconf paramhdr */
+ sctp_asconf_ack_clear(stcb);
+ return;
+ }
+ param_type = ntohs(aph->ph.param_type);
+ param_length = ntohs(aph->ph.param_length);
+ if (param_length > ack_length) {
+ sctp_asconf_ack_clear(stcb);
+ return;
+ }
+ if (param_length < sizeof(struct sctp_paramhdr)) {
+ sctp_asconf_ack_clear(stcb);
+ return;
+ }
+ /* get the complete parameter... */
+ if (param_length > sizeof(aparam_buf)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "param length (%u) larger than buffer size!\n", param_length);
+ sctp_asconf_ack_clear(stcb);
+ return;
+ }
+ aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
+ if (aph == NULL) {
+ sctp_asconf_ack_clear(stcb);
+ return;
+ }
+ /* correlation_id is transparent to peer, no ntohl needed */
+ id = aph->correlation_id;
+
+ switch (param_type) {
+ case SCTP_ERROR_CAUSE_IND:
+ last_error_id = id;
+ /* find the corresponding asconf param in our queue */
+ ap = sctp_asconf_find_param(stcb, id);
+ if (ap == NULL) {
+ /* hmm... can't find this in our queue! */
+ break;
+ }
+ /* process the parameter, failed flag */
+ sctp_asconf_process_param_ack(stcb, ap, 0);
+ /* process the error response */
+ sctp_asconf_process_error(stcb, aph);
+ break;
+ case SCTP_SUCCESS_REPORT:
+ /* find the corresponding asconf param in our queue */
+ ap = sctp_asconf_find_param(stcb, id);
+ if (ap == NULL) {
+ /* hmm... can't find this in our queue! */
+ break;
+ }
+ /* process the parameter, success flag */
+ sctp_asconf_process_param_ack(stcb, ap, 1);
+ break;
+ default:
+ break;
+ } /* switch */
+
+ /* update remaining ASCONF-ACK message length to process */
+ ack_length -= SCTP_SIZE32(param_length);
+ if (ack_length <= 0) {
+ /* no more data in the mbuf chain */
+ break;
+ }
+ offset += SCTP_SIZE32(param_length);
+ } /* while */
+
+ /*
+ * if there are any "sent" params still on the queue, these are
+ * implicitly "success", or "failed" (if we got an error back) ...
+ * so process these appropriately
+ *
+ * we assume that the correlation_id's are monotonically increasing
+ * beginning from 1 and that we don't have *that* many outstanding
+ * at any given time
+ */
+ if (last_error_id == 0)
+ last_error_id--;/* set to "max" value */
+ for (aa = TAILQ_FIRST(&stcb->asoc.asconf_queue); aa != NULL;
+ aa = aa_next) {
+ aa_next = TAILQ_NEXT(aa, next);
+ if (aa->sent == 1) {
+ /*
+ * implicitly successful or failed if correlation_id
+ * < last_error_id, then success else, failure
+ */
+ if (aa->ap.aph.correlation_id < last_error_id)
+ sctp_asconf_process_param_ack(stcb, aa, 1);
+ else
+ sctp_asconf_process_param_ack(stcb, aa, 0);
+ } else {
+ /*
+ * since we always process in order (FIFO queue) if
+ * we reach one that hasn't been sent, the rest
+ * should not have been sent either. so, we're
+ * done...
+ */
+ break;
+ }
+ }
+
+ /* update the next sequence number to use */
+ asoc->asconf_seq_out++;
+ /* remove the old ASCONF on our outbound queue */
+ sctp_toss_old_asconf(stcb);
+ /* clear the sent flag to allow new ASCONFs */
+ asoc->asconf_sent = 0;
+ if (!TAILQ_EMPTY(&stcb->asoc.asconf_queue)) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ /* we have more params, so restart our timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, stcb->sctp_ep,
+ stcb, net);
+#else
+ /* we have more params, so send out more */
+ sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+#endif
+ }
+}
+
+static uint32_t
+sctp_is_scopeid_in_nets(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+ struct sockaddr_in6 *sin6, *net6;
+ struct sctp_nets *net;
+
+ if (sa->sa_family != AF_INET6) {
+ /* wrong family */
+ return (0);
+ }
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) == 0) {
+ /* not link local address */
+ return (0);
+ }
+ /* hunt through our destination nets list for this scope_id */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (((struct sockaddr *)(&net->ro._l_addr))->sa_family !=
+ AF_INET6)
+ continue;
+ net6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ if (IN6_IS_ADDR_LINKLOCAL(&net6->sin6_addr) == 0)
+ continue;
+ if (sctp_is_same_scope(sin6, net6)) {
+ /* found one */
+ return (1);
+ }
+ }
+ /* didn't find one */
+ return (0);
+}
+
+/*
+ * address management functions
+ */
+static void
+sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_ifa *ifa, uint16_t type, int addr_locked)
+{
+ int status;
+
+
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0 &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+ /* subset bound, no ASCONF allowed case, so ignore */
+ return;
+ }
+ /*
+ * note: we know this is not the subset bound, no ASCONF case eg.
+ * this is boundall or subset bound w/ASCONF allowed
+ */
+
+ /* first, make sure it's a good address family */
+ if (ifa->address.sa.sa_family != AF_INET6 &&
+ ifa->address.sa.sa_family != AF_INET) {
+ return;
+ }
+ /* make sure we're "allowed" to add this type of addr */
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ /* invalid if we're not a v6 endpoint */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0)
+ return;
+ /* is the v6 addr really valid ? */
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ return;
+ }
+ }
+ /* put this address on the "pending/do not use yet" list */
+ sctp_add_local_addr_restricted(stcb, ifa);
+ /*
+ * check address scope if address is out of scope, don't queue
+ * anything... note: this would leave the address on both inp and
+ * asoc lists
+ */
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /* we skip unspecifed addresses */
+ return;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (stcb->asoc.local_scope == 0) {
+ return;
+ }
+ /* is it the right link local scope? */
+ if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+ return;
+ }
+ }
+ if (stcb->asoc.site_scope == 0 &&
+ IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+ return;
+ }
+ } else if (ifa->address.sa.sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+ struct in6pcb *inp6;
+
+ inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+ /* invalid if we are a v6 only endpoint */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp6))
+ return;
+
+ sin = (struct sockaddr_in *)&ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0) {
+ /* we skip unspecifed addresses */
+ return;
+ }
+ if (stcb->asoc.ipv4_local_scope == 0 &&
+ IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ return;
+ }
+ } else {
+ /* else, not AF_INET or AF_INET6, so skip */
+ return;
+ }
+
+ /* queue an asconf for this address add/delete */
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+ /* does the peer do asconf? */
+ if (stcb->asoc.peer_supports_asconf) {
+ /* queue an asconf for this addr */
+ status = sctp_asconf_queue_add(stcb, ifa, type);
+
+ /*
+ * if queued ok, and in the open state, send out the
+ * ASCONF. If in the non-open state, these will be
+ * sent when the state goes open.
+ */
+ if (status == 0 &&
+ SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+ stcb, stcb->asoc.primary_destination);
+#else
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ addr_locked);
+#endif
+ }
+ }
+ }
+}
+
+
+int
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+ struct sctp_asconf_iterator *asc;
+ struct sctp_ifa *ifa;
+ struct sctp_laddr *l;
+ int type;
+ int cnt_invalid = 0;
+
+ asc = (struct sctp_asconf_iterator *)ptr;
+ LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+ ifa = l->ifa;
+ type = l->action;
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ /* invalid if we're not a v6 endpoint */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return (1);
+ else
+ continue;
+ }
+ } else if (ifa->address.sa.sa_family == AF_INET) {
+ /* invalid if we are a v6 only endpoint */
+ struct in6pcb *inp6;
+
+ inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp6)) {
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return (1);
+ else
+ continue;
+ }
+ } else {
+ /* invalid address family */
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return (1);
+ else
+ continue;
+ }
+ }
+ return (0);
+}
+
+static int
+sctp_asconf_iterator_ep_end(struct sctp_inpcb *inp, void *ptr, uint32_t val)
+{
+ struct sctp_ifa *ifa;
+ struct sctp_asconf_iterator *asc;
+ struct sctp_laddr *laddr, *nladdr, *l;
+
+ /* Only for specific case not bound all */
+ asc = (struct sctp_asconf_iterator *)ptr;
+ LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+ ifa = l->ifa;
+ if (l->action == SCTP_ADD_IP_ADDRESS) {
+ LIST_FOREACH(laddr, &inp->sctp_addr_list,
+ sctp_nxt_addr) {
+ if (laddr->ifa == ifa) {
+ laddr->action = 0;
+ break;
+ }
+ }
+ } else if (l->action == SCTP_DEL_IP_ADDRESS) {
+ laddr = LIST_FIRST(&inp->sctp_addr_list);
+ while (laddr) {
+ nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+ /* remove only after all guys are done */
+ if (laddr->ifa == ifa) {
+ sctp_del_local_addr_ep(inp, ifa);
+ }
+ laddr = nladdr;
+ }
+ }
+ }
+ return (0);
+}
+
+void
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ void *ptr, uint32_t val)
+{
+ struct sctp_asconf_iterator *asc;
+ struct sctp_ifa *ifa;
+ struct sctp_laddr *l;
+ int cnt_invalid = 0;
+ int type, status;
+ int num_queued = 0;
+
+ asc = (struct sctp_asconf_iterator *)ptr;
+ LIST_FOREACH(l, &asc->list_of_work, sctp_nxt_addr) {
+ ifa = l->ifa;
+ type = l->action;
+
+ /* address's vrf_id must be the vrf_id of the assoc */
+ if (ifa->vrf_id != stcb->asoc.vrf_id) {
+ continue;
+ }
+ /* Same checks again for assoc */
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ /* invalid if we're not a v6 endpoint */
+ struct sockaddr_in6 *sin6;
+
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return;
+ else
+ continue;
+ }
+ sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /* we skip unspecifed addresses */
+ continue;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (stcb->asoc.local_scope == 0) {
+ continue;
+ }
+ /* is it the right link local scope? */
+ if (sctp_is_scopeid_in_nets(stcb, &ifa->address.sa) == 0) {
+ continue;
+ }
+ }
+ } else if (ifa->address.sa.sa_family == AF_INET) {
+ /* invalid if we are a v6 only endpoint */
+ struct in6pcb *inp6;
+ struct sockaddr_in *sin;
+
+ inp6 = (struct in6pcb *)&inp->ip_inp.inp;
+ /* invalid if we are a v6 only endpoint */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp6))
+ continue;
+
+ sin = (struct sockaddr_in *)&ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0) {
+ /* we skip unspecifed addresses */
+ continue;
+ }
+ if (stcb->asoc.ipv4_local_scope == 0 &&
+ IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ continue;;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp6)) {
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return;
+ else
+ continue;
+ }
+ } else {
+ /* invalid address family */
+ cnt_invalid++;
+ if (asc->cnt == cnt_invalid)
+ return;
+ else
+ continue;
+ }
+
+ if (type == SCTP_ADD_IP_ADDRESS) {
+ /* prevent this address from being used as a source */
+ sctp_add_local_addr_restricted(stcb, ifa);
+ } else if (type == SCTP_DEL_IP_ADDRESS) {
+ struct sctp_nets *net;
+
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sctp_rtentry_t *rt;
+
+ /* delete this address if cached */
+ if (net->ro._s_addr == ifa) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ rt = net->ro.ro_rt;
+ if (rt) {
+ RTFREE(rt);
+ net->ro.ro_rt = NULL;
+ }
+ /*
+ * Now we deleted our src address,
+ * should we not also now reset the
+ * cwnd/rto to start as if its a new
+ * address?
+ */
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+ net->RTO = 0;
+
+ }
+ }
+ } else if (type == SCTP_SET_PRIM_ADDR) {
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+ /* must validate the ifa is in the ep */
+ if (sctp_is_addr_in_ep(stcb->sctp_ep, ifa) == 0) {
+ continue;
+ }
+ } else {
+ /* Need to check scopes for this guy */
+ if (sctp_is_address_in_scope(ifa,
+ stcb->asoc.ipv4_addr_legal,
+ stcb->asoc.ipv6_addr_legal,
+ stcb->asoc.loopback_scope,
+ stcb->asoc.ipv4_local_scope,
+ stcb->asoc.local_scope,
+ stcb->asoc.site_scope, 0) == 0) {
+ continue;
+ }
+ }
+ }
+ /* queue an asconf for this address add/delete */
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF) &&
+ stcb->asoc.peer_supports_asconf) {
+ /* queue an asconf for this addr */
+ status = sctp_asconf_queue_add(stcb, ifa, type);
+ /*
+ * if queued ok, and in the open state, update the
+ * count of queued params. If in the non-open
+ * state, these get sent when the assoc goes open.
+ */
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ if (status >= 0) {
+ num_queued++;
+ }
+ }
+ }
+ }
+ /*
+ * If we have queued params in the open state, send out an ASCONF.
+ */
+ if (num_queued > 0) {
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ SCTP_ADDR_NOT_LOCKED);
+ }
+}
+
+void
+sctp_asconf_iterator_end(void *ptr, uint32_t val)
+{
+ struct sctp_asconf_iterator *asc;
+ struct sctp_ifa *ifa;
+ struct sctp_laddr *l, *l_next;
+
+ asc = (struct sctp_asconf_iterator *)ptr;
+ l = LIST_FIRST(&asc->list_of_work);
+ while (l != NULL) {
+ l_next = LIST_NEXT(l, sctp_nxt_addr);
+ ifa = l->ifa;
+ if (l->action == SCTP_ADD_IP_ADDRESS) {
+ /* Clear the defer use flag */
+ ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+ }
+ sctp_free_ifa(ifa);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, l);
+ SCTP_DECR_LADDR_COUNT();
+ l = l_next;
+ }
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+}
+
+/*
+ * sa is the sockaddr to ask the peer to set primary to.
+ * returns: 0 = completed, -1 = error
+ */
+int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *stcb, struct sockaddr *sa)
+{
+ uint32_t vrf_id;
+ struct sctp_ifa *ifa;
+
+ /* find the ifa for the desired set primary */
+ vrf_id = stcb->asoc.vrf_id;
+ ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+ if (ifa == NULL) {
+ /* Invalid address */
+ return (-1);
+ }
+ /* queue an ASCONF:SET_PRIM_ADDR to be sent */
+ if (!sctp_asconf_queue_add(stcb, ifa, SCTP_SET_PRIM_ADDR)) {
+ /* set primary queuing succeeded */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "set_primary_ip_address_sa: queued on tcb=%p, ",
+ stcb);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+ stcb->sctp_ep, stcb,
+ stcb->asoc.primary_destination);
+#else
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ SCTP_ADDR_NOT_LOCKED);
+#endif
+ }
+ } else {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address_sa: failed to add to queue on tcb=%p, ",
+ stcb);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, sa);
+ return (-1);
+ }
+ return (0);
+}
+
+void
+sctp_set_primary_ip_address(struct sctp_ifa *ifa)
+{
+ struct sctp_inpcb *inp;
+
+ /* go through all our PCB's */
+ LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+ struct sctp_tcb *stcb;
+
+ /* process for all associations for this endpoint */
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ /* queue an ASCONF:SET_PRIM_ADDR to be sent */
+ if (!sctp_asconf_queue_add(stcb, ifa,
+ SCTP_SET_PRIM_ADDR)) {
+ /* set primary queuing succeeded */
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "set_primary_ip_address: queued on stcb=%p, ",
+ stcb);
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &ifa->address.sa);
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+ stcb->sctp_ep, stcb,
+ stcb->asoc.primary_destination);
+#else
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ SCTP_ADDR_NOT_LOCKED);
+#endif
+ }
+ }
+ } /* for each stcb */
+ } /* for each inp */
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr(struct sctp_tcb *stcb, int addr_locked)
+{
+ struct sctp_vrf *vrf = NULL;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+
+ if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+ if (vrf == NULL) {
+ if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (NULL);
+ }
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if (stcb->asoc.loopback_scope == 0 &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ /* Skip if loopback_scope not set */
+ continue;
+ }
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if (sctp_ifa->address.sa.sa_family == AF_INET &&
+ stcb->asoc.ipv4_addr_legal) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0) {
+ /* skip unspecifed addresses */
+ continue;
+ }
+ if (stcb->asoc.ipv4_local_scope == 0 &&
+ IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))
+ continue;
+
+ if (sctp_is_addr_restricted(stcb, sctp_ifa))
+ continue;
+ /* found a valid local v4 address to use */
+ if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (&sctp_ifa->address.sa);
+ } else if (sctp_ifa->address.sa.sa_family == AF_INET6 &&
+ stcb->asoc.ipv6_addr_legal) {
+ struct sockaddr_in6 *sin6;
+
+ if (sctp_ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ continue;
+ }
+ sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /* we skip unspecifed addresses */
+ continue;
+ }
+ if (stcb->asoc.local_scope == 0 &&
+ IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+ continue;
+ if (stcb->asoc.site_scope == 0 &&
+ IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))
+ continue;
+
+ /* found a valid local v6 address to use */
+ if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (&sctp_ifa->address.sa);
+ }
+ }
+ }
+ /* no valid addresses found */
+ if (addr_locked == SCTP_ADDR_NOT_LOCKED)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (NULL);
+}
+
+static struct sockaddr *
+sctp_find_valid_localaddr_ep(struct sctp_tcb *stcb)
+{
+ struct sctp_laddr *laddr;
+
+ LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ continue;
+ }
+ /* is the address restricted ? */
+ if (sctp_is_addr_restricted(stcb, laddr->ifa))
+ continue;
+
+ /* found a valid local address to use */
+ return (&laddr->ifa->address.sa);
+ }
+ /* no valid addresses found */
+ return (NULL);
+}
+
+/*
+ * builds an ASCONF chunk from queued ASCONF params.
+ * returns NULL on error (no mbuf, no ASCONF params queued, etc).
+ */
+struct mbuf *
+sctp_compose_asconf(struct sctp_tcb *stcb, int *retlen, int addr_locked)
+{
+ struct mbuf *m_asconf, *m_asconf_chk;
+ struct sctp_asconf_addr *aa;
+ struct sctp_asconf_chunk *acp;
+ struct sctp_asconf_paramhdr *aph;
+ struct sctp_asconf_addr_param *aap;
+ uint32_t p_length;
+ uint32_t correlation_id = 1; /* 0 is reserved... */
+ caddr_t ptr, lookup_ptr;
+ uint8_t lookup_used = 0;
+
+ /* are there any asconf params to send? */
+ if (TAILQ_EMPTY(&stcb->asoc.asconf_queue)) {
+ return (NULL);
+ }
+ /* can't send a new one if there is one in flight already */
+ if (stcb->asoc.asconf_sent > 0) {
+ return (NULL);
+ }
+ /*
+ * get a chunk header mbuf and a cluster for the asconf params since
+ * it's simpler to fill in the asconf chunk header lookup address on
+ * the fly
+ */
+ m_asconf_chk = sctp_get_mbuf_for_msg(sizeof(struct sctp_asconf_chunk), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_asconf_chk == NULL) {
+ /* no mbuf's */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "compose_asconf: couldn't get chunk mbuf!\n");
+ return (NULL);
+ }
+ m_asconf = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_asconf == NULL) {
+ /* no mbuf's */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "compose_asconf: couldn't get mbuf!\n");
+ sctp_m_freem(m_asconf_chk);
+ return (NULL);
+ }
+ SCTP_BUF_LEN(m_asconf_chk) = sizeof(struct sctp_asconf_chunk);
+ SCTP_BUF_LEN(m_asconf) = 0;
+ acp = mtod(m_asconf_chk, struct sctp_asconf_chunk *);
+ bzero(acp, sizeof(struct sctp_asconf_chunk));
+ /* save pointers to lookup address and asconf params */
+ lookup_ptr = (caddr_t)(acp + 1); /* after the header */
+ ptr = mtod(m_asconf, caddr_t); /* beginning of cluster */
+
+ /* fill in chunk header info */
+ acp->ch.chunk_type = SCTP_ASCONF;
+ acp->ch.chunk_flags = 0;
+ acp->serial_number = htonl(stcb->asoc.asconf_seq_out);
+
+ /* add parameters... up to smallest MTU allowed */
+ TAILQ_FOREACH(aa, &stcb->asoc.asconf_queue, next) {
+ /* get the parameter length */
+ p_length = SCTP_SIZE32(aa->ap.aph.ph.param_length);
+ /* will it fit in current chunk? */
+ if (SCTP_BUF_LEN(m_asconf) + p_length > stcb->asoc.smallest_mtu) {
+ /* won't fit, so we're done with this chunk */
+ break;
+ }
+ /* assign (and store) a correlation id */
+ aa->ap.aph.correlation_id = correlation_id++;
+
+ /*
+ * fill in address if we're doing a delete this is a simple
+ * way for us to fill in the correlation address, which
+ * should only be used by the peer if we're deleting our
+ * source address and adding a new address (e.g. renumbering
+ * case)
+ */
+ if (lookup_used == 0 &&
+ aa->ap.aph.ph.param_type == SCTP_DEL_IP_ADDRESS) {
+ struct sctp_ipv6addr_param *lookup;
+ uint16_t p_size, addr_size;
+
+ lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+ lookup->ph.param_type =
+ htons(aa->ap.addrp.ph.param_type);
+ if (aa->ap.addrp.ph.param_type == SCTP_IPV6_ADDRESS) {
+ /* copy IPv6 address */
+ p_size = sizeof(struct sctp_ipv6addr_param);
+ addr_size = sizeof(struct in6_addr);
+ } else {
+ /* copy IPv4 address */
+ p_size = sizeof(struct sctp_ipv4addr_param);
+ addr_size = sizeof(struct in_addr);
+ }
+ lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+ memcpy(lookup->addr, &aa->ap.addrp.addr, addr_size);
+ SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+ lookup_used = 1;
+ }
+ /* copy into current space */
+ memcpy(ptr, &aa->ap, p_length);
+
+ /* network elements and update lengths */
+ aph = (struct sctp_asconf_paramhdr *)ptr;
+ aap = (struct sctp_asconf_addr_param *)ptr;
+ /* correlation_id is transparent to peer, no htonl needed */
+ aph->ph.param_type = htons(aph->ph.param_type);
+ aph->ph.param_length = htons(aph->ph.param_length);
+ aap->addrp.ph.param_type = htons(aap->addrp.ph.param_type);
+ aap->addrp.ph.param_length = htons(aap->addrp.ph.param_length);
+
+ SCTP_BUF_LEN(m_asconf) += SCTP_SIZE32(p_length);
+ ptr += SCTP_SIZE32(p_length);
+
+ /*
+ * these params are removed off the pending list upon
+ * getting an ASCONF-ACK back from the peer, just set flag
+ */
+ aa->sent = 1;
+ }
+ /* check to see if the lookup addr has been populated yet */
+ if (lookup_used == 0) {
+ /* NOTE: if the address param is optional, can skip this... */
+ /* add any valid (existing) address... */
+ struct sctp_ipv6addr_param *lookup;
+ uint16_t p_size, addr_size;
+ struct sockaddr *found_addr;
+ caddr_t addr_ptr;
+
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)
+ found_addr = sctp_find_valid_localaddr(stcb,
+ addr_locked);
+ else
+ found_addr = sctp_find_valid_localaddr_ep(stcb);
+
+ lookup = (struct sctp_ipv6addr_param *)lookup_ptr;
+ if (found_addr != NULL) {
+ if (found_addr->sa_family == AF_INET6) {
+ /* copy IPv6 address */
+ lookup->ph.param_type =
+ htons(SCTP_IPV6_ADDRESS);
+ p_size = sizeof(struct sctp_ipv6addr_param);
+ addr_size = sizeof(struct in6_addr);
+ addr_ptr = (caddr_t)&((struct sockaddr_in6 *)
+ found_addr)->sin6_addr;
+ } else {
+ /* copy IPv4 address */
+ lookup->ph.param_type =
+ htons(SCTP_IPV4_ADDRESS);
+ p_size = sizeof(struct sctp_ipv4addr_param);
+ addr_size = sizeof(struct in_addr);
+ addr_ptr = (caddr_t)&((struct sockaddr_in *)
+ found_addr)->sin_addr;
+ }
+ lookup->ph.param_length = htons(SCTP_SIZE32(p_size));
+ memcpy(lookup->addr, addr_ptr, addr_size);
+ SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(p_size);
+ lookup_used = 1;
+ } else {
+ /* uh oh... don't have any address?? */
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "compose_asconf: no lookup addr!\n");
+ /* for now, we send a IPv4 address of 0.0.0.0 */
+ lookup->ph.param_type = htons(SCTP_IPV4_ADDRESS);
+ lookup->ph.param_length = htons(SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param)));
+ bzero(lookup->addr, sizeof(struct in_addr));
+ SCTP_BUF_LEN(m_asconf_chk) += SCTP_SIZE32(sizeof(struct sctp_ipv4addr_param));
+ lookup_used = 1;
+ }
+ }
+ /* chain it all together */
+ SCTP_BUF_NEXT(m_asconf_chk) = m_asconf;
+ *retlen = SCTP_BUF_LEN(m_asconf_chk) + SCTP_BUF_LEN(m_asconf);
+ acp->ch.chunk_length = ntohs(*retlen);
+
+ /* update "sent" flag */
+ stcb->asoc.asconf_sent++;
+
+ return (m_asconf_chk);
+}
+
+/*
+ * section to handle address changes before an association is up eg. changes
+ * during INIT/INIT-ACK/COOKIE-ECHO handshake
+ */
+
+/*
+ * processes the (local) addresses in the INIT-ACK chunk
+ */
+static void
+sctp_process_initack_addresses(struct sctp_tcb *stcb, struct mbuf *m,
+ unsigned int offset, unsigned int length)
+{
+ struct sctp_paramhdr tmp_param, *ph;
+ uint16_t plen, ptype;
+ struct sctp_ifa *sctp_ifa;
+ struct sctp_ipv6addr_param addr_store;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct sockaddr *sa;
+ uint32_t vrf_id;
+
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "processing init-ack addresses\n");
+ if (stcb == NULL) /* Un-needed check for SA */
+ return;
+
+ /* convert to upper bound */
+ length += offset;
+
+ if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+ return;
+ }
+ /* init the addresses */
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(sin6);
+ sin6.sin6_port = stcb->rport;
+
+ bzero(&sin, sizeof(sin));
+ sin.sin_len = sizeof(sin);
+ sin.sin_family = AF_INET;
+ sin.sin_port = stcb->rport;
+
+ /* go through the addresses in the init-ack */
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+ while (ph != NULL) {
+ ptype = ntohs(ph->param_type);
+ plen = ntohs(ph->param_length);
+ if (ptype == SCTP_IPV6_ADDRESS) {
+ struct sctp_ipv6addr_param *a6p;
+
+ /* get the entire IPv6 address param */
+ a6p = (struct sctp_ipv6addr_param *)
+ sctp_m_getptr(m, offset,
+ sizeof(struct sctp_ipv6addr_param),
+ (uint8_t *) & addr_store);
+ if (plen != sizeof(struct sctp_ipv6addr_param) ||
+ a6p == NULL) {
+ return;
+ }
+ memcpy(&sin6.sin6_addr, a6p->addr,
+ sizeof(struct in6_addr));
+ sa = (struct sockaddr *)&sin6;
+ } else if (ptype == SCTP_IPV4_ADDRESS) {
+ struct sctp_ipv4addr_param *a4p;
+
+ /* get the entire IPv4 address param */
+ a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_ipv4addr_param),
+ (uint8_t *) & addr_store);
+ if (plen != sizeof(struct sctp_ipv4addr_param) ||
+ a4p == NULL) {
+ return;
+ }
+ sin.sin_addr.s_addr = a4p->addr;
+ sa = (struct sockaddr *)&sin;
+ } else {
+ goto next_addr;
+ }
+
+ /* see if this address really (still) exists */
+ if (stcb) {
+ vrf_id = stcb->asoc.vrf_id;
+ } else {
+ vrf_id = SCTP_DEFAULT_VRFID;
+ }
+ sctp_ifa = sctp_find_ifa_by_addr(sa, vrf_id,
+ SCTP_ADDR_NOT_LOCKED);
+ if (sctp_ifa == NULL) {
+ /* address doesn't exist anymore */
+ int status;
+
+ /* are ASCONFs allowed ? */
+ if ((sctp_is_feature_on(stcb->sctp_ep,
+ SCTP_PCB_FLAGS_DO_ASCONF)) &&
+ stcb->asoc.peer_supports_asconf) {
+ /* queue an ASCONF DEL_IP_ADDRESS */
+ status = sctp_asconf_queue_sa_delete(stcb, sa);
+ /*
+ * if queued ok, and in correct state, send
+ * out the ASCONF.
+ */
+ if (status == 0 &&
+ SCTP_GET_STATE(&stcb->asoc) ==
+ SCTP_STATE_OPEN) {
+#ifdef SCTP_TIMER_BASED_ASCONF
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
+ stcb->sctp_ep, stcb,
+ stcb->asoc.primary_destination);
+#else
+ sctp_send_asconf(stcb, stcb->asoc.primary_destination,
+ SCTP_ADDR_NOT_LOCKED);
+#endif
+ }
+ }
+ }
+next_addr:
+ /*
+ * Sanity check: Make sure the length isn't 0, otherwise
+ * we'll be stuck in this loop for a long time...
+ */
+ if (SCTP_SIZE32(plen) == 0) {
+ SCTP_PRINTF("process_initack_addrs: bad len (%d) type=%xh\n",
+ plen, ptype);
+ return;
+ }
+ /* get next parameter */
+ offset += SCTP_SIZE32(plen);
+ if ((offset + sizeof(struct sctp_paramhdr)) > length)
+ return;
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+ } /* while */
+}
+
+/* FIX ME: need to verify return result for v6 address type if v6 disabled */
+/*
+ * checks to see if a specific address is in the initack address list returns
+ * 1 if found, 0 if not
+ */
+static uint32_t
+sctp_addr_in_initack(struct sctp_tcb *stcb, struct mbuf *m, uint32_t offset,
+ uint32_t length, struct sockaddr *sa)
+{
+ struct sctp_paramhdr tmp_param, *ph;
+ uint16_t plen, ptype;
+ struct sctp_ipv6addr_param addr_store;
+ struct sockaddr_in *sin;
+ struct sctp_ipv4addr_param *a4p;
+
+#ifdef INET6
+ struct sockaddr_in6 *sin6;
+ struct sctp_ipv6addr_param *a6p;
+ struct sockaddr_in6 sin6_tmp;
+
+#endif /* INET6 */
+
+ if (
+#ifdef INET6
+ (sa->sa_family != AF_INET6) &&
+#endif /* INET6 */
+ (sa->sa_family != AF_INET))
+ return (0);
+
+ SCTPDBG(SCTP_DEBUG_ASCONF2, "find_initack_addr: starting search for ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF2, sa);
+ /* convert to upper bound */
+ length += offset;
+
+ if ((offset + sizeof(struct sctp_paramhdr)) > length) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "find_initack_addr: invalid offset?\n");
+ return (0);
+ }
+ /* go through the addresses in the init-ack */
+ ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+ while (ph != NULL) {
+ ptype = ntohs(ph->param_type);
+ plen = ntohs(ph->param_length);
+#ifdef INET6
+ if (ptype == SCTP_IPV6_ADDRESS && sa->sa_family == AF_INET6) {
+ /* get the entire IPv6 address param */
+ a6p = (struct sctp_ipv6addr_param *)
+ sctp_m_getptr(m, offset,
+ sizeof(struct sctp_ipv6addr_param),
+ (uint8_t *) & addr_store);
+ if (plen != sizeof(struct sctp_ipv6addr_param) ||
+ (ph == NULL) ||
+ (a6p == NULL)) {
+ return (0);
+ }
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+ /* create a copy and clear scope */
+ memcpy(&sin6_tmp, sin6,
+ sizeof(struct sockaddr_in6));
+ sin6 = &sin6_tmp;
+ in6_clearscope(&sin6->sin6_addr);
+ }
+ if (memcmp(&sin6->sin6_addr, a6p->addr,
+ sizeof(struct in6_addr)) == 0) {
+ /* found it */
+ return (1);
+ }
+ } else
+#endif /* INET6 */
+
+ if (ptype == SCTP_IPV4_ADDRESS &&
+ sa->sa_family == AF_INET) {
+ /* get the entire IPv4 address param */
+ a4p = (struct sctp_ipv4addr_param *)sctp_m_getptr(m,
+ offset, sizeof(struct sctp_ipv4addr_param),
+ (uint8_t *) & addr_store);
+ if (plen != sizeof(struct sctp_ipv4addr_param) ||
+ (ph == NULL) ||
+ (a4p == NULL)) {
+ return (0);
+ }
+ sin = (struct sockaddr_in *)sa;
+ if (sin->sin_addr.s_addr == a4p->addr) {
+ /* found it */
+ return (1);
+ }
+ }
+ /* get next parameter */
+ offset += SCTP_SIZE32(plen);
+ if (offset + sizeof(struct sctp_paramhdr) > length)
+ return (0);
+ ph = (struct sctp_paramhdr *)
+ sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+ (uint8_t *) & tmp_param);
+ } /* while */
+ /* not found! */
+ return (0);
+}
+
+/*
+ * makes sure that the current endpoint local addr list is consistent with
+ * the new association (eg. subset bound, asconf allowed) adds addresses as
+ * necessary
+ */
+static void
+sctp_check_address_list_ep(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+ int length, struct sockaddr *init_addr)
+{
+ struct sctp_laddr *laddr;
+
+ /* go through the endpoint list */
+ LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list, sctp_nxt_addr) {
+ /* be paranoid and validate the laddr */
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "check_addr_list_ep: laddr->ifa is NULL");
+ continue;
+ }
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "check_addr_list_ep: laddr->ifa->ifa_addr is NULL");
+ continue;
+ }
+ /* do i have it implicitly? */
+ if (sctp_cmpaddr(&laddr->ifa->address.sa, init_addr)) {
+ continue;
+ }
+ /* check to see if in the init-ack */
+ if (!sctp_addr_in_initack(stcb, m, offset, length,
+ &laddr->ifa->address.sa)) {
+ /* try to add it */
+ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb, laddr->ifa,
+ SCTP_ADD_IP_ADDRESS, SCTP_ADDR_NOT_LOCKED);
+ }
+ }
+}
+
+/*
+ * makes sure that the current kernel address list is consistent with the new
+ * association (with all addrs bound) adds addresses as necessary
+ */
+static void
+sctp_check_address_list_all(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+ int length, struct sockaddr *init_addr,
+ uint16_t local_scope, uint16_t site_scope,
+ uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+ struct sctp_vrf *vrf = NULL;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+ uint32_t vrf_id;
+
+ if (stcb) {
+ vrf_id = stcb->asoc.vrf_id;
+ } else {
+ return;
+ }
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return;
+ }
+ /* go through all our known interfaces */
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if (loopback_scope == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ /* skip loopback interface */
+ continue;
+ }
+ /* go through each interface address */
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ /* do i have it implicitly? */
+ if (sctp_cmpaddr(&sctp_ifa->address.sa, init_addr)) {
+ continue;
+ }
+ /* check to see if in the init-ack */
+ if (!sctp_addr_in_initack(stcb, m, offset, length,
+ &sctp_ifa->address.sa)) {
+ /* try to add it */
+ sctp_addr_mgmt_assoc(stcb->sctp_ep, stcb,
+ sctp_ifa, SCTP_ADD_IP_ADDRESS,
+ SCTP_ADDR_LOCKED);
+ }
+ } /* end foreach ifa */
+ } /* end foreach ifn */
+ SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*
+ * validates an init-ack chunk (from a cookie-echo) with current addresses
+ * adds addresses from the init-ack into our local address list, if needed
+ * queues asconf adds/deletes addresses as needed and makes appropriate list
+ * changes for source address selection m, offset: points to the start of the
+ * address list in an init-ack chunk length: total length of the address
+ * params only init_addr: address where my INIT-ACK was sent from
+ */
+void
+sctp_check_address_list(struct sctp_tcb *stcb, struct mbuf *m, int offset,
+ int length, struct sockaddr *init_addr,
+ uint16_t local_scope, uint16_t site_scope,
+ uint16_t ipv4_scope, uint16_t loopback_scope)
+{
+ /* process the local addresses in the initack */
+ sctp_process_initack_addresses(stcb, m, offset, length);
+
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* bound all case */
+ sctp_check_address_list_all(stcb, m, offset, length, init_addr,
+ local_scope, site_scope, ipv4_scope, loopback_scope);
+ } else {
+ /* subset bound case */
+ if (sctp_is_feature_on(stcb->sctp_ep,
+ SCTP_PCB_FLAGS_DO_ASCONF)) {
+ /* asconf's allowed */
+ sctp_check_address_list_ep(stcb, m, offset, length,
+ init_addr);
+ }
+ /* else, no asconfs allowed, so what we sent is what we get */
+ }
+}
+
+/*
+ * sctp_bindx() support
+ */
+uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *inp, struct sockaddr *sa,
+ uint32_t type, uint32_t vrf_id, struct sctp_ifa *sctp_ifap)
+{
+ struct sctp_ifa *ifa;
+
+ if (sa->sa_len == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+ return (EINVAL);
+ }
+ if (sctp_ifap) {
+ ifa = sctp_ifap;
+ } else if (type == SCTP_ADD_IP_ADDRESS) {
+ /* For an add the address MUST be on the system */
+ ifa = sctp_find_ifa_by_addr(sa, vrf_id, SCTP_ADDR_NOT_LOCKED);
+ } else if (type == SCTP_DEL_IP_ADDRESS) {
+ /* For a delete we need to find it in the inp */
+ ifa = sctp_find_ifa_in_ep(inp, sa, SCTP_ADDR_NOT_LOCKED);
+ } else {
+ ifa = NULL;
+ }
+ if (ifa != NULL) {
+ /* add this address */
+ struct sctp_asconf_iterator *asc;
+ struct sctp_laddr *wi;
+
+ SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+ sizeof(struct sctp_asconf_iterator),
+ SCTP_M_ASC_IT);
+ if (asc == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+ return (ENOMEM);
+ }
+ wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr,
+ struct sctp_laddr);
+ if (wi == NULL) {
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, ENOMEM);
+ return (ENOMEM);
+ }
+ if (type == SCTP_ADD_IP_ADDRESS) {
+ sctp_add_local_addr_ep(inp, ifa, type);
+ } else if (type == SCTP_DEL_IP_ADDRESS) {
+ struct sctp_laddr *laddr;
+
+ if (inp->laddr_count < 2) {
+ /* can't delete the last local address */
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, wi);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EINVAL);
+ return (EINVAL);
+ }
+ LIST_FOREACH(laddr, &inp->sctp_addr_list,
+ sctp_nxt_addr) {
+ if (ifa == laddr->ifa) {
+ /* Mark in the delete */
+ laddr->action = type;
+ }
+ }
+ }
+ LIST_INIT(&asc->list_of_work);
+ asc->cnt = 1;
+ SCTP_INCR_LADDR_COUNT();
+ wi->ifa = ifa;
+ wi->action = type;
+ atomic_add_int(&ifa->refcount, 1);
+ LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+ (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ sctp_asconf_iterator_stcb,
+ sctp_asconf_iterator_ep_end,
+ SCTP_PCB_ANY_FLAGS,
+ SCTP_PCB_ANY_FEATURES,
+ SCTP_ASOC_ANY_STATE,
+ (void *)asc, 0,
+ sctp_asconf_iterator_end, inp, 0);
+ } else {
+ /* invalid address! */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_ASCONF, EADDRNOTAVAIL);
+ return (EADDRNOTAVAIL);
+ }
+ return (0);
+}
Index: ip_encap.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_encap.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_encap.c -L sys/netinet/ip_encap.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_encap.c
+++ sys/netinet/ip_encap.c
@@ -1,4 +1,3 @@
-/* $FreeBSD: src/sys/netinet/ip_encap.c,v 1.20 2005/01/07 01:45:44 imp Exp $ */
/* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */
/*-
@@ -57,6 +56,9 @@
*/
/* XXX is M_NETADDR correct? */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_encap.c,v 1.24 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_mrouting.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -87,11 +89,9 @@
#include <machine/stdarg.h>
-#include <net/net_osdep.h>
-
#include <sys/kernel.h>
#include <sys/malloc.h>
-static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
+static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
static void encap_add(struct encaptab *);
static int mask_match(const struct encaptab *, const struct sockaddr *,
@@ -110,15 +110,13 @@
* it's referenced by KAME pieces in netinet6.
*/
void
-encap_init()
+encap_init(void)
{
}
#ifdef INET
void
-encap4_input(m, off)
- struct mbuf *m;
- int off;
+encap4_input(struct mbuf *m, int off)
{
struct ip *ip;
int proto;
@@ -203,10 +201,7 @@
#ifdef INET6
int
-encap6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp;
- int proto;
+encap6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct ip6_hdr *ip6;
@@ -274,8 +269,7 @@
/*lint -sem(encap_add, custodial(1)) */
static void
-encap_add(ep)
- struct encaptab *ep;
+encap_add(struct encaptab *ep)
{
mtx_assert(&encapmtx, MA_OWNED);
@@ -288,13 +282,9 @@
* Return value will be necessary as input (cookie) for encap_detach().
*/
const struct encaptab *
-encap_attach(af, proto, sp, sm, dp, dm, psw, arg)
- int af;
- int proto;
- const struct sockaddr *sp, *sm;
- const struct sockaddr *dp, *dm;
- const struct protosw *psw;
- void *arg;
+encap_attach(int af, int proto, const struct sockaddr *sp,
+ const struct sockaddr *sm, const struct sockaddr *dp,
+ const struct sockaddr *dm, const struct protosw *psw, void *arg)
{
struct encaptab *ep;
@@ -348,12 +338,9 @@
}
const struct encaptab *
-encap_attach_func(af, proto, func, psw, arg)
- int af;
- int proto;
- int (*func)(const struct mbuf *, int, int, void *);
- const struct protosw *psw;
- void *arg;
+encap_attach_func(int af, int proto,
+ int (*func)(const struct mbuf *, int, int, void *),
+ const struct protosw *psw, void *arg)
{
struct encaptab *ep;
@@ -379,8 +366,7 @@
}
int
-encap_detach(cookie)
- const struct encaptab *cookie;
+encap_detach(const struct encaptab *cookie)
{
const struct encaptab *ep = cookie;
struct encaptab *p;
@@ -400,10 +386,8 @@
}
static int
-mask_match(ep, sp, dp)
- const struct encaptab *ep;
- const struct sockaddr *sp;
- const struct sockaddr *dp;
+mask_match(const struct encaptab *ep, const struct sockaddr *sp,
+ const struct sockaddr *dp)
{
struct sockaddr_storage s;
struct sockaddr_storage d;
@@ -453,9 +437,7 @@
}
static void
-encap_fillarg(m, ep)
- struct mbuf *m;
- const struct encaptab *ep;
+encap_fillarg(struct mbuf *m, const struct encaptab *ep)
{
struct m_tag *tag;
@@ -467,8 +449,7 @@
}
void *
-encap_getarg(m)
- struct mbuf *m;
+encap_getarg(struct mbuf *m)
{
void *p = NULL;
struct m_tag *tag;
--- /dev/null
+++ sys/netinet/sctp_constants.h
@@ -0,0 +1,1115 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_constants.h,v 1.17 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_constants.h,v 1.32.2.2 2007/11/06 02:48:03 rrs Exp $");
+
+#ifndef __sctp_constants_h__
+#define __sctp_constants_h__
+
+/* Number of packets to get before sack sent by default */
+#define SCTP_DEFAULT_SACK_FREQ 2
+
+/* Address limit - This variable is calculated
+ * based on an 65535 byte max ip packet. We take out 100 bytes
+ * for the cookie, 40 bytes for a v6 header and 32
+ * bytes for the init structure. A second init structure
+ * for the init-ack and then finally a third one for the
+ * imbedded init. This yeilds 100+40+(3 * 32) = 236 bytes.
+ * This leaves 65299 bytes for addresses. We throw out the 299 bytes.
+ * Now whatever we send in the INIT() we need to allow to get back in the
+ * INIT-ACK plus all the values from INIT and INIT-ACK
+ * listed in the cookie. Plus we need some overhead for
+ * maybe copied parameters in the COOKIE. If we
+ * allow 1080 addresses, and each side has 1080 V6 addresses
+ * that will be 21600 bytes. In the INIT-ACK we will
+ * see the INIT-ACK 21600 + 43200 in the cookie. This leaves
+ * about 500 bytes slack for misc things in the cookie.
+ */
+#define SCTP_ADDRESS_LIMIT 1080
+
+/* We need at least 2k of space for us, inits
+ * larger than that lets abort.
+ */
+#define SCTP_LARGEST_INIT_ACCEPTED (65535 - 2048)
+
+/* Number of addresses where we just skip the counting */
+#define SCTP_COUNT_LIMIT 40
+
+#define SCTP_ZERO_COPY_TICK_DELAY (((100 * hz) + 999) / 1000)
+#define SCTP_ZERO_COPY_SENDQ_TICK_DELAY (((100 * hz) + 999) / 1000)
+
+/* Number of ticks to delay before running
+ * iterator on an address change.
+ */
+#define SCTP_ADDRESS_TICK_DELAY 2
+
+#define SCTP_VERSION_STRING "KAME-BSD 1.1"
+/* #define SCTP_AUDITING_ENABLED 1 used for debug/auditing */
+#define SCTP_AUDIT_SIZE 256
+
+#define SCTP_USE_THREAD_BASED_ITERATOR 1
+
+#define SCTP_KTRHEAD_NAME "sctp_iterator"
+#define SCTP_KTHREAD_PAGES 2
+
+
+/* If you support Multi-VRF how big to
+ * make the initial array of VRF's to.
+ */
+#define SCTP_DEFAULT_VRF_SIZE 4
+
+/* constants for rto calc */
+#define sctp_align_safe_nocopy 0
+#define sctp_align_unsafe_makecopy 1
+
+/* JRS - Values defined for the HTCP algorithm */
+#define ALPHA_BASE (1<<7) /* 1.0 with shift << 7 */
+#define BETA_MIN (1<<6) /* 0.5 with shift << 7 */
+#define BETA_MAX 102 /* 0.8 with shift << 7 */
+
+/* Places that CWND log can happen from */
+#define SCTP_CWND_LOG_FROM_FR 1
+#define SCTP_CWND_LOG_FROM_RTX 2
+#define SCTP_CWND_LOG_FROM_BRST 3
+#define SCTP_CWND_LOG_FROM_SS 4
+#define SCTP_CWND_LOG_FROM_CA 5
+#define SCTP_CWND_LOG_FROM_SAT 6
+#define SCTP_BLOCK_LOG_INTO_BLK 7
+#define SCTP_BLOCK_LOG_OUTOF_BLK 8
+#define SCTP_BLOCK_LOG_CHECK 9
+#define SCTP_STR_LOG_FROM_INTO_STRD 10
+#define SCTP_STR_LOG_FROM_IMMED_DEL 11
+#define SCTP_STR_LOG_FROM_INSERT_HD 12
+#define SCTP_STR_LOG_FROM_INSERT_MD 13
+#define SCTP_STR_LOG_FROM_INSERT_TL 14
+#define SCTP_STR_LOG_FROM_MARK_TSN 15
+#define SCTP_STR_LOG_FROM_EXPRS_DEL 16
+#define SCTP_FR_LOG_BIGGEST_TSNS 17
+#define SCTP_FR_LOG_STRIKE_TEST 18
+#define SCTP_FR_LOG_STRIKE_CHUNK 19
+#define SCTP_FR_T3_TIMEOUT 20
+#define SCTP_MAP_PREPARE_SLIDE 21
+#define SCTP_MAP_SLIDE_FROM 22
+#define SCTP_MAP_SLIDE_RESULT 23
+#define SCTP_MAP_SLIDE_CLEARED 24
+#define SCTP_MAP_SLIDE_NONE 25
+#define SCTP_FR_T3_MARK_TIME 26
+#define SCTP_FR_T3_MARKED 27
+#define SCTP_FR_T3_STOPPED 28
+#define SCTP_FR_MARKED 30
+#define SCTP_CWND_LOG_NOADV_SS 31
+#define SCTP_CWND_LOG_NOADV_CA 32
+#define SCTP_MAX_BURST_APPLIED 33
+#define SCTP_MAX_IFP_APPLIED 34
+#define SCTP_MAX_BURST_ERROR_STOP 35
+#define SCTP_INCREASE_PEER_RWND 36
+#define SCTP_DECREASE_PEER_RWND 37
+#define SCTP_SET_PEER_RWND_VIA_SACK 38
+#define SCTP_LOG_MBCNT_INCREASE 39
+#define SCTP_LOG_MBCNT_DECREASE 40
+#define SCTP_LOG_MBCNT_CHKSET 41
+#define SCTP_LOG_NEW_SACK 42
+#define SCTP_LOG_TSN_ACKED 43
+#define SCTP_LOG_TSN_REVOKED 44
+#define SCTP_LOG_LOCK_TCB 45
+#define SCTP_LOG_LOCK_INP 46
+#define SCTP_LOG_LOCK_SOCK 47
+#define SCTP_LOG_LOCK_SOCKBUF_R 48
+#define SCTP_LOG_LOCK_SOCKBUF_S 49
+#define SCTP_LOG_LOCK_CREATE 50
+#define SCTP_LOG_INITIAL_RTT 51
+#define SCTP_LOG_RTTVAR 52
+#define SCTP_LOG_SBALLOC 53
+#define SCTP_LOG_SBFREE 54
+#define SCTP_LOG_SBRESULT 55
+#define SCTP_FR_DUPED 56
+#define SCTP_FR_MARKED_EARLY 57
+#define SCTP_FR_CWND_REPORT 58
+#define SCTP_FR_CWND_REPORT_START 59
+#define SCTP_FR_CWND_REPORT_STOP 60
+#define SCTP_CWND_LOG_FROM_SEND 61
+#define SCTP_CWND_INITIALIZATION 62
+#define SCTP_CWND_LOG_FROM_T3 63
+#define SCTP_CWND_LOG_FROM_SACK 64
+#define SCTP_CWND_LOG_NO_CUMACK 65
+#define SCTP_CWND_LOG_FROM_RESEND 66
+#define SCTP_FR_LOG_CHECK_STRIKE 67
+#define SCTP_SEND_NOW_COMPLETES 68
+#define SCTP_CWND_LOG_FILL_OUTQ_CALLED 69
+#define SCTP_CWND_LOG_FILL_OUTQ_FILLS 70
+#define SCTP_LOG_FREE_SENT 71
+#define SCTP_NAGLE_APPLIED 72
+#define SCTP_NAGLE_SKIPPED 73
+#define SCTP_WAKESND_FROM_SACK 74
+#define SCTP_WAKESND_FROM_FWDTSN 75
+#define SCTP_NOWAKE_FROM_SACK 76
+#define SCTP_CWNDLOG_PRESEND 77
+#define SCTP_CWNDLOG_ENDSEND 78
+#define SCTP_AT_END_OF_SACK 79
+#define SCTP_REASON_FOR_SC 80
+#define SCTP_BLOCK_LOG_INTO_BLKA 81
+#define SCTP_ENTER_USER_RECV 82
+#define SCTP_USER_RECV_SACKS 83
+#define SCTP_SORECV_BLOCKSA 84
+#define SCTP_SORECV_BLOCKSB 85
+#define SCTP_SORECV_DONE 86
+#define SCTP_SACK_RWND_UPDATE 87
+#define SCTP_SORECV_ENTER 88
+#define SCTP_SORECV_ENTERPL 89
+#define SCTP_MBUF_INPUT 90
+#define SCTP_MBUF_IALLOC 91
+#define SCTP_MBUF_IFREE 92
+#define SCTP_MBUF_ICOPY 93
+#define SCTP_SORCV_FREECTL 94
+#define SCTP_SORCV_DOESCPY 95
+#define SCTP_SORCV_DOESLCK 96
+#define SCTP_SORCV_DOESADJ 97
+#define SCTP_SORCV_BOTWHILE 98
+#define SCTP_SORCV_PASSBF 99
+#define SCTP_SORCV_ADJD 100
+#define SCTP_UNKNOWN_MAX 101
+#define SCTP_RANDY_STUFF 102
+#define SCTP_RANDY_STUFF1 103
+#define SCTP_STRMOUT_LOG_ASSIGN 104
+#define SCTP_STRMOUT_LOG_SEND 105
+#define SCTP_FLIGHT_LOG_DOWN_CA 106
+#define SCTP_FLIGHT_LOG_UP 107
+#define SCTP_FLIGHT_LOG_DOWN_GAP 108
+#define SCTP_FLIGHT_LOG_DOWN_RSND 109
+#define SCTP_FLIGHT_LOG_UP_RSND 110
+#define SCTP_FLIGHT_LOG_DOWN_RSND_TO 111
+#define SCTP_FLIGHT_LOG_DOWN_WP 112
+#define SCTP_FLIGHT_LOG_UP_REVOKE 113
+#define SCTP_FLIGHT_LOG_DOWN_PDRP 114
+#define SCTP_FLIGHT_LOG_DOWN_PMTU 115
+#define SCTP_SACK_LOG_NORMAL 116
+#define SCTP_SACK_LOG_EXPRESS 117
+#define SCTP_MAP_TSN_ENTERS 118
+#define SCTP_THRESHOLD_CLEAR 119
+#define SCTP_THRESHOLD_INCR 120
+
+#define SCTP_LOG_MAX_TYPES 121
+/*
+ * To turn on various logging, you must first enable 'options KTR' and
+ * you might want to bump the entires 'options KTR_ENTRIES=80000'.
+ * To get something to log you define one of the logging defines.
+ * (see LINT).
+ *
+ * This gets the compile in place, but you still need to turn the
+ * logging flag on too in the sysctl (see in sctp.h).
+ */
+
+#define SCTP_LOG_EVENT_UNKNOWN 0
+#define SCTP_LOG_EVENT_CWND 1
+#define SCTP_LOG_EVENT_BLOCK 2
+#define SCTP_LOG_EVENT_STRM 3
+#define SCTP_LOG_EVENT_FR 4
+#define SCTP_LOG_EVENT_MAP 5
+#define SCTP_LOG_EVENT_MAXBURST 6
+#define SCTP_LOG_EVENT_RWND 7
+#define SCTP_LOG_EVENT_MBCNT 8
+#define SCTP_LOG_EVENT_SACK 9
+#define SCTP_LOG_LOCK_EVENT 10
+#define SCTP_LOG_EVENT_RTT 11
+#define SCTP_LOG_EVENT_SB 12
+#define SCTP_LOG_EVENT_NAGLE 13
+#define SCTP_LOG_EVENT_WAKE 14
+#define SCTP_LOG_MISC_EVENT 15
+#define SCTP_LOG_EVENT_CLOSE 16
+#define SCTP_LOG_EVENT_MBUF 17
+#define SCTP_LOG_CHUNK_PROC 18
+#define SCTP_LOG_ERROR_RET 19
+
+#define SCTP_LOG_MAX_EVENT 20
+
+#define SCTP_LOCK_UNKNOWN 2
+
+
+/* number of associations by default for zone allocation */
+#define SCTP_MAX_NUM_OF_ASOC 40000
+/* how many addresses per assoc remote and local */
+#define SCTP_SCALE_FOR_ADDR 2
+
+/* default AUTO_ASCONF mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_AUTO_ASCONF)
+#define SCTP_DEFAULT_AUTO_ASCONF 0
+#else
+#define SCTP_DEFAULT_AUTO_ASCONF 1
+#endif
+
+/* default MOBILITY_BASE mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_MOBILITY_BASE)
+#define SCTP_DEFAULT_MOBILITY_BASE 0
+#else
+#define SCTP_DEFAULT_MOBILITY_BASE 0
+#endif
+
+/* default MOBILITY_FASTHANDOFF mode enable(1)/disable(0) value (sysctl) */
+#if defined (__APPLE__) && !defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF 0
+#else
+#define SCTP_DEFAULT_MOBILITY_FASTHANDOFF 0
+#endif
+
+/*
+ * Theshold for rwnd updates, we have to read (sb_hiwat >>
+ * SCTP_RWND_HIWAT_SHIFT) before we will look to see if we need to send a
+ * window update sack. When we look, we compare the last rwnd we sent vs the
+ * current rwnd. It too must be greater than this value. Using 3 divdes the
+ * hiwat by 8, so for 200k rwnd we need to read 24k. For a 64k rwnd we need
+ * to read 8k. This seems about right.. I hope :-D.. we do set a
+ * min of a MTU on it so if the rwnd is real small we will insist
+ * on a full MTU of 1500 bytes.
+ */
+#define SCTP_RWND_HIWAT_SHIFT 3
+
+/* How much of the rwnd must the
+ * message be taking up to start partial delivery.
+ * We calculate this by shifing the hi_water (recv_win)
+ * left the following .. set to 1, when a message holds
+ * 1/2 the rwnd. If we set it to 2 when a message holds
+ * 1/4 the rwnd...etc..
+ */
+
+#define SCTP_PARTIAL_DELIVERY_SHIFT 1
+
+/* Minimum number of bytes read by user before we
+ * condsider doing a rwnd update
+ */
+#define SCTP_MIN_READ_BEFORE_CONSIDERING 3000
+
+/*
+ * default HMAC for cookies, etc... use one of the AUTH HMAC id's
+ * SCTP_HMAC is the HMAC_ID to use
+ * SCTP_SIGNATURE_SIZE is the digest length
+ */
+#define SCTP_HMAC SCTP_AUTH_HMAC_ID_SHA1
+#define SCTP_SIGNATURE_SIZE SCTP_AUTH_DIGEST_LEN_SHA1
+#define SCTP_SIGNATURE_ALOC_SIZE SCTP_SIGNATURE_SIZE
+
+/* DEFINE HERE WHAT CRC YOU WANT TO USE */
+#define SCTP_USECRC_RFC2960 1
+/* #define SCTP_USECRC_FLETCHER 1 */
+/* #define SCTP_USECRC_SSHCRC32 1 */
+/* #define SCTP_USECRC_FASTCRC32 1 */
+/* #define SCTP_USECRC_CRC32 1 */
+/* #define SCTP_USECRC_TCP32 1 */
+/* #define SCTP_USECRC_CRC16SMAL 1 */
+/* #define SCTP_USECRC_CRC16 1 */
+/* #define SCTP_USECRC_MODADLER 1 */
+
+#ifndef SCTP_ADLER32_BASE
+#define SCTP_ADLER32_BASE 65521
+#endif
+
+/*
+ * the SCTP protocol signature this includes the version number encoded in
+ * the last 4 bits of the signature.
+ */
+#define PROTO_SIGNATURE_A 0x30000000
+#define SCTP_VERSION_NUMBER 0x3
+
+#define MAX_TSN 0xffffffff
+#define MAX_SEQ 0xffff
+
+/* how many executions every N tick's */
+#define SCTP_ITERATOR_MAX_AT_ONCE 20
+
+/* number of clock ticks between iterator executions */
+#define SCTP_ITERATOR_TICKS 1
+
+/*
+ * option: If you comment out the following you will receive the old behavior
+ * of obeying cwnd for the fast retransmit algorithm. With this defined a FR
+ * happens right away with-out waiting for the flightsize to drop below the
+ * cwnd value (which is reduced by the FR to 1/2 the inflight packets).
+ */
+#define SCTP_IGNORE_CWND_ON_FR 1
+
+/*
+ * Adds implementors guide behavior to only use newest highest update in SACK
+ * gap ack's to figure out if you need to stroke a chunk for FR.
+ */
+#define SCTP_NO_FR_UNLESS_SEGMENT_SMALLER 1
+
+/* default max I can burst out after a fast retransmit */
+#define SCTP_DEF_MAX_BURST 4
+/* IP hdr (20/40) + 12+2+2 (enet) + sctp common 12 */
+#define SCTP_FIRST_MBUF_RESV 68
+/* Packet transmit states in the sent field */
+#define SCTP_DATAGRAM_UNSENT 0
+#define SCTP_DATAGRAM_SENT 1
+#define SCTP_DATAGRAM_RESEND1 2 /* not used (in code, but may
+ * hit this value) */
+#define SCTP_DATAGRAM_RESEND2 3 /* not used (in code, but may
+ * hit this value) */
+#define SCTP_DATAGRAM_RESEND 4
+#define SCTP_DATAGRAM_ACKED 10010
+#define SCTP_DATAGRAM_INBOUND 10011
+#define SCTP_READY_TO_TRANSMIT 10012
+#define SCTP_DATAGRAM_MARKED 20010
+#define SCTP_FORWARD_TSN_SKIP 30010
+
+/* chunk output send from locations */
+#define SCTP_OUTPUT_FROM_USR_SEND 0
+#define SCTP_OUTPUT_FROM_T3 1
+#define SCTP_OUTPUT_FROM_INPUT_ERROR 2
+#define SCTP_OUTPUT_FROM_CONTROL_PROC 3
+#define SCTP_OUTPUT_FROM_SACK_TMR 4
+#define SCTP_OUTPUT_FROM_SHUT_TMR 5
+#define SCTP_OUTPUT_FROM_HB_TMR 6
+#define SCTP_OUTPUT_FROM_SHUT_ACK_TMR 7
+#define SCTP_OUTPUT_FROM_ASCONF_TMR 8
+#define SCTP_OUTPUT_FROM_STRRST_TMR 9
+#define SCTP_OUTPUT_FROM_AUTOCLOSE_TMR 10
+#define SCTP_OUTPUT_FROM_EARLY_FR_TMR 11
+#define SCTP_OUTPUT_FROM_STRRST_REQ 12
+#define SCTP_OUTPUT_FROM_USR_RCVD 13
+#define SCTP_OUTPUT_FROM_COOKIE_ACK 14
+#define SCTP_OUTPUT_FROM_DRAIN 15
+#define SCTP_OUTPUT_FROM_CLOSING 16
+/* SCTP chunk types are moved sctp.h for application (NAT, FW) use */
+
+/* align to 32-bit sizes */
+#define SCTP_SIZE32(x) ((((x)+3) >> 2) << 2)
+
+#define IS_SCTP_CONTROL(a) ((a)->chunk_type != SCTP_DATA)
+#define IS_SCTP_DATA(a) ((a)->chunk_type == SCTP_DATA)
+
+
+/* SCTP parameter types */
+/*************0x0000 series*************/
+#define SCTP_HEARTBEAT_INFO 0x0001
+#define SCTP_IPV4_ADDRESS 0x0005
+#define SCTP_IPV6_ADDRESS 0x0006
+#define SCTP_STATE_COOKIE 0x0007
+#define SCTP_UNRECOG_PARAM 0x0008
+#define SCTP_COOKIE_PRESERVE 0x0009
+#define SCTP_HOSTNAME_ADDRESS 0x000b
+#define SCTP_SUPPORTED_ADDRTYPE 0x000c
+
+/* draft-ietf-stewart-strreset-xxx */
+#define SCTP_STR_RESET_OUT_REQUEST 0x000d
+#define SCTP_STR_RESET_IN_REQUEST 0x000e
+#define SCTP_STR_RESET_TSN_REQUEST 0x000f
+#define SCTP_STR_RESET_RESPONSE 0x0010
+
+#define SCTP_MAX_RESET_PARAMS 2
+#define SCTP_STREAM_RESET_TSN_DELTA 0x1000
+
+/*************0x4000 series*************/
+
+/*************0x8000 series*************/
+#define SCTP_ECN_CAPABLE 0x8000
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+#define SCTP_ECN_NONCE_SUPPORTED 0x8001
+/* draft-ietf-tsvwg-auth-xxx */
+#define SCTP_RANDOM 0x8002
+#define SCTP_CHUNK_LIST 0x8003
+#define SCTP_HMAC_LIST 0x8004
+/*
+ * draft-ietf-tsvwg-addip-sctp-xx param=0x8008 len=0xNNNN Byte | Byte | Byte
+ * | Byte Byte | Byte ...
+ *
+ * Where each byte is a chunk type extension supported. For example, to support
+ * all chunks one would have (in hex):
+ *
+ * 80 01 00 09 C0 C1 80 81 82 00 00 00
+ *
+ * Has the parameter. C0 = PR-SCTP (RFC3758) C1, 80 = ASCONF (addip draft) 81
+ * = Packet Drop 82 = Stream Reset 83 = Authentication
+ */
+#define SCTP_SUPPORTED_CHUNK_EXT 0x8008
+
+/*************0xC000 series*************/
+#define SCTP_PRSCTP_SUPPORTED 0xc000
+/* draft-ietf-tsvwg-addip-sctp */
+#define SCTP_ADD_IP_ADDRESS 0xc001
+#define SCTP_DEL_IP_ADDRESS 0xc002
+#define SCTP_ERROR_CAUSE_IND 0xc003
+#define SCTP_SET_PRIM_ADDR 0xc004
+#define SCTP_SUCCESS_REPORT 0xc005
+#define SCTP_ULP_ADAPTATION 0xc006
+
+/* Notification error codes */
+#define SCTP_NOTIFY_DATAGRAM_UNSENT 0x0001
+#define SCTP_NOTIFY_DATAGRAM_SENT 0x0002
+#define SCTP_FAILED_THRESHOLD 0x0004
+#define SCTP_HEARTBEAT_SUCCESS 0x0008
+#define SCTP_RESPONSE_TO_USER_REQ 0x0010
+#define SCTP_INTERNAL_ERROR 0x0020
+#define SCTP_SHUTDOWN_GUARD_EXPIRES 0x0040
+#define SCTP_RECEIVED_SACK 0x0080
+#define SCTP_PEER_FAULTY 0x0100
+#define SCTP_ICMP_REFUSED 0x0200
+
+/* bits for TOS field */
+#define SCTP_ECT0_BIT 0x02
+#define SCTP_ECT1_BIT 0x01
+#define SCTP_CE_BITS 0x03
+
+/* below turns off above */
+#define SCTP_FLEXIBLE_ADDRESS 0x20
+#define SCTP_NO_HEARTBEAT 0x40
+
+/* mask to get sticky */
+#define SCTP_STICKY_OPTIONS_MASK 0x0c
+
+
+/*
+ * SCTP states for internal state machine XXX (should match "user" values)
+ */
+#define SCTP_STATE_EMPTY 0x0000
+#define SCTP_STATE_INUSE 0x0001
+#define SCTP_STATE_COOKIE_WAIT 0x0002
+#define SCTP_STATE_COOKIE_ECHOED 0x0004
+#define SCTP_STATE_OPEN 0x0008
+#define SCTP_STATE_SHUTDOWN_SENT 0x0010
+#define SCTP_STATE_SHUTDOWN_RECEIVED 0x0020
+#define SCTP_STATE_SHUTDOWN_ACK_SENT 0x0040
+#define SCTP_STATE_SHUTDOWN_PENDING 0x0080
+#define SCTP_STATE_CLOSED_SOCKET 0x0100
+#define SCTP_STATE_ABOUT_TO_BE_FREED 0x0200
+#define SCTP_STATE_PARTIAL_MSG_LEFT 0x0400
+#define SCTP_STATE_WAS_ABORTED 0x0800
+#define SCTP_STATE_MASK 0x007f
+
+#define SCTP_GET_STATE(asoc) ((asoc)->state & SCTP_STATE_MASK)
+#define SCTP_SET_STATE(asoc, newstate) ((asoc)->state = ((asoc)->state & ~SCTP_STATE_MASK) | newstate)
+#define SCTP_CLEAR_SUBSTATE(asoc, substate) ((asoc)->state &= ~substate)
+#define SCTP_ADD_SUBSTATE(asoc, substate) ((asoc)->state |= substate)
+
+/* SCTP reachability state for each address */
+#define SCTP_ADDR_REACHABLE 0x001
+#define SCTP_ADDR_NOT_REACHABLE 0x002
+#define SCTP_ADDR_NOHB 0x004
+#define SCTP_ADDR_BEING_DELETED 0x008
+#define SCTP_ADDR_NOT_IN_ASSOC 0x010
+#define SCTP_ADDR_WAS_PRIMARY 0x020
+#define SCTP_ADDR_SWITCH_PRIMARY 0x040
+#define SCTP_ADDR_OUT_OF_SCOPE 0x080
+#define SCTP_ADDR_DOUBLE_SWITCH 0x100
+#define SCTP_ADDR_UNCONFIRMED 0x200
+#define SCTP_ADDR_REQ_PRIMARY 0x400
+/* JRS 5/13/07 - Added potentially failed state for CMT PF */
+#define SCTP_ADDR_PF 0x800
+#define SCTP_REACHABLE_MASK 0x203
+
+/* bound address types (e.g. valid address types to allow) */
+#define SCTP_BOUND_V6 0x01
+#define SCTP_BOUND_V4 0x02
+
+/*
+ * what is the default number of mbufs in a chain I allow before switching to
+ * a cluster
+ */
+#define SCTP_DEFAULT_MBUFS_IN_CHAIN 5
+
+/* How long a cookie lives in milli-seconds */
+#define SCTP_DEFAULT_COOKIE_LIFE 60000
+
+/* resource limit of streams */
+#define MAX_SCTP_STREAMS 2048
+
+/* Maximum the mapping array will grow to (TSN mapping array) */
+#define SCTP_MAPPING_ARRAY 512
+
+/* size of the inital malloc on the mapping array */
+#define SCTP_INITIAL_MAPPING_ARRAY 16
+/* how much we grow the mapping array each call */
+#define SCTP_MAPPING_ARRAY_INCR 32
+
+/*
+ * Here we define the timer types used by the implementation as arguments in
+ * the set/get timer type calls.
+ */
+#define SCTP_TIMER_INIT 0
+#define SCTP_TIMER_RECV 1
+#define SCTP_TIMER_SEND 2
+#define SCTP_TIMER_HEARTBEAT 3
+#define SCTP_TIMER_PMTU 4
+#define SCTP_TIMER_MAXSHUTDOWN 5
+#define SCTP_TIMER_SIGNATURE 6
+/*
+ * number of timer types in the base SCTP structure used in the set/get and
+ * has the base default.
+ */
+#define SCTP_NUM_TMRS 7
+
+/* timer types */
+#define SCTP_TIMER_TYPE_NONE 0
+#define SCTP_TIMER_TYPE_SEND 1
+#define SCTP_TIMER_TYPE_INIT 2
+#define SCTP_TIMER_TYPE_RECV 3
+#define SCTP_TIMER_TYPE_SHUTDOWN 4
+#define SCTP_TIMER_TYPE_HEARTBEAT 5
+#define SCTP_TIMER_TYPE_COOKIE 6
+#define SCTP_TIMER_TYPE_NEWCOOKIE 7
+#define SCTP_TIMER_TYPE_PATHMTURAISE 8
+#define SCTP_TIMER_TYPE_SHUTDOWNACK 9
+#define SCTP_TIMER_TYPE_ASCONF 10
+#define SCTP_TIMER_TYPE_SHUTDOWNGUARD 11
+#define SCTP_TIMER_TYPE_AUTOCLOSE 12
+#define SCTP_TIMER_TYPE_EVENTWAKE 13
+#define SCTP_TIMER_TYPE_STRRESET 14
+#define SCTP_TIMER_TYPE_INPKILL 15
+#define SCTP_TIMER_TYPE_ITERATOR 16
+#define SCTP_TIMER_TYPE_EARLYFR 17
+#define SCTP_TIMER_TYPE_ASOCKILL 18
+#define SCTP_TIMER_TYPE_ADDR_WQ 19
+#define SCTP_TIMER_TYPE_ZERO_COPY 20
+#define SCTP_TIMER_TYPE_ZCOPY_SENDQ 21
+#define SCTP_TIMER_TYPE_PRIM_DELETED 22
+/* add new timers here - and increment LAST */
+#define SCTP_TIMER_TYPE_LAST 23
+
+#define SCTP_IS_TIMER_TYPE_VALID(t) (((t) > SCTP_TIMER_TYPE_NONE) && \
+ ((t) < SCTP_TIMER_TYPE_LAST))
+
+
+
+/*
+ * Number of ticks before the soxwakeup() event that is delayed is sent AFTER
+ * the accept() call
+ */
+#define SCTP_EVENTWAKEUP_WAIT_TICKS 3000
+
+/*
+ * Of course we really don't collect stale cookies, being folks of decerning
+ * taste. However we do count them, if we get too many before the association
+ * comes up.. we give up. Below is the constant that dictates when we give it
+ * up...this is a implemenation dependent treatment. In ours we do not ask
+ * for a extension of time, but just retry this many times...
+ */
+#define SCTP_MAX_STALE_COOKIES_I_COLLECT 10
+
+/* max number of TSN's dup'd that I will hold */
+#define SCTP_MAX_DUP_TSNS 20
+
+/*
+ * Here we define the types used when setting the retry amounts.
+ */
+/* constants for type of set */
+#define SCTP_MAXATTEMPT_INIT 2
+#define SCTP_MAXATTEMPT_SEND 3
+
+/* Maximum TSN's we will summarize in a drop report */
+#define SCTP_MAX_DROP_REPORT 16
+
+/* How many drop re-attempts we make on INIT/COOKIE-ECHO */
+#define SCTP_RETRY_DROPPED_THRESH 4
+
+/*
+ * And the max we will keep a history of in the tcb which MUST be lower than
+ * 256.
+ */
+#define SCTP_MAX_DROP_SAVE_REPORT 16
+
+/*
+ * Here we define the default timers and the default number of attemts we
+ * make for each respective side (send/init).
+ */
+
+/*
+ * Maxmium number of chunks a single association can have on it. Note that
+ * this is a squishy number since the count can run over this if the user
+ * sends a large message down .. the fragmented chunks don't count until
+ * AFTER the message is on queue.. it would be the next send that blocks
+ * things. This number will get tuned up at boot in the sctp_init and use the
+ * number of clusters as a base. This way high bandwidth environments will
+ * not get impacted by the lower bandwidth sending a bunch of 1 byte chunks
+ */
+#define SCTP_ASOC_MAX_CHUNKS_ON_QUEUE 512
+
+
+/* The conversion from time to ticks and vice versa is done by rounding
+ * upwards. This way we can test in the code the time to be positive and
+ * know that this corresponds to a positive number of ticks.
+ */
+#define MSEC_TO_TICKS(x) ((hz == 1000) ? x : ((((x) * hz) + 999) / 1000))
+#define TICKS_TO_MSEC(x) ((hz == 1000) ? x : ((((x) * 1000) + (hz - 1)) / hz))
+
+#define SEC_TO_TICKS(x) ((x) * hz)
+#define TICKS_TO_SEC(x) (((x) + (hz - 1)) / hz)
+
+/*
+ * Basically the minimum amount of time before I do a early FR. Making this
+ * value to low will cause duplicate retransmissions.
+ */
+#define SCTP_MINFR_MSEC_TIMER 250
+/* The floor this value is allowed to fall to when starting a timer. */
+#define SCTP_MINFR_MSEC_FLOOR 20
+
+/* init timer def = 1 sec */
+#define SCTP_INIT_SEC 1
+
+/* send timer def = 1 seconds */
+#define SCTP_SEND_SEC 1
+
+/* recv timer def = 200ms */
+#define SCTP_RECV_MSEC 200
+
+/* 30 seconds + RTO (in ms) */
+#define SCTP_HB_DEFAULT_MSEC 30000
+
+/* Max time I will wait for Shutdown to complete */
+#define SCTP_DEF_MAX_SHUTDOWN_SEC 180
+
+
+/*
+ * This is how long a secret lives, NOT how long a cookie lives how many
+ * ticks the current secret will live.
+ */
+#define SCTP_DEFAULT_SECRET_LIFE_SEC 3600
+
+#define SCTP_RTO_UPPER_BOUND (60000) /* 60 sec in ms */
+#define SCTP_RTO_UPPER_BOUND_SEC 60 /* for the init timer */
+#define SCTP_RTO_LOWER_BOUND (1000) /* 1 sec in ms */
+#define SCTP_RTO_INITIAL (3000) /* 3 sec in ms */
+
+
+#define SCTP_INP_KILL_TIMEOUT 20/* number of ms to retry kill of inpcb */
+#define SCTP_ASOC_KILL_TIMEOUT 10 /* number of ms to retry kill of inpcb */
+
+#define SCTP_DEF_MAX_INIT 8
+#define SCTP_DEF_MAX_SEND 10
+#define SCTP_DEF_MAX_PATH_RTX 5
+
+#define SCTP_DEF_PMTU_RAISE_SEC 600 /* 10 min between raise attempts */
+#define SCTP_DEF_PMTU_MIN 600
+
+
+#define SCTP_MSEC_IN_A_SEC 1000
+#define SCTP_USEC_IN_A_SEC 1000000
+#define SCTP_NSEC_IN_A_SEC 1000000000
+
+#define SCTP_MAX_OUTSTANDING_DG 10000
+
+/* How many streams I request initally by default */
+#define SCTP_OSTREAM_INITIAL 10
+
+/*
+ * How many smallest_mtu's need to increase before a window update sack is
+ * sent (should be a power of 2).
+ */
+#define SCTP_SEG_TO_RWND_UPD 32
+/* Send window update (incr * this > hiwat). Should be a power of 2 */
+#define SCTP_SCALE_OF_RWND_TO_UPD 4
+#define SCTP_MINIMAL_RWND (4096) /* minimal rwnd */
+
+#define SCTP_ADDRMAX 24
+
+/* SCTP DEBUG Switch parameters */
+#define SCTP_DEBUG_TIMER1 0x00000001
+#define SCTP_DEBUG_TIMER2 0x00000002 /* unused */
+#define SCTP_DEBUG_TIMER3 0x00000004 /* unused */
+#define SCTP_DEBUG_TIMER4 0x00000008
+#define SCTP_DEBUG_OUTPUT1 0x00000010
+#define SCTP_DEBUG_OUTPUT2 0x00000020
+#define SCTP_DEBUG_OUTPUT3 0x00000040
+#define SCTP_DEBUG_OUTPUT4 0x00000080
+#define SCTP_DEBUG_UTIL1 0x00000100
+#define SCTP_DEBUG_UTIL2 0x00000200 /* unused */
+#define SCTP_DEBUG_AUTH1 0x00000400
+#define SCTP_DEBUG_AUTH2 0x00000800 /* unused */
+#define SCTP_DEBUG_INPUT1 0x00001000
+#define SCTP_DEBUG_INPUT2 0x00002000
+#define SCTP_DEBUG_INPUT3 0x00004000
+#define SCTP_DEBUG_INPUT4 0x00008000 /* unused */
+#define SCTP_DEBUG_ASCONF1 0x00010000
+#define SCTP_DEBUG_ASCONF2 0x00020000
+#define SCTP_DEBUG_OUTPUT5 0x00040000 /* unused */
+#define SCTP_DEBUG_XXX 0x00080000 /* unused */
+#define SCTP_DEBUG_PCB1 0x00100000
+#define SCTP_DEBUG_PCB2 0x00200000 /* unused */
+#define SCTP_DEBUG_PCB3 0x00400000
+#define SCTP_DEBUG_PCB4 0x00800000
+#define SCTP_DEBUG_INDATA1 0x01000000
+#define SCTP_DEBUG_INDATA2 0x02000000 /* unused */
+#define SCTP_DEBUG_INDATA3 0x04000000 /* unused */
+#define SCTP_DEBUG_INDATA4 0x08000000 /* unused */
+#define SCTP_DEBUG_USRREQ1 0x10000000 /* unused */
+#define SCTP_DEBUG_USRREQ2 0x20000000 /* unused */
+#define SCTP_DEBUG_PEEL1 0x40000000
+#define SCTP_DEBUG_XXXXX 0x80000000 /* unused */
+#define SCTP_DEBUG_ALL 0x7ff3ffff
+#define SCTP_DEBUG_NOISY 0x00040000
+
+/* What sender needs to see to avoid SWS or we consider peers rwnd 0 */
+#define SCTP_SWS_SENDER_DEF 1420
+
+/*
+ * SWS is scaled to the sb_hiwat of the socket. A value of 2 is hiwat/4, 1
+ * would be hiwat/2 etc.
+ */
+/* What receiver needs to see in sockbuf or we tell peer its 1 */
+#define SCTP_SWS_RECEIVER_DEF 3000
+
+#define SCTP_INITIAL_CWND 4380
+
+#define SCTP_DEFAULT_MTU 1500 /* emegency default MTU */
+/* amount peer is obligated to have in rwnd or I will abort */
+#define SCTP_MIN_RWND 1500
+
+#define SCTP_WINDOW_MIN 1500 /* smallest rwnd can be */
+#define SCTP_WINDOW_MAX 1048576 /* biggest I can grow rwnd to My playing
+ * around suggests a value greater than 64k
+ * does not do much, I guess via the kernel
+ * limitations on the stream/socket. */
+
+/* I can handle a 1meg re-assembly */
+#define SCTP_DEFAULT_MAXMSGREASM 1048576
+
+#define SCTP_DEFAULT_MAXSEGMENT 65535
+
+#define SCTP_CHUNK_BUFFER_SIZE 512
+#define SCTP_PARAM_BUFFER_SIZE 512
+
+/* small chunk store for looking at chunk_list in auth */
+#define SCTP_SMALL_CHUNK_STORE 260
+
+#define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */
+#define SCTP_HOW_MANY_SECRETS 2 /* how many secrets I keep */
+
+#define SCTP_NUMBER_OF_SECRETS 8 /* or 8 * 4 = 32 octets */
+#define SCTP_SECRET_SIZE 32 /* number of octets in a 256 bits */
+
+
+/*
+ * SCTP upper layer notifications
+ */
+#define SCTP_NOTIFY_ASSOC_UP 1
+#define SCTP_NOTIFY_ASSOC_DOWN 2
+#define SCTP_NOTIFY_INTERFACE_DOWN 3
+#define SCTP_NOTIFY_INTERFACE_UP 4
+#define SCTP_NOTIFY_DG_FAIL 5
+#define SCTP_NOTIFY_STRDATA_ERR 6
+#define SCTP_NOTIFY_ASSOC_ABORTED 7
+#define SCTP_NOTIFY_PEER_OPENED_STREAM 8
+#define SCTP_NOTIFY_STREAM_OPENED_OK 9
+#define SCTP_NOTIFY_ASSOC_RESTART 10
+#define SCTP_NOTIFY_HB_RESP 11
+#define SCTP_NOTIFY_ASCONF_SUCCESS 12
+#define SCTP_NOTIFY_ASCONF_FAILED 13
+#define SCTP_NOTIFY_PEER_SHUTDOWN 14
+#define SCTP_NOTIFY_ASCONF_ADD_IP 15
+#define SCTP_NOTIFY_ASCONF_DELETE_IP 16
+#define SCTP_NOTIFY_ASCONF_SET_PRIMARY 17
+#define SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION 18
+#define SCTP_NOTIFY_INTERFACE_CONFIRMED 20
+#define SCTP_NOTIFY_STR_RESET_RECV 21
+#define SCTP_NOTIFY_STR_RESET_SEND 22
+#define SCTP_NOTIFY_STR_RESET_FAILED_OUT 23
+#define SCTP_NOTIFY_STR_RESET_FAILED_IN 24
+#define SCTP_NOTIFY_AUTH_NEW_KEY 25
+#define SCTP_NOTIFY_AUTH_KEY_CONFLICT 26
+#define SCTP_NOTIFY_SPECIAL_SP_FAIL 27
+#define SCTP_NOTIFY_MAX 27
+
+/* This is the value for messages that are NOT completely
+ * copied down where we will start to split the message.
+ * So, with our default, we split only if the piece we
+ * want to take will fill up a full MTU (assuming
+ * a 1500 byte MTU).
+ */
+#define SCTP_DEFAULT_SPLIT_POINT_MIN 2904
+
+/* ABORT CODES and other tell-tale location
+ * codes are generated by adding the below
+ * to the instance id.
+ */
+
+/* File defines */
+#define SCTP_FROM_SCTP_INPUT 0x10000000
+#define SCTP_FROM_SCTP_PCB 0x20000000
+#define SCTP_FROM_SCTP_INDATA 0x30000000
+#define SCTP_FROM_SCTP_TIMER 0x40000000
+#define SCTP_FROM_SCTP_USRREQ 0x50000000
+#define SCTP_FROM_SCTPUTIL 0x60000000
+#define SCTP_FROM_SCTP6_USRREQ 0x70000000
+#define SCTP_FROM_SCTP_ASCONF 0x80000000
+#define SCTP_FROM_SCTP_OUTPUT 0x90000000
+#define SCTP_FROM_SCTP_PEELOFF 0xa0000000
+#define SCTP_FROM_SCTP_PANDA 0xb0000000
+#define SCTP_FROM_SCTP_SYSCTL 0xc0000000
+
+/* Location ID's */
+#define SCTP_LOC_1 0x00000001
+#define SCTP_LOC_2 0x00000002
+#define SCTP_LOC_3 0x00000003
+#define SCTP_LOC_4 0x00000004
+#define SCTP_LOC_5 0x00000005
+#define SCTP_LOC_6 0x00000006
+#define SCTP_LOC_7 0x00000007
+#define SCTP_LOC_8 0x00000008
+#define SCTP_LOC_9 0x00000009
+#define SCTP_LOC_10 0x0000000a
+#define SCTP_LOC_11 0x0000000b
+#define SCTP_LOC_12 0x0000000c
+#define SCTP_LOC_13 0x0000000d
+#define SCTP_LOC_14 0x0000000e
+#define SCTP_LOC_15 0x0000000f
+#define SCTP_LOC_16 0x00000010
+#define SCTP_LOC_17 0x00000011
+#define SCTP_LOC_18 0x00000012
+#define SCTP_LOC_19 0x00000013
+#define SCTP_LOC_20 0x00000014
+#define SCTP_LOC_21 0x00000015
+#define SCTP_LOC_22 0x00000016
+#define SCTP_LOC_23 0x00000017
+#define SCTP_LOC_24 0x00000018
+#define SCTP_LOC_25 0x00000019
+#define SCTP_LOC_26 0x0000001a
+#define SCTP_LOC_27 0x0000001b
+#define SCTP_LOC_28 0x0000001c
+#define SCTP_LOC_29 0x0000001d
+#define SCTP_LOC_30 0x0000001e
+#define SCTP_LOC_31 0x0000001f
+#define SCTP_LOC_32 0x00000020
+#define SCTP_LOC_33 0x00000021
+
+
+/* Free assoc codes */
+#define SCTP_NORMAL_PROC 0
+#define SCTP_PCBFREE_NOFORCE 1
+#define SCTP_PCBFREE_FORCE 2
+
+/* From codes for adding addresses */
+#define SCTP_ADDR_IS_CONFIRMED 8
+#define SCTP_ADDR_DYNAMIC_ADDED 6
+#define SCTP_IN_COOKIE_PROC 100
+#define SCTP_ALLOC_ASOC 1
+#define SCTP_LOAD_ADDR_2 2
+#define SCTP_LOAD_ADDR_3 3
+#define SCTP_LOAD_ADDR_4 4
+#define SCTP_LOAD_ADDR_5 5
+
+#define SCTP_DONOT_SETSCOPE 0
+#define SCTP_DO_SETSCOPE 1
+
+
+/* This value determines the default for when
+ * we try to add more on the send queue., if
+ * there is room. This prevents us from cycling
+ * into the copy_resume routine to often if
+ * we have not got enough space to add a decent
+ * enough size message. Note that if we have enough
+ * space to complete the message copy we will always
+ * add to the message, no matter what the size. Its
+ * only when we reach the point that we have some left
+ * to add, there is only room for part of it that we
+ * will use this threshold. Its also a sysctl.
+ */
+#define SCTP_DEFAULT_ADD_MORE 1452
+
+#ifndef SCTP_PCBHASHSIZE
+/* default number of association hash buckets in each endpoint */
+#define SCTP_PCBHASHSIZE 256
+#endif
+#ifndef SCTP_TCBHASHSIZE
+#define SCTP_TCBHASHSIZE 1024
+#endif
+
+#ifndef SCTP_CHUNKQUEUE_SCALE
+#define SCTP_CHUNKQUEUE_SCALE 10
+#endif
+
+/* clock variance is 1 ms */
+#define SCTP_CLOCK_GRANULARITY 1
+#define IP_HDR_SIZE 40 /* we use the size of a IP6 header here this
+ * detracts a small amount for ipv4 but it
+ * simplifies the ipv6 addition */
+
+/* Argument magic number for sctp_inpcb_free() */
+
+/* third argument */
+#define SCTP_CALLED_DIRECTLY_NOCMPSET 0
+#define SCTP_CALLED_AFTER_CMPSET_OFCLOSE 1
+
+/* second argument */
+#define SCTP_FREE_SHOULD_USE_ABORT 1
+#define SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE 0
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132 /* the Official IANA number :-) */
+#endif /* !IPPROTO_SCTP */
+
+#define SCTP_MAX_DATA_BUNDLING 256
+#define SCTP_MAX_CONTROL_BUNDLING 20
+
+/* modular comparison */
+/* True if a > b (mod = M) */
+#define compare_with_wrap(a, b, M) (((a > b) && ((a - b) < ((M >> 1) + 1))) || \
+ ((b > a) && ((b - a) > ((M >> 1) + 1))))
+
+
+/* Mapping array manipulation routines */
+#define SCTP_IS_TSN_PRESENT(arry, gap) ((arry[(gap >> 3)] >> (gap & 0x07)) & 0x01)
+#define SCTP_SET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] |= (0x01 << ((gap & 0x07))))
+#define SCTP_UNSET_TSN_PRESENT(arry, gap) (arry[(gap >> 3)] &= ((~(0x01 << ((gap & 0x07)))) & 0xff))
+
+
+#define SCTP_RETRAN_DONE -1
+#define SCTP_RETRAN_EXIT -2
+
+/*
+ * This value defines the number of vtag block time wait entry's per list
+ * element. Each entry will take 2 4 byte ints (and of course the overhead
+ * of the next pointer as well). Using 15 as an example will yield * ((8 *
+ * 15) + 8) or 128 bytes of overhead for each timewait block that gets
+ * initialized. Increasing it to 31 would yeild 256 bytes per block.
+ */
+#define SCTP_NUMBER_IN_VTAG_BLOCK 15
+/*
+ * If we use the STACK option, we have an array of this size head pointers.
+ * This array is mod'd the with the size to find which bucket and then all
+ * entries must be searched to see if the tag is in timed wait. If so we
+ * reject it.
+ */
+#define SCTP_STACK_VTAG_HASH_SIZE 31
+#define SCTP_STACK_VTAG_HASH_SIZE_A 32
+
+
+/*
+ * If we use the per-endpoint model than we do not have a hash table of
+ * entries but instead have a single head pointer and we must crawl through
+ * the entire list.
+ */
+
+/*
+ * Number of seconds of time wait for a vtag.
+ */
+#define SCTP_TIME_WAIT 60
+
+/* This time wait is the same as the default cookie life
+ * since we now enter a tag in every time we send a cookie.
+ * We want this shorter to avoid vtag depletion.
+ */
+#define SCTP_TIME_WAIT_SHORT 60
+
+/* The system retains a cache of free chunks such to
+ * cut down on calls the memory allocation system. There
+ * is a per association limit of free items and a overall
+ * system limit. If either one gets hit then the resource
+ * stops being cached.
+ */
+
+#define SCTP_DEF_ASOC_RESC_LIMIT 10
+#define SCTP_DEF_SYSTEM_RESC_LIMIT 1000
+
+/*-
+ * defines for socket lock states.
+ * Used by __APPLE__ and SCTP_SO_LOCK_TESTING
+ */
+#define SCTP_SO_LOCKED 1
+#define SCTP_SO_NOT_LOCKED 0
+
+
+#define SCTP_HOLDS_LOCK 1
+#define SCTP_NOT_LOCKED 0
+
+/*-
+ * For address locks, do we hold the lock?
+ */
+#define SCTP_ADDR_LOCKED 1
+#define SCTP_ADDR_NOT_LOCKED 0
+
+#define IN4_ISPRIVATE_ADDRESS(a) \
+ ((((uint8_t *)&(a)->s_addr)[0] == 10) || \
+ ((((uint8_t *)&(a)->s_addr)[0] == 172) && \
+ (((uint8_t *)&(a)->s_addr)[1] >= 16) && \
+ (((uint8_t *)&(a)->s_addr)[1] <= 32)) || \
+ ((((uint8_t *)&(a)->s_addr)[0] == 192) && \
+ (((uint8_t *)&(a)->s_addr)[1] == 168)))
+
+#define IN4_ISLOOPBACK_ADDRESS(a) \
+ ((((uint8_t *)&(a)->s_addr)[0] == 127) && \
+ (((uint8_t *)&(a)->s_addr)[1] == 0) && \
+ (((uint8_t *)&(a)->s_addr)[2] == 0) && \
+ (((uint8_t *)&(a)->s_addr)[3] == 1))
+
+
+#if defined(_KERNEL)
+
+#define SCTP_GETTIME_TIMEVAL(x) (getmicrouptime(x))
+#define SCTP_GETPTIME_TIMEVAL(x) (microuptime(x))
+/*#if defined(__FreeBSD__) || defined(__APPLE__)*/
+/*#define SCTP_GETTIME_TIMEVAL(x) { \*/
+/* (x)->tv_sec = ticks / 1000; \*/
+/* (x)->tv_usec = (ticks % 1000) * 1000; \*/
+/*}*/
+
+/*#else*/
+/*#define SCTP_GETTIME_TIMEVAL(x) (microtime(x))*/
+/*#endif __FreeBSD__ */
+
+#define sctp_sowwakeup(inp, so) \
+do { \
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+ inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+ } else { \
+ sowwakeup(so); \
+ } \
+} while (0)
+
+#define sctp_sowwakeup_locked(inp, so) \
+do { \
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+ SOCKBUF_UNLOCK(&((so)->so_snd)); \
+ inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEOUTPUT; \
+ } else { \
+ sowwakeup_locked(so); \
+ } \
+} while (0)
+
+#define sctp_sorwakeup(inp, so) \
+do { \
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+ inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+ } else { \
+ sorwakeup(so); \
+ } \
+} while (0)
+
+#define sctp_sorwakeup_locked(inp, so) \
+do { \
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) { \
+ inp->sctp_flags |= SCTP_PCB_FLAGS_WAKEINPUT; \
+ SOCKBUF_UNLOCK(&((so)->so_rcv)); \
+ } else { \
+ sorwakeup_locked(so); \
+ } \
+} while (0)
+
+#endif /* _KERNEL */
+#endif
--- /dev/null
+++ sys/netinet/sctp_indata.c
@@ -0,0 +1,5605 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.c,v 1.36 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_indata.c,v 1.45.2.2.2.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+
+
+/*
+ * NOTES: On the outbound side of things I need to check the sack timer to
+ * see if I should generate a sack into the chunk queue (if I have data to
+ * send that is and will be sending it .. for bundling.
+ *
+ * The callback in sctp_usrreq.c will get called when the socket is read from.
+ * This will cause sctp_service_queues() to get called on the top entry in
+ * the list.
+ */
+
+void
+sctp_set_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ uint32_t calc, calc_save;
+
+ /*
+ * This is really set wrong with respect to a 1-2-m socket. Since
+ * the sb_cc is the count that everyone as put up. When we re-write
+ * sctp_soreceive then we will fix this so that ONLY this
+ * associations data is taken into account.
+ */
+ if (stcb->sctp_socket == NULL)
+ return;
+
+ if (stcb->asoc.sb_cc == 0 &&
+ asoc->size_on_reasm_queue == 0 &&
+ asoc->size_on_all_streams == 0) {
+ /* Full rwnd granted */
+ asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+ SCTP_MINIMAL_RWND);
+ return;
+ }
+ /* get actual space */
+ calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
+
+ /*
+ * take out what has NOT been put on socket queue and we yet hold
+ * for putting up.
+ */
+ calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_reasm_queue);
+ calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_all_streams);
+
+ if (calc == 0) {
+ /* out of space */
+ asoc->my_rwnd = 0;
+ return;
+ }
+ /* what is the overhead of all these rwnd's */
+
+ calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
+ calc_save = calc;
+
+ asoc->my_rwnd = calc;
+ if ((asoc->my_rwnd == 0) &&
+ (calc < stcb->asoc.my_rwnd_control_len)) {
+ /*-
+ * If our rwnd == 0 && the overhead is greater than the
+ * data onqueue, we clamp the rwnd to 1. This lets us
+ * still accept inbound segments, but hopefully will shut
+ * the sender down when he finally gets the message. This
+ * hopefully will gracefully avoid discarding packets.
+ */
+ asoc->my_rwnd = 1;
+ }
+ if (asoc->my_rwnd &&
+ (asoc->my_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_receiver)) {
+ /* SWS engaged, tell peer none left */
+ asoc->my_rwnd = 1;
+ }
+}
+
+/* Calculate what the rwnd would be */
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ uint32_t calc = 0, calc_save = 0, result = 0;
+
+ /*
+ * This is really set wrong with respect to a 1-2-m socket. Since
+ * the sb_cc is the count that everyone as put up. When we re-write
+ * sctp_soreceive then we will fix this so that ONLY this
+ * associations data is taken into account.
+ */
+ if (stcb->sctp_socket == NULL)
+ return (calc);
+
+ if (stcb->asoc.sb_cc == 0 &&
+ asoc->size_on_reasm_queue == 0 &&
+ asoc->size_on_all_streams == 0) {
+ /* Full rwnd granted */
+ calc = max(SCTP_SB_LIMIT_RCV(stcb->sctp_socket),
+ SCTP_MINIMAL_RWND);
+ return (calc);
+ }
+ /* get actual space */
+ calc = (uint32_t) sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv);
+
+ /*
+ * take out what has NOT been put on socket queue and we yet hold
+ * for putting up.
+ */
+ calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_reasm_queue);
+ calc = sctp_sbspace_sub(calc, (uint32_t) asoc->size_on_all_streams);
+
+ if (calc == 0) {
+ /* out of space */
+ return (calc);
+ }
+ /* what is the overhead of all these rwnd's */
+ calc = sctp_sbspace_sub(calc, stcb->asoc.my_rwnd_control_len);
+ calc_save = calc;
+
+ result = calc;
+ if ((result == 0) &&
+ (calc < stcb->asoc.my_rwnd_control_len)) {
+ /*-
+ * If our rwnd == 0 && the overhead is greater than the
+ * data onqueue, we clamp the rwnd to 1. This lets us
+ * still accept inbound segments, but hopefully will shut
+ * the sender down when he finally gets the message. This
+ * hopefully will gracefully avoid discarding packets.
+ */
+ result = 1;
+ }
+ if (result &&
+ (result < stcb->sctp_ep->sctp_ep.sctp_sws_receiver)) {
+ /* SWS engaged, tell peer none left */
+ result = 1;
+ }
+ return (result);
+}
+
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ uint32_t tsn, uint32_t ppid,
+ uint32_t context, uint16_t stream_no,
+ uint16_t stream_seq, uint8_t flags,
+ struct mbuf *dm)
+{
+ struct sctp_queued_to_read *read_queue_e = NULL;
+
+ sctp_alloc_a_readq(stcb, read_queue_e);
+ if (read_queue_e == NULL) {
+ goto failed_build;
+ }
+ read_queue_e->sinfo_stream = stream_no;
+ read_queue_e->sinfo_ssn = stream_seq;
+ read_queue_e->sinfo_flags = (flags << 8);
+ read_queue_e->sinfo_ppid = ppid;
+ read_queue_e->sinfo_context = stcb->asoc.context;
+ read_queue_e->sinfo_timetolive = 0;
+ read_queue_e->sinfo_tsn = tsn;
+ read_queue_e->sinfo_cumtsn = tsn;
+ read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+ read_queue_e->whoFrom = net;
+ read_queue_e->length = 0;
+ atomic_add_int(&net->ref_count, 1);
+ read_queue_e->data = dm;
+ read_queue_e->spec_flags = 0;
+ read_queue_e->tail_mbuf = NULL;
+ read_queue_e->aux_data = NULL;
+ read_queue_e->stcb = stcb;
+ read_queue_e->port_from = stcb->rport;
+ read_queue_e->do_not_ref_stcb = 0;
+ read_queue_e->end_added = 0;
+ read_queue_e->some_taken = 0;
+ read_queue_e->pdapi_aborted = 0;
+failed_build:
+ return (read_queue_e);
+}
+
+
+/*
+ * Build out our readq entry based on the incoming packet.
+ */
+static struct sctp_queued_to_read *
+sctp_build_readq_entry_chk(struct sctp_tcb *stcb,
+ struct sctp_tmit_chunk *chk)
+{
+ struct sctp_queued_to_read *read_queue_e = NULL;
+
+ sctp_alloc_a_readq(stcb, read_queue_e);
+ if (read_queue_e == NULL) {
+ goto failed_build;
+ }
+ read_queue_e->sinfo_stream = chk->rec.data.stream_number;
+ read_queue_e->sinfo_ssn = chk->rec.data.stream_seq;
+ read_queue_e->sinfo_flags = (chk->rec.data.rcv_flags << 8);
+ read_queue_e->sinfo_ppid = chk->rec.data.payloadtype;
+ read_queue_e->sinfo_context = stcb->asoc.context;
+ read_queue_e->sinfo_timetolive = 0;
+ read_queue_e->sinfo_tsn = chk->rec.data.TSN_seq;
+ read_queue_e->sinfo_cumtsn = chk->rec.data.TSN_seq;
+ read_queue_e->sinfo_assoc_id = sctp_get_associd(stcb);
+ read_queue_e->whoFrom = chk->whoTo;
+ read_queue_e->aux_data = NULL;
+ read_queue_e->length = 0;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ read_queue_e->data = chk->data;
+ read_queue_e->tail_mbuf = NULL;
+ read_queue_e->stcb = stcb;
+ read_queue_e->port_from = stcb->rport;
+ read_queue_e->spec_flags = 0;
+ read_queue_e->do_not_ref_stcb = 0;
+ read_queue_e->end_added = 0;
+ read_queue_e->some_taken = 0;
+ read_queue_e->pdapi_aborted = 0;
+failed_build:
+ return (read_queue_e);
+}
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+ struct sctp_sndrcvinfo *sinfo)
+{
+ struct sctp_sndrcvinfo *outinfo;
+ struct cmsghdr *cmh;
+ struct mbuf *ret;
+ int len;
+ int use_extended = 0;
+
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+ /* user does not want the sndrcv ctl */
+ return (NULL);
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+ use_extended = 1;
+ len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+ } else {
+ len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+ }
+
+
+ ret = sctp_get_mbuf_for_msg(len,
+ 0, M_DONTWAIT, 1, MT_DATA);
+
+ if (ret == NULL) {
+ /* No space */
+ return (ret);
+ }
+ /* We need a CMSG header followed by the struct */
+ cmh = mtod(ret, struct cmsghdr *);
+ outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+ cmh->cmsg_level = IPPROTO_SCTP;
+ if (use_extended) {
+ cmh->cmsg_type = SCTP_EXTRCV;
+ cmh->cmsg_len = len;
+ memcpy(outinfo, sinfo, len);
+ } else {
+ cmh->cmsg_type = SCTP_SNDRCV;
+ cmh->cmsg_len = len;
+ *outinfo = *sinfo;
+ }
+ SCTP_BUF_LEN(ret) = cmh->cmsg_len;
+ return (ret);
+}
+
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+ int *control_len,
+ struct sctp_sndrcvinfo *sinfo)
+{
+ struct sctp_sndrcvinfo *outinfo;
+ struct cmsghdr *cmh;
+ char *buf;
+ int len;
+ int use_extended = 0;
+
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT)) {
+ /* user does not want the sndrcv ctl */
+ return (NULL);
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+ use_extended = 1;
+ len = CMSG_LEN(sizeof(struct sctp_extrcvinfo));
+ } else {
+ len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
+ }
+ SCTP_MALLOC(buf, char *, len, SCTP_M_CMSG);
+ if (buf == NULL) {
+ /* No space */
+ return (buf);
+ }
+ /* We need a CMSG header followed by the struct */
+ cmh = (struct cmsghdr *)buf;
+ outinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmh);
+ cmh->cmsg_level = IPPROTO_SCTP;
+ if (use_extended) {
+ cmh->cmsg_type = SCTP_EXTRCV;
+ cmh->cmsg_len = len;
+ memcpy(outinfo, sinfo, len);
+ } else {
+ cmh->cmsg_type = SCTP_SNDRCV;
+ cmh->cmsg_len = len;
+ *outinfo = *sinfo;
+ }
+ *control_len = len;
+ return (buf);
+}
+
+
+/*
+ * We are delivering currently from the reassembly queue. We must continue to
+ * deliver until we either: 1) run out of space. 2) run out of sequential
+ * TSN's 3) hit the SCTP_DATA_LAST_FRAG flag.
+ */
+static void
+sctp_service_reassembly(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+ uint16_t nxt_todel;
+ uint16_t stream_no;
+ int end = 0;
+ int cntDel;
+ struct sctp_queued_to_read *control, *ctl, *ctlat;
+
+ if (stcb == NULL)
+ return;
+
+ cntDel = stream_no = 0;
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+ /* socket above is long gone or going.. */
+abandon:
+ asoc->fragmented_delivery_inprogress = 0;
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+ asoc->size_on_reasm_queue -= chk->send_size;
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ /*
+ * Lose the data pointer, since its in the socket
+ * buffer
+ */
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ /* Now free the address and data */
+ sctp_free_a_chunk(stcb, chk);
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ }
+ return;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ do {
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ if (chk == NULL) {
+ return;
+ }
+ if (chk->rec.data.TSN_seq != (asoc->tsn_last_delivered + 1)) {
+ /* Can't deliver more :< */
+ return;
+ }
+ stream_no = chk->rec.data.stream_number;
+ nxt_todel = asoc->strmin[stream_no].last_sequence_delivered + 1;
+ if (nxt_todel != chk->rec.data.stream_seq &&
+ (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+ /*
+ * Not the next sequence to deliver in its stream OR
+ * unordered
+ */
+ return;
+ }
+ if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+
+ control = sctp_build_readq_entry_chk(stcb, chk);
+ if (control == NULL) {
+ /* out of memory? */
+ return;
+ }
+ /* save it off for our future deliveries */
+ stcb->asoc.control_pdapi = control;
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+ end = 1;
+ else
+ end = 0;
+ sctp_add_to_readq(stcb->sctp_ep,
+ stcb, control, &stcb->sctp_socket->so_rcv, end, SCTP_SO_NOT_LOCKED);
+ cntDel++;
+ } else {
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG)
+ end = 1;
+ else
+ end = 0;
+ if (sctp_append_to_readq(stcb->sctp_ep, stcb,
+ stcb->asoc.control_pdapi,
+ chk->data, end, chk->rec.data.TSN_seq,
+ &stcb->sctp_socket->so_rcv)) {
+ /*
+ * something is very wrong, either
+ * control_pdapi is NULL, or the tail_mbuf
+ * is corrupt, or there is a EOM already on
+ * the mbuf chain.
+ */
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ goto abandon;
+ } else {
+ if ((stcb->asoc.control_pdapi == NULL) || (stcb->asoc.control_pdapi->tail_mbuf == NULL)) {
+ panic("This should not happen control_pdapi NULL?");
+ }
+ /* if we did not panic, it was a EOM */
+ panic("Bad chunking ??");
+ return;
+ }
+ }
+ cntDel++;
+ }
+ /* pull it we did it */
+ TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ asoc->fragmented_delivery_inprogress = 0;
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+ asoc->strmin[stream_no].last_sequence_delivered++;
+ }
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+ SCTP_STAT_INCR_COUNTER64(sctps_reasmusrmsgs);
+ }
+ } else if (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ /*
+ * turn the flag back on since we just delivered
+ * yet another one.
+ */
+ asoc->fragmented_delivery_inprogress = 1;
+ }
+ asoc->tsn_of_pdapi_last_delivered = chk->rec.data.TSN_seq;
+ asoc->last_flags_delivered = chk->rec.data.rcv_flags;
+ asoc->last_strm_seq_delivered = chk->rec.data.stream_seq;
+ asoc->last_strm_no_delivered = chk->rec.data.stream_number;
+
+ asoc->tsn_last_delivered = chk->rec.data.TSN_seq;
+ asoc->size_on_reasm_queue -= chk->send_size;
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ /* free up the chk */
+ chk->data = NULL;
+ sctp_free_a_chunk(stcb, chk);
+
+ if (asoc->fragmented_delivery_inprogress == 0) {
+ /*
+ * Now lets see if we can deliver the next one on
+ * the stream
+ */
+ struct sctp_stream_in *strm;
+
+ strm = &asoc->strmin[stream_no];
+ nxt_todel = strm->last_sequence_delivered + 1;
+ ctl = TAILQ_FIRST(&strm->inqueue);
+ if (ctl && (nxt_todel == ctl->sinfo_ssn)) {
+ while (ctl != NULL) {
+ /* Deliver more if we can. */
+ if (nxt_todel == ctl->sinfo_ssn) {
+ ctlat = TAILQ_NEXT(ctl, next);
+ TAILQ_REMOVE(&strm->inqueue, ctl, next);
+ asoc->size_on_all_streams -= ctl->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ strm->last_sequence_delivered++;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ ctl,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ ctl = ctlat;
+ } else {
+ break;
+ }
+ nxt_todel = strm->last_sequence_delivered + 1;
+ }
+ }
+ break;
+ }
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ } while (chk);
+}
+
+/*
+ * Queue the chunk either right into the socket buffer if it is the next one
+ * to go OR put it in the correct place in the delivery queue. If we do
+ * append to the so_buf, keep doing so until we are out of order. One big
+ * question still remains, what to do when the socket buffer is FULL??
+ */
+static void
+sctp_queue_data_to_stream(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_queued_to_read *control, int *abort_flag)
+{
+ /*
+ * FIX-ME maybe? What happens when the ssn wraps? If we are getting
+ * all the data in one stream this could happen quite rapidly. One
+ * could use the TSN to keep track of things, but this scheme breaks
+ * down in the other type of stream useage that could occur. Send a
+ * single msg to stream 0, send 4Billion messages to stream 1, now
+ * send a message to stream 0. You have a situation where the TSN
+ * has wrapped but not in the stream. Is this worth worrying about
+ * or should we just change our queue sort at the bottom to be by
+ * TSN.
+ *
+ * Could it also be legal for a peer to send ssn 1 with TSN 2 and ssn 2
+ * with TSN 1? If the peer is doing some sort of funky TSN/SSN
+ * assignment this could happen... and I don't see how this would be
+ * a violation. So for now I am undecided an will leave the sort by
+ * SSN alone. Maybe a hybred approach is the answer
+ *
+ */
+ struct sctp_stream_in *strm;
+ struct sctp_queued_to_read *at;
+ int queue_needed;
+ uint16_t nxt_todel;
+ struct mbuf *oper;
+
+ queue_needed = 1;
+ asoc->size_on_all_streams += control->length;
+ sctp_ucount_incr(asoc->cnt_on_all_streams);
+ strm = &asoc->strmin[control->sinfo_stream];
+ nxt_todel = strm->last_sequence_delivered + 1;
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INTO_STRD);
+ }
+ SCTPDBG(SCTP_DEBUG_INDATA1,
+ "queue to stream called for ssn:%u lastdel:%u nxt:%u\n",
+ (uint32_t) control->sinfo_stream,
+ (uint32_t) strm->last_sequence_delivered,
+ (uint32_t) nxt_todel);
+ if (compare_with_wrap(strm->last_sequence_delivered,
+ control->sinfo_ssn, MAX_SEQ) ||
+ (strm->last_sequence_delivered == control->sinfo_ssn)) {
+ /* The incoming sseq is behind where we last delivered? */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Duplicate S-SEQ:%d delivered:%d from peer, Abort association\n",
+ control->sinfo_ssn, strm->last_sequence_delivered);
+ /*
+ * throw it in the stream so it gets cleaned up in
+ * association destruction
+ */
+ TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ (sizeof(uint32_t) * 3);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_1);
+ ippp++;
+ *ippp = control->sinfo_tsn;
+ ippp++;
+ *ippp = ((control->sinfo_stream << 16) | control->sinfo_ssn);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_1;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+
+ }
+ if (nxt_todel == control->sinfo_ssn) {
+ /* can be delivered right away? */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_IMMED_DEL);
+ }
+ queue_needed = 0;
+ asoc->size_on_all_streams -= control->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ strm->last_sequence_delivered++;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ control = TAILQ_FIRST(&strm->inqueue);
+ while (control != NULL) {
+ /* all delivered */
+ nxt_todel = strm->last_sequence_delivered + 1;
+ if (nxt_todel == control->sinfo_ssn) {
+ at = TAILQ_NEXT(control, next);
+ TAILQ_REMOVE(&strm->inqueue, control, next);
+ asoc->size_on_all_streams -= control->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ strm->last_sequence_delivered++;
+ /*
+ * We ignore the return of deliver_data here
+ * since we always can hold the chunk on the
+ * d-queue. And we have a finite number that
+ * can be delivered from the strq.
+ */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, NULL,
+ SCTP_STR_LOG_FROM_IMMED_DEL);
+ }
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ control = at;
+ continue;
+ }
+ break;
+ }
+ }
+ if (queue_needed) {
+ /*
+ * Ok, we did not deliver this guy, find the correct place
+ * to put it on the queue.
+ */
+ if (TAILQ_EMPTY(&strm->inqueue)) {
+ /* Empty queue */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, NULL, SCTP_STR_LOG_FROM_INSERT_HD);
+ }
+ TAILQ_INSERT_HEAD(&strm->inqueue, control, next);
+ } else {
+ TAILQ_FOREACH(at, &strm->inqueue, next) {
+ if (compare_with_wrap(at->sinfo_ssn,
+ control->sinfo_ssn, MAX_SEQ)) {
+ /*
+ * one in queue is bigger than the
+ * new one, insert before this one
+ */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, at,
+ SCTP_STR_LOG_FROM_INSERT_MD);
+ }
+ TAILQ_INSERT_BEFORE(at, control, next);
+ break;
+ } else if (at->sinfo_ssn == control->sinfo_ssn) {
+ /*
+ * Gak, He sent me a duplicate str
+ * seq number
+ */
+ /*
+ * foo bar, I guess I will just free
+ * this new guy, should we abort
+ * too? FIX ME MAYBE? Or it COULD be
+ * that the SSN's have wrapped.
+ * Maybe I should compare to TSN
+ * somehow... sigh for now just blow
+ * away the chunk!
+ */
+
+ if (control->data)
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ asoc->size_on_all_streams -= control->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ if (control->whoFrom)
+ sctp_free_remote_addr(control->whoFrom);
+ control->whoFrom = NULL;
+ sctp_free_a_readq(stcb, control);
+ return;
+ } else {
+ if (TAILQ_NEXT(at, next) == NULL) {
+ /*
+ * We are at the end, insert
+ * it after this one
+ */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del(control, at,
+ SCTP_STR_LOG_FROM_INSERT_TL);
+ }
+ TAILQ_INSERT_AFTER(&strm->inqueue,
+ at, control, next);
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Returns two things: You get the total size of the deliverable parts of the
+ * first fragmented message on the reassembly queue. And you get a 1 back if
+ * all of the message is ready or a 0 back if the message is still incomplete
+ */
+static int
+sctp_is_all_msg_on_reasm(struct sctp_association *asoc, uint32_t * t_size)
+{
+ struct sctp_tmit_chunk *chk;
+ uint32_t tsn;
+
+ *t_size = 0;
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ if (chk == NULL) {
+ /* nothing on the queue */
+ return (0);
+ }
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0) {
+ /* Not a first on the queue */
+ return (0);
+ }
+ tsn = chk->rec.data.TSN_seq;
+ while (chk) {
+ if (tsn != chk->rec.data.TSN_seq) {
+ return (0);
+ }
+ *t_size += chk->send_size;
+ if (chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ return (1);
+ }
+ tsn++;
+ chk = TAILQ_NEXT(chk, sctp_next);
+ }
+ return (0);
+}
+
+static void
+sctp_deliver_reasm_check(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+ uint16_t nxt_todel;
+ uint32_t tsize;
+
+doit_again:
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ if (chk == NULL) {
+ /* Huh? */
+ asoc->size_on_reasm_queue = 0;
+ asoc->cnt_on_reasm_queue = 0;
+ return;
+ }
+ if (asoc->fragmented_delivery_inprogress == 0) {
+ nxt_todel =
+ asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+ (nxt_todel == chk->rec.data.stream_seq ||
+ (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+ /*
+ * Yep the first one is here and its ok to deliver
+ * but should we?
+ */
+ if ((sctp_is_all_msg_on_reasm(asoc, &tsize) ||
+ (tsize >= stcb->sctp_ep->partial_delivery_point))) {
+
+ /*
+ * Yes, we setup to start reception, by
+ * backing down the TSN just in case we
+ * can't deliver. If we
+ */
+ asoc->fragmented_delivery_inprogress = 1;
+ asoc->tsn_last_delivered =
+ chk->rec.data.TSN_seq - 1;
+ asoc->str_of_pdapi =
+ chk->rec.data.stream_number;
+ asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+ asoc->pdapi_ppid = chk->rec.data.payloadtype;
+ asoc->fragment_flags = chk->rec.data.rcv_flags;
+ sctp_service_reassembly(stcb, asoc);
+ }
+ }
+ } else {
+ /*
+ * Service re-assembly will deliver stream data queued at
+ * the end of fragmented delivery.. but it wont know to go
+ * back and call itself again... we do that here with the
+ * got doit_again
+ */
+ sctp_service_reassembly(stcb, asoc);
+ if (asoc->fragmented_delivery_inprogress == 0) {
+ /*
+ * finished our Fragmented delivery, could be more
+ * waiting?
+ */
+ goto doit_again;
+ }
+ }
+}
+
+/*
+ * Dump onto the re-assembly queue, in its proper place. After dumping on the
+ * queue, see if anthing can be delivered. If so pull it off (or as much as
+ * we can. If we run out of space then we must dump what we can and set the
+ * appropriate flag to say we queued what we could.
+ */
+static void
+sctp_queue_data_for_reasm(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_tmit_chunk *chk, int *abort_flag)
+{
+ struct mbuf *oper;
+ uint32_t cum_ackp1, last_tsn, prev_tsn, post_tsn;
+ u_char last_flags;
+ struct sctp_tmit_chunk *at, *prev, *next;
+
+ prev = next = NULL;
+ cum_ackp1 = asoc->tsn_last_delivered + 1;
+ if (TAILQ_EMPTY(&asoc->reasmqueue)) {
+ /* This is the first one on the queue */
+ TAILQ_INSERT_HEAD(&asoc->reasmqueue, chk, sctp_next);
+ /*
+ * we do not check for delivery of anything when only one
+ * fragment is here
+ */
+ asoc->size_on_reasm_queue = chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ if (chk->rec.data.TSN_seq == cum_ackp1) {
+ if (asoc->fragmented_delivery_inprogress == 0 &&
+ (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) !=
+ SCTP_DATA_FIRST_FRAG) {
+ /*
+ * An empty queue, no delivery inprogress,
+ * we hit the next one and it does NOT have
+ * a FIRST fragment mark.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not first, no fragmented delivery in progress\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (sizeof(uint32_t) * 3);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_2);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_2;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ } else if (asoc->fragmented_delivery_inprogress &&
+ (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+ /*
+ * We are doing a partial delivery and the
+ * NEXT chunk MUST be either the LAST or
+ * MIDDLE fragment NOT a FIRST
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS a first and fragmented delivery in progress\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_3);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_3;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ } else if (asoc->fragmented_delivery_inprogress) {
+ /*
+ * Here we are ok with a MIDDLE or LAST
+ * piece
+ */
+ if (chk->rec.data.stream_number !=
+ asoc->str_of_pdapi) {
+ /* Got to be the right STR No */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream number %d vs %d\n",
+ chk->rec.data.stream_number,
+ asoc->str_of_pdapi);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (sizeof(uint32_t) * 3);
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_4);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_4;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ } else if ((asoc->fragment_flags & SCTP_DATA_UNORDERED) !=
+ SCTP_DATA_UNORDERED &&
+ chk->rec.data.stream_seq !=
+ asoc->ssn_of_pdapi) {
+ /* Got to be the right STR Seq */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it IS not same stream seq %d vs %d\n",
+ chk->rec.data.stream_seq,
+ asoc->ssn_of_pdapi);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_5);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_5;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ }
+ }
+ }
+ return;
+ }
+ /* Find its place */
+ TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+ if (compare_with_wrap(at->rec.data.TSN_seq,
+ chk->rec.data.TSN_seq, MAX_TSN)) {
+ /*
+ * one in queue is bigger than the new one, insert
+ * before this one
+ */
+ /* A check */
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ next = at;
+ TAILQ_INSERT_BEFORE(at, chk, sctp_next);
+ break;
+ } else if (at->rec.data.TSN_seq == chk->rec.data.TSN_seq) {
+ /* Gak, He sent me a duplicate str seq number */
+ /*
+ * foo bar, I guess I will just free this new guy,
+ * should we abort too? FIX ME MAYBE? Or it COULD be
+ * that the SSN's have wrapped. Maybe I should
+ * compare to TSN somehow... sigh for now just blow
+ * away the chunk!
+ */
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ } else {
+ last_flags = at->rec.data.rcv_flags;
+ last_tsn = at->rec.data.TSN_seq;
+ prev = at;
+ if (TAILQ_NEXT(at, sctp_next) == NULL) {
+ /*
+ * We are at the end, insert it after this
+ * one
+ */
+ /* check it first */
+ asoc->size_on_reasm_queue += chk->send_size;
+ sctp_ucount_incr(asoc->cnt_on_reasm_queue);
+ TAILQ_INSERT_AFTER(&asoc->reasmqueue, at, chk, sctp_next);
+ break;
+ }
+ }
+ }
+ /* Now the audits */
+ if (prev) {
+ prev_tsn = chk->rec.data.TSN_seq - 1;
+ if (prev_tsn == prev->rec.data.TSN_seq) {
+ /*
+ * Ok the one I am dropping onto the end is the
+ * NEXT. A bit of valdiation here.
+ */
+ if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_FIRST_FRAG ||
+ (prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_MIDDLE_FRAG) {
+ /*
+ * Insert chk MUST be a MIDDLE or LAST
+ * fragment
+ */
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_FIRST_FRAG) {
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - It can be a midlle or last but not a first\n");
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, it's a FIRST!\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_6);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_6;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return;
+ }
+ if (chk->rec.data.stream_number !=
+ prev->rec.data.stream_number) {
+ /*
+ * Huh, need the correct STR here,
+ * they must be the same.
+ */
+ SCTP_PRINTF("Prev check - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+ chk->rec.data.stream_number,
+ prev->rec.data.stream_number);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_7);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_7;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ if ((prev->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+ chk->rec.data.stream_seq !=
+ prev->rec.data.stream_seq) {
+ /*
+ * Huh, need the correct STR here,
+ * they must be the same.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+ chk->rec.data.stream_seq,
+ prev->rec.data.stream_seq);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_8);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_8;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ } else if ((prev->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_LAST_FRAG) {
+ /* Insert chk MUST be a FIRST */
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+ SCTP_DATA_FIRST_FRAG) {
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Prev check - Gak, evil plot, its not FIRST and it must be!\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_9);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_9;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ }
+ }
+ }
+ if (next) {
+ post_tsn = chk->rec.data.TSN_seq + 1;
+ if (post_tsn == next->rec.data.TSN_seq) {
+ /*
+ * Ok the one I am inserting ahead of is my NEXT
+ * one. A bit of valdiation here.
+ */
+ if (next->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) {
+ /* Insert chk MUST be a last fragment */
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK)
+ != SCTP_DATA_LAST_FRAG) {
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is FIRST, we must be LAST\n");
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, its not a last!\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_10);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_10;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ } else if ((next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_MIDDLE_FRAG ||
+ (next->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_LAST_FRAG) {
+ /*
+ * Insert chk CAN be MIDDLE or FIRST NOT
+ * LAST
+ */
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) ==
+ SCTP_DATA_LAST_FRAG) {
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Next is a MIDDLE/LAST\n");
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Gak, Evil plot, new prev chunk is a LAST\n");
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_11);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_11;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ if (chk->rec.data.stream_number !=
+ next->rec.data.stream_number) {
+ /*
+ * Huh, need the correct STR here,
+ * they must be the same.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, ssn:%d not the same as at:%d\n",
+ chk->rec.data.stream_number,
+ next->rec.data.stream_number);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_12);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_12;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ if ((next->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0 &&
+ chk->rec.data.stream_seq !=
+ next->rec.data.stream_seq) {
+ /*
+ * Huh, need the correct STR here,
+ * they must be the same.
+ */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Next chk - Gak, Evil plot, sseq:%d not the same as at:%d\n",
+ chk->rec.data.stream_seq,
+ next->rec.data.stream_seq);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_13);
+ ippp++;
+ *ippp = chk->rec.data.TSN_seq;
+ ippp++;
+ *ippp = ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_13;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return;
+ }
+ }
+ }
+ }
+ /* Do we need to do some delivery? check */
+ sctp_deliver_reasm_check(stcb, asoc);
+}
+
+/*
+ * This is an unfortunate routine. It checks to make sure a evil guy is not
+ * stuffing us full of bad packet fragments. A broken peer could also do this
+ * but this is doubtful. It is to bad I must worry about evil crackers sigh
+ * :< more cycles.
+ */
+static int
+sctp_does_tsn_belong_to_reasm(struct sctp_association *asoc,
+ uint32_t TSN_seq)
+{
+ struct sctp_tmit_chunk *at;
+ uint32_t tsn_est;
+
+ TAILQ_FOREACH(at, &asoc->reasmqueue, sctp_next) {
+ if (compare_with_wrap(TSN_seq,
+ at->rec.data.TSN_seq, MAX_TSN)) {
+ /* is it one bigger? */
+ tsn_est = at->rec.data.TSN_seq + 1;
+ if (tsn_est == TSN_seq) {
+ /* yep. It better be a last then */
+ if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+ SCTP_DATA_LAST_FRAG) {
+ /*
+ * Ok this guy belongs next to a guy
+ * that is NOT last, it should be a
+ * middle/last, not a complete
+ * chunk.
+ */
+ return (1);
+ } else {
+ /*
+ * This guy is ok since its a LAST
+ * and the new chunk is a fully
+ * self- contained one.
+ */
+ return (0);
+ }
+ }
+ } else if (TSN_seq == at->rec.data.TSN_seq) {
+ /* Software error since I have a dup? */
+ return (1);
+ } else {
+ /*
+ * Ok, 'at' is larger than new chunk but does it
+ * need to be right before it.
+ */
+ tsn_est = TSN_seq + 1;
+ if (tsn_est == at->rec.data.TSN_seq) {
+ /* Yep, It better be a first */
+ if ((at->rec.data.rcv_flags & SCTP_DATA_FRAG_MASK) !=
+ SCTP_DATA_FIRST_FRAG) {
+ return (1);
+ } else {
+ return (0);
+ }
+ }
+ }
+ }
+ return (0);
+}
+
+
+static int
+sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct mbuf **m, int offset, struct sctp_data_chunk *ch, int chk_length,
+ struct sctp_nets *net, uint32_t * high_tsn, int *abort_flag,
+ int *break_flag, int last_chunk)
+{
+ /* Process a data chunk */
+ /* struct sctp_tmit_chunk *chk; */
+ struct sctp_tmit_chunk *chk;
+ uint32_t tsn, gap;
+ struct mbuf *dmbuf;
+ int indx, the_len;
+ int need_reasm_check = 0;
+ uint16_t strmno, strmseq;
+ struct mbuf *oper;
+ struct sctp_queued_to_read *control;
+ int ordered;
+ uint32_t protocol_id;
+ uint8_t chunk_flags;
+ struct sctp_stream_reset_list *liste;
+
+ chk = NULL;
+ tsn = ntohl(ch->dp.tsn);
+ chunk_flags = ch->ch.chunk_flags;
+ protocol_id = ch->dp.protocol_id;
+ ordered = ((ch->ch.chunk_flags & SCTP_DATA_UNORDERED) == 0);
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(tsn, asoc->cumulative_tsn, asoc->highest_tsn_inside_map, SCTP_MAP_TSN_ENTERS);
+ }
+ if (stcb == NULL) {
+ return (0);
+ }
+ SCTP_LTRACE_CHK(stcb->sctp_ep, stcb, ch->ch.chunk_type, tsn);
+ if (compare_with_wrap(asoc->cumulative_tsn, tsn, MAX_TSN) ||
+ asoc->cumulative_tsn == tsn) {
+ /* It is a duplicate */
+ SCTP_STAT_INCR(sctps_recvdupdata);
+ if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+ /* Record a dup for the next outbound sack */
+ asoc->dup_tsns[asoc->numduptsns] = tsn;
+ asoc->numduptsns++;
+ }
+ asoc->send_sack = 1;
+ return (0);
+ }
+ /* Calculate the number of TSN's between the base and this TSN */
+ if (tsn >= asoc->mapping_array_base_tsn) {
+ gap = tsn - asoc->mapping_array_base_tsn;
+ } else {
+ gap = (MAX_TSN - asoc->mapping_array_base_tsn) + tsn + 1;
+ }
+ if (gap >= (SCTP_MAPPING_ARRAY << 3)) {
+ /* Can't hold the bit in the mapping at max array, toss it */
+ return (0);
+ }
+ if (gap >= (uint32_t) (asoc->mapping_array_size << 3)) {
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (sctp_expand_mapping_array(asoc, gap)) {
+ /* Can't expand, drop it */
+ return (0);
+ }
+ }
+ if (compare_with_wrap(tsn, *high_tsn, MAX_TSN)) {
+ *high_tsn = tsn;
+ }
+ /* See if we have received this one already */
+ if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+ SCTP_STAT_INCR(sctps_recvdupdata);
+ if (asoc->numduptsns < SCTP_MAX_DUP_TSNS) {
+ /* Record a dup for the next outbound sack */
+ asoc->dup_tsns[asoc->numduptsns] = tsn;
+ asoc->numduptsns++;
+ }
+ asoc->send_sack = 1;
+ return (0);
+ }
+ /*
+ * Check to see about the GONE flag, duplicates would cause a sack
+ * to be sent up above
+ */
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET))
+ ) {
+ /*
+ * wait a minute, this guy is gone, there is no longer a
+ * receiver. Send peer an ABORT!
+ */
+ struct mbuf *op_err;
+
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_OUT_OF_RESC);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, 0, op_err, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return (0);
+ }
+ /*
+ * Now before going further we see if there is room. If NOT then we
+ * MAY let one through only IF this TSN is the one we are waiting
+ * for on a partial delivery API.
+ */
+
+ /* now do the tests */
+ if (((asoc->cnt_on_all_streams +
+ asoc->cnt_on_reasm_queue +
+ asoc->cnt_msg_on_sb) > sctp_max_chunks_on_queue) ||
+ (((int)asoc->my_rwnd) <= 0)) {
+ /*
+ * When we have NO room in the rwnd we check to make sure
+ * the reader is doing its job...
+ */
+ if (stcb->sctp_socket->so_rcv.sb_cc) {
+ /* some to read, wake-up */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* assoc was freed while we were unlocked */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (0);
+ }
+#endif
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ /* now is it in the mapping array of what we have accepted? */
+ if (compare_with_wrap(tsn,
+ asoc->highest_tsn_inside_map, MAX_TSN)) {
+
+ /* Nope not in the valid range dump it */
+ sctp_set_rwnd(stcb, asoc);
+ if ((asoc->cnt_on_all_streams +
+ asoc->cnt_on_reasm_queue +
+ asoc->cnt_msg_on_sb) > sctp_max_chunks_on_queue) {
+ SCTP_STAT_INCR(sctps_datadropchklmt);
+ } else {
+ SCTP_STAT_INCR(sctps_datadroprwnd);
+ }
+ indx = *break_flag;
+ *break_flag = 1;
+ return (0);
+ }
+ }
+ strmno = ntohs(ch->dp.stream_id);
+ if (strmno >= asoc->streamincnt) {
+ struct sctp_paramhdr *phdr;
+ struct mbuf *mb;
+
+ mb = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) * 2),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (mb != NULL) {
+ /* add some space up front so prepend will work well */
+ SCTP_BUF_RESV_UF(mb, sizeof(struct sctp_chunkhdr));
+ phdr = mtod(mb, struct sctp_paramhdr *);
+ /*
+ * Error causes are just param's and this one has
+ * two back to back phdr, one with the error type
+ * and size, the other with the streamid and a rsvd
+ */
+ SCTP_BUF_LEN(mb) = (sizeof(struct sctp_paramhdr) * 2);
+ phdr->param_type = htons(SCTP_CAUSE_INVALID_STREAM);
+ phdr->param_length =
+ htons(sizeof(struct sctp_paramhdr) * 2);
+ phdr++;
+ /* We insert the stream in the type field */
+ phdr->param_type = ch->dp.stream_id;
+ /* And set the length to 0 for the rsvd field */
+ phdr->param_length = 0;
+ sctp_queue_op_err(stcb, mb);
+ }
+ SCTP_STAT_INCR(sctps_badsid);
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+ if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) {
+ /* we have a new high score */
+ asoc->highest_tsn_inside_map = tsn;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 2, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ }
+ if (tsn == (asoc->cumulative_tsn + 1)) {
+ /* Update cum-ack */
+ asoc->cumulative_tsn = tsn;
+ }
+ return (0);
+ }
+ /*
+ * Before we continue lets validate that we are not being fooled by
+ * an evil attacker. We can only have 4k chunks based on our TSN
+ * spread allowed by the mapping array 512 * 8 bits, so there is no
+ * way our stream sequence numbers could have wrapped. We of course
+ * only validate the FIRST fragment so the bit must be set.
+ */
+ strmseq = ntohs(ch->dp.stream_sequence);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (asoc->tsn_in_at >= SCTP_TSN_LOG_SIZE) {
+ asoc->tsn_in_at = 0;
+ asoc->tsn_in_wrapped = 1;
+ }
+ asoc->in_tsnlog[asoc->tsn_in_at].tsn = tsn;
+ asoc->in_tsnlog[asoc->tsn_in_at].strm = strmno;
+ asoc->in_tsnlog[asoc->tsn_in_at].seq = strmseq;
+ asoc->in_tsnlog[asoc->tsn_in_at].sz = chk_length;
+ asoc->in_tsnlog[asoc->tsn_in_at].flgs = chunk_flags;
+ asoc->in_tsnlog[asoc->tsn_in_at].stcb = (void *)stcb;
+ asoc->in_tsnlog[asoc->tsn_in_at].in_pos = asoc->tsn_in_at;
+ asoc->in_tsnlog[asoc->tsn_in_at].in_out = 1;
+ asoc->tsn_in_at++;
+#endif
+ if ((chunk_flags & SCTP_DATA_FIRST_FRAG) &&
+ (TAILQ_EMPTY(&asoc->resetHead)) &&
+ (chunk_flags & SCTP_DATA_UNORDERED) == 0 &&
+ (compare_with_wrap(asoc->strmin[strmno].last_sequence_delivered,
+ strmseq, MAX_SEQ) ||
+ asoc->strmin[strmno].last_sequence_delivered == strmseq)) {
+ /* The incoming sseq is behind where we last delivered? */
+ SCTPDBG(SCTP_DEBUG_INDATA1, "EVIL/Broken-Dup S-SEQ:%d delivered:%d from peer, Abort!\n",
+ strmseq, asoc->strmin[strmno].last_sequence_delivered);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_14);
+ ippp++;
+ *ippp = tsn;
+ ippp++;
+ *ippp = ((strmno << 16) | strmseq);
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_14;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ *abort_flag = 1;
+ return (0);
+ }
+ /************************************
+ * From here down we may find ch-> invalid
+ * so its a good idea NOT to use it.
+ *************************************/
+
+ the_len = (chk_length - sizeof(struct sctp_data_chunk));
+ if (last_chunk == 0) {
+ dmbuf = SCTP_M_COPYM(*m,
+ (offset + sizeof(struct sctp_data_chunk)),
+ the_len, M_DONTWAIT);
+#ifdef SCTP_MBUF_LOGGING
+ if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+ struct mbuf *mat;
+
+ mat = dmbuf;
+ while (mat) {
+ if (SCTP_BUF_IS_EXTENDED(mat)) {
+ sctp_log_mb(mat, SCTP_MBUF_ICOPY);
+ }
+ mat = SCTP_BUF_NEXT(mat);
+ }
+ }
+#endif
+ } else {
+ /* We can steal the last chunk */
+ int l_len;
+
+ dmbuf = *m;
+ /* lop off the top part */
+ m_adj(dmbuf, (offset + sizeof(struct sctp_data_chunk)));
+ if (SCTP_BUF_NEXT(dmbuf) == NULL) {
+ l_len = SCTP_BUF_LEN(dmbuf);
+ } else {
+ /*
+ * need to count up the size hopefully does not hit
+ * this to often :-0
+ */
+ struct mbuf *lat;
+
+ l_len = 0;
+ lat = dmbuf;
+ while (lat) {
+ l_len += SCTP_BUF_LEN(lat);
+ lat = SCTP_BUF_NEXT(lat);
+ }
+ }
+ if (l_len > the_len) {
+ /* Trim the end round bytes off too */
+ m_adj(dmbuf, -(l_len - the_len));
+ }
+ }
+ if (dmbuf == NULL) {
+ SCTP_STAT_INCR(sctps_nomem);
+ return (0);
+ }
+ if ((chunk_flags & SCTP_DATA_NOT_FRAG) == SCTP_DATA_NOT_FRAG &&
+ asoc->fragmented_delivery_inprogress == 0 &&
+ TAILQ_EMPTY(&asoc->resetHead) &&
+ ((ordered == 0) ||
+ ((asoc->strmin[strmno].last_sequence_delivered + 1) == strmseq &&
+ TAILQ_EMPTY(&asoc->strmin[strmno].inqueue)))) {
+ /* Candidate for express delivery */
+ /*
+ * Its not fragmented, No PD-API is up, Nothing in the
+ * delivery queue, Its un-ordered OR ordered and the next to
+ * deliver AND nothing else is stuck on the stream queue,
+ * And there is room for it in the socket buffer. Lets just
+ * stuff it up the buffer....
+ */
+
+ /* It would be nice to avoid this copy if we could :< */
+ sctp_alloc_a_readq(stcb, control);
+ sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+ protocol_id,
+ stcb->asoc.context,
+ strmno, strmseq,
+ chunk_flags,
+ dmbuf);
+ if (control == NULL) {
+ goto failed_express_del;
+ }
+ sctp_add_to_readq(stcb->sctp_ep, stcb, control, &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+ /* for ordered, bump what we delivered */
+ asoc->strmin[strmno].last_sequence_delivered++;
+ }
+ SCTP_STAT_INCR(sctps_recvexpress);
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno,
+ SCTP_STR_LOG_FROM_EXPRS_DEL);
+ }
+ control = NULL;
+ goto finish_express_del;
+ }
+failed_express_del:
+ /* If we reach here this is a new chunk */
+ chk = NULL;
+ control = NULL;
+ /* Express for fragmented delivery? */
+ if ((asoc->fragmented_delivery_inprogress) &&
+ (stcb->asoc.control_pdapi) &&
+ (asoc->str_of_pdapi == strmno) &&
+ (asoc->ssn_of_pdapi == strmseq)
+ ) {
+ control = stcb->asoc.control_pdapi;
+ if ((chunk_flags & SCTP_DATA_FIRST_FRAG) == SCTP_DATA_FIRST_FRAG) {
+ /* Can't be another first? */
+ goto failed_pdapi_express_del;
+ }
+ if (tsn == (control->sinfo_tsn + 1)) {
+ /* Yep, we can add it on */
+ int end = 0;
+ uint32_t cumack;
+
+ if (chunk_flags & SCTP_DATA_LAST_FRAG) {
+ end = 1;
+ }
+ cumack = asoc->cumulative_tsn;
+ if ((cumack + 1) == tsn)
+ cumack = tsn;
+
+ if (sctp_append_to_readq(stcb->sctp_ep, stcb, control, dmbuf, end,
+ tsn,
+ &stcb->sctp_socket->so_rcv)) {
+ SCTP_PRINTF("Append fails end:%d\n", end);
+ goto failed_pdapi_express_del;
+ }
+ SCTP_STAT_INCR(sctps_recvexpressm);
+ control->sinfo_tsn = tsn;
+ asoc->tsn_last_delivered = tsn;
+ asoc->fragment_flags = chunk_flags;
+ asoc->tsn_of_pdapi_last_delivered = tsn;
+ asoc->last_flags_delivered = chunk_flags;
+ asoc->last_strm_seq_delivered = strmseq;
+ asoc->last_strm_no_delivered = strmno;
+ if (end) {
+ /* clean up the flags and such */
+ asoc->fragmented_delivery_inprogress = 0;
+ if ((chunk_flags & SCTP_DATA_UNORDERED) == 0) {
+ asoc->strmin[strmno].last_sequence_delivered++;
+ }
+ stcb->asoc.control_pdapi = NULL;
+ if (TAILQ_EMPTY(&asoc->reasmqueue) == 0) {
+ /*
+ * There could be another message
+ * ready
+ */
+ need_reasm_check = 1;
+ }
+ }
+ control = NULL;
+ goto finish_express_del;
+ }
+ }
+failed_pdapi_express_del:
+ control = NULL;
+ if ((chunk_flags & SCTP_DATA_NOT_FRAG) != SCTP_DATA_NOT_FRAG) {
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* No memory so we drop the chunk */
+ SCTP_STAT_INCR(sctps_nomem);
+ if (last_chunk == 0) {
+ /* we copied it, free the copy */
+ sctp_m_freem(dmbuf);
+ }
+ return (0);
+ }
+ chk->rec.data.TSN_seq = tsn;
+ chk->no_fr_allowed = 0;
+ chk->rec.data.stream_seq = strmseq;
+ chk->rec.data.stream_number = strmno;
+ chk->rec.data.payloadtype = protocol_id;
+ chk->rec.data.context = stcb->asoc.context;
+ chk->rec.data.doing_fast_retransmit = 0;
+ chk->rec.data.rcv_flags = chunk_flags;
+ chk->asoc = asoc;
+ chk->send_size = the_len;
+ chk->whoTo = net;
+ atomic_add_int(&net->ref_count, 1);
+ chk->data = dmbuf;
+ } else {
+ sctp_alloc_a_readq(stcb, control);
+ sctp_build_readq_entry_mac(control, stcb, asoc->context, net, tsn,
+ protocol_id,
+ stcb->asoc.context,
+ strmno, strmseq,
+ chunk_flags,
+ dmbuf);
+ if (control == NULL) {
+ /* No memory so we drop the chunk */
+ SCTP_STAT_INCR(sctps_nomem);
+ if (last_chunk == 0) {
+ /* we copied it, free the copy */
+ sctp_m_freem(dmbuf);
+ }
+ return (0);
+ }
+ control->length = the_len;
+ }
+
+ /* Mark it as received */
+ /* Now queue it where it belongs */
+ if (control != NULL) {
+ /* First a sanity check */
+ if (asoc->fragmented_delivery_inprogress) {
+ /*
+ * Ok, we have a fragmented delivery in progress if
+ * this chunk is next to deliver OR belongs in our
+ * view to the reassembly, the peer is evil or
+ * broken.
+ */
+ uint32_t estimate_tsn;
+
+ estimate_tsn = asoc->tsn_last_delivered + 1;
+ if (TAILQ_EMPTY(&asoc->reasmqueue) &&
+ (estimate_tsn == control->sinfo_tsn)) {
+ /* Evil/Broke peer */
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ if (control->whoFrom) {
+ sctp_free_remote_addr(control->whoFrom);
+ control->whoFrom = NULL;
+ }
+ sctp_free_a_readq(stcb, control);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_15);
+ ippp++;
+ *ippp = tsn;
+ ippp++;
+ *ippp = ((strmno << 16) | strmseq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_15;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return (0);
+ } else {
+ if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ if (control->whoFrom) {
+ sctp_free_remote_addr(control->whoFrom);
+ control->whoFrom = NULL;
+ }
+ sctp_free_a_readq(stcb, control);
+
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_16);
+ ippp++;
+ *ippp = tsn;
+ ippp++;
+ *ippp = ((strmno << 16) | strmseq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_16;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return (0);
+ }
+ }
+ } else {
+ /* No PDAPI running */
+ if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+ /*
+ * Reassembly queue is NOT empty validate
+ * that this tsn does not need to be in
+ * reasembly queue. If it does then our peer
+ * is broken or evil.
+ */
+ if (sctp_does_tsn_belong_to_reasm(asoc, control->sinfo_tsn)) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ if (control->whoFrom) {
+ sctp_free_remote_addr(control->whoFrom);
+ control->whoFrom = NULL;
+ }
+ sctp_free_a_readq(stcb, control);
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) =
+ sizeof(struct sctp_paramhdr) +
+ (3 * sizeof(uint32_t));
+ ph = mtod(oper,
+ struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length =
+ htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_17);
+ ippp++;
+ *ippp = tsn;
+ ippp++;
+ *ippp = ((strmno << 16) | strmseq);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_17;
+ sctp_abort_an_association(stcb->sctp_ep,
+ stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+
+ *abort_flag = 1;
+ return (0);
+ }
+ }
+ }
+ /* ok, if we reach here we have passed the sanity checks */
+ if (chunk_flags & SCTP_DATA_UNORDERED) {
+ /* queue directly into socket buffer */
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ } else {
+ /*
+ * Special check for when streams are resetting. We
+ * could be more smart about this and check the
+ * actual stream to see if it is not being reset..
+ * that way we would not create a HOLB when amongst
+ * streams being reset and those not being reset.
+ *
+ * We take complete messages that have a stream reset
+ * intervening (aka the TSN is after where our
+ * cum-ack needs to be) off and put them on a
+ * pending_reply_queue. The reassembly ones we do
+ * not have to worry about since they are all sorted
+ * and proceessed by TSN order. It is only the
+ * singletons I must worry about.
+ */
+ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+ ((compare_with_wrap(tsn, liste->tsn, MAX_TSN)))
+ ) {
+ /*
+ * yep its past where we need to reset... go
+ * ahead and queue it.
+ */
+ if (TAILQ_EMPTY(&asoc->pending_reply_queue)) {
+ /* first one on */
+ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+ } else {
+ struct sctp_queued_to_read *ctlOn;
+ unsigned char inserted = 0;
+
+ ctlOn = TAILQ_FIRST(&asoc->pending_reply_queue);
+ while (ctlOn) {
+ if (compare_with_wrap(control->sinfo_tsn,
+ ctlOn->sinfo_tsn, MAX_TSN)) {
+ ctlOn = TAILQ_NEXT(ctlOn, next);
+ } else {
+ /* found it */
+ TAILQ_INSERT_BEFORE(ctlOn, control, next);
+ inserted = 1;
+ break;
+ }
+ }
+ if (inserted == 0) {
+ /*
+ * must be put at end, use
+ * prevP (all setup from
+ * loop) to setup nextP.
+ */
+ TAILQ_INSERT_TAIL(&asoc->pending_reply_queue, control, next);
+ }
+ }
+ } else {
+ sctp_queue_data_to_stream(stcb, asoc, control, abort_flag);
+ if (*abort_flag) {
+ return (0);
+ }
+ }
+ }
+ } else {
+ /* Into the re-assembly queue */
+ sctp_queue_data_for_reasm(stcb, asoc, chk, abort_flag);
+ if (*abort_flag) {
+ /*
+ * the assoc is now gone and chk was put onto the
+ * reasm queue, which has all been freed.
+ */
+ *m = NULL;
+ return (0);
+ }
+ }
+finish_express_del:
+ if (compare_with_wrap(tsn, asoc->highest_tsn_inside_map, MAX_TSN)) {
+ /* we have a new high score */
+ asoc->highest_tsn_inside_map = tsn;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 2, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ }
+ if (tsn == (asoc->cumulative_tsn + 1)) {
+ /* Update cum-ack */
+ asoc->cumulative_tsn = tsn;
+ }
+ if (last_chunk) {
+ *m = NULL;
+ }
+ if (ordered) {
+ SCTP_STAT_INCR_COUNTER64(sctps_inorderchunks);
+ } else {
+ SCTP_STAT_INCR_COUNTER64(sctps_inunorderchunks);
+ }
+ SCTP_STAT_INCR(sctps_recvdata);
+ /* Set it present please */
+ if (sctp_logging_level & SCTP_STR_LOGGING_ENABLE) {
+ sctp_log_strm_del_alt(stcb, tsn, strmseq, strmno, SCTP_STR_LOG_FROM_MARK_TSN);
+ }
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
+ asoc->highest_tsn_inside_map, SCTP_MAP_PREPARE_SLIDE);
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ SCTP_SET_TSN_PRESENT(asoc->mapping_array, gap);
+ /* check the special flag for stream resets */
+ if (((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) &&
+ ((compare_with_wrap(asoc->cumulative_tsn, liste->tsn, MAX_TSN)) ||
+ (asoc->cumulative_tsn == liste->tsn))
+ ) {
+ /*
+ * we have finished working through the backlogged TSN's now
+ * time to reset streams. 1: call reset function. 2: free
+ * pending_reply space 3: distribute any chunks in
+ * pending_reply_queue.
+ */
+ struct sctp_queued_to_read *ctl;
+
+ sctp_reset_in_stream(stcb, liste->number_entries, liste->req.list_of_streams);
+ TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+ SCTP_FREE(liste, SCTP_M_STRESET);
+ /* sa_ignore FREED_MEMORY */
+ liste = TAILQ_FIRST(&asoc->resetHead);
+ ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+ if (ctl && (liste == NULL)) {
+ /* All can be removed */
+ while (ctl) {
+ TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+ sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+ if (*abort_flag) {
+ return (0);
+ }
+ ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+ }
+ } else if (ctl) {
+ /* more than one in queue */
+ while (!compare_with_wrap(ctl->sinfo_tsn, liste->tsn, MAX_TSN)) {
+ /*
+ * if ctl->sinfo_tsn is <= liste->tsn we can
+ * process it which is the NOT of
+ * ctl->sinfo_tsn > liste->tsn
+ */
+ TAILQ_REMOVE(&asoc->pending_reply_queue, ctl, next);
+ sctp_queue_data_to_stream(stcb, asoc, ctl, abort_flag);
+ if (*abort_flag) {
+ return (0);
+ }
+ ctl = TAILQ_FIRST(&asoc->pending_reply_queue);
+ }
+ }
+ /*
+ * Now service re-assembly to pick up anything that has been
+ * held on reassembly queue?
+ */
+ sctp_deliver_reasm_check(stcb, asoc);
+ need_reasm_check = 0;
+ }
+ if (need_reasm_check) {
+ /* Another one waits ? */
+ sctp_deliver_reasm_check(stcb, asoc);
+ }
+ return (1);
+}
+
+int8_t sctp_map_lookup_tab[256] = {
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 4,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 5,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 4,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 6,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 4,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 5,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 4,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 3,
+ -1, 0, -1, 1, -1, 0, -1, 2,
+ -1, 0, -1, 1, -1, 0, -1, 7,
+};
+
+
+void
+sctp_sack_check(struct sctp_tcb *stcb, int ok_to_sack, int was_a_gap, int *abort_flag)
+{
+ /*
+ * Now we also need to check the mapping array in a couple of ways.
+ * 1) Did we move the cum-ack point?
+ */
+ struct sctp_association *asoc;
+ int i, at;
+ int last_all_ones = 0;
+ int slide_from, slide_end, lgap, distance;
+ uint32_t old_cumack, old_base, old_highest;
+ unsigned char aux_array[64];
+
+
+ asoc = &stcb->asoc;
+ at = 0;
+
+ old_cumack = asoc->cumulative_tsn;
+ old_base = asoc->mapping_array_base_tsn;
+ old_highest = asoc->highest_tsn_inside_map;
+ if (asoc->mapping_array_size < 64)
+ memcpy(aux_array, asoc->mapping_array,
+ asoc->mapping_array_size);
+ else
+ memcpy(aux_array, asoc->mapping_array, 64);
+
+ /*
+ * We could probably improve this a small bit by calculating the
+ * offset of the current cum-ack as the starting point.
+ */
+ at = 0;
+ for (i = 0; i < stcb->asoc.mapping_array_size; i++) {
+
+ if (asoc->mapping_array[i] == 0xff) {
+ at += 8;
+ last_all_ones = 1;
+ } else {
+ /* there is a 0 bit */
+ at += sctp_map_lookup_tab[asoc->mapping_array[i]];
+ last_all_ones = 0;
+ break;
+ }
+ }
+ asoc->cumulative_tsn = asoc->mapping_array_base_tsn + (at - last_all_ones);
+ /* at is one off, since in the table a embedded -1 is present */
+ at++;
+
+ if (compare_with_wrap(asoc->cumulative_tsn,
+ asoc->highest_tsn_inside_map,
+ MAX_TSN)) {
+#ifdef INVARIANTS
+ panic("huh, cumack 0x%x greater than high-tsn 0x%x in map",
+ asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+#else
+ SCTP_PRINTF("huh, cumack 0x%x greater than high-tsn 0x%x in map - should panic?\n",
+ asoc->cumulative_tsn, asoc->highest_tsn_inside_map);
+ asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+#endif
+ }
+ if ((asoc->cumulative_tsn == asoc->highest_tsn_inside_map) && (at >= 8)) {
+ /* The complete array was completed by a single FR */
+ /* higest becomes the cum-ack */
+ int clr;
+
+ asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
+ /* clear the array */
+ clr = (at >> 3) + 1;
+ if (clr > asoc->mapping_array_size) {
+ clr = asoc->mapping_array_size;
+ }
+ memset(asoc->mapping_array, 0, clr);
+ /* base becomes one ahead of the cum-ack */
+ asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(old_base, old_cumack, old_highest,
+ SCTP_MAP_PREPARE_SLIDE);
+ sctp_log_map(asoc->mapping_array_base_tsn, asoc->cumulative_tsn,
+ asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_CLEARED);
+ }
+ } else if (at >= 8) {
+ /* we can slide the mapping array down */
+ /* Calculate the new byte postion we can move down */
+ slide_from = at >> 3;
+ /*
+ * now calculate the ceiling of the move using our highest
+ * TSN value
+ */
+ if (asoc->highest_tsn_inside_map >= asoc->mapping_array_base_tsn) {
+ lgap = asoc->highest_tsn_inside_map -
+ asoc->mapping_array_base_tsn;
+ } else {
+ lgap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+ asoc->highest_tsn_inside_map + 1;
+ }
+ slide_end = lgap >> 3;
+ if (slide_end < slide_from) {
+#ifdef INVARIANTS
+ panic("impossible slide");
+#else
+ printf("impossible slide?\n");
+ return;
+#endif
+ }
+ distance = (slide_end - slide_from) + 1;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(old_base, old_cumack, old_highest,
+ SCTP_MAP_PREPARE_SLIDE);
+ sctp_log_map((uint32_t) slide_from, (uint32_t) slide_end,
+ (uint32_t) lgap, SCTP_MAP_SLIDE_FROM);
+ }
+ if (distance + slide_from > asoc->mapping_array_size ||
+ distance < 0) {
+ /*
+ * Here we do NOT slide forward the array so that
+ * hopefully when more data comes in to fill it up
+ * we will be able to slide it forward. Really I
+ * don't think this should happen :-0
+ */
+
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map((uint32_t) distance, (uint32_t) slide_from,
+ (uint32_t) asoc->mapping_array_size,
+ SCTP_MAP_SLIDE_NONE);
+ }
+ } else {
+ int ii;
+
+ for (ii = 0; ii < distance; ii++) {
+ asoc->mapping_array[ii] =
+ asoc->mapping_array[slide_from + ii];
+ }
+ for (ii = distance; ii <= slide_end; ii++) {
+ asoc->mapping_array[ii] = 0;
+ }
+ asoc->mapping_array_base_tsn += (slide_from << 3);
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(asoc->mapping_array_base_tsn,
+ asoc->cumulative_tsn, asoc->highest_tsn_inside_map,
+ SCTP_MAP_SLIDE_RESULT);
+ }
+ }
+ }
+ /*
+ * Now we need to see if we need to queue a sack or just start the
+ * timer (if allowed).
+ */
+ if (ok_to_sack) {
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ /*
+ * Ok special case, in SHUTDOWN-SENT case. here we
+ * maker sure SACK timer is off and instead send a
+ * SHUTDOWN and a SACK
+ */
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_INDATA + SCTP_LOC_18);
+ }
+ sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+ sctp_send_sack(stcb);
+ } else {
+ int is_a_gap;
+
+ /* is there a gap now ? */
+ is_a_gap = compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+ stcb->asoc.cumulative_tsn, MAX_TSN);
+
+ /*
+ * CMT DAC algorithm: increase number of packets
+ * received since last ack
+ */
+ stcb->asoc.cmt_dac_pkts_rcvd++;
+
+ if ((stcb->asoc.send_sack == 1) || /* We need to send a
+ * SACK */
+ ((was_a_gap) && (is_a_gap == 0)) || /* was a gap, but no
+ * longer is one */
+ (stcb->asoc.numduptsns) || /* we have dup's */
+ (is_a_gap) || /* is still a gap */
+ (stcb->asoc.delayed_ack == 0) || /* Delayed sack disabled */
+ (stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) /* hit limit of pkts */
+ ) {
+
+ if ((sctp_cmt_on_off) && (sctp_cmt_use_dac) &&
+ (stcb->asoc.send_sack == 0) &&
+ (stcb->asoc.numduptsns == 0) &&
+ (stcb->asoc.delayed_ack) &&
+ (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer))) {
+
+ /*
+ * CMT DAC algorithm: With CMT,
+ * delay acks even in the face of
+ *
+ * reordering. Therefore, if acks that
+ * do not have to be sent because of
+ * the above reasons, will be
+ * delayed. That is, acks that would
+ * have been sent due to gap reports
+ * will be delayed with DAC. Start
+ * the delayed ack timer.
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL);
+ } else {
+ /*
+ * Ok we must build a SACK since the
+ * timer is pending, we got our
+ * first packet OR there are gaps or
+ * duplicates.
+ */
+ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+ sctp_send_sack(stcb);
+ }
+ } else {
+ if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL);
+ }
+ }
+ }
+ }
+}
+
+void
+sctp_service_queues(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+ uint32_t tsize;
+ uint16_t nxt_todel;
+
+ if (asoc->fragmented_delivery_inprogress) {
+ sctp_service_reassembly(stcb, asoc);
+ }
+ /* Can we proceed further, i.e. the PD-API is complete */
+ if (asoc->fragmented_delivery_inprogress) {
+ /* no */
+ return;
+ }
+ /*
+ * Now is there some other chunk I can deliver from the reassembly
+ * queue.
+ */
+doit_again:
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ if (chk == NULL) {
+ asoc->size_on_reasm_queue = 0;
+ asoc->cnt_on_reasm_queue = 0;
+ return;
+ }
+ nxt_todel = asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered + 1;
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) &&
+ ((nxt_todel == chk->rec.data.stream_seq) ||
+ (chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED))) {
+ /*
+ * Yep the first one is here. We setup to start reception,
+ * by backing down the TSN just in case we can't deliver.
+ */
+
+ /*
+ * Before we start though either all of the message should
+ * be here or 1/4 the socket buffer max or nothing on the
+ * delivery queue and something can be delivered.
+ */
+ if ((sctp_is_all_msg_on_reasm(asoc, &tsize) ||
+ (tsize >= stcb->sctp_ep->partial_delivery_point))) {
+ asoc->fragmented_delivery_inprogress = 1;
+ asoc->tsn_last_delivered = chk->rec.data.TSN_seq - 1;
+ asoc->str_of_pdapi = chk->rec.data.stream_number;
+ asoc->ssn_of_pdapi = chk->rec.data.stream_seq;
+ asoc->pdapi_ppid = chk->rec.data.payloadtype;
+ asoc->fragment_flags = chk->rec.data.rcv_flags;
+ sctp_service_reassembly(stcb, asoc);
+ if (asoc->fragmented_delivery_inprogress == 0) {
+ goto doit_again;
+ }
+ }
+ }
+}
+
+int
+sctp_process_data(struct mbuf **mm, int iphlen, int *offset, int length,
+ struct sctphdr *sh, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, uint32_t * high_tsn)
+{
+ struct sctp_data_chunk *ch, chunk_buf;
+ struct sctp_association *asoc;
+ int num_chunks = 0; /* number of control chunks processed */
+ int stop_proc = 0;
+ int chk_length, break_flag, last_chunk;
+ int abort_flag = 0, was_a_gap = 0;
+ struct mbuf *m;
+
+ /* set the rwnd */
+ sctp_set_rwnd(stcb, &stcb->asoc);
+
+ m = *mm;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ asoc = &stcb->asoc;
+ if (compare_with_wrap(stcb->asoc.highest_tsn_inside_map,
+ stcb->asoc.cumulative_tsn, MAX_TSN)) {
+ /* there was a gap before this data was processed */
+ was_a_gap = 1;
+ }
+ /*
+ * setup where we got the last DATA packet from for any SACK that
+ * may need to go out. Don't bump the net. This is done ONLY when a
+ * chunk is assigned.
+ */
+ asoc->last_data_chunk_from = net;
+
+ /*-
+ * Now before we proceed we must figure out if this is a wasted
+ * cluster... i.e. it is a small packet sent in and yet the driver
+ * underneath allocated a full cluster for it. If so we must copy it
+ * to a smaller mbuf and free up the cluster mbuf. This will help
+ * with cluster starvation. Note for __Panda__ we don't do this
+ * since it has clusters all the way down to 64 bytes.
+ */
+ if (SCTP_BUF_LEN(m) < (long)MLEN && SCTP_BUF_NEXT(m) == NULL) {
+ /* we only handle mbufs that are singletons.. not chains */
+ m = sctp_get_mbuf_for_msg(SCTP_BUF_LEN(m), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m) {
+ /* ok lets see if we can copy the data up */
+ caddr_t *from, *to;
+
+ /* get the pointers and copy */
+ to = mtod(m, caddr_t *);
+ from = mtod((*mm), caddr_t *);
+ memcpy(to, from, SCTP_BUF_LEN((*mm)));
+ /* copy the length and free up the old */
+ SCTP_BUF_LEN(m) = SCTP_BUF_LEN((*mm));
+ sctp_m_freem(*mm);
+ /* sucess, back copy */
+ *mm = m;
+ } else {
+ /* We are in trouble in the mbuf world .. yikes */
+ m = *mm;
+ }
+ }
+ /* get pointer to the first chunk header */
+ ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+ if (ch == NULL) {
+ return (1);
+ }
+ /*
+ * process all DATA chunks...
+ */
+ *high_tsn = asoc->cumulative_tsn;
+ break_flag = 0;
+ asoc->data_pkts_seen++;
+ while (stop_proc == 0) {
+ /* validate chunk length */
+ chk_length = ntohs(ch->ch.chunk_length);
+ if (length - *offset < chk_length) {
+ /* all done, mutulated chunk */
+ stop_proc = 1;
+ break;
+ }
+ if (ch->ch.chunk_type == SCTP_DATA) {
+ if ((size_t)chk_length < sizeof(struct sctp_data_chunk) + 1) {
+ /*
+ * Need to send an abort since we had a
+ * invalid data chunk.
+ */
+ struct mbuf *op_err;
+
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 2 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+
+ if (op_err) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(op_err) = sizeof(struct sctp_paramhdr) +
+ (2 * sizeof(uint32_t));
+ ph = mtod(op_err, struct sctp_paramhdr *);
+ ph->param_type =
+ htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_19);
+ ippp++;
+ *ippp = asoc->cumulative_tsn;
+
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_19;
+ sctp_abort_association(inp, stcb, m, iphlen, sh,
+ op_err, 0);
+ return (2);
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xB1, 0);
+#endif
+ if (SCTP_SIZE32(chk_length) == (length - *offset)) {
+ last_chunk = 1;
+ } else {
+ last_chunk = 0;
+ }
+ if (sctp_process_a_data_chunk(stcb, asoc, mm, *offset, ch,
+ chk_length, net, high_tsn, &abort_flag, &break_flag,
+ last_chunk)) {
+ num_chunks++;
+ }
+ if (abort_flag)
+ return (2);
+
+ if (break_flag) {
+ /*
+ * Set because of out of rwnd space and no
+ * drop rep space left.
+ */
+ stop_proc = 1;
+ break;
+ }
+ } else {
+ /* not a data chunk in the data region */
+ switch (ch->ch.chunk_type) {
+ case SCTP_INITIATION:
+ case SCTP_INITIATION_ACK:
+ case SCTP_SELECTIVE_ACK:
+ case SCTP_HEARTBEAT_REQUEST:
+ case SCTP_HEARTBEAT_ACK:
+ case SCTP_ABORT_ASSOCIATION:
+ case SCTP_SHUTDOWN:
+ case SCTP_SHUTDOWN_ACK:
+ case SCTP_OPERATION_ERROR:
+ case SCTP_COOKIE_ECHO:
+ case SCTP_COOKIE_ACK:
+ case SCTP_ECN_ECHO:
+ case SCTP_ECN_CWR:
+ case SCTP_SHUTDOWN_COMPLETE:
+ case SCTP_AUTHENTICATION:
+ case SCTP_ASCONF_ACK:
+ case SCTP_PACKET_DROPPED:
+ case SCTP_STREAM_RESET:
+ case SCTP_FORWARD_CUM_TSN:
+ case SCTP_ASCONF:
+ /*
+ * Now, what do we do with KNOWN chunks that
+ * are NOT in the right place?
+ *
+ * For now, I do nothing but ignore them. We
+ * may later want to add sysctl stuff to
+ * switch out and do either an ABORT() or
+ * possibly process them.
+ */
+ if (sctp_strict_data_order) {
+ struct mbuf *op_err;
+
+ op_err = sctp_generate_invmanparam(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ sctp_abort_association(inp, stcb, m, iphlen, sh, op_err, 0);
+ return (2);
+ }
+ break;
+ default:
+ /* unknown chunk type, use bit rules */
+ if (ch->ch.chunk_type & 0x40) {
+ /* Add a error report to the queue */
+ struct mbuf *merr;
+ struct sctp_paramhdr *phd;
+
+ merr = sctp_get_mbuf_for_msg(sizeof(*phd), 0, M_DONTWAIT, 1, MT_DATA);
+ if (merr) {
+ phd = mtod(merr, struct sctp_paramhdr *);
+ /*
+ * We cheat and use param
+ * type since we did not
+ * bother to define a error
+ * cause struct. They are
+ * the same basic format
+ * with different names.
+ */
+ phd->param_type =
+ htons(SCTP_CAUSE_UNRECOG_CHUNK);
+ phd->param_length =
+ htons(chk_length + sizeof(*phd));
+ SCTP_BUF_LEN(merr) = sizeof(*phd);
+ SCTP_BUF_NEXT(merr) = SCTP_M_COPYM(m, *offset,
+ SCTP_SIZE32(chk_length),
+ M_DONTWAIT);
+ if (SCTP_BUF_NEXT(merr)) {
+ sctp_queue_op_err(stcb, merr);
+ } else {
+ sctp_m_freem(merr);
+ }
+ }
+ }
+ if ((ch->ch.chunk_type & 0x80) == 0) {
+ /* discard the rest of this packet */
+ stop_proc = 1;
+ } /* else skip this bad chunk and
+ * continue... */
+ break;
+ }; /* switch of chunk type */
+ }
+ *offset += SCTP_SIZE32(chk_length);
+ if ((*offset >= length) || stop_proc) {
+ /* no more data left in the mbuf chain */
+ stop_proc = 1;
+ continue;
+ }
+ ch = (struct sctp_data_chunk *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_data_chunk), (uint8_t *) & chunk_buf);
+ if (ch == NULL) {
+ *offset = length;
+ stop_proc = 1;
+ break;
+
+ }
+ } /* while */
+ if (break_flag) {
+ /*
+ * we need to report rwnd overrun drops.
+ */
+ sctp_send_packet_dropped(stcb, net, *mm, iphlen, 0);
+ }
+ if (num_chunks) {
+ /*
+ * Did we get data, if so update the time for auto-close and
+ * give peer credit for being alive.
+ */
+ SCTP_STAT_INCR(sctps_recvpktwithdata);
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INDATA,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_last_rcvd);
+ }
+ /* now service all of the reassm queue if needed */
+ if (!(TAILQ_EMPTY(&asoc->reasmqueue)))
+ sctp_service_queues(stcb, asoc);
+
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
+ /* Assure that we ack right away */
+ stcb->asoc.send_sack = 1;
+ }
+ /* Start a sack timer or QUEUE a SACK for sending */
+ if ((stcb->asoc.cumulative_tsn == stcb->asoc.highest_tsn_inside_map) &&
+ (stcb->asoc.mapping_array[0] != 0xff)) {
+ if ((stcb->asoc.data_pkts_seen >= stcb->asoc.sack_freq) ||
+ (stcb->asoc.delayed_ack == 0) ||
+ (stcb->asoc.numduptsns) ||
+ (stcb->asoc.send_sack == 1)) {
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+ }
+ sctp_send_sack(stcb);
+ } else {
+ if (!SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL);
+ }
+ }
+ } else {
+ sctp_sack_check(stcb, 1, was_a_gap, &abort_flag);
+ }
+ if (abort_flag)
+ return (2);
+
+ return (0);
+}
+
+static void
+sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_sack_chunk *ch, uint32_t last_tsn, uint32_t * biggest_tsn_acked,
+ uint32_t * biggest_newly_acked_tsn, uint32_t * this_sack_lowest_newack,
+ int num_seg, int *ecn_seg_sums)
+{
+ /************************************************/
+ /* process fragments and update sendqueue */
+ /************************************************/
+ struct sctp_sack *sack;
+ struct sctp_gap_ack_block *frag, block;
+ struct sctp_tmit_chunk *tp1;
+ int i;
+ unsigned int j;
+ int num_frs = 0;
+
+ uint16_t frag_strt, frag_end, primary_flag_set;
+ u_long last_frag_high;
+
+ /*
+ * @@@ JRI : TODO: This flag is not used anywhere .. remove?
+ */
+ if (asoc->primary_destination->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
+ primary_flag_set = 1;
+ } else {
+ primary_flag_set = 0;
+ }
+ sack = &ch->sack;
+
+ frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_gap_ack_block), (uint8_t *) & block);
+ *offset += sizeof(block);
+ if (frag == NULL) {
+ return;
+ }
+ tp1 = NULL;
+ last_frag_high = 0;
+ for (i = 0; i < num_seg; i++) {
+ frag_strt = ntohs(frag->start);
+ frag_end = ntohs(frag->end);
+ /* some sanity checks on the fargment offsets */
+ if (frag_strt > frag_end) {
+ /* this one is malformed, skip */
+ frag++;
+ continue;
+ }
+ if (compare_with_wrap((frag_end + last_tsn), *biggest_tsn_acked,
+ MAX_TSN))
+ *biggest_tsn_acked = frag_end + last_tsn;
+
+ /* mark acked dgs and find out the highestTSN being acked */
+ if (tp1 == NULL) {
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+
+ /* save the locations of the last frags */
+ last_frag_high = frag_end + last_tsn;
+ } else {
+ /*
+ * now lets see if we need to reset the queue due to
+ * a out-of-order SACK fragment
+ */
+ if (compare_with_wrap(frag_strt + last_tsn,
+ last_frag_high, MAX_TSN)) {
+ /*
+ * if the new frag starts after the last TSN
+ * frag covered, we are ok and this one is
+ * beyond the last one
+ */
+ ;
+ } else {
+ /*
+ * ok, they have reset us, so we need to
+ * reset the queue this will cause extra
+ * hunting but hey, they chose the
+ * performance hit when they failed to order
+ * there gaps..
+ */
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ }
+ last_frag_high = frag_end + last_tsn;
+ }
+ for (j = frag_strt + last_tsn; (compare_with_wrap((frag_end + last_tsn), j, MAX_TSN)); j++) {
+ while (tp1) {
+ if (tp1->rec.data.doing_fast_retransmit)
+ num_frs++;
+
+ /*
+ * CMT: CUCv2 algorithm. For each TSN being
+ * processed from the sent queue, track the
+ * next expected pseudo-cumack, or
+ * rtx_pseudo_cumack, if required. Separate
+ * cumack trackers for first transmissions,
+ * and retransmissions.
+ */
+ if ((tp1->whoTo->find_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->snd_count == 1)) {
+ tp1->whoTo->pseudo_cumack = tp1->rec.data.TSN_seq;
+ tp1->whoTo->find_pseudo_cumack = 0;
+ }
+ if ((tp1->whoTo->find_rtx_pseudo_cumack == 1) && (tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (tp1->snd_count > 1)) {
+ tp1->whoTo->rtx_pseudo_cumack = tp1->rec.data.TSN_seq;
+ tp1->whoTo->find_rtx_pseudo_cumack = 0;
+ }
+ if (tp1->rec.data.TSN_seq == j) {
+ if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+ /*
+ * must be held until
+ * cum-ack passes
+ */
+ /*
+ * ECN Nonce: Add the nonce
+ * value to the sender's
+ * nonce sum
+ */
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ /*-
+ * If it is less than RESEND, it is
+ * now no-longer in flight.
+ * Higher values may already be set
+ * via previous Gap Ack Blocks...
+ * i.e. ACKED or RESEND.
+ */
+ if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ *biggest_newly_acked_tsn, MAX_TSN)) {
+ *biggest_newly_acked_tsn = tp1->rec.data.TSN_seq;
+ }
+ /*
+ * CMT: SFR algo
+ * (and HTNA) - set
+ * saw_newack to 1
+ * for dest being
+ * newly acked.
+ * update
+ * this_sack_highest_
+ * newack if
+ * appropriate.
+ */
+ if (tp1->rec.data.chunk_was_revoked == 0)
+ tp1->whoTo->saw_newack = 1;
+
+ if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ tp1->whoTo->this_sack_highest_newack,
+ MAX_TSN)) {
+ tp1->whoTo->this_sack_highest_newack =
+ tp1->rec.data.TSN_seq;
+ }
+ /*
+ * CMT DAC algo:
+ * also update
+ * this_sack_lowest_n
+ * ewack
+ */
+ if (*this_sack_lowest_newack == 0) {
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(*this_sack_lowest_newack,
+ last_tsn,
+ tp1->rec.data.TSN_seq,
+ 0,
+ 0,
+ SCTP_LOG_TSN_ACKED);
+ }
+ *this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+ }
+ /*
+ * CMT: CUCv2
+ * algorithm. If
+ * (rtx-)pseudo-cumac
+ * k for corresp
+ * dest is being
+ * acked, then we
+ * have a new
+ * (rtx-)pseudo-cumac
+ * k. Set
+ * new_(rtx_)pseudo_c
+ * umack to TRUE so
+ * that the cwnd for
+ * this dest can be
+ * updated. Also
+ * trigger search
+ * for the next
+ * expected
+ * (rtx-)pseudo-cumac
+ * k. Separate
+ * pseudo_cumack
+ * trackers for
+ * first
+ * transmissions and
+ * retransmissions.
+ */
+ if (tp1->rec.data.TSN_seq == tp1->whoTo->pseudo_cumack) {
+ if (tp1->rec.data.chunk_was_revoked == 0) {
+ tp1->whoTo->new_pseudo_cumack = 1;
+ }
+ tp1->whoTo->find_pseudo_cumack = 1;
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+ }
+ if (tp1->rec.data.TSN_seq == tp1->whoTo->rtx_pseudo_cumack) {
+ if (tp1->rec.data.chunk_was_revoked == 0) {
+ tp1->whoTo->new_pseudo_cumack = 1;
+ }
+ tp1->whoTo->find_rtx_pseudo_cumack = 1;
+ }
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(*biggest_newly_acked_tsn,
+ last_tsn,
+ tp1->rec.data.TSN_seq,
+ frag_strt,
+ frag_end,
+ SCTP_LOG_TSN_ACKED);
+ }
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_GAP,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(tp1);
+ sctp_total_flight_decrease(stcb, tp1);
+
+ tp1->whoTo->net_ack += tp1->send_size;
+ if (tp1->snd_count < 2) {
+ /*
+ * True
+ * non-retran
+ * smited
+ * chunk */
+ tp1->whoTo->net_ack2 += tp1->send_size;
+
+ /*
+ * update RTO
+ * too ? */
+ if (tp1->do_rtt) {
+ tp1->whoTo->RTO =
+ sctp_calculate_rto(stcb,
+ asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ sctp_align_safe_nocopy);
+ tp1->do_rtt = 0;
+ }
+ }
+ }
+ if (tp1->sent <= SCTP_DATAGRAM_RESEND) {
+ (*ecn_seg_sums) += tp1->rec.data.ect_nonce;
+ (*ecn_seg_sums) &= SCTP_SACK_NONCE_SUM;
+ if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ asoc->this_sack_highest_gap,
+ MAX_TSN)) {
+ asoc->this_sack_highest_gap =
+ tp1->rec.data.TSN_seq;
+ }
+ if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xB2,
+ (asoc->sent_queue_retran_cnt & 0x000000ff));
+#endif
+ }
+ }
+ /*
+ * All chunks NOT UNSENT
+ * fall through here and are
+ * marked
+ */
+ tp1->sent = SCTP_DATAGRAM_MARKED;
+ if (tp1->rec.data.chunk_was_revoked) {
+ /* deflate the cwnd */
+ tp1->whoTo->cwnd -= tp1->book_size;
+ tp1->rec.data.chunk_was_revoked = 0;
+ }
+ }
+ break;
+ } /* if (tp1->TSN_seq == j) */
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, j,
+ MAX_TSN))
+ break;
+
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ } /* end while (tp1) */
+ } /* end for (j = fragStart */
+ frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset,
+ sizeof(struct sctp_gap_ack_block), (uint8_t *) & block);
+ *offset += sizeof(block);
+ if (frag == NULL) {
+ break;
+ }
+ }
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ if (num_frs)
+ sctp_log_fr(*biggest_tsn_acked,
+ *biggest_newly_acked_tsn,
+ last_tsn, SCTP_FR_LOG_BIGGEST_TSNS);
+ }
+}
+
+static void
+sctp_check_for_revoked(struct sctp_tcb *stcb,
+ struct sctp_association *asoc, uint32_t cumack,
+ u_long biggest_tsn_acked)
+{
+ struct sctp_tmit_chunk *tp1;
+ int tot_revoked = 0;
+
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ while (tp1) {
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, cumack,
+ MAX_TSN)) {
+ /*
+ * ok this guy is either ACK or MARKED. If it is
+ * ACKED it has been previously acked but not this
+ * time i.e. revoked. If it is MARKED it was ACK'ed
+ * again.
+ */
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+ MAX_TSN))
+ break;
+
+
+ if (tp1->sent == SCTP_DATAGRAM_ACKED) {
+ /* it has been revoked */
+ tp1->sent = SCTP_DATAGRAM_SENT;
+ tp1->rec.data.chunk_was_revoked = 1;
+ /*
+ * We must add this stuff back in to assure
+ * timers and such get started.
+ */
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_increase(tp1);
+ sctp_total_flight_increase(stcb, tp1);
+ /*
+ * We inflate the cwnd to compensate for our
+ * artificial inflation of the flight_size.
+ */
+ tp1->whoTo->cwnd += tp1->book_size;
+ tot_revoked++;
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(asoc->last_acked_seq,
+ cumack,
+ tp1->rec.data.TSN_seq,
+ 0,
+ 0,
+ SCTP_LOG_TSN_REVOKED);
+ }
+ } else if (tp1->sent == SCTP_DATAGRAM_MARKED) {
+ /* it has been re-acked in this SACK */
+ tp1->sent = SCTP_DATAGRAM_ACKED;
+ }
+ }
+ if (tp1->sent == SCTP_DATAGRAM_UNSENT)
+ break;
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ }
+ if (tot_revoked > 0) {
+ /*
+ * Setup the ecn nonce re-sync point. We do this since once
+ * data is revoked we begin to retransmit things, which do
+ * NOT have the ECN bits set. This means we are now out of
+ * sync and must wait until we get back in sync with the
+ * peer to check ECN bits.
+ */
+ tp1 = TAILQ_FIRST(&asoc->send_queue);
+ if (tp1 == NULL) {
+ asoc->nonce_resync_tsn = asoc->sending_seq;
+ } else {
+ asoc->nonce_resync_tsn = tp1->rec.data.TSN_seq;
+ }
+ asoc->nonce_wait_for_ecne = 0;
+ asoc->nonce_sum_check = 0;
+ }
+}
+
+static void
+sctp_strike_gap_ack_chunks(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ u_long biggest_tsn_acked, u_long biggest_tsn_newly_acked, u_long this_sack_lowest_newack, int accum_moved)
+{
+ struct sctp_tmit_chunk *tp1;
+ int strike_flag = 0;
+ struct timeval now;
+ int tot_retrans = 0;
+ uint32_t sending_seq;
+ struct sctp_nets *net;
+ int num_dests_sacked = 0;
+
+ /*
+ * select the sending_seq, this is either the next thing ready to be
+ * sent but not transmitted, OR, the next seq we assign.
+ */
+ tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+ if (tp1 == NULL) {
+ sending_seq = asoc->sending_seq;
+ } else {
+ sending_seq = tp1->rec.data.TSN_seq;
+ }
+
+ /* CMT DAC algo: finding out if SACK is a mixed SACK */
+ if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (net->saw_newack)
+ num_dests_sacked++;
+ }
+ }
+ if (stcb->asoc.peer_supports_prsctp) {
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ }
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ while (tp1) {
+ strike_flag = 0;
+ if (tp1->no_fr_allowed) {
+ /* this one had a timeout or something */
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ }
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ if (tp1->sent < SCTP_DATAGRAM_RESEND)
+ sctp_log_fr(biggest_tsn_newly_acked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_CHECK_STRIKE);
+ }
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, biggest_tsn_acked,
+ MAX_TSN) ||
+ tp1->sent == SCTP_DATAGRAM_UNSENT) {
+ /* done */
+ break;
+ }
+ if (stcb->asoc.peer_supports_prsctp) {
+ if ((PR_SCTP_TTL_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
+ /* Is it expired? */
+ if (
+ (timevalcmp(&now, &tp1->rec.data.timetodrop, >))
+ ) {
+ /* Yes so drop it */
+ if (tp1->data != NULL) {
+ (void)sctp_release_pr_sctp_chunk(stcb, tp1,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ }
+ }
+ if ((PR_SCTP_RTX_ENABLED(tp1->flags)) && tp1->sent < SCTP_DATAGRAM_ACKED) {
+ /* Has it been retransmitted tv_sec times? */
+ if (tp1->snd_count > tp1->rec.data.timetodrop.tv_sec) {
+ /* Yes, so drop it */
+ if (tp1->data != NULL) {
+ (void)sctp_release_pr_sctp_chunk(stcb, tp1,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ }
+ }
+ }
+ if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ asoc->this_sack_highest_gap, MAX_TSN)) {
+ /* we are beyond the tsn in the sack */
+ break;
+ }
+ if (tp1->sent >= SCTP_DATAGRAM_RESEND) {
+ /* either a RESEND, ACKED, or MARKED */
+ /* skip */
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ }
+ /*
+ * CMT : SFR algo (covers part of DAC and HTNA as well)
+ */
+ if (tp1->whoTo && tp1->whoTo->saw_newack == 0) {
+ /*
+ * No new acks were receieved for data sent to this
+ * dest. Therefore, according to the SFR algo for
+ * CMT, no data sent to this dest can be marked for
+ * FR using this SACK.
+ */
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ } else if (tp1->whoTo && compare_with_wrap(tp1->rec.data.TSN_seq,
+ tp1->whoTo->this_sack_highest_newack, MAX_TSN)) {
+ /*
+ * CMT: New acks were receieved for data sent to
+ * this dest. But no new acks were seen for data
+ * sent after tp1. Therefore, according to the SFR
+ * algo for CMT, tp1 cannot be marked for FR using
+ * this SACK. This step covers part of the DAC algo
+ * and the HTNA algo as well.
+ */
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ continue;
+ }
+ /*
+ * Here we check to see if we were have already done a FR
+ * and if so we see if the biggest TSN we saw in the sack is
+ * smaller than the recovery point. If so we don't strike
+ * the tsn... otherwise we CAN strike the TSN.
+ */
+ /*
+ * @@@ JRI: Check for CMT if (accum_moved &&
+ * asoc->fast_retran_loss_recovery && (sctp_cmt_on_off ==
+ * 0)) {
+ */
+ if (accum_moved && asoc->fast_retran_loss_recovery) {
+ /*
+ * Strike the TSN if in fast-recovery and cum-ack
+ * moved.
+ */
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(biggest_tsn_newly_acked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ tp1->sent++;
+ }
+ if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+ /*
+ * CMT DAC algorithm: If SACK flag is set to
+ * 0, then lowest_newack test will not pass
+ * because it would have been set to the
+ * cumack earlier. If not already to be
+ * rtx'd, If not a mixed sack and if tp1 is
+ * not between two sacked TSNs, then mark by
+ * one more. NOTE that we are marking by one
+ * additional time since the SACK DAC flag
+ * indicates that two packets have been
+ * received after this missing TSN.
+ */
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+ compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(16 + num_dests_sacked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ tp1->sent++;
+ }
+ }
+ } else if (tp1->rec.data.doing_fast_retransmit) {
+ /*
+ * For those that have done a FR we must take
+ * special consideration if we strike. I.e the
+ * biggest_newly_acked must be higher than the
+ * sending_seq at the time we did the FR.
+ */
+ if (
+#ifdef SCTP_FR_TO_ALTERNATE
+ /*
+ * If FR's go to new networks, then we must only do
+ * this for singly homed asoc's. However if the FR's
+ * go to the same network (Armando's work) then its
+ * ok to FR multiple times.
+ */
+ (asoc->numnets < 2)
+#else
+ (1)
+#endif
+ ) {
+
+ if ((compare_with_wrap(biggest_tsn_newly_acked,
+ tp1->rec.data.fast_retran_tsn, MAX_TSN)) ||
+ (biggest_tsn_newly_acked ==
+ tp1->rec.data.fast_retran_tsn)) {
+ /*
+ * Strike the TSN, since this ack is
+ * beyond where things were when we
+ * did a FR.
+ */
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(biggest_tsn_newly_acked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ tp1->sent++;
+ }
+ strike_flag = 1;
+ if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+ /*
+ * CMT DAC algorithm: If
+ * SACK flag is set to 0,
+ * then lowest_newack test
+ * will not pass because it
+ * would have been set to
+ * the cumack earlier. If
+ * not already to be rtx'd,
+ * If not a mixed sack and
+ * if tp1 is not between two
+ * sacked TSNs, then mark by
+ * one more. NOTE that we
+ * are marking by one
+ * additional time since the
+ * SACK DAC flag indicates
+ * that two packets have
+ * been received after this
+ * missing TSN.
+ */
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) &&
+ (num_dests_sacked == 1) &&
+ compare_with_wrap(this_sack_lowest_newack,
+ tp1->rec.data.TSN_seq, MAX_TSN)) {
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(32 + num_dests_sacked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ tp1->sent++;
+ }
+ }
+ }
+ }
+ }
+ /*
+ * JRI: TODO: remove code for HTNA algo. CMT's SFR
+ * algo covers HTNA.
+ */
+ } else if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ biggest_tsn_newly_acked, MAX_TSN)) {
+ /*
+ * We don't strike these: This is the HTNA
+ * algorithm i.e. we don't strike If our TSN is
+ * larger than the Highest TSN Newly Acked.
+ */
+ ;
+ } else {
+ /* Strike the TSN */
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(biggest_tsn_newly_acked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ tp1->sent++;
+ }
+ if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+ /*
+ * CMT DAC algorithm: If SACK flag is set to
+ * 0, then lowest_newack test will not pass
+ * because it would have been set to the
+ * cumack earlier. If not already to be
+ * rtx'd, If not a mixed sack and if tp1 is
+ * not between two sacked TSNs, then mark by
+ * one more. NOTE that we are marking by one
+ * additional time since the SACK DAC flag
+ * indicates that two packets have been
+ * received after this missing TSN.
+ */
+ if ((tp1->sent < SCTP_DATAGRAM_RESEND) && (num_dests_sacked == 1) &&
+ compare_with_wrap(this_sack_lowest_newack, tp1->rec.data.TSN_seq, MAX_TSN)) {
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(48 + num_dests_sacked,
+ tp1->rec.data.TSN_seq,
+ tp1->sent,
+ SCTP_FR_LOG_STRIKE_CHUNK);
+ }
+ tp1->sent++;
+ }
+ }
+ }
+ if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ /* Increment the count to resend */
+ struct sctp_nets *alt;
+
+ /* printf("OK, we are now ready to FR this guy\n"); */
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(tp1->rec.data.TSN_seq, tp1->snd_count,
+ 0, SCTP_FR_MARKED);
+ }
+ if (strike_flag) {
+ /* This is a subsequent FR */
+ SCTP_STAT_INCR(sctps_sendmultfastretrans);
+ }
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ if (sctp_cmt_on_off) {
+ /*
+ * CMT: Using RTX_SSTHRESH policy for CMT.
+ * If CMT is being used, then pick dest with
+ * largest ssthresh for any retransmission.
+ */
+ tp1->no_fr_allowed = 1;
+ alt = tp1->whoTo;
+ /* sa_ignore NO_NULL_CHK */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ /*
+ * JRS 5/18/07 - If CMT PF is on,
+ * use the PF version of
+ * find_alt_net()
+ */
+ alt = sctp_find_alternate_net(stcb, alt, 2);
+ } else {
+ /*
+ * JRS 5/18/07 - If only CMT is on,
+ * use the CMT version of
+ * find_alt_net()
+ */
+ /* sa_ignore NO_NULL_CHK */
+ alt = sctp_find_alternate_net(stcb, alt, 1);
+ }
+ if (alt == NULL) {
+ alt = tp1->whoTo;
+ }
+ /*
+ * CUCv2: If a different dest is picked for
+ * the retransmission, then new
+ * (rtx-)pseudo_cumack needs to be tracked
+ * for orig dest. Let CUCv2 track new (rtx-)
+ * pseudo-cumack always.
+ */
+ if (tp1->whoTo) {
+ tp1->whoTo->find_pseudo_cumack = 1;
+ tp1->whoTo->find_rtx_pseudo_cumack = 1;
+ }
+ } else {/* CMT is OFF */
+
+#ifdef SCTP_FR_TO_ALTERNATE
+ /* Can we find an alternate? */
+ alt = sctp_find_alternate_net(stcb, tp1->whoTo, 0);
+#else
+ /*
+ * default behavior is to NOT retransmit
+ * FR's to an alternate. Armando Caro's
+ * paper details why.
+ */
+ alt = tp1->whoTo;
+#endif
+ }
+
+ tp1->rec.data.doing_fast_retransmit = 1;
+ tot_retrans++;
+ /* mark the sending seq for possible subsequent FR's */
+ /*
+ * printf("Marking TSN for FR new value %x\n",
+ * (uint32_t)tpi->rec.data.TSN_seq);
+ */
+ if (TAILQ_EMPTY(&asoc->send_queue)) {
+ /*
+ * If the queue of send is empty then its
+ * the next sequence number that will be
+ * assigned so we subtract one from this to
+ * get the one we last sent.
+ */
+ tp1->rec.data.fast_retran_tsn = sending_seq;
+ } else {
+ /*
+ * If there are chunks on the send queue
+ * (unsent data that has made it from the
+ * stream queues but not out the door, we
+ * take the first one (which will have the
+ * lowest TSN) and subtract one to get the
+ * one we last sent.
+ */
+ struct sctp_tmit_chunk *ttt;
+
+ ttt = TAILQ_FIRST(&asoc->send_queue);
+ tp1->rec.data.fast_retran_tsn =
+ ttt->rec.data.TSN_seq;
+ }
+
+ if (tp1->do_rtt) {
+ /*
+ * this guy had a RTO calculation pending on
+ * it, cancel it
+ */
+ tp1->do_rtt = 0;
+ }
+ /* fix counts and things */
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND,
+ (tp1->whoTo ? (tp1->whoTo->flight_size) : 0),
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ if (tp1->whoTo) {
+ tp1->whoTo->net_ack++;
+ sctp_flight_size_decrease(tp1);
+ }
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd(SCTP_INCREASE_PEER_RWND,
+ asoc->peers_rwnd, tp1->send_size, sctp_peer_chunk_oh);
+ }
+ /* add back to the rwnd */
+ asoc->peers_rwnd += (tp1->send_size + sctp_peer_chunk_oh);
+
+ /* remove from the total flight */
+ sctp_total_flight_decrease(stcb, tp1);
+ if (alt != tp1->whoTo) {
+ /* yes, there is an alternate. */
+ sctp_free_remote_addr(tp1->whoTo);
+ /* sa_ignore FREED_MEMORY */
+ tp1->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ } /* while (tp1) */
+
+ if (tot_retrans > 0) {
+ /*
+ * Setup the ecn nonce re-sync point. We do this since once
+ * we go to FR something we introduce a Karn's rule scenario
+ * and won't know the totals for the ECN bits.
+ */
+ asoc->nonce_resync_tsn = sending_seq;
+ asoc->nonce_wait_for_ecne = 0;
+ asoc->nonce_sum_check = 0;
+ }
+}
+
+struct sctp_tmit_chunk *
+sctp_try_advance_peer_ack_point(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *tp1, *tp2, *a_adv = NULL;
+ struct timeval now;
+ int now_filled = 0;
+
+ if (asoc->peer_supports_prsctp == 0) {
+ return (NULL);
+ }
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ while (tp1) {
+ if (tp1->sent != SCTP_FORWARD_TSN_SKIP &&
+ tp1->sent != SCTP_DATAGRAM_RESEND) {
+ /* no chance to advance, out of here */
+ break;
+ }
+ if (!PR_SCTP_ENABLED(tp1->flags)) {
+ /*
+ * We can't fwd-tsn past any that are reliable aka
+ * retransmitted until the asoc fails.
+ */
+ break;
+ }
+ if (!now_filled) {
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ now_filled = 1;
+ }
+ tp2 = TAILQ_NEXT(tp1, sctp_next);
+ /*
+ * now we got a chunk which is marked for another
+ * retransmission to a PR-stream but has run out its chances
+ * already maybe OR has been marked to skip now. Can we skip
+ * it if its a resend?
+ */
+ if (tp1->sent == SCTP_DATAGRAM_RESEND &&
+ (PR_SCTP_TTL_ENABLED(tp1->flags))) {
+ /*
+ * Now is this one marked for resend and its time is
+ * now up?
+ */
+ if (timevalcmp(&now, &tp1->rec.data.timetodrop, >)) {
+ /* Yes so drop it */
+ if (tp1->data) {
+ (void)sctp_release_pr_sctp_chunk(stcb, tp1,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ &asoc->sent_queue, SCTP_SO_NOT_LOCKED);
+ }
+ } else {
+ /*
+ * No, we are done when hit one for resend
+ * whos time as not expired.
+ */
+ break;
+ }
+ }
+ /*
+ * Ok now if this chunk is marked to drop it we can clean up
+ * the chunk, advance our peer ack point and we can check
+ * the next chunk.
+ */
+ if (tp1->sent == SCTP_FORWARD_TSN_SKIP) {
+ /* advance PeerAckPoint goes forward */
+ asoc->advanced_peer_ack_point = tp1->rec.data.TSN_seq;
+ a_adv = tp1;
+ /*
+ * we don't want to de-queue it here. Just wait for
+ * the next peer SACK to come with a new cumTSN and
+ * then the chunk will be droped in the normal
+ * fashion.
+ */
+ if (tp1->data) {
+ sctp_free_bufspace(stcb, asoc, tp1, 1);
+ /*
+ * Maybe there should be another
+ * notification type
+ */
+ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ tp1, SCTP_SO_NOT_LOCKED);
+ sctp_m_freem(tp1->data);
+ tp1->data = NULL;
+ if (stcb->sctp_socket) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /*
+ * assoc was freed while we
+ * were unlocked
+ */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (NULL);
+ }
+#endif
+ sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+ sctp_wakeup_log(stcb, tp1->rec.data.TSN_seq, 1, SCTP_WAKESND_FROM_FWDTSN);
+ }
+ }
+ }
+ } else {
+ /*
+ * If it is still in RESEND we can advance no
+ * further
+ */
+ break;
+ }
+ /*
+ * If we hit here we just dumped tp1, move to next tsn on
+ * sent queue.
+ */
+ tp1 = tp2;
+ }
+ return (a_adv);
+}
+
+static void
+sctp_fs_audit(struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+ int inflight = 0, resend = 0, inbetween = 0, acked = 0, above = 0;
+
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ inflight++;
+ } else if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ resend++;
+ } else if (chk->sent < SCTP_DATAGRAM_ACKED) {
+ inbetween++;
+ } else if (chk->sent > SCTP_DATAGRAM_ACKED) {
+ above++;
+ } else {
+ acked++;
+ }
+ }
+
+ if ((inflight > 0) || (inbetween > 0)) {
+#ifdef INVARIANTS
+ panic("Flight size-express incorrect? \n");
+#else
+ SCTP_PRINTF("Flight size-express incorrect inflight:%d inbetween:%d\n",
+ inflight, inbetween);
+#endif
+ }
+}
+
+
+static void
+sctp_window_probe_recovery(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_nets *net,
+ struct sctp_tmit_chunk *tp1)
+{
+ struct sctp_tmit_chunk *chk;
+
+ /* First setup this one and get it moved back */
+ tp1->sent = SCTP_DATAGRAM_UNSENT;
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_WP,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(tp1);
+ sctp_total_flight_decrease(stcb, tp1);
+ TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+ TAILQ_INSERT_HEAD(&asoc->send_queue, tp1, sctp_next);
+ asoc->sent_queue_cnt--;
+ asoc->send_queue_cnt++;
+ /*
+ * Now all guys marked for RESEND on the sent_queue must be moved
+ * back too.
+ */
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ /* Another chunk to move */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ /* It should not be in flight */
+ TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+ TAILQ_INSERT_AFTER(&asoc->send_queue, tp1, chk, sctp_next);
+ asoc->sent_queue_cnt--;
+ asoc->send_queue_cnt++;
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+ }
+ }
+}
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+ uint32_t rwnd, int nonce_sum_flag, int *abort_now)
+{
+ struct sctp_nets *net;
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *tp1, *tp2;
+ uint32_t old_rwnd;
+ int win_probe_recovery = 0;
+ int win_probe_recovered = 0;
+ int j, done_once = 0;
+
+ if (sctp_logging_level & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+ sctp_misc_ints(SCTP_SACK_LOG_EXPRESS, cumack,
+ rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cumack;
+ stcb->asoc.cumack_log_at++;
+ if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+ stcb->asoc.cumack_log_at = 0;
+ }
+#endif
+ asoc = &stcb->asoc;
+ old_rwnd = asoc->peers_rwnd;
+ if (compare_with_wrap(asoc->last_acked_seq, cumack, MAX_TSN)) {
+ /* old ack */
+ return;
+ } else if (asoc->last_acked_seq == cumack) {
+ /* Window update sack */
+ asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+ (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ if (asoc->peers_rwnd > old_rwnd) {
+ goto again;
+ }
+ return;
+ }
+ /* First setup for CC stuff */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->prev_cwnd = net->cwnd;
+ net->net_ack = 0;
+ net->net_ack2 = 0;
+
+ /*
+ * CMT: Reset CUC and Fast recovery algo variables before
+ * SACK processing
+ */
+ net->new_pseudo_cumack = 0;
+ net->will_exit_fast_recovery = 0;
+ }
+ if (sctp_strict_sacks) {
+ uint32_t send_s;
+
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ tp1 = TAILQ_LAST(&asoc->sent_queue,
+ sctpchunk_listhead);
+ send_s = tp1->rec.data.TSN_seq + 1;
+ } else {
+ send_s = asoc->sending_seq;
+ }
+ if ((cumack == send_s) ||
+ compare_with_wrap(cumack, send_s, MAX_TSN)) {
+#ifndef INVARIANTS
+ struct mbuf *oper;
+
+#endif
+#ifdef INVARIANTS
+ panic("Impossible sack 1");
+#else
+ *abort_now = 1;
+ /* XXX */
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ return;
+#endif
+ }
+ }
+ asoc->this_sack_highest_gap = cumack;
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INDATA,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ if (compare_with_wrap(cumack, asoc->last_acked_seq, MAX_TSN)) {
+ /* process the new consecutive TSN first */
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ while (tp1) {
+ tp2 = TAILQ_NEXT(tp1, sctp_next);
+ if (compare_with_wrap(cumack, tp1->rec.data.TSN_seq,
+ MAX_TSN) ||
+ cumack == tp1->rec.data.TSN_seq) {
+ if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+ printf("Warning, an unsent is now acked?\n");
+ }
+ /*
+ * ECN Nonce: Add the nonce to the sender's
+ * nonce sum
+ */
+ asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+ if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+ /*
+ * If it is less than ACKED, it is
+ * now no-longer in flight. Higher
+ * values may occur during marking
+ */
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(tp1);
+ /* sa_ignore NO_NULL_CHK */
+ sctp_total_flight_decrease(stcb, tp1);
+ }
+ tp1->whoTo->net_ack += tp1->send_size;
+ if (tp1->snd_count < 2) {
+ /*
+ * True non-retransmited
+ * chunk
+ */
+ tp1->whoTo->net_ack2 +=
+ tp1->send_size;
+
+ /* update RTO too? */
+ if (tp1->do_rtt) {
+ tp1->whoTo->RTO =
+ /*
+ * sa_ignore
+ * NO_NULL_CHK
+ */
+ sctp_calculate_rto(stcb,
+ asoc, tp1->whoTo,
+ &tp1->sent_rcv_time,
+ sctp_align_safe_nocopy);
+ tp1->do_rtt = 0;
+ }
+ }
+ /*
+ * CMT: CUCv2 algorithm. From the
+ * cumack'd TSNs, for each TSN being
+ * acked for the first time, set the
+ * following variables for the
+ * corresp destination.
+ * new_pseudo_cumack will trigger a
+ * cwnd update.
+ * find_(rtx_)pseudo_cumack will
+ * trigger search for the next
+ * expected (rtx-)pseudo-cumack.
+ */
+ tp1->whoTo->new_pseudo_cumack = 1;
+ tp1->whoTo->find_pseudo_cumack = 1;
+ tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ /* sa_ignore NO_NULL_CHK */
+ sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+ }
+ }
+ if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+ }
+ if (tp1->rec.data.chunk_was_revoked) {
+ /* deflate the cwnd */
+ tp1->whoTo->cwnd -= tp1->book_size;
+ tp1->rec.data.chunk_was_revoked = 0;
+ }
+ tp1->sent = SCTP_DATAGRAM_ACKED;
+ TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+ if (tp1->data) {
+ /* sa_ignore NO_NULL_CHK */
+ sctp_free_bufspace(stcb, asoc, tp1, 1);
+ sctp_m_freem(tp1->data);
+ }
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(asoc->last_acked_seq,
+ cumack,
+ tp1->rec.data.TSN_seq,
+ 0,
+ 0,
+ SCTP_LOG_FREE_SENT);
+ }
+ tp1->data = NULL;
+ asoc->sent_queue_cnt--;
+ sctp_free_a_chunk(stcb, tp1);
+ tp1 = tp2;
+ } else {
+ break;
+ }
+ }
+
+ }
+ /* sa_ignore NO_NULL_CHK */
+ if (stcb->sctp_socket) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+ if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+ /* sa_ignore NO_NULL_CHK */
+ sctp_wakeup_log(stcb, cumack, 1, SCTP_WAKESND_FROM_SACK);
+ }
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* assoc was freed while we were unlocked */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ } else {
+ if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+ sctp_wakeup_log(stcb, cumack, 1, SCTP_NOWAKE_FROM_SACK);
+ }
+ }
+
+ /* JRS - Use the congestion control given in the CC module */
+ if (asoc->last_acked_seq != cumack)
+ asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, 1, 0, 0);
+
+ asoc->last_acked_seq = cumack;
+
+ if (TAILQ_EMPTY(&asoc->sent_queue)) {
+ /* nothing left in-flight */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->flight_size = 0;
+ net->partial_bytes_acked = 0;
+ }
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ }
+ /* Fix up the a-p-a-p for future PR-SCTP sends */
+ if (compare_with_wrap(cumack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+ asoc->advanced_peer_ack_point = cumack;
+ }
+ /* ECN Nonce updates */
+ if (asoc->ecn_nonce_allowed) {
+ if (asoc->nonce_sum_check) {
+ if (nonce_sum_flag != ((asoc->nonce_sum_expect_base) & SCTP_SACK_NONCE_SUM)) {
+ if (asoc->nonce_wait_for_ecne == 0) {
+ struct sctp_tmit_chunk *lchk;
+
+ lchk = TAILQ_FIRST(&asoc->send_queue);
+ asoc->nonce_wait_for_ecne = 1;
+ if (lchk) {
+ asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+ } else {
+ asoc->nonce_wait_tsn = asoc->sending_seq;
+ }
+ } else {
+ if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+ (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+ /*
+ * Misbehaving peer. We need
+ * to react to this guy
+ */
+ asoc->ecn_allowed = 0;
+ asoc->ecn_nonce_allowed = 0;
+ }
+ }
+ }
+ } else {
+ /* See if Resynchronization Possible */
+ if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+ asoc->nonce_sum_check = 1;
+ /*
+ * now we must calculate what the base is.
+ * We do this based on two things, we know
+ * the total's for all the segments
+ * gap-acked in the SACK (none), We also
+ * know the SACK's nonce sum, its in
+ * nonce_sum_flag. So we can build a truth
+ * table to back-calculate the new value of
+ * asoc->nonce_sum_expect_base:
+ *
+ * SACK-flag-Value Seg-Sums Base 0 0 0
+ * 1 0 1 0 1 1 1
+ * 1 0
+ */
+ asoc->nonce_sum_expect_base = (0 ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+ }
+ }
+ }
+ /* RWND update */
+ asoc->peers_rwnd = sctp_sbspace_sub(rwnd,
+ (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ if (asoc->peers_rwnd > old_rwnd) {
+ win_probe_recovery = 1;
+ }
+ /* Now assure a timer where data is queued at */
+again:
+ j = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (win_probe_recovery && (net->window_probe)) {
+ net->window_probe = 0;
+ win_probe_recovered = 1;
+ /*
+ * Find first chunk that was used with window probe
+ * and clear the sent
+ */
+ /* sa_ignore FREED_MEMORY */
+ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+ if (tp1->window_probe) {
+ /* move back to data send queue */
+ sctp_window_probe_recovery(stcb, asoc, net, tp1);
+ break;
+ }
+ }
+ }
+ if (net->flight_size) {
+ int to_ticks;
+
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ j++;
+ (void)SCTP_OS_TIMER_START(&net->rxt_timer.timer, to_ticks,
+ sctp_timeout_handler, &net->rxt_timer);
+ } else {
+ if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ }
+ if (sctp_early_fr) {
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+ }
+ }
+ }
+ }
+ if ((j == 0) &&
+ (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+ (asoc->sent_queue_retran_cnt == 0) &&
+ (win_probe_recovered == 0) &&
+ (done_once == 0)) {
+ /* huh, this should not happen */
+ sctp_fs_audit(asoc);
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->flight_size = 0;
+ }
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ asoc->sent_queue_retran_cnt = 0;
+ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ sctp_flight_size_increase(tp1);
+ sctp_total_flight_increase(stcb, tp1);
+ } else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ asoc->sent_queue_retran_cnt++;
+ }
+ }
+ done_once = 1;
+ goto again;
+ }
+ /**********************************/
+ /* Now what about shutdown issues */
+ /**********************************/
+ if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+ /* nothing left on sendqueue.. consider done */
+ /* clean up */
+ if ((asoc->stream_queue_cnt == 1) &&
+ ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+ (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (asoc->locked_on_sending)
+ ) {
+ struct sctp_stream_queue_pending *sp;
+
+ /*
+ * I may be in a state where we got all across.. but
+ * cannot write more due to a shutdown... we abort
+ * since the user did not indicate EOR in this case.
+ * The sp will be cleaned during free of the asoc.
+ */
+ sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+ sctp_streamhead);
+ if ((sp) && (sp->length == 0)) {
+ /* Let cleanup code purge it */
+ if (sp->msg_is_complete) {
+ asoc->stream_queue_cnt--;
+ } else {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ asoc->locked_on_sending = NULL;
+ asoc->stream_queue_cnt--;
+ }
+ }
+ }
+ if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+ (asoc->stream_queue_cnt == 0)) {
+ if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+ /* Need to abort here */
+ struct mbuf *oper;
+
+ abort_out_now:
+ *abort_now = 1;
+ /* XXX */
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_24);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_24;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+ } else {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb,
+ stcb->asoc.primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (asoc->stream_queue_cnt == 0)) {
+ if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+ goto abort_out_now;
+ }
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_send_shutdown_ack(stcb,
+ stcb->asoc.primary_destination);
+
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ }
+ if (sctp_logging_level & SCTP_SACK_RWND_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+ rwnd,
+ stcb->asoc.peers_rwnd,
+ stcb->asoc.total_flight,
+ stcb->asoc.total_output_queue_size);
+ }
+}
+
+void
+sctp_handle_sack(struct mbuf *m, int offset,
+ struct sctp_sack_chunk *ch, struct sctp_tcb *stcb,
+ struct sctp_nets *net_from, int *abort_now, int sack_len, uint32_t rwnd)
+{
+ struct sctp_association *asoc;
+ struct sctp_sack *sack;
+ struct sctp_tmit_chunk *tp1, *tp2;
+ uint32_t cum_ack, last_tsn, biggest_tsn_acked, biggest_tsn_newly_acked,
+ this_sack_lowest_newack;
+ uint32_t sav_cum_ack;
+ uint16_t num_seg, num_dup;
+ uint16_t wake_him = 0;
+ unsigned int sack_length;
+ uint32_t send_s = 0;
+ long j;
+ int accum_moved = 0;
+ int will_exit_fast_recovery = 0;
+ uint32_t a_rwnd, old_rwnd;
+ int win_probe_recovery = 0;
+ int win_probe_recovered = 0;
+ struct sctp_nets *net = NULL;
+ int nonce_sum_flag, ecn_seg_sums = 0;
+ int done_once;
+ uint8_t reneged_all = 0;
+ uint8_t cmt_dac_flag;
+
+ /*
+ * we take any chance we can to service our queues since we cannot
+ * get awoken when the socket is read from :<
+ */
+ /*
+ * Now perform the actual SACK handling: 1) Verify that it is not an
+ * old sack, if so discard. 2) If there is nothing left in the send
+ * queue (cum-ack is equal to last acked) then you have a duplicate
+ * too, update any rwnd change and verify no timers are running.
+ * then return. 3) Process any new consequtive data i.e. cum-ack
+ * moved process these first and note that it moved. 4) Process any
+ * sack blocks. 5) Drop any acked from the queue. 6) Check for any
+ * revoked blocks and mark. 7) Update the cwnd. 8) Nothing left,
+ * sync up flightsizes and things, stop all timers and also check
+ * for shutdown_pending state. If so then go ahead and send off the
+ * shutdown. If in shutdown recv, send off the shutdown-ack and
+ * start that timer, Ret. 9) Strike any non-acked things and do FR
+ * procedure if needed being sure to set the FR flag. 10) Do pr-sctp
+ * procedures. 11) Apply any FR penalties. 12) Assure we will SACK
+ * if in shutdown_recv state.
+ */
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ sack = &ch->sack;
+ /* CMT DAC algo */
+ this_sack_lowest_newack = 0;
+ j = 0;
+ sack_length = (unsigned int)sack_len;
+ /* ECN Nonce */
+ SCTP_STAT_INCR(sctps_slowpath_sack);
+ nonce_sum_flag = ch->ch.chunk_flags & SCTP_SACK_NONCE_SUM;
+ cum_ack = last_tsn = ntohl(sack->cum_tsn_ack);
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ stcb->asoc.cumack_log[stcb->asoc.cumack_log_at] = cum_ack;
+ stcb->asoc.cumack_log_at++;
+ if (stcb->asoc.cumack_log_at > SCTP_TSN_LOG_SIZE) {
+ stcb->asoc.cumack_log_at = 0;
+ }
+#endif
+ num_seg = ntohs(sack->num_gap_ack_blks);
+ a_rwnd = rwnd;
+
+ if (sctp_logging_level & SCTP_LOG_SACK_ARRIVALS_ENABLE) {
+ sctp_misc_ints(SCTP_SACK_LOG_NORMAL, cum_ack,
+ rwnd, stcb->asoc.last_acked_seq, stcb->asoc.peers_rwnd);
+ }
+ /* CMT DAC algo */
+ cmt_dac_flag = ch->ch.chunk_flags & SCTP_SACK_CMT_DAC;
+ num_dup = ntohs(sack->num_dup_tsns);
+
+ old_rwnd = stcb->asoc.peers_rwnd;
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
+ stcb->asoc.overall_error_count,
+ 0,
+ SCTP_FROM_SCTP_INDATA,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count = 0;
+ asoc = &stcb->asoc;
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(asoc->last_acked_seq,
+ cum_ack,
+ 0,
+ num_seg,
+ num_dup,
+ SCTP_LOG_NEW_SACK);
+ }
+ if ((num_dup) && (sctp_logging_level & (SCTP_FR_LOGGING_ENABLE | SCTP_EARLYFR_LOGGING_ENABLE))) {
+ int off_to_dup, iii;
+ uint32_t *dupdata, dblock;
+
+ off_to_dup = (num_seg * sizeof(struct sctp_gap_ack_block)) + sizeof(struct sctp_sack_chunk);
+ if ((off_to_dup + (num_dup * sizeof(uint32_t))) <= sack_length) {
+ dupdata = (uint32_t *) sctp_m_getptr(m, off_to_dup,
+ sizeof(uint32_t), (uint8_t *) & dblock);
+ off_to_dup += sizeof(uint32_t);
+ if (dupdata) {
+ for (iii = 0; iii < num_dup; iii++) {
+ sctp_log_fr(*dupdata, 0, 0, SCTP_FR_DUPED);
+ dupdata = (uint32_t *) sctp_m_getptr(m, off_to_dup,
+ sizeof(uint32_t), (uint8_t *) & dblock);
+ if (dupdata == NULL)
+ break;
+ off_to_dup += sizeof(uint32_t);
+ }
+ }
+ } else {
+ SCTP_PRINTF("Size invalid offset to dups:%d number dups:%d sack_len:%d num gaps:%d\n",
+ off_to_dup, num_dup, sack_length, num_seg);
+ }
+ }
+ if (sctp_strict_sacks) {
+ /* reality check */
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ tp1 = TAILQ_LAST(&asoc->sent_queue,
+ sctpchunk_listhead);
+ send_s = tp1->rec.data.TSN_seq + 1;
+ } else {
+ send_s = asoc->sending_seq;
+ }
+ if (cum_ack == send_s ||
+ compare_with_wrap(cum_ack, send_s, MAX_TSN)) {
+#ifndef INVARIANTS
+ struct mbuf *oper;
+
+#endif
+#ifdef INVARIANTS
+ hopeless_peer:
+ panic("Impossible sack 1");
+#else
+
+
+ /*
+ * no way, we have not even sent this TSN out yet.
+ * Peer is hopelessly messed up with us.
+ */
+ hopeless_peer:
+ *abort_now = 1;
+ /* XXX */
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_25);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_25;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ return;
+#endif
+ }
+ }
+ /**********************/
+ /* 1) check the range */
+ /**********************/
+ if (compare_with_wrap(asoc->last_acked_seq, last_tsn, MAX_TSN)) {
+ /* acking something behind */
+ return;
+ }
+ sav_cum_ack = asoc->last_acked_seq;
+
+ /* update the Rwnd of the peer */
+ if (TAILQ_EMPTY(&asoc->sent_queue) &&
+ TAILQ_EMPTY(&asoc->send_queue) &&
+ (asoc->stream_queue_cnt == 0)
+ ) {
+ /* nothing left on send/sent and strmq */
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+ asoc->peers_rwnd, 0, 0, a_rwnd);
+ }
+ asoc->peers_rwnd = a_rwnd;
+ if (asoc->sent_queue_retran_cnt) {
+ asoc->sent_queue_retran_cnt = 0;
+ }
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ /* stop any timers */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+ if (sctp_early_fr) {
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck1);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_26);
+ }
+ }
+ net->partial_bytes_acked = 0;
+ net->flight_size = 0;
+ }
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ return;
+ }
+ /*
+ * We init netAckSz and netAckSz2 to 0. These are used to track 2
+ * things. The total byte count acked is tracked in netAckSz AND
+ * netAck2 is used to track the total bytes acked that are un-
+ * amibguious and were never retransmitted. We track these on a per
+ * destination address basis.
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->prev_cwnd = net->cwnd;
+ net->net_ack = 0;
+ net->net_ack2 = 0;
+
+ /*
+ * CMT: Reset CUC and Fast recovery algo variables before
+ * SACK processing
+ */
+ net->new_pseudo_cumack = 0;
+ net->will_exit_fast_recovery = 0;
+ }
+ /* process the new consecutive TSN first */
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ while (tp1) {
+ if (compare_with_wrap(last_tsn, tp1->rec.data.TSN_seq,
+ MAX_TSN) ||
+ last_tsn == tp1->rec.data.TSN_seq) {
+ if (tp1->sent != SCTP_DATAGRAM_UNSENT) {
+ /*
+ * ECN Nonce: Add the nonce to the sender's
+ * nonce sum
+ */
+ asoc->nonce_sum_expect_base += tp1->rec.data.ect_nonce;
+ accum_moved = 1;
+ if (tp1->sent < SCTP_DATAGRAM_ACKED) {
+ /*
+ * If it is less than ACKED, it is
+ * now no-longer in flight. Higher
+ * values may occur during marking
+ */
+ if ((tp1->whoTo->dest_state &
+ SCTP_ADDR_UNCONFIRMED) &&
+ (tp1->snd_count < 2)) {
+ /*
+ * If there was no retran
+ * and the address is
+ * un-confirmed and we sent
+ * there and are now
+ * sacked.. its confirmed,
+ * mark it so.
+ */
+ tp1->whoTo->dest_state &=
+ ~SCTP_ADDR_UNCONFIRMED;
+ }
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_CA,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(tp1);
+ sctp_total_flight_decrease(stcb, tp1);
+ }
+ tp1->whoTo->net_ack += tp1->send_size;
+
+ /* CMT SFR and DAC algos */
+ this_sack_lowest_newack = tp1->rec.data.TSN_seq;
+ tp1->whoTo->saw_newack = 1;
+
+ if (tp1->snd_count < 2) {
+ /*
+ * True non-retransmited
+ * chunk
+ */
+ tp1->whoTo->net_ack2 +=
+ tp1->send_size;
+
+ /* update RTO too? */
+ if (tp1->do_rtt) {
+ tp1->whoTo->RTO =
+ sctp_calculate_rto(stcb,
+ asoc, tp1->whoTo,
+ &tp1->sent_rcv_time,
+ sctp_align_safe_nocopy);
+ tp1->do_rtt = 0;
+ }
+ }
+ /*
+ * CMT: CUCv2 algorithm. From the
+ * cumack'd TSNs, for each TSN being
+ * acked for the first time, set the
+ * following variables for the
+ * corresp destination.
+ * new_pseudo_cumack will trigger a
+ * cwnd update.
+ * find_(rtx_)pseudo_cumack will
+ * trigger search for the next
+ * expected (rtx-)pseudo-cumack.
+ */
+ tp1->whoTo->new_pseudo_cumack = 1;
+ tp1->whoTo->find_pseudo_cumack = 1;
+ tp1->whoTo->find_rtx_pseudo_cumack = 1;
+
+
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(asoc->last_acked_seq,
+ cum_ack,
+ tp1->rec.data.TSN_seq,
+ 0,
+ 0,
+ SCTP_LOG_TSN_ACKED);
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, tp1->whoTo, tp1->rec.data.TSN_seq, SCTP_CWND_LOG_FROM_SACK);
+ }
+ }
+ if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xB3,
+ (asoc->sent_queue_retran_cnt & 0x000000ff));
+#endif
+ }
+ if (tp1->rec.data.chunk_was_revoked) {
+ /* deflate the cwnd */
+ tp1->whoTo->cwnd -= tp1->book_size;
+ tp1->rec.data.chunk_was_revoked = 0;
+ }
+ tp1->sent = SCTP_DATAGRAM_ACKED;
+ }
+ } else {
+ break;
+ }
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ }
+ biggest_tsn_newly_acked = biggest_tsn_acked = last_tsn;
+ /* always set this up to cum-ack */
+ asoc->this_sack_highest_gap = last_tsn;
+
+ /* Move offset up to point to gaps/dups */
+ offset += sizeof(struct sctp_sack_chunk);
+ if (((num_seg * (sizeof(struct sctp_gap_ack_block))) + sizeof(struct sctp_sack_chunk)) > sack_length) {
+
+ /* skip corrupt segments */
+ goto skip_segments;
+ }
+ if (num_seg > 0) {
+
+ /*
+ * CMT: SFR algo (and HTNA) - this_sack_highest_newack has
+ * to be greater than the cumack. Also reset saw_newack to 0
+ * for all dests.
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->saw_newack = 0;
+ net->this_sack_highest_newack = last_tsn;
+ }
+
+ /*
+ * thisSackHighestGap will increase while handling NEW
+ * segments this_sack_highest_newack will increase while
+ * handling NEWLY ACKED chunks. this_sack_lowest_newack is
+ * used for CMT DAC algo. saw_newack will also change.
+ */
+ sctp_handle_segments(m, &offset, stcb, asoc, ch, last_tsn,
+ &biggest_tsn_acked, &biggest_tsn_newly_acked, &this_sack_lowest_newack,
+ num_seg, &ecn_seg_sums);
+
+ if (sctp_strict_sacks) {
+ /*
+ * validate the biggest_tsn_acked in the gap acks if
+ * strict adherence is wanted.
+ */
+ if ((biggest_tsn_acked == send_s) ||
+ (compare_with_wrap(biggest_tsn_acked, send_s, MAX_TSN))) {
+ /*
+ * peer is either confused or we are under
+ * attack. We must abort.
+ */
+ goto hopeless_peer;
+ }
+ }
+ }
+skip_segments:
+ /*******************************************/
+ /* cancel ALL T3-send timer if accum moved */
+ /*******************************************/
+ if (sctp_cmt_on_off) {
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (net->new_pseudo_cumack)
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_27);
+
+ }
+ } else {
+ if (accum_moved) {
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_28);
+ }
+ }
+ }
+ /********************************************/
+ /* drop the acked chunks from the sendqueue */
+ /********************************************/
+ asoc->last_acked_seq = cum_ack;
+
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ if (tp1 == NULL)
+ goto done_with_it;
+ do {
+ if (compare_with_wrap(tp1->rec.data.TSN_seq, cum_ack,
+ MAX_TSN)) {
+ break;
+ }
+ if (tp1->sent == SCTP_DATAGRAM_UNSENT) {
+ /* no more sent on list */
+ printf("Warning, tp1->sent == %d and its now acked?\n",
+ tp1->sent);
+ }
+ tp2 = TAILQ_NEXT(tp1, sctp_next);
+ TAILQ_REMOVE(&asoc->sent_queue, tp1, sctp_next);
+ if (tp1->pr_sctp_on) {
+ if (asoc->pr_sctp_cnt != 0)
+ asoc->pr_sctp_cnt--;
+ }
+ if ((TAILQ_FIRST(&asoc->sent_queue) == NULL) &&
+ (asoc->total_flight > 0)) {
+#ifdef INVARIANTS
+ panic("Warning flight size is postive and should be 0");
+#else
+ SCTP_PRINTF("Warning flight size incorrect should be 0 is %d\n",
+ asoc->total_flight);
+#endif
+ asoc->total_flight = 0;
+ }
+ if (tp1->data) {
+ /* sa_ignore NO_NULL_CHK */
+ sctp_free_bufspace(stcb, asoc, tp1, 1);
+ sctp_m_freem(tp1->data);
+ if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
+ asoc->sent_queue_cnt_removeable--;
+ }
+ }
+ if (sctp_logging_level & SCTP_SACK_LOGGING_ENABLE) {
+ sctp_log_sack(asoc->last_acked_seq,
+ cum_ack,
+ tp1->rec.data.TSN_seq,
+ 0,
+ 0,
+ SCTP_LOG_FREE_SENT);
+ }
+ tp1->data = NULL;
+ asoc->sent_queue_cnt--;
+ sctp_free_a_chunk(stcb, tp1);
+ wake_him++;
+ tp1 = tp2;
+ } while (tp1 != NULL);
+
+done_with_it:
+ /* sa_ignore NO_NULL_CHK */
+ if ((wake_him) && (stcb->sctp_socket)) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ SOCKBUF_LOCK(&stcb->sctp_socket->so_snd);
+ if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+ sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_WAKESND_FROM_SACK);
+ }
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* assoc was freed while we were unlocked */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ sctp_sowwakeup_locked(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ } else {
+ if (sctp_logging_level & SCTP_WAKE_LOGGING_ENABLE) {
+ sctp_wakeup_log(stcb, cum_ack, wake_him, SCTP_NOWAKE_FROM_SACK);
+ }
+ }
+
+ if (asoc->fast_retran_loss_recovery && accum_moved) {
+ if (compare_with_wrap(asoc->last_acked_seq,
+ asoc->fast_recovery_tsn, MAX_TSN) ||
+ asoc->last_acked_seq == asoc->fast_recovery_tsn) {
+ /* Setup so we will exit RFC2582 fast recovery */
+ will_exit_fast_recovery = 1;
+ }
+ }
+ /*
+ * Check for revoked fragments:
+ *
+ * if Previous sack - Had no frags then we can't have any revoked if
+ * Previous sack - Had frag's then - If we now have frags aka
+ * num_seg > 0 call sctp_check_for_revoked() to tell if peer revoked
+ * some of them. else - The peer revoked all ACKED fragments, since
+ * we had some before and now we have NONE.
+ */
+
+ if (num_seg)
+ sctp_check_for_revoked(stcb, asoc, cum_ack, biggest_tsn_acked);
+ else if (asoc->saw_sack_with_frags) {
+ int cnt_revoked = 0;
+
+ tp1 = TAILQ_FIRST(&asoc->sent_queue);
+ if (tp1 != NULL) {
+ /* Peer revoked all dg's marked or acked */
+ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+ if ((tp1->sent > SCTP_DATAGRAM_RESEND) &&
+ (tp1->sent < SCTP_FORWARD_TSN_SKIP)) {
+ tp1->sent = SCTP_DATAGRAM_SENT;
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_UP_REVOKE,
+ tp1->whoTo->flight_size,
+ tp1->book_size,
+ (uintptr_t) tp1->whoTo,
+ tp1->rec.data.TSN_seq);
+ }
+ sctp_flight_size_increase(tp1);
+ sctp_total_flight_increase(stcb, tp1);
+ tp1->rec.data.chunk_was_revoked = 1;
+ /*
+ * To ensure that this increase in
+ * flightsize, which is artificial,
+ * does not throttle the sender, we
+ * also increase the cwnd
+ * artificially.
+ */
+ tp1->whoTo->cwnd += tp1->book_size;
+ cnt_revoked++;
+ }
+ }
+ if (cnt_revoked) {
+ reneged_all = 1;
+ }
+ }
+ asoc->saw_sack_with_frags = 0;
+ }
+ if (num_seg)
+ asoc->saw_sack_with_frags = 1;
+ else
+ asoc->saw_sack_with_frags = 0;
+
+ /* JRS - Use the congestion control given in the CC module */
+ asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery);
+
+ if (TAILQ_EMPTY(&asoc->sent_queue)) {
+ /* nothing left in-flight */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ /* stop all timers */
+ if (sctp_early_fr) {
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_29);
+ }
+ }
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_30);
+ net->flight_size = 0;
+ net->partial_bytes_acked = 0;
+ }
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ }
+ /**********************************/
+ /* Now what about shutdown issues */
+ /**********************************/
+ if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue)) {
+ /* nothing left on sendqueue.. consider done */
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+ asoc->peers_rwnd, 0, 0, a_rwnd);
+ }
+ asoc->peers_rwnd = a_rwnd;
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ /* clean up */
+ if ((asoc->stream_queue_cnt == 1) &&
+ ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+ (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED)) &&
+ (asoc->locked_on_sending)
+ ) {
+ struct sctp_stream_queue_pending *sp;
+
+ /*
+ * I may be in a state where we got all across.. but
+ * cannot write more due to a shutdown... we abort
+ * since the user did not indicate EOR in this case.
+ */
+ sp = TAILQ_LAST(&((asoc->locked_on_sending)->outqueue),
+ sctp_streamhead);
+ if ((sp) && (sp->length == 0)) {
+ asoc->locked_on_sending = NULL;
+ if (sp->msg_is_complete) {
+ asoc->stream_queue_cnt--;
+ } else {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ asoc->stream_queue_cnt--;
+ }
+ }
+ }
+ if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) &&
+ (asoc->stream_queue_cnt == 0)) {
+ if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+ /* Need to abort here */
+ struct mbuf *oper;
+
+ abort_out_now:
+ *abort_now = 1;
+ /* XXX */
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_31);
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_31;
+ sctp_abort_an_association(stcb->sctp_ep, stcb, SCTP_RESPONSE_TO_USER_REQ, oper, SCTP_SO_NOT_LOCKED);
+ return;
+ } else {
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb,
+ stcb->asoc.primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ }
+ return;
+ } else if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (asoc->stream_queue_cnt == 0)) {
+ if (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT) {
+ goto abort_out_now;
+ }
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_send_shutdown_ack(stcb,
+ stcb->asoc.primary_destination);
+
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
+ stcb->sctp_ep, stcb, asoc->primary_destination);
+ return;
+ }
+ }
+ /*
+ * Now here we are going to recycle net_ack for a different use...
+ * HEADS UP.
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->net_ack = 0;
+ }
+
+ /*
+ * CMT DAC algorithm: If SACK DAC flag was 0, then no extra marking
+ * to be done. Setting this_sack_lowest_newack to the cum_ack will
+ * automatically ensure that.
+ */
+ if (sctp_cmt_on_off && sctp_cmt_use_dac && (cmt_dac_flag == 0)) {
+ this_sack_lowest_newack = cum_ack;
+ }
+ if (num_seg > 0) {
+ sctp_strike_gap_ack_chunks(stcb, asoc, biggest_tsn_acked,
+ biggest_tsn_newly_acked, this_sack_lowest_newack, accum_moved);
+ }
+ /*********************************************/
+ /* Here we perform PR-SCTP procedures */
+ /* (section 4.2) */
+ /*********************************************/
+ /* C1. update advancedPeerAckPoint */
+ if (compare_with_wrap(cum_ack, asoc->advanced_peer_ack_point, MAX_TSN)) {
+ asoc->advanced_peer_ack_point = cum_ack;
+ }
+ /* C2. try to further move advancedPeerAckPoint ahead */
+ if ((asoc->peer_supports_prsctp) && (asoc->pr_sctp_cnt > 0)) {
+ struct sctp_tmit_chunk *lchk;
+
+ lchk = sctp_try_advance_peer_ack_point(stcb, asoc);
+ /* C3. See if we need to send a Fwd-TSN */
+ if (compare_with_wrap(asoc->advanced_peer_ack_point, cum_ack,
+ MAX_TSN)) {
+ /*
+ * ISSUE with ECN, see FWD-TSN processing for notes
+ * on issues that will occur when the ECN NONCE
+ * stuff is put into SCTP for cross checking.
+ */
+ send_forward_tsn(stcb, asoc);
+
+ /*
+ * ECN Nonce: Disable Nonce Sum check when FWD TSN
+ * is sent and store resync tsn
+ */
+ asoc->nonce_sum_check = 0;
+ asoc->nonce_resync_tsn = asoc->advanced_peer_ack_point;
+ if (lchk) {
+ /* Assure a timer is up */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, lchk->whoTo);
+ }
+ }
+ }
+ /* JRS - Use the congestion control given in the CC module */
+ asoc->cc_functions.sctp_cwnd_update_after_fr(stcb, asoc);
+
+ /******************************************************************
+ * Here we do the stuff with ECN Nonce checking.
+ * We basically check to see if the nonce sum flag was incorrect
+ * or if resynchronization needs to be done. Also if we catch a
+ * misbehaving receiver we give him the kick.
+ ******************************************************************/
+
+ if (asoc->ecn_nonce_allowed) {
+ if (asoc->nonce_sum_check) {
+ if (nonce_sum_flag != ((asoc->nonce_sum_expect_base + ecn_seg_sums) & SCTP_SACK_NONCE_SUM)) {
+ if (asoc->nonce_wait_for_ecne == 0) {
+ struct sctp_tmit_chunk *lchk;
+
+ lchk = TAILQ_FIRST(&asoc->send_queue);
+ asoc->nonce_wait_for_ecne = 1;
+ if (lchk) {
+ asoc->nonce_wait_tsn = lchk->rec.data.TSN_seq;
+ } else {
+ asoc->nonce_wait_tsn = asoc->sending_seq;
+ }
+ } else {
+ if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_wait_tsn, MAX_TSN) ||
+ (asoc->last_acked_seq == asoc->nonce_wait_tsn)) {
+ /*
+ * Misbehaving peer. We need
+ * to react to this guy
+ */
+ asoc->ecn_allowed = 0;
+ asoc->ecn_nonce_allowed = 0;
+ }
+ }
+ }
+ } else {
+ /* See if Resynchronization Possible */
+ if (compare_with_wrap(asoc->last_acked_seq, asoc->nonce_resync_tsn, MAX_TSN)) {
+ asoc->nonce_sum_check = 1;
+ /*
+ * now we must calculate what the base is.
+ * We do this based on two things, we know
+ * the total's for all the segments
+ * gap-acked in the SACK, its stored in
+ * ecn_seg_sums. We also know the SACK's
+ * nonce sum, its in nonce_sum_flag. So we
+ * can build a truth table to back-calculate
+ * the new value of
+ * asoc->nonce_sum_expect_base:
+ *
+ * SACK-flag-Value Seg-Sums Base 0 0 0
+ * 1 0 1 0 1 1 1
+ * 1 0
+ */
+ asoc->nonce_sum_expect_base = (ecn_seg_sums ^ nonce_sum_flag) & SCTP_SACK_NONCE_SUM;
+ }
+ }
+ }
+ /* Now are we exiting loss recovery ? */
+ if (will_exit_fast_recovery) {
+ /* Ok, we must exit fast recovery */
+ asoc->fast_retran_loss_recovery = 0;
+ }
+ if ((asoc->sat_t3_loss_recovery) &&
+ ((compare_with_wrap(asoc->last_acked_seq, asoc->sat_t3_recovery_tsn,
+ MAX_TSN) ||
+ (asoc->last_acked_seq == asoc->sat_t3_recovery_tsn)))) {
+ /* end satellite t3 loss recovery */
+ asoc->sat_t3_loss_recovery = 0;
+ }
+ /*
+ * CMT Fast recovery
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (net->will_exit_fast_recovery) {
+ /* Ok, we must exit fast recovery */
+ net->fast_retran_loss_recovery = 0;
+ }
+ }
+
+ /* Adjust and set the new rwnd value */
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd_set(SCTP_SET_PEER_RWND_VIA_SACK,
+ asoc->peers_rwnd, asoc->total_flight, (asoc->sent_queue_cnt * sctp_peer_chunk_oh), a_rwnd);
+ }
+ asoc->peers_rwnd = sctp_sbspace_sub(a_rwnd,
+ (uint32_t) (asoc->total_flight + (asoc->sent_queue_cnt * sctp_peer_chunk_oh)));
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ if (asoc->peers_rwnd > old_rwnd) {
+ win_probe_recovery = 1;
+ }
+ /*
+ * Now we must setup so we have a timer up for anyone with
+ * outstanding data.
+ */
+ done_once = 0;
+again:
+ j = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (win_probe_recovery && (net->window_probe)) {
+ net->window_probe = 0;
+ win_probe_recovered = 1;
+ /*-
+ * Find first chunk that was used with
+ * window probe and clear the event. Put
+ * it back into the send queue as if has
+ * not been sent.
+ */
+ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+ if (tp1->window_probe) {
+ sctp_window_probe_recovery(stcb, asoc, net, tp1);
+ break;
+ }
+ }
+ }
+ if (net->flight_size) {
+ j++;
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net);
+ } else {
+ if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_22);
+ }
+ if (sctp_early_fr) {
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck4);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_23);
+ }
+ }
+ }
+ }
+ if ((j == 0) &&
+ (!TAILQ_EMPTY(&asoc->sent_queue)) &&
+ (asoc->sent_queue_retran_cnt == 0) &&
+ (win_probe_recovered == 0) &&
+ (done_once == 0)) {
+ /* huh, this should not happen */
+ sctp_fs_audit(asoc);
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ net->flight_size = 0;
+ }
+ asoc->total_flight = 0;
+ asoc->total_flight_count = 0;
+ asoc->sent_queue_retran_cnt = 0;
+ TAILQ_FOREACH(tp1, &asoc->sent_queue, sctp_next) {
+ if (tp1->sent < SCTP_DATAGRAM_RESEND) {
+ sctp_flight_size_increase(tp1);
+ sctp_total_flight_increase(stcb, tp1);
+ } else if (tp1->sent == SCTP_DATAGRAM_RESEND) {
+ asoc->sent_queue_retran_cnt++;
+ }
+ }
+ done_once = 1;
+ goto again;
+ }
+ if (sctp_logging_level & SCTP_SACK_RWND_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_SACK_RWND_UPDATE,
+ a_rwnd,
+ stcb->asoc.peers_rwnd,
+ stcb->asoc.total_flight,
+ stcb->asoc.total_output_queue_size);
+ }
+}
+
+void
+sctp_update_acked(struct sctp_tcb *stcb, struct sctp_shutdown_chunk *cp,
+ struct sctp_nets *netp, int *abort_flag)
+{
+ /* Copy cum-ack */
+ uint32_t cum_ack, a_rwnd;
+
+ cum_ack = ntohl(cp->cumulative_tsn_ack);
+ /* Arrange so a_rwnd does NOT change */
+ a_rwnd = stcb->asoc.peers_rwnd + stcb->asoc.total_flight;
+
+ /* Now call the express sack handling */
+ sctp_express_handle_sack(stcb, cum_ack, a_rwnd, 0, abort_flag);
+}
+
+static void
+sctp_kick_prsctp_reorder_queue(struct sctp_tcb *stcb,
+ struct sctp_stream_in *strmin)
+{
+ struct sctp_queued_to_read *ctl, *nctl;
+ struct sctp_association *asoc;
+ int tt;
+
+ asoc = &stcb->asoc;
+ tt = strmin->last_sequence_delivered;
+ /*
+ * First deliver anything prior to and including the stream no that
+ * came in
+ */
+ ctl = TAILQ_FIRST(&strmin->inqueue);
+ while (ctl) {
+ nctl = TAILQ_NEXT(ctl, next);
+ if (compare_with_wrap(tt, ctl->sinfo_ssn, MAX_SEQ) ||
+ (tt == ctl->sinfo_ssn)) {
+ /* this is deliverable now */
+ TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+ /* subtract pending on streams */
+ asoc->size_on_all_streams -= ctl->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ /* deliver it to at least the delivery-q */
+ if (stcb->sctp_socket) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ ctl,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ }
+ } else {
+ /* no more delivery now. */
+ break;
+ }
+ ctl = nctl;
+ }
+ /*
+ * now we must deliver things in queue the normal way if any are
+ * now ready.
+ */
+ tt = strmin->last_sequence_delivered + 1;
+ ctl = TAILQ_FIRST(&strmin->inqueue);
+ while (ctl) {
+ nctl = TAILQ_NEXT(ctl, next);
+ if (tt == ctl->sinfo_ssn) {
+ /* this is deliverable now */
+ TAILQ_REMOVE(&strmin->inqueue, ctl, next);
+ /* subtract pending on streams */
+ asoc->size_on_all_streams -= ctl->length;
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+ /* deliver it to at least the delivery-q */
+ strmin->last_sequence_delivered = ctl->sinfo_ssn;
+ if (stcb->sctp_socket) {
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ ctl,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+ }
+ tt = strmin->last_sequence_delivered + 1;
+ } else {
+ break;
+ }
+ ctl = nctl;
+ }
+}
+
+void
+sctp_handle_forward_tsn(struct sctp_tcb *stcb,
+ struct sctp_forward_tsn_chunk *fwd, int *abort_flag, struct mbuf *m, int offset)
+{
+ /*
+ * ISSUES that MUST be fixed for ECN! When we are the sender of the
+ * forward TSN, when the SACK comes back that acknowledges the
+ * FWD-TSN we must reset the NONCE sum to match correctly. This will
+ * get quite tricky since we may have sent more data interveneing
+ * and must carefully account for what the SACK says on the nonce
+ * and any gaps that are reported. This work will NOT be done here,
+ * but I note it here since it is really related to PR-SCTP and
+ * FWD-TSN's
+ */
+
+ /* The pr-sctp fwd tsn */
+ /*
+ * here we will perform all the data receiver side steps for
+ * processing FwdTSN, as required in by pr-sctp draft:
+ *
+ * Assume we get FwdTSN(x):
+ *
+ * 1) update local cumTSN to x 2) try to further advance cumTSN to x +
+ * others we have 3) examine and update re-ordering queue on
+ * pr-in-streams 4) clean up re-assembly queue 5) Send a sack to
+ * report where we are.
+ */
+ struct sctp_association *asoc;
+ uint32_t new_cum_tsn, gap;
+ unsigned int i, cnt_gone, fwd_sz, cumack_set_flag, m_size;
+ struct sctp_stream_in *strm;
+ struct sctp_tmit_chunk *chk, *at;
+
+ cumack_set_flag = 0;
+ asoc = &stcb->asoc;
+ cnt_gone = 0;
+ if ((fwd_sz = ntohs(fwd->ch.chunk_length)) < sizeof(struct sctp_forward_tsn_chunk)) {
+ SCTPDBG(SCTP_DEBUG_INDATA1,
+ "Bad size too small/big fwd-tsn\n");
+ return;
+ }
+ m_size = (stcb->asoc.mapping_array_size << 3);
+ /*************************************************************/
+ /* 1. Here we update local cumTSN and shift the bitmap array */
+ /*************************************************************/
+ new_cum_tsn = ntohl(fwd->new_cumulative_tsn);
+
+ if (compare_with_wrap(asoc->cumulative_tsn, new_cum_tsn, MAX_TSN) ||
+ asoc->cumulative_tsn == new_cum_tsn) {
+ /* Already got there ... */
+ return;
+ }
+ if (compare_with_wrap(new_cum_tsn, asoc->highest_tsn_inside_map,
+ MAX_TSN)) {
+ asoc->highest_tsn_inside_map = new_cum_tsn;
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 0, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ }
+ /*
+ * now we know the new TSN is more advanced, let's find the actual
+ * gap
+ */
+ if ((compare_with_wrap(new_cum_tsn, asoc->mapping_array_base_tsn,
+ MAX_TSN)) ||
+ (new_cum_tsn == asoc->mapping_array_base_tsn)) {
+ gap = new_cum_tsn - asoc->mapping_array_base_tsn;
+ } else {
+ /* try to prevent underflow here */
+ gap = new_cum_tsn + (MAX_TSN - asoc->mapping_array_base_tsn) + 1;
+ }
+
+ if (gap >= m_size) {
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 0, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ if ((long)gap > sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv)) {
+ struct mbuf *oper;
+
+ /*
+ * out of range (of single byte chunks in the rwnd I
+ * give out). This must be an attacker.
+ */
+ *abort_flag = 1;
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + 3 * sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ (sizeof(uint32_t) * 3);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_INDATA + SCTP_LOC_33);
+ ippp++;
+ *ippp = asoc->highest_tsn_inside_map;
+ ippp++;
+ *ippp = new_cum_tsn;
+ }
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_INDATA + SCTP_LOC_33;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_PEER_FAULTY, oper, SCTP_SO_NOT_LOCKED);
+ return;
+ }
+ SCTP_STAT_INCR(sctps_fwdtsn_map_over);
+slide_out:
+ memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
+ cumack_set_flag = 1;
+ asoc->mapping_array_base_tsn = new_cum_tsn + 1;
+ asoc->cumulative_tsn = asoc->highest_tsn_inside_map = new_cum_tsn;
+
+ if (sctp_logging_level & SCTP_MAP_LOGGING_ENABLE) {
+ sctp_log_map(0, 3, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
+ }
+ asoc->last_echo_tsn = asoc->highest_tsn_inside_map;
+ } else {
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if ((compare_with_wrap(((uint32_t) asoc->cumulative_tsn + gap), asoc->highest_tsn_inside_map, MAX_TSN)) ||
+ (((uint32_t) asoc->cumulative_tsn + gap) == asoc->highest_tsn_inside_map)) {
+ goto slide_out;
+ } else {
+ for (i = 0; i <= gap; i++) {
+ SCTP_SET_TSN_PRESENT(asoc->mapping_array, i);
+ }
+ }
+ /*
+ * Now after marking all, slide thing forward but no sack
+ * please.
+ */
+ sctp_sack_check(stcb, 0, 0, abort_flag);
+ if (*abort_flag)
+ return;
+ }
+
+ /*************************************************************/
+ /* 2. Clear up re-assembly queue */
+ /*************************************************************/
+ /*
+ * First service it if pd-api is up, just in case we can progress it
+ * forward
+ */
+ if (asoc->fragmented_delivery_inprogress) {
+ sctp_service_reassembly(stcb, asoc);
+ }
+ if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+ /* For each one on here see if we need to toss it */
+ /*
+ * For now large messages held on the reasmqueue that are
+ * complete will be tossed too. We could in theory do more
+ * work to spin through and stop after dumping one msg aka
+ * seeing the start of a new msg at the head, and call the
+ * delivery function... to see if it can be delivered... But
+ * for now we just dump everything on the queue.
+ */
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ while (chk) {
+ at = TAILQ_NEXT(chk, sctp_next);
+ if (compare_with_wrap(asoc->cumulative_tsn,
+ chk->rec.data.TSN_seq, MAX_TSN) ||
+ asoc->cumulative_tsn == chk->rec.data.TSN_seq) {
+ /* It needs to be tossed */
+ TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+ if (compare_with_wrap(chk->rec.data.TSN_seq,
+ asoc->tsn_last_delivered, MAX_TSN)) {
+ asoc->tsn_last_delivered =
+ chk->rec.data.TSN_seq;
+ asoc->str_of_pdapi =
+ chk->rec.data.stream_number;
+ asoc->ssn_of_pdapi =
+ chk->rec.data.stream_seq;
+ asoc->fragment_flags =
+ chk->rec.data.rcv_flags;
+ }
+ asoc->size_on_reasm_queue -= chk->send_size;
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ cnt_gone++;
+
+ /* Clear up any stream problem */
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) !=
+ SCTP_DATA_UNORDERED &&
+ (compare_with_wrap(chk->rec.data.stream_seq,
+ asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered,
+ MAX_SEQ))) {
+ /*
+ * We must dump forward this streams
+ * sequence number if the chunk is
+ * not unordered that is being
+ * skipped. There is a chance that
+ * if the peer does not include the
+ * last fragment in its FWD-TSN we
+ * WILL have a problem here since
+ * you would have a partial chunk in
+ * queue that may not be
+ * deliverable. Also if a Partial
+ * delivery API as started the user
+ * may get a partial chunk. The next
+ * read returning a new chunk...
+ * really ugly but I see no way
+ * around it! Maybe a notify??
+ */
+ asoc->strmin[chk->rec.data.stream_number].last_sequence_delivered =
+ chk->rec.data.stream_seq;
+ }
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ } else {
+ /*
+ * Ok we have gone beyond the end of the
+ * fwd-tsn's mark. Some checks...
+ */
+ if ((asoc->fragmented_delivery_inprogress) &&
+ (chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG)) {
+ uint32_t str_seq;
+
+ /*
+ * Special case PD-API is up and
+ * what we fwd-tsn' over includes
+ * one that had the LAST_FRAG. We no
+ * longer need to do the PD-API.
+ */
+ asoc->fragmented_delivery_inprogress = 0;
+
+ str_seq = (asoc->str_of_pdapi << 16) | asoc->ssn_of_pdapi;
+ sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+ stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&str_seq, SCTP_SO_NOT_LOCKED);
+
+ }
+ break;
+ }
+ chk = at;
+ }
+ }
+ if (asoc->fragmented_delivery_inprogress) {
+ /*
+ * Ok we removed cnt_gone chunks in the PD-API queue that
+ * were being delivered. So now we must turn off the flag.
+ */
+ uint32_t str_seq;
+
+ str_seq = (asoc->str_of_pdapi << 16) | asoc->ssn_of_pdapi;
+ sctp_ulp_notify(SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION,
+ stcb, SCTP_PARTIAL_DELIVERY_ABORTED, (void *)&str_seq, SCTP_SO_NOT_LOCKED);
+ asoc->fragmented_delivery_inprogress = 0;
+ }
+ /*************************************************************/
+ /* 3. Update the PR-stream re-ordering queues */
+ /*************************************************************/
+ fwd_sz -= sizeof(*fwd);
+ if (m && fwd_sz) {
+ /* New method. */
+ unsigned int num_str;
+ struct sctp_strseq *stseq, strseqbuf;
+
+ offset += sizeof(*fwd);
+
+ num_str = fwd_sz / sizeof(struct sctp_strseq);
+ for (i = 0; i < num_str; i++) {
+ uint16_t st;
+
+ stseq = (struct sctp_strseq *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_strseq),
+ (uint8_t *) & strseqbuf);
+ offset += sizeof(struct sctp_strseq);
+ if (stseq == NULL) {
+ break;
+ }
+ /* Convert */
+ st = ntohs(stseq->stream);
+ stseq->stream = st;
+ st = ntohs(stseq->sequence);
+ stseq->sequence = st;
+ /* now process */
+ if (stseq->stream >= asoc->streamincnt) {
+ /* screwed up streams, stop! */
+ break;
+ }
+ strm = &asoc->strmin[stseq->stream];
+ if (compare_with_wrap(stseq->sequence,
+ strm->last_sequence_delivered, MAX_SEQ)) {
+ /* Update the sequence number */
+ strm->last_sequence_delivered =
+ stseq->sequence;
+ }
+ /* now kick the stream the new way */
+ /* sa_ignore NO_NULL_CHK */
+ sctp_kick_prsctp_reorder_queue(stcb, strm);
+ }
+ }
+ if (TAILQ_FIRST(&asoc->reasmqueue)) {
+ /* now lets kick out and check for more fragmented delivery */
+ /* sa_ignore NO_NULL_CHK */
+ sctp_deliver_reasm_check(stcb, &stcb->asoc);
+ }
+}
Index: in_pcb.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_pcb.h -L sys/netinet/in_pcb.h -u -r1.1.1.1 -r1.2
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.80.2.2 2005/10/02 15:45:47 andre Exp $
+ * $FreeBSD: src/sys/netinet/in_pcb.h,v 1.100.2.1 2007/12/07 05:46:08 kmacy Exp $
*/
#ifndef _NETINET_IN_PCB_H_
@@ -44,11 +44,12 @@
struct inpcbpolicy;
/*
- * Common structure pcb for internet protocol implementation.
- * Here are stored pointers to local and foreign host table
- * entries, local and foreign socket numbers, and pointers
- * up (to a socket structure) and down (to a protocol-specific)
- * control block.
+ * Struct inpcb is the ommon structure pcb for the Internet Protocol
+ * implementation.
+ *
+ * Pointers to local and foreign host table entries, local and foreign socket
+ * numbers, and pointers up (to a socket structure) and down (to a
+ * protocol-specific control block) are stored here.
*/
LIST_HEAD(inpcbhead, inpcb);
LIST_HEAD(inpcbporthead, inpcbport);
@@ -56,8 +57,8 @@
/*
* PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet.
- * So, AF_INET6 null laddr is also used as AF_INET null laddr,
- * by utilize following structure. (At last, same as INRIA)
+ * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing
+ * the following structure.
*/
struct in_addr_4in6 {
u_int32_t ia46_pad32[3];
@@ -65,8 +66,8 @@
};
/*
- * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553.
- * in_conninfo has some extra padding to accomplish this.
+ * NOTE: ipv6 addrs should be 64-bit aligned, per RFC 2553. in_conninfo has
+ * some extra padding to accomplish this.
*/
struct in_endpoints {
u_int16_t ie_fport; /* foreign port */
@@ -89,8 +90,8 @@
};
/*
- * XXX
- * the defines for inc_* are hacks and should be changed to direct references
+ * XXX The defines for inc_* are hacks and should be changed to direct
+ * references.
*/
struct in_conninfo {
u_int8_t inc_flags;
@@ -110,39 +111,44 @@
struct icmp6_filter;
struct inpcb {
- LIST_ENTRY(inpcb) inp_hash; /* hash list */
- LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
- u_int32_t inp_flow;
-
- /* local and foreign ports, local and foreign addr */
- struct in_conninfo inp_inc;
-
- caddr_t inp_ppcb; /* pointer to per-protocol pcb */
+ LIST_ENTRY(inpcb) inp_hash; /* hash list */
+ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */
+ void *inp_ppcb; /* pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* PCB list info */
struct socket *inp_socket; /* back pointer to socket */
- /* list for this PCB's local port */
- struct label *inp_label; /* MAC label */
+
+ u_int32_t inp_flow;
int inp_flags; /* generic IP/datagram flags */
- struct inpcbpolicy *inp_sp; /* for IPSEC */
u_char inp_vflag; /* IP version flag (v4/v6) */
#define INP_IPV4 0x1
#define INP_IPV6 0x2
-#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
-#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */
+#define INP_IPV6PROTO 0x4 /* opened under IPv6 protocol */
+#define INP_TIMEWAIT 0x8 /* .. probably doesn't go here */
#define INP_ONESBCAST 0x10 /* send all-ones broadcast */
+#define INP_DROPPED 0x20 /* protocol drop flag */
+#define INP_SOCKREF 0x40 /* strong socket reference */
u_char inp_ip_ttl; /* time to live proto */
u_char inp_ip_p; /* protocol proto */
u_char inp_ip_minttl; /* minimum TTL or drop */
+ uint32_t inp_ispare1; /* connection id / queue id */
+ void *inp_pspare[2]; /* rtentry / general use */
- /* protocol dependent part; options */
+ /* Local and foreign ports, local and foreign addr. */
+ struct in_conninfo inp_inc;
+
+ /* list for this PCB's local port */
+ struct label *inp_label; /* MAC label */
+ struct inpcbpolicy *inp_sp; /* for IPSEC */
+
+ /* Protocol-dependent part; options. */
struct {
u_char inp4_ip_tos; /* type of service proto */
struct mbuf *inp4_options; /* IP options */
struct ip_moptions *inp4_moptions; /* IP multicast options */
} inp_depend4;
-#define inp_fport inp_inc.inc_fport
-#define inp_lport inp_inc.inc_lport
+#define inp_fport inp_inc.inc_fport
+#define inp_lport inp_inc.inc_lport
#define inp_faddr inp_inc.inc_faddr
#define inp_laddr inp_inc.inc_laddr
#define inp_ip_tos inp_depend4.inp4_ip_tos
@@ -159,17 +165,16 @@
struct icmp6_filter *inp6_icmp6filt;
/* IPV6_CHECKSUM setsockopt */
int inp6_cksum;
- u_short inp6_ifindex;
short inp6_hops;
} inp_depend6;
LIST_ENTRY(inpcb) inp_portlist;
struct inpcbport *inp_phd; /* head of this list */
+#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* generation count of this instance */
struct mtx inp_mtx;
#define in6p_faddr inp_inc.inc6_faddr
#define in6p_laddr inp_inc.inc6_laddr
-#define in6p_ip6_hlim inp_depend6.inp6_hlim
#define in6p_hops inp_depend6.inp6_hops /* default hop limit */
#define in6p_ip6_nxt inp_ip_p
#define in6p_flowinfo inp_flow
@@ -179,7 +184,6 @@
#define in6p_moptions inp_depend6.inp6_moptions
#define in6p_icmp6filt inp_depend6.inp6_icmp6filt
#define in6p_cksum inp_depend6.inp6_cksum
-#define inp6_ifindex inp_depend6.inp6_ifindex
#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */
#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */
#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */
@@ -187,15 +191,15 @@
#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */
};
/*
- * The range of the generation count, as used in this implementation,
- * is 9e19. We would have to create 300 billion connections per
- * second for this number to roll over in a year. This seems sufficiently
- * unlikely that we simply don't concern ourselves with that possibility.
+ * The range of the generation count, as used in this implementation, is 9e19.
+ * We would have to create 300 billion connections per second for this number
+ * to roll over in a year. This seems sufficiently unlikely that we simply
+ * don't concern ourselves with that possibility.
*/
/*
- * Interface exported to userland by various protocols which use
- * inpcbs. Hack alert -- only define if struct xsocket is in scope.
+ * Interface exported to userland by various protocols which use inpcbs. Hack
+ * alert -- only define if struct xsocket is in scope.
*/
#ifdef _SYS_SOCKETVAR_H_
struct xinpcb {
@@ -219,35 +223,62 @@
u_short phd_port;
};
-struct inpcbinfo { /* XXX documentation, prefixes */
- struct inpcbhead *hashbase;
- u_long hashmask;
- struct inpcbporthead *porthashbase;
- u_long porthashmask;
- struct inpcbhead *listhead;
- u_short lastport;
- u_short lastlow;
- u_short lasthi;
- struct uma_zone *ipi_zone; /* zone to allocate pcbs from */
- u_int ipi_count; /* number of pcbs in this list */
- u_quad_t ipi_gencnt; /* current generation count */
- struct mtx ipi_mtx;
-};
-
/*
- * NB: We cannot enable assertions when IPv6 is configured as
- * this code is shared by both IPv4 and IPv6 and IPv6 is
- * not properly locked.
+ * Global data structure for each high-level protocol (UDP, TCP, ...) in both
+ * IPv4 and IPv6. Holds inpcb lists and information for managing them.
*/
+struct inpcbinfo {
+ /*
+ * Global list of inpcbs on the protocol.
+ */
+ struct inpcbhead *ipi_listhead;
+ u_int ipi_count;
+
+ /*
+ * Global hash of inpcbs, hashed by local and foreign addresses and
+ * port numbers.
+ */
+ struct inpcbhead *ipi_hashbase;
+ u_long ipi_hashmask;
+
+ /*
+ * Global hash of inpcbs, hashed by only local port number.
+ */
+ struct inpcbporthead *ipi_porthashbase;
+ u_long ipi_porthashmask;
+
+ /*
+ * Fields associated with port lookup and allocation.
+ */
+ u_short ipi_lastport;
+ u_short ipi_lastlow;
+ u_short ipi_lasthi;
+
+ /*
+ * UMA zone from which inpcbs are allocated for this protocol.
+ */
+ struct uma_zone *ipi_zone;
+
+ /*
+ * Generation count--incremented each time a connection is allocated
+ * or freed.
+ */
+ u_quad_t ipi_gencnt;
+ struct mtx ipi_mtx;
+
+ /*
+ * vimage 1
+ * general use 1
+ */
+ void *ipi_pspare[2];
+};
+
#define INP_LOCK_INIT(inp, d, t) \
mtx_init(&(inp)->inp_mtx, (d), (t), MTX_DEF | MTX_RECURSE | MTX_DUPOK)
#define INP_LOCK_DESTROY(inp) mtx_destroy(&(inp)->inp_mtx)
#define INP_LOCK(inp) mtx_lock(&(inp)->inp_mtx)
#define INP_UNLOCK(inp) mtx_unlock(&(inp)->inp_mtx)
-#define INP_LOCK_ASSERT(inp) do { \
- mtx_assert(&(inp)->inp_mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
+#define INP_LOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_OWNED)
#define INP_UNLOCK_ASSERT(inp) mtx_assert(&(inp)->inp_mtx, MA_NOTOWNED)
#define INP_INFO_LOCK_INIT(ipi, d) \
@@ -257,14 +288,9 @@
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_mtx)
#define INP_INFO_RUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx)
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_mtx)
-#define INP_INFO_RLOCK_ASSERT(ipi) do { \
- mtx_assert(&(ipi)->ipi_mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
-#define INP_INFO_WLOCK_ASSERT(ipi) do { \
- mtx_assert(&(ipi)->ipi_mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
+#define INP_INFO_RLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
+#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_OWNED)
+#define INP_INFO_UNLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_mtx, MA_NOTOWNED)
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
@@ -329,6 +355,8 @@
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
#ifdef _KERNEL
+extern int ipport_reservedhigh;
+extern int ipport_reservedlow;
extern int ipport_lowfirstauto;
extern int ipport_lowlastauto;
extern int ipport_firstauto;
@@ -338,7 +366,7 @@
extern struct callout ipport_tick_callout;
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
-int in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
+int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
@@ -348,6 +376,8 @@
struct ucred *);
void in_pcbdetach(struct inpcb *);
void in_pcbdisconnect(struct inpcb *);
+void in_pcbdrop(struct inpcb *);
+void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
@@ -359,13 +389,19 @@
int, struct inpcb *(*)(struct inpcb *, int));
void in_pcbrehash(struct inpcb *);
void in_pcbsetsolabel(struct socket *so);
-int in_setpeeraddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);
-int in_setsockaddr(struct socket *so, struct sockaddr **nam, struct inpcbinfo *pcbinfo);
+int in_getpeeraddr(struct socket *so, struct sockaddr **nam);
+int in_getsockaddr(struct socket *so, struct sockaddr **nam);
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
void in_pcbsosetlabel(struct socket *so);
void in_pcbremlists(struct inpcb *inp);
void ipport_tick(void *xtp);
+
+/*
+ * Debugging routines compiled in when DDB is present.
+ */
+void db_print_inpcb(struct inpcb *inp, const char *name, int indent);
+
#endif /* _KERNEL */
#endif /* !_NETINET_IN_PCB_H_ */
--- /dev/null
+++ sys/netinet/sctp_bsd_addr.c
@@ -0,0 +1,529 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_bsd_addr.c,v 1.14.4.2 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_indata.h>
+#include <sys/unistd.h>
+
+
+/* Declare all of our malloc named types */
+
+/* Note to Michael/Peter for mac-os,
+ * I think mac has this too since I
+ * do see the M_PCB type, so I
+ * will also put in the mac file the
+ * MALLOC_DECLARE. If this does not
+ * work for mac uncomment the defines for
+ * the strings that we use in Panda, I put
+ * them in comments in the mac-os file.
+ */
+MALLOC_DEFINE(SCTP_M_MAP, "sctp_map", "sctp asoc map descriptor");
+MALLOC_DEFINE(SCTP_M_STRMI, "sctp_stri", "sctp stream in array");
+MALLOC_DEFINE(SCTP_M_STRMO, "sctp_stro", "sctp stream out array");
+MALLOC_DEFINE(SCTP_M_ASC_ADDR, "sctp_aadr", "sctp asconf address");
+MALLOC_DEFINE(SCTP_M_ASC_IT, "sctp_a_it", "sctp asconf iterator");
+MALLOC_DEFINE(SCTP_M_AUTH_CL, "sctp_atcl", "sctp auth chunklist");
+MALLOC_DEFINE(SCTP_M_AUTH_KY, "sctp_atky", "sctp auth key");
+MALLOC_DEFINE(SCTP_M_AUTH_HL, "sctp_athm", "sctp auth hmac list");
+MALLOC_DEFINE(SCTP_M_AUTH_IF, "sctp_athi", "sctp auth info");
+MALLOC_DEFINE(SCTP_M_STRESET, "sctp_stre", "sctp stream reset");
+MALLOC_DEFINE(SCTP_M_CMSG, "sctp_cmsg", "sctp CMSG buffer");
+MALLOC_DEFINE(SCTP_M_COPYAL, "sctp_cpal", "sctp copy all");
+MALLOC_DEFINE(SCTP_M_VRF, "sctp_vrf", "sctp vrf struct");
+MALLOC_DEFINE(SCTP_M_IFA, "sctp_ifa", "sctp ifa struct");
+MALLOC_DEFINE(SCTP_M_IFN, "sctp_ifn", "sctp ifn struct");
+MALLOC_DEFINE(SCTP_M_TIMW, "sctp_timw", "sctp time block");
+MALLOC_DEFINE(SCTP_M_MVRF, "sctp_mvrf", "sctp mvrf pcb list");
+MALLOC_DEFINE(SCTP_M_ITER, "sctp_iter", "sctp iterator control");
+MALLOC_DEFINE(SCTP_M_SOCKOPT, "sctp_socko", "sctp socket option");
+
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+void
+sctp_wakeup_iterator(void)
+{
+ wakeup(&sctppcbinfo.iterator_running);
+}
+
+static void
+sctp_iterator_thread(void *v)
+{
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ sctppcbinfo.iterator_running = 0;
+ while (1) {
+ msleep(&sctppcbinfo.iterator_running,
+ &sctppcbinfo.ipi_iterator_wq_mtx,
+ 0, "waiting_for_work", 0);
+ sctp_iterator_worker();
+ }
+}
+
+void
+sctp_startup_iterator(void)
+{
+ int ret;
+
+ ret = kthread_create(sctp_iterator_thread,
+ (void *)NULL,
+ &sctppcbinfo.thread_proc,
+ RFPROC,
+ SCTP_KTHREAD_PAGES,
+ SCTP_KTRHEAD_NAME);
+}
+
+#endif
+
+
+void
+sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa)
+{
+ struct in6_ifaddr *ifa6;
+
+ ifa6 = (struct in6_ifaddr *)ifa->ifa;
+ ifa->flags = ifa6->ia6_flags;
+ if (!ip6_use_deprecated) {
+ if (ifa->flags &
+ IN6_IFF_DEPRECATED) {
+ ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+ } else {
+ ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+ }
+ } else {
+ ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+ }
+ if (ifa->flags &
+ (IN6_IFF_DETACHED |
+ IN6_IFF_ANYCAST |
+ IN6_IFF_NOTREADY)) {
+ ifa->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+ } else {
+ ifa->localifa_flags &= ~SCTP_ADDR_IFA_UNUSEABLE;
+ }
+}
+
+
+
+static uint32_t
+sctp_is_desired_interface_type(struct ifaddr *ifa)
+{
+ int result;
+
+ /* check the interface type to see if it's one we care about */
+ switch (ifa->ifa_ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_ISO88023:
+ case IFT_ISO88024:
+ case IFT_ISO88025:
+ case IFT_ISO88026:
+ case IFT_STARLAN:
+ case IFT_P10:
+ case IFT_P80:
+ case IFT_HY:
+ case IFT_FDDI:
+ case IFT_XETHER:
+ case IFT_ISDNBASIC:
+ case IFT_ISDNPRIMARY:
+ case IFT_PTPSERIAL:
+ case IFT_OTHER:
+ case IFT_PPP:
+ case IFT_LOOP:
+ case IFT_SLIP:
+ case IFT_IP:
+ case IFT_IPOVERCDLC:
+ case IFT_IPOVERCLAW:
+ case IFT_VIRTUALIPADDRESS:
+ result = 1;
+ break;
+ default:
+ result = 0;
+ }
+
+ return (result);
+}
+
+static void
+sctp_init_ifns_for_vrf(int vrfid)
+{
+ /*
+ * Here we must apply ANY locks needed by the IFN we access and also
+ * make sure we lock any IFA that exists as we float through the
+ * list of IFA's
+ */
+ struct ifnet *ifn;
+ struct ifaddr *ifa;
+ struct in6_ifaddr *ifa6;
+ struct sctp_ifa *sctp_ifa;
+ uint32_t ifa_flags;
+
+ TAILQ_FOREACH(ifn, &ifnet, if_list) {
+ TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) {
+ if (ifa->ifa_addr == NULL) {
+ continue;
+ }
+ if ((ifa->ifa_addr->sa_family != AF_INET) &&
+ (ifa->ifa_addr->sa_family != AF_INET6)
+ ) {
+ /* non inet/inet6 skip */
+ continue;
+ }
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ ifa6 = (struct in6_ifaddr *)ifa;
+ ifa_flags = ifa6->ia6_flags;
+ if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+ /* skip unspecifed addresses */
+ continue;
+ }
+ } else if (ifa->ifa_addr->sa_family == AF_INET) {
+ if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+ continue;
+ }
+ }
+ if (sctp_is_desired_interface_type(ifa) == 0) {
+ /* non desired type */
+ continue;
+ }
+ if ((ifa->ifa_addr->sa_family == AF_INET6) ||
+ (ifa->ifa_addr->sa_family == AF_INET)) {
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ ifa6 = (struct in6_ifaddr *)ifa;
+ ifa_flags = ifa6->ia6_flags;
+ } else {
+ ifa_flags = 0;
+ }
+ sctp_ifa = sctp_add_addr_to_vrf(vrfid,
+ (void *)ifn,
+ ifn->if_index,
+ ifn->if_type,
+ ifn->if_xname,
+ (void *)ifa,
+ ifa->ifa_addr,
+ ifa_flags, 0
+ );
+ if (sctp_ifa) {
+ sctp_ifa->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+ }
+ }
+ }
+ }
+}
+
+
+void
+sctp_init_vrf_list(int vrfid)
+{
+ if (vrfid > SCTP_MAX_VRF_ID)
+ /* can't do that */
+ return;
+
+ /* Don't care about return here */
+ (void)sctp_allocate_vrf(vrfid);
+
+ /*
+ * Now we need to build all the ifn's for this vrf and there
+ * addresses
+ */
+ sctp_init_ifns_for_vrf(vrfid);
+}
+
+static uint8_t first_time = 0;
+
+
+void
+sctp_addr_change(struct ifaddr *ifa, int cmd)
+{
+ struct sctp_ifa *ifap = NULL;
+ uint32_t ifa_flags = 0;
+ struct in6_ifaddr *ifa6;
+
+ /*
+ * BSD only has one VRF, if this changes we will need to hook in the
+ * right things here to get the id to pass to the address managment
+ * routine.
+ */
+ if (first_time == 0) {
+ /* Special test to see if my ::1 will showup with this */
+ first_time = 1;
+ sctp_init_ifns_for_vrf(SCTP_DEFAULT_VRFID);
+ }
+ if ((cmd != RTM_ADD) && (cmd != RTM_DELETE)) {
+ /* don't know what to do with this */
+ return;
+ }
+ if (ifa->ifa_addr == NULL) {
+ return;
+ }
+ if ((ifa->ifa_addr->sa_family != AF_INET) &&
+ (ifa->ifa_addr->sa_family != AF_INET6)
+ ) {
+ /* non inet/inet6 skip */
+ return;
+ }
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ ifa6 = (struct in6_ifaddr *)ifa;
+ ifa_flags = ifa6->ia6_flags;
+ if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr)) {
+ /* skip unspecifed addresses */
+ return;
+ }
+ } else if (ifa->ifa_addr->sa_family == AF_INET) {
+ if (((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr == 0) {
+ return;
+ }
+ }
+ if (sctp_is_desired_interface_type(ifa) == 0) {
+ /* non desired type */
+ return;
+ }
+ if (cmd == RTM_ADD) {
+ ifap = sctp_add_addr_to_vrf(SCTP_DEFAULT_VRFID, (void *)ifa->ifa_ifp,
+ ifa->ifa_ifp->if_index, ifa->ifa_ifp->if_type,
+ ifa->ifa_ifp->if_xname,
+ (void *)ifa, ifa->ifa_addr, ifa_flags, 1);
+ } else if (cmd == RTM_DELETE) {
+
+ sctp_del_addr_from_vrf(SCTP_DEFAULT_VRFID, ifa->ifa_addr,
+ ifa->ifa_ifp->if_index,
+ ifa->ifa_ifp->if_xname
+ );
+ /*
+ * We don't bump refcount here so when it completes the
+ * final delete will happen.
+ */
+ }
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed, int want_header,
+ int how, int allonebuf, int type)
+{
+ struct mbuf *m = NULL;
+
+ m = m_getm2(NULL, space_needed, how, type, want_header ? M_PKTHDR : 0);
+ if (m == NULL) {
+ /* bad, no memory */
+ return (m);
+ }
+ if (allonebuf) {
+ int siz;
+
+ if (SCTP_BUF_IS_EXTENDED(m)) {
+ siz = SCTP_BUF_EXTEND_SIZE(m);
+ } else {
+ if (want_header)
+ siz = MHLEN;
+ else
+ siz = MLEN;
+ }
+ if (siz < space_needed) {
+ m_freem(m);
+ return (NULL);
+ }
+ }
+ if (SCTP_BUF_NEXT(m)) {
+ sctp_m_freem(SCTP_BUF_NEXT(m));
+ SCTP_BUF_NEXT(m) = NULL;
+ }
+#ifdef SCTP_MBUF_LOGGING
+ if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+ if (SCTP_BUF_IS_EXTENDED(m)) {
+ sctp_log_mb(m, SCTP_MBUF_IALLOC);
+ }
+ }
+#endif
+ return (m);
+}
+
+
+#ifdef SCTP_PACKET_LOGGING
+
+int packet_log_writers = 0;
+int packet_log_end = 0;
+uint8_t packet_log_buffer[SCTP_PACKET_LOG_SIZE];
+
+
+void
+sctp_packet_log(struct mbuf *m, int length)
+{
+ int *lenat, thisone;
+ void *copyto;
+ uint32_t *tick_tock;
+ int total_len;
+ int grabbed_lock = 0;
+ int value, newval, thisend, thisbegin;
+
+ /*
+ * Buffer layout. -sizeof this entry (total_len) -previous end
+ * (value) -ticks of log (ticks) o -ip packet o -as logged -
+ * where this started (thisbegin) x <--end points here
+ */
+ total_len = SCTP_SIZE32((length + (4 * sizeof(int))));
+ /* Log a packet to the buffer. */
+ if (total_len > SCTP_PACKET_LOG_SIZE) {
+ /* Can't log this packet I have not a buffer big enough */
+ return;
+ }
+ if (length < (SCTP_MIN_V4_OVERHEAD + sizeof(struct sctp_cookie_ack_chunk))) {
+ return;
+ }
+ atomic_add_int(&packet_log_writers, 1);
+try_again:
+ if (packet_log_writers > SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+ SCTP_IP_PKTLOG_LOCK();
+ grabbed_lock = 1;
+again_locked:
+ value = packet_log_end;
+ newval = packet_log_end + total_len;
+ if (newval >= SCTP_PACKET_LOG_SIZE) {
+ /* we wrapped */
+ thisbegin = 0;
+ thisend = total_len;
+ } else {
+ thisbegin = packet_log_end;
+ thisend = newval;
+ }
+ if (!(atomic_cmpset_int(&packet_log_end, value, thisend))) {
+ goto again_locked;
+ }
+ } else {
+ value = packet_log_end;
+ newval = packet_log_end + total_len;
+ if (newval >= SCTP_PACKET_LOG_SIZE) {
+ /* we wrapped */
+ thisbegin = 0;
+ thisend = total_len;
+ } else {
+ thisbegin = packet_log_end;
+ thisend = newval;
+ }
+ if (!(atomic_cmpset_int(&packet_log_end, value, thisend))) {
+ goto try_again;
+ }
+ }
+ /* Sanity check */
+ if (thisend >= SCTP_PACKET_LOG_SIZE) {
+ printf("Insanity stops a log thisbegin:%d thisend:%d writers:%d lock:%d end:%d\n",
+ thisbegin,
+ thisend,
+ packet_log_writers,
+ grabbed_lock,
+ packet_log_end);
+ packet_log_end = 0;
+ goto no_log;
+
+ }
+ lenat = (int *)&packet_log_buffer[thisbegin];
+ *lenat = total_len;
+ lenat++;
+ *lenat = value;
+ lenat++;
+ tick_tock = (uint32_t *) lenat;
+ lenat++;
+ *tick_tock = sctp_get_tick_count();
+ copyto = (void *)lenat;
+ thisone = thisend - sizeof(int);
+ lenat = (int *)&packet_log_buffer[thisone];
+ *lenat = thisbegin;
+ if (grabbed_lock) {
+ SCTP_IP_PKTLOG_UNLOCK();
+ grabbed_lock = 0;
+ }
+ m_copydata(m, 0, length, (caddr_t)copyto);
+no_log:
+ if (grabbed_lock) {
+ SCTP_IP_PKTLOG_UNLOCK();
+ }
+ atomic_subtract_int(&packet_log_writers, 1);
+}
+
+
+int
+sctp_copy_out_packet_log(uint8_t * target, int length)
+{
+ /*
+ * We wind through the packet log starting at start copying up to
+ * length bytes out. We return the number of bytes copied.
+ */
+ int tocopy, this_copy;
+ int *lenat;
+ int did_delay = 0;
+
+ tocopy = length;
+ if (length < (2 * sizeof(int))) {
+ /* not enough room */
+ return (0);
+ }
+ if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+ atomic_add_int(&packet_log_writers, SCTP_PKTLOG_WRITERS_NEED_LOCK);
+again:
+ if ((did_delay == 0) && (packet_log_writers != SCTP_PKTLOG_WRITERS_NEED_LOCK)) {
+ /*
+ * we delay here for just a moment hoping the
+ * writer(s) that were present when we entered will
+ * have left and we only have locking ones that will
+ * contend with us for the lock. This does not
+ * assure 100% access, but its good enough for a
+ * logging facility like this.
+ */
+ did_delay = 1;
+ DELAY(10);
+ goto again;
+ }
+ }
+ SCTP_IP_PKTLOG_LOCK();
+ lenat = (int *)target;
+ *lenat = packet_log_end;
+ lenat++;
+ this_copy = min((length - sizeof(int)), SCTP_PACKET_LOG_SIZE);
+ memcpy((void *)lenat, (void *)packet_log_buffer, this_copy);
+ if (SCTP_PKTLOG_WRITERS_NEED_LOCK) {
+ atomic_subtract_int(&packet_log_writers,
+ SCTP_PKTLOG_WRITERS_NEED_LOCK);
+ }
+ SCTP_IP_PKTLOG_UNLOCK();
+ return (this_copy + sizeof(int));
+}
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_asconf.h
@@ -0,0 +1,91 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_asconf.h,v 1.8 2005/03/06 16:04:16 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_asconf.h,v 1.10 2007/09/15 19:07:42 rrs Exp $");
+
+#ifndef _NETINET_SCTP_ASCONF_H_
+#define _NETINET_SCTP_ASCONF_H_
+
+#if defined(_KERNEL)
+
+/*
+ * function prototypes
+ */
+extern void sctp_asconf_cleanup(struct sctp_tcb *, struct sctp_nets *);
+
+extern struct mbuf *sctp_compose_asconf(struct sctp_tcb *, int *, int);
+
+extern void
+sctp_handle_asconf(struct mbuf *, unsigned int, struct sctp_asconf_chunk *,
+ struct sctp_tcb *, int i);
+
+extern void
+sctp_handle_asconf_ack(struct mbuf *, int, struct sctp_asconf_ack_chunk *,
+ struct sctp_tcb *, struct sctp_nets *, int *);
+
+extern uint32_t
+sctp_addr_mgmt_ep_sa(struct sctp_inpcb *, struct sockaddr *,
+ uint32_t, uint32_t, struct sctp_ifa *);
+
+
+extern int
+sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr,
+ uint32_t val);
+extern void
+sctp_asconf_iterator_stcb(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ void *ptr, uint32_t type);
+extern void sctp_asconf_iterator_end(void *ptr, uint32_t val);
+
+
+extern int32_t
+sctp_set_primary_ip_address_sa(struct sctp_tcb *,
+ struct sockaddr *);
+
+extern void
+ sctp_set_primary_ip_address(struct sctp_ifa *ifa);
+
+extern void
+sctp_check_address_list(struct sctp_tcb *, struct mbuf *, int, int,
+ struct sockaddr *, uint16_t, uint16_t, uint16_t, uint16_t);
+
+extern void
+ sctp_move_chunks_from_deleted_prim(struct sctp_tcb *, struct sctp_nets *);
+extern void
+ sctp_assoc_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+extern void
+ sctp_net_immediate_retrans(struct sctp_tcb *, struct sctp_nets *);
+
+#endif /* _KERNEL */
+
+#endif /* !_NETINET_SCTP_ASCONF_H_ */
Index: tcp_timer.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_timer.c -L sys/netinet/tcp_timer.c -u -r1.2 -r1.3
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -27,12 +27,13 @@
* SUCH DAMAGE.
*
* @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.74.2.2 2006/03/01 21:08:53 andre Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_timer.c,v 1.99 2007/10/07 20:44:24 silby Exp $");
+
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -76,8 +77,8 @@
&tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
int tcp_delacktime;
-SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
- CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
+SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
"Time before a delayed ACK is sent");
int tcp_msl;
@@ -86,16 +87,28 @@
int tcp_rexmit_min;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
- &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
+ &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
+ "Minimum Retransmission Timeout");
int tcp_rexmit_slop;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
- &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
+ &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
+ "Retransmission Timer Slop");
static int always_keepalive = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+int tcp_fast_finwait2_recycle = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
+ &tcp_fast_finwait2_recycle, 0,
+ "Recycle closed FIN_WAIT_2 connections faster");
+
+int tcp_finwait2_timeout;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
+ &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
+
+
static int tcp_keepcnt = TCPTV_KEEPCNT;
/* max idle probes */
int tcp_maxpersistidle;
@@ -108,12 +121,12 @@
* causes finite state machine actions if timers expire.
*/
void
-tcp_slowtimo()
+tcp_slowtimo(void)
{
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
INP_INFO_WLOCK(&tcbinfo);
- (void) tcp_timer_2msl_tw(0);
+ (void) tcp_tw_2msl_scan(0);
INP_INFO_WUNLOCK(&tcbinfo);
}
@@ -125,30 +138,42 @@
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
+static int tcp_timer_race;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
+ 0, "Count of t_inpcb races on tcp_discardcb");
+
/*
* TCP timer processing.
*/
void
-tcp_timer_delack(xtp)
- void *xtp;
+tcp_timer_delack(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
INP_INFO_RLOCK(&tcbinfo);
inp = tp->t_inpcb;
+ /*
+ * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+ * tear-down mean we need it as a work-around for races between
+ * timers and tcp_discardcb().
+ *
+ * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
+ */
if (inp == NULL) {
+ tcp_timer_race++;
INP_INFO_RUNLOCK(&tcbinfo);
return;
}
INP_LOCK(inp);
INP_INFO_RUNLOCK(&tcbinfo);
- if (callout_pending(tp->tt_delack) || !callout_active(tp->tt_delack)) {
+ if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
+ || !callout_active(&tp->t_timers->tt_delack)) {
INP_UNLOCK(inp);
return;
}
- callout_deactivate(tp->tt_delack);
+ callout_deactivate(&tp->t_timers->tt_delack);
tp->t_flags |= TF_ACKNOW;
tcpstat.tcps_delack++;
@@ -157,8 +182,7 @@
}
void
-tcp_timer_2msl(xtp)
- void *xtp;
+tcp_timer_2msl(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
@@ -167,121 +191,68 @@
ostate = tp->t_state;
#endif
+ /*
+ * XXXRW: Does this actually happen?
+ */
INP_INFO_WLOCK(&tcbinfo);
inp = tp->t_inpcb;
+ /*
+ * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+ * tear-down mean we need it as a work-around for races between
+ * timers and tcp_discardcb().
+ *
+ * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
+ */
if (inp == NULL) {
+ tcp_timer_race++;
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
INP_LOCK(inp);
tcp_free_sackholes(tp);
- if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) {
+ if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
+ !callout_active(&tp->t_timers->tt_2msl)) {
INP_UNLOCK(tp->t_inpcb);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
- callout_deactivate(tp->tt_2msl);
+ callout_deactivate(&tp->t_timers->tt_2msl);
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
* too long, or if 2MSL time is up from TIME_WAIT, delete connection
* control block. Otherwise, check again in a bit.
+ *
+ * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
+ * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
+ * Ignore fact that there were recent incoming segments.
*/
- if (tp->t_state != TCPS_TIME_WAIT &&
- (ticks - tp->t_rcvtime) <= tcp_maxidle)
- callout_reset(tp->tt_2msl, tcp_keepintvl,
- tcp_timer_2msl, tp);
- else
- tp = tcp_close(tp);
+ if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
+ tp->t_inpcb && tp->t_inpcb->inp_socket &&
+ (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
+ tcpstat.tcps_finwait2_drops++;
+ tp = tcp_close(tp);
+ } else {
+ if (tp->t_state != TCPS_TIME_WAIT &&
+ (ticks - tp->t_rcvtime) <= tcp_maxidle)
+ callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
+ tcp_timer_2msl, tp);
+ else
+ tp = tcp_close(tp);
+ }
#ifdef TCPDEBUG
- if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
- if (tp)
+ if (tp != NULL)
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
}
-/*
- * The timed wait lists contain references to each of the TCP sessions
- * currently TIME_WAIT state. The list pointers, including the list pointers
- * in each tcptw structure, are protected using the global tcbinfo lock,
- * which must be held over list iteration and modification.
- */
-struct twlist {
- LIST_HEAD(, tcptw) tw_list;
- struct tcptw tw_tail;
-};
-#define TWLIST_NLISTS 2
-static struct twlist twl_2msl[TWLIST_NLISTS];
-static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL };
-
-void
-tcp_timer_init(void)
-{
- int i;
- struct twlist *twl;
-
- for (i = 0; i < TWLIST_NLISTS; i++) {
- twl = &twl_2msl[i];
- LIST_INIT(&twl->tw_list);
- LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl);
- }
-}
-
-void
-tcp_timer_2msl_reset(struct tcptw *tw, int timeo)
-{
- int i;
- struct tcptw *tw_tail;
-
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
- INP_LOCK_ASSERT(tw->tw_inpcb);
- if (tw->tw_time != 0)
- LIST_REMOVE(tw, tw_2msl);
- tw->tw_time = timeo + ticks;
- i = timeo > tcp_msl ? 1 : 0;
- tw_tail = &twl_2msl[i].tw_tail;
- LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
-}
-
-void
-tcp_timer_2msl_stop(struct tcptw *tw)
-{
-
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
- if (tw->tw_time != 0)
- LIST_REMOVE(tw, tw_2msl);
-}
-
-struct tcptw *
-tcp_timer_2msl_tw(int reuse)
-{
- struct tcptw *tw, *tw_tail;
- struct twlist *twl;
- int i;
-
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
- for (i = 0; i < 2; i++) {
- twl = tw_2msl_list[i];
- tw_tail = &twl->tw_tail;
- for (;;) {
- tw = LIST_FIRST(&twl->tw_list);
- if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
- break;
- INP_LOCK(tw->tw_inpcb);
- if (tcp_twclose(tw, reuse) != NULL)
- return (tw);
- }
- }
- return (NULL);
-}
-
void
-tcp_timer_keep(xtp)
- void *xtp;
+tcp_timer_keep(void *xtp)
{
struct tcpcb *tp = xtp;
struct tcptemp *t_template;
@@ -293,17 +264,26 @@
#endif
INP_INFO_WLOCK(&tcbinfo);
inp = tp->t_inpcb;
- if (!inp) {
+ /*
+ * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+ * tear-down mean we need it as a work-around for races between
+ * timers and tcp_discardcb().
+ *
+ * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
+ */
+ if (inp == NULL) {
+ tcp_timer_race++;
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
INP_LOCK(inp);
- if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) {
+ if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
+ || !callout_active(&tp->t_timers->tt_keep)) {
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
- callout_deactivate(tp->tt_keep);
+ callout_deactivate(&tp->t_timers->tt_keep);
/*
* Keep-alive timer went off; send something
* or drop connection if idle for too long.
@@ -335,9 +315,9 @@
tp->rcv_nxt, tp->snd_una - 1, 0);
(void) m_free(dtom(t_template));
}
- callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
+ callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
} else
- callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+ callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
#ifdef TCPDEBUG
if (inp->inp_socket->so_options & SO_DEBUG)
@@ -353,18 +333,17 @@
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
- if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
- if (tp)
+ if (tp != NULL)
INP_UNLOCK(tp->t_inpcb);
INP_INFO_WUNLOCK(&tcbinfo);
}
void
-tcp_timer_persist(xtp)
- void *xtp;
+tcp_timer_persist(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
@@ -375,17 +354,26 @@
#endif
INP_INFO_WLOCK(&tcbinfo);
inp = tp->t_inpcb;
- if (!inp) {
+ /*
+ * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+ * tear-down mean we need it as a work-around for races between
+ * timers and tcp_discardcb().
+ *
+ * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
+ */
+ if (inp == NULL) {
+ tcp_timer_race++;
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
INP_LOCK(inp);
- if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){
+ if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
+ || !callout_active(&tp->t_timers->tt_persist)) {
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
- callout_deactivate(tp->tt_persist);
+ callout_deactivate(&tp->t_timers->tt_persist);
/*
* Persistance timer into zero window.
* Force a byte to be output, if possible.
@@ -412,18 +400,16 @@
out:
#ifdef TCPDEBUG
- if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
+ if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
+ tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
#endif
- if (tp)
+ if (tp != NULL)
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
}
void
-tcp_timer_rexmt(xtp)
- void *xtp;
+tcp_timer_rexmt(void * xtp)
{
struct tcpcb *tp = xtp;
int rexmt;
@@ -437,17 +423,26 @@
INP_INFO_WLOCK(&tcbinfo);
headlocked = 1;
inp = tp->t_inpcb;
- if (!inp) {
+ /*
+ * XXXRW: While this assert is in fact correct, bugs in the tcpcb
+ * tear-down mean we need it as a work-around for races between
+ * timers and tcp_discardcb().
+ *
+ * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
+ */
+ if (inp == NULL) {
+ tcp_timer_race++;
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
INP_LOCK(inp);
- if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) {
+ if ((inp->inp_vflag & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
+ || !callout_active(&tp->t_timers->tt_rexmt)) {
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
- callout_deactivate(tp->tt_rexmt);
+ callout_deactivate(&tp->t_timers->tt_rexmt);
tcp_free_sackholes(tp);
/*
* Retransmission timer went off. Message has not
@@ -560,12 +555,76 @@
out:
#ifdef TCPDEBUG
- if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
- if (tp)
+ if (tp != NULL)
INP_UNLOCK(inp);
if (headlocked)
INP_INFO_WUNLOCK(&tcbinfo);
}
+
+void
+tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
+{
+ struct callout *t_callout;
+ void *f_callout;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_callout = &tp->t_timers->tt_delack;
+ f_callout = tcp_timer_delack;
+ break;
+ case TT_REXMT:
+ t_callout = &tp->t_timers->tt_rexmt;
+ f_callout = tcp_timer_rexmt;
+ break;
+ case TT_PERSIST:
+ t_callout = &tp->t_timers->tt_persist;
+ f_callout = tcp_timer_persist;
+ break;
+ case TT_KEEP:
+ t_callout = &tp->t_timers->tt_keep;
+ f_callout = tcp_timer_keep;
+ break;
+ case TT_2MSL:
+ t_callout = &tp->t_timers->tt_2msl;
+ f_callout = tcp_timer_2msl;
+ break;
+ default:
+ panic("bad timer_type");
+ }
+ if (delta == 0) {
+ callout_stop(t_callout);
+ } else {
+ callout_reset(t_callout, delta, f_callout, tp);
+ }
+}
+
+int
+tcp_timer_active(struct tcpcb *tp, int timer_type)
+{
+ struct callout *t_callout;
+
+ switch (timer_type) {
+ case TT_DELACK:
+ t_callout = &tp->t_timers->tt_delack;
+ break;
+ case TT_REXMT:
+ t_callout = &tp->t_timers->tt_rexmt;
+ break;
+ case TT_PERSIST:
+ t_callout = &tp->t_timers->tt_persist;
+ break;
+ case TT_KEEP:
+ t_callout = &tp->t_timers->tt_keep;
+ break;
+ case TT_2MSL:
+ t_callout = &tp->t_timers->tt_2msl;
+ break;
+ default:
+ panic("bad timer_type");
+ }
+ return callout_active(t_callout);
+}
Index: tcp_sack.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_sack.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_sack.c -L sys/netinet/tcp_sack.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_sack.c
+++ sys/netinet/tcp_sack.c
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,37 +28,9 @@
* SUCH DAMAGE.
*
* @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.26.2.1 2005/10/09 03:17:41 delphij Exp $
*/
/*-
- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
* @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995
*
* NRL grants permission for redistribution and use in source and binary
@@ -96,11 +69,13 @@
* official policies, either expressed or implied, of the US Naval
* Research Laboratory (NRL).
*/
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_sack.c,v 1.40 2007/05/11 11:21:43 rwatson Exp $");
+
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
-#include "opt_tcp_input.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -125,9 +100,7 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* for ICMP_BANDLIM */
#include <netinet/in_var.h>
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
@@ -153,17 +126,17 @@
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK");
int tcp_do_sack = 1;
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW,
- &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
+ &tcp_do_sack, 0, "Enable/Disable TCP SACK support");
TUNABLE_INT("net.inet.tcp.sack.enable", &tcp_do_sack);
static int tcp_sack_maxholes = 128;
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW,
- &tcp_sack_maxholes, 0,
+ &tcp_sack_maxholes, 0,
"Maximum number of TCP SACK holes allowed per connection");
static int tcp_sack_globalmaxholes = 65536;
SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW,
- &tcp_sack_globalmaxholes, 0,
+ &tcp_sack_globalmaxholes, 0,
"Global maximum number of TCP SACK holes");
static int tcp_sack_globalholes = 0;
@@ -172,8 +145,8 @@
"Global number of TCP SACK holes currently allocated");
/*
- * This function is called upon receipt of new valid data (while not in header
- * prediction mode), and it updates the ordered list of sacks.
+ * This function is called upon receipt of new valid data (while not in
+ * header prediction mode), and it updates the ordered list of sacks.
*/
void
tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
@@ -189,7 +162,7 @@
INP_LOCK_ASSERT(tp->t_inpcb);
- /* Check arguments */
+ /* Check arguments. */
KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
/* SACK block for the received segment. */
@@ -197,9 +170,9 @@
head_blk.end = rcv_end;
/*
- * Merge updated SACK blocks into head_blk, and
- * save unchanged SACK blocks into saved_blks[].
- * num_saved will have the number of the saved SACK blocks.
+ * Merge updated SACK blocks into head_blk, and save unchanged SACK
+ * blocks into saved_blks[]. num_saved will have the number of the
+ * saved SACK blocks.
*/
num_saved = 0;
for (i = 0; i < tp->rcv_numsacks; i++) {
@@ -212,8 +185,8 @@
} else if (SEQ_LEQ(head_blk.start, end) &&
SEQ_GEQ(head_blk.end, start)) {
/*
- * Merge this SACK block into head_blk.
- * This SACK block itself will be discarded.
+ * Merge this SACK block into head_blk. This SACK
+ * block itself will be discarded.
*/
if (SEQ_GT(head_blk.start, start))
head_blk.start = start;
@@ -235,8 +208,8 @@
num_head = 0;
if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
/*
- * The received data segment is an out-of-order segment.
- * Put head_blk at the top of SACK list.
+ * The received data segment is an out-of-order segment. Put
+ * head_blk at the top of SACK list.
*/
tp->sackblks[0] = head_blk;
num_head = 1;
@@ -263,8 +236,7 @@
* Delete all receiver-side SACK information.
*/
void
-tcp_clean_sackreport(tp)
- struct tcpcb *tp;
+tcp_clean_sackreport(struct tcpcb *tp)
{
int i;
@@ -308,6 +280,7 @@
static void
tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
{
+
uma_zfree(sack_hole_zone, hole);
tp->snd_numholes--;
@@ -322,7 +295,7 @@
*/
static struct sackhole *
tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
- struct sackhole *after)
+ struct sackhole *after)
{
struct sackhole *hole;
@@ -331,7 +304,7 @@
if (hole == NULL)
return NULL;
- /* Insert the new SACK hole into scoreboard */
+ /* Insert the new SACK hole into scoreboard. */
if (after != NULL)
TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink);
else
@@ -350,6 +323,7 @@
static void
tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
{
+
/* Update SACK hint. */
if (tp->sackhint.nexthole == hole)
tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink);
@@ -385,30 +359,35 @@
sack_blocks[num_sack_blks++].end = th_ack;
}
/*
- * Append received valid SACK blocks to sack_blocks[].
+ * Append received valid SACK blocks to sack_blocks[], but only if we
+ * received new blocks from the other side.
*/
- for (i = 0; i < to->to_nsacks; i++) {
- bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack));
- sack.start = ntohl(sack.start);
- sack.end = ntohl(sack.end);
- if (SEQ_GT(sack.end, sack.start) &&
- SEQ_GT(sack.start, tp->snd_una) &&
- SEQ_GT(sack.start, th_ack) &&
- SEQ_LEQ(sack.end, tp->snd_max))
- sack_blocks[num_sack_blks++] = sack;
+ if (to->to_flags & TOF_SACK) {
+ for (i = 0; i < to->to_nsacks; i++) {
+ bcopy((to->to_sacks + i * TCPOLEN_SACK),
+ &sack, sizeof(sack));
+ sack.start = ntohl(sack.start);
+ sack.end = ntohl(sack.end);
+ if (SEQ_GT(sack.end, sack.start) &&
+ SEQ_GT(sack.start, tp->snd_una) &&
+ SEQ_GT(sack.start, th_ack) &&
+ SEQ_LT(sack.start, tp->snd_max) &&
+ SEQ_GT(sack.end, tp->snd_una) &&
+ SEQ_LEQ(sack.end, tp->snd_max))
+ sack_blocks[num_sack_blks++] = sack;
+ }
}
-
/*
- * Return if SND.UNA is not advanced and no valid SACK block
- * is received.
+ * Return if SND.UNA is not advanced and no valid SACK block is
+ * received.
*/
if (num_sack_blks == 0)
return;
/*
- * Sort the SACK blocks so we can update the scoreboard
- * with just one pass. The overhead of sorting upto 4+1 elements
- * is less than making upto 4+1 passes over the scoreboard.
+ * Sort the SACK blocks so we can update the scoreboard with just one
+ * pass. The overhead of sorting upto 4+1 elements is less than
+ * making upto 4+1 passes over the scoreboard.
*/
for (i = 0; i < num_sack_blks; i++) {
for (j = i + 1; j < num_sack_blks; j++) {
@@ -423,15 +402,17 @@
/*
* Empty scoreboard. Need to initialize snd_fack (it may be
* uninitialized or have a bogus value). Scoreboard holes
- * (from the sack blocks received) are created later below (in
- * the logic that adds holes to the tail of the scoreboard).
+ * (from the sack blocks received) are created later below
+ * (in the logic that adds holes to the tail of the
+ * scoreboard).
*/
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
/*
- * In the while-loop below, incoming SACK blocks (sack_blocks[])
- * and SACK holes (snd_holes) are traversed from their tails with
- * just one pass in order to reduce the number of compares especially
- * when the bandwidth-delay product is large.
+ * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
+ * SACK holes (snd_holes) are traversed from their tails with just
+ * one pass in order to reduce the number of compares especially when
+ * the bandwidth-delay product is large.
+ *
* Note: Typically, in the first RTT of SACK recovery, the highest
* three or four SACK blocks with the same ack number are received.
* In the second RTT, if retransmitted data segments are not lost,
@@ -441,88 +422,104 @@
sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */
if (SEQ_LT(tp->snd_fack, sblkp->start)) {
/*
- * The highest SACK block is beyond fack.
- * Append new SACK hole at the tail.
- * If the second or later highest SACK blocks are also
- * beyond the current fack, they will be inserted by
- * way of hole splitting in the while-loop below.
+ * The highest SACK block is beyond fack. Append new SACK
+ * hole at the tail. If the second or later highest SACK
+ * blocks are also beyond the current fack, they will be
+ * inserted by way of hole splitting in the while-loop below.
*/
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
- if (temp == NULL)
- return;
- tp->snd_fack = sblkp->end;
- /* Go to the previous sack block. */
- sblkp--;
+ if (temp != NULL) {
+ tp->snd_fack = sblkp->end;
+ /* Go to the previous sack block. */
+ sblkp--;
+ } else {
+ /*
+ * We failed to add a new hole based on the current
+ * sack block. Skip over all the sack blocks that
+ * fall completely to the right of snd_fack and
+ * proceed to trim the scoreboard based on the
+ * remaining sack blocks. This also trims the
+ * scoreboard for th_ack (which is sack_blocks[0]).
+ */
+ while (sblkp >= sack_blocks &&
+ SEQ_LT(tp->snd_fack, sblkp->start))
+ sblkp--;
+ if (sblkp >= sack_blocks &&
+ SEQ_LT(tp->snd_fack, sblkp->end))
+ tp->snd_fack = sblkp->end;
+ }
} else if (SEQ_LT(tp->snd_fack, sblkp->end))
/* fack is advanced. */
tp->snd_fack = sblkp->end;
- /* We must have at least one SACK hole in scoreboard */
- KASSERT(!TAILQ_EMPTY(&tp->snd_holes), ("SACK scoreboard must not be empty"));
- cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */
+ /* We must have at least one SACK hole in scoreboard. */
+ KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
+ ("SACK scoreboard must not be empty"));
+ cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
/*
* Since the incoming sack blocks are sorted, we can process them
* making one sweep of the scoreboard.
*/
- while (sblkp - sack_blocks >= 0 && cur != NULL) {
+ while (sblkp >= sack_blocks && cur != NULL) {
if (SEQ_GEQ(sblkp->start, cur->end)) {
/*
- * SACKs data beyond the current hole.
- * Go to the previous sack block.
+ * SACKs data beyond the current hole. Go to the
+ * previous sack block.
*/
sblkp--;
continue;
}
if (SEQ_LEQ(sblkp->end, cur->start)) {
/*
- * SACKs data before the current hole.
- * Go to the previous hole.
+ * SACKs data before the current hole. Go to the
+ * previous hole.
*/
cur = TAILQ_PREV(cur, sackhole_head, scblink);
continue;
}
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
- ("sackhint bytes rtx >= 0"));
+ ("sackhint bytes rtx >= 0"));
if (SEQ_LEQ(sblkp->start, cur->start)) {
- /* Data acks at least the beginning of hole */
+ /* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
- /* Acks entire hole, so delete hole */
+ /* Acks entire hole, so delete hole. */
temp = cur;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
tcp_sackhole_remove(tp, temp);
/*
- * The sack block may ack all or part of the next
- * hole too, so continue onto the next hole.
+ * The sack block may ack all or part of the
+ * next hole too, so continue onto the next
+ * hole.
*/
continue;
} else {
- /* Move start of hole forward */
+ /* Move start of hole forward. */
cur->start = sblkp->end;
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
}
} else {
- /* Data acks at least the end of hole */
+ /* Data acks at least the end of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
- /* Move end of hole backward */
+ /* Move end of hole backward. */
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
} else {
/*
- * ACKs some data in middle of a hole; need to
- * split current hole
+ * ACKs some data in middle of a hole; need
+ * to split current hole
*/
temp = tcp_sackhole_insert(tp, sblkp->end,
- cur->end, cur);
+ cur->end, cur);
if (temp != NULL) {
if (SEQ_GT(cur->rxmit, temp->rxmit)) {
temp->rxmit = cur->rxmit;
tp->sackhint.sack_bytes_rexmit
- += (temp->rxmit
- - temp->start);
+ += (temp->rxmit
+ - temp->start);
}
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit,
- cur->end);
+ cur->end);
}
}
}
@@ -558,39 +555,35 @@
}
/*
- * Partial ack handling within a sack recovery episode.
- * Keeping this very simple for now. When a partial ack
- * is received, force snd_cwnd to a value that will allow
- * the sender to transmit no more than 2 segments.
- * If necessary, a better scheme can be adopted at a
- * later point, but for now, the goal is to prevent the
- * sender from bursting a large amount of data in the midst
- * of sack recovery.
+ * Partial ack handling within a sack recovery episode. Keeping this very
+ * simple for now. When a partial ack is received, force snd_cwnd to a value
+ * that will allow the sender to transmit no more than 2 segments. If
+ * necessary, a better scheme can be adopted at a later point, but for now,
+ * the goal is to prevent the sender from bursting a large amount of data in
+ * the midst of sack recovery.
*/
void
-tcp_sack_partialack(tp, th)
- struct tcpcb *tp;
- struct tcphdr *th;
+tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
{
int num_segs = 1;
INP_LOCK_ASSERT(tp->t_inpcb);
- callout_stop(tp->tt_rexmt);
+ tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- /* send one or 2 segments based on how much new data was acked */
+ /* Send one or 2 segments based on how much new data was acked. */
if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
num_segs = 2;
tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
- (tp->snd_nxt - tp->sack_newdata) +
- num_segs * tp->t_maxseg);
+ (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg);
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
}
+#if 0
/*
- * Debug version of tcp_sack_output() that walks the scoreboard. Used for
+ * Debug version of tcp_sack_output() that walks the scoreboard. Used for
* now to sanity check the hint.
*/
static struct sackhole *
@@ -612,32 +605,31 @@
}
return (p);
}
+#endif
/*
* Returns the next hole to retransmit and the number of retransmitted bytes
- * from the scoreboard. We store both the next hole and the number of
+ * from the scoreboard. We store both the next hole and the number of
* retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
- * reception). This avoids scoreboard traversals completely.
+ * reception). This avoids scoreboard traversals completely.
*
- * The loop here will traverse *at most* one link. Here's the argument.
- * For the loop to traverse more than 1 link before finding the next hole to
- * retransmit, we would need to have at least 1 node following the current hint
- * with (rxmit == end). But, for all holes following the current hint,
- * (start == rxmit), since we have not yet retransmitted from them. Therefore,
- * in order to traverse more 1 link in the loop below, we need to have at least
- * one node following the current hint with (start == rxmit == end).
- * But that can't happen, (start == end) means that all the data in that hole
- * has been sacked, in which case, the hole would have been removed from the
- * scoreboard.
+ * The loop here will traverse *at most* one link. Here's the argument. For
+ * the loop to traverse more than 1 link before finding the next hole to
+ * retransmit, we would need to have at least 1 node following the current
+ * hint with (rxmit == end). But, for all holes following the current hint,
+ * (start == rxmit), since we have not yet retransmitted from them.
+ * Therefore, in order to traverse more 1 link in the loop below, we need to
+ * have at least one node following the current hint with (start == rxmit ==
+ * end). But that can't happen, (start == end) means that all the data in
+ * that hole has been sacked, in which case, the hole would have been removed
+ * from the scoreboard.
*/
struct sackhole *
tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
{
- struct sackhole *hole = NULL, *dbg_hole = NULL;
- int dbg_bytes_rexmt;
+ struct sackhole *hole = NULL;
INP_LOCK_ASSERT(tp->t_inpcb);
- dbg_hole = tcp_sack_output_debug(tp, &dbg_bytes_rexmt);
*sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
hole = tp->sackhint.nexthole;
if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
@@ -649,16 +641,6 @@
}
}
out:
- if (dbg_hole != hole) {
- printf("%s: Computed sack hole not the same as cached value\n", __func__);
- hole = dbg_hole;
- }
- if (*sack_bytes_rexmt != dbg_bytes_rexmt) {
- printf("%s: Computed sack_bytes_retransmitted (%d) not "
- "the same as cached value (%d)\n",
- __func__, dbg_bytes_rexmt, *sack_bytes_rexmt);
- *sack_bytes_rexmt = dbg_bytes_rexmt;
- }
return (hole);
}
@@ -677,7 +659,7 @@
return; /* No holes */
if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
return; /* We're already beyond any SACKed blocks */
- /*
+ /*-
* Two cases for which we want to advance snd_nxt:
* i) snd_nxt lies between end of one hole and beginning of another
* ii) snd_nxt lies between end of last hole and snd_fack
Index: ip_mroute.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_mroute.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_mroute.h -L sys/netinet/ip_mroute.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_mroute.h
+++ sys/netinet/ip_mroute.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/ip_mroute.h,v 1.24.2.1 2006/02/03 15:51:17 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_mroute.h,v 1.31 2007/02/08 23:05:08 bms Exp $
*/
#ifndef _NETINET_IP_MROUTE_H_
@@ -102,8 +102,8 @@
struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */
};
-#define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */
-#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */
+#define VIFF_TUNNEL 0x1 /* no-op; retained for old source */
+#define VIFF_SRCRT 0x2 /* no-op; retained for old source */
#define VIFF_REGISTER 0x4 /* used for PIM Register encap/decap */
/*
@@ -211,7 +211,7 @@
struct mrtstat {
u_long mrts_mfc_lookups; /* # forw. cache hash table hits */
u_long mrts_mfc_misses; /* # forw. cache hash table misses */
- u_long mrts_upcalls; /* # calls to mrouted */
+ u_long mrts_upcalls; /* # calls to multicast routing daemon */
u_long mrts_no_route; /* no route for packet's origin */
u_long mrts_bad_tunnel; /* malformed tunnel options */
u_long mrts_cant_tunnel; /* no room for tunnel options */
@@ -253,8 +253,8 @@
struct vif {
u_char v_flags; /* VIFF_ flags defined above */
u_char v_threshold; /* min ttl required to forward on vif*/
- u_int v_rate_limit; /* max rate */
- struct tbf *v_tbf; /* token bucket structure at intf. */
+ u_int v_rate_limit; /* ignored; kept for compatibility */
+ struct tbf *v_tbf; /* ignored; kept for compatibility */
struct in_addr v_lcl_addr; /* local interface address */
struct in_addr v_rmt_addr; /* remote address (tunnels only) */
struct ifnet *v_ifp; /* pointer to interface */
@@ -262,7 +262,7 @@
u_long v_pkt_out; /* # pkts out on interface */
u_long v_bytes_in; /* # bytes in on interface */
u_long v_bytes_out; /* # bytes out on interface */
- struct route v_route; /* cached route if this is a tunnel */
+ struct route v_route; /* cached route */
u_int v_rsvp_on; /* RSVP listening on this vif */
struct socket *v_rsvpd; /* RSVP daemon socket */
};
@@ -327,25 +327,6 @@
#define MAX_UPQ 4 /* max. no of pkts in upcall Q */
/*
- * Token Bucket filter code
- */
-#define MAX_BKT_SIZE 10000 /* 10K bytes size */
-#define MAXQSIZE 10 /* max # of pkts in queue */
-
-/*
- * the token bucket filter at each vif
- */
-struct tbf
-{
- struct timeval tbf_last_pkt_t; /* arr. time of last pkt */
- u_long tbf_n_tok; /* no of tokens in bucket */
- u_long tbf_q_len; /* length of queue at this vif */
- u_long tbf_max_q_len; /* max. queue length */
- struct mbuf *tbf_q; /* Packet queue */
- struct mbuf *tbf_t; /* tail-insertion pointer */
-};
-
-/*
* Structure for measuring the bandwidth and sending an upcall if the
* measured bandwidth is above or below a threshold.
*/
Index: tcp_hostcache.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_hostcache.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_hostcache.c -L sys/netinet/tcp_hostcache.c -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_hostcache.c
+++ sys/netinet/tcp_hostcache.c
@@ -25,39 +25,36 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/tcp_hostcache.c,v 1.10 2005/01/07 01:45:45 imp Exp $
*/
/*
- * The tcp_hostcache moves the tcp specific cached metrics from the routing
- * table into a dedicated structure indexed by the remote IP address. It
- * keeps information on the measured tcp parameters of past tcp sessions
- * to have better initial start values for following connections from the
- * same source. Depending on the network parameters (delay, bandwidth, max
- * MTU, congestion window) between local and remote site this can lead to
- * significant speedups for new tcp connections after the first one.
+ * The tcp_hostcache moves the tcp-specific cached metrics from the routing
+ * table to a dedicated structure indexed by the remote IP address. It keeps
+ * information on the measured TCP parameters of past TCP sessions to allow
+ * better initial start values to be used with later connections to/from the
+ * same source. Depending on the network parameters (delay, bandwidth, max
+ * MTU, congestion window) between local and remote sites, this can lead to
+ * significant speed-ups for new TCP connections after the first one.
*
- * Due to this new tcp_hostcache all tcp specific metrics information in
- * the routing table has been removed. The INPCB no longer keeps a pointer
- * to the routing entry and protocol initiated route cloning has been
- * removed as well. With these changes the routing table has gone back
- * to being more lightwight and only carries information related to packet
- * forwarding.
+ * Due to the tcp_hostcache, all TCP-specific metrics information in the
+ * routing table has been removed. The inpcb no longer keeps a pointer to
+ * the routing entry, and protocol-initiated route cloning has been removed
+ * as well. With these changes, the routing table has gone back to being
+ * more lightwight and only carries information related to packet forwarding.
*
- * Tcp_hostcache is designed for multiple concurrent access in SMP
- * environments and high contention. All bucket rows have their own
- * lock and thus multiple lookups and modifies can be done at the same
- * time as long as they are in different bucket rows. If a request for
- * insertion of a new record can't be satisfied it simply returns an
- * empty structure. Nobody and nothing shall ever point directly to
- * any entry in tcp_hostcache. All communication is done in an object
- * oriented way and only funtions of tcp_hostcache will manipulate hostcache
- * entries. Otherwise we are unable to achieve good behaviour in concurrent
- * access situations. Since tcp_hostcache is only caching information there
- * are no fatal consequences if we either can't satisfy any particular request
- * or have to drop/overwrite an existing entry because of bucket limit
- * memory constrains.
+ * tcp_hostcache is designed for multiple concurrent access in SMP
+ * environments and high contention. All bucket rows have their own lock and
+ * thus multiple lookups and modifies can be done at the same time as long as
+ * they are in different bucket rows. If a request for insertion of a new
+ * record can't be satisfied, it simply returns an empty structure. Nobody
+ * and nothing outside of tcp_hostcache.c will ever point directly to any
+ * entry in the tcp_hostcache. All communication is done in an
+ * object-oriented way and only functions of tcp_hostcache will manipulate
+ * hostcache entries. Otherwise, we are unable to achieve good behaviour in
+ * concurrent access situations. Since tcp_hostcache is only caching
+ * information, there are no fatal consequences if we either can't satisfy
+ * any particular request or have to drop/overwrite an existing entry because
+ * of bucket limit memory constrains.
*/
/*
@@ -65,6 +62,9 @@
* followed here.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_hostcache.c,v 1.17 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_inet6.h"
#include <sys/param.h>
@@ -112,7 +112,7 @@
struct hc_head *rmx_head; /* head of bucket tail queue */
struct in_addr ip4; /* IP address */
struct in6_addr ip6; /* IP6 address */
- /* endpoint specific values for tcp */
+ /* endpoint specific values for TCP */
u_long rmx_mtu; /* MTU for this path */
u_long rmx_ssthresh; /* outbound gateway buffer limit */
u_long rmx_rtt; /* estimated round trip time */
@@ -121,7 +121,7 @@
u_long rmx_cwnd; /* congestion window */
u_long rmx_sendpipe; /* outbound delay-bandwidth product */
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
- /* tcp hostcache internal data */
+ /* TCP hostcache internal data */
int rmx_expire; /* lifetime for object */
u_long rmx_hits; /* number of hits */
u_long rmx_updates; /* number of updates */
@@ -142,6 +142,7 @@
u_int cache_count;
u_int cache_limit;
int expire;
+ int prune;
int purgeall;
};
static struct tcp_hostcache tcp_hostcache;
@@ -156,26 +157,29 @@
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
- &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
+ &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
- &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
+ &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
- &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
+ &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
- &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
+ &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
- &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+ &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+
+SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, prune, CTLFLAG_RW,
+ &tcp_hostcache.prune, 0, "Time between purge runs");
SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
- &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
+ &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
- sysctl_tcp_hc_list, "A", "List of all hostcache entries");
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
+ sysctl_tcp_hc_list, "A", "List of all hostcache entries");
static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
@@ -201,7 +205,7 @@
int i;
/*
- * Initialize hostcache structures
+ * Initialize hostcache structures.
*/
tcp_hostcache.cache_count = 0;
tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
@@ -209,6 +213,7 @@
tcp_hostcache.cache_limit =
tcp_hostcache.hashsize * tcp_hostcache.bucket_limit;
tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+ tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE;
TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
&tcp_hostcache.hashsize);
@@ -218,19 +223,19 @@
&tcp_hostcache.bucket_limit);
if (!powerof2(tcp_hostcache.hashsize)) {
printf("WARNING: hostcache hash size is not a power of 2.\n");
- tcp_hostcache.hashsize = 512; /* safe default */
+ tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */
}
tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1;
/*
- * Allocate the hash table
+ * Allocate the hash table.
*/
tcp_hostcache.hashbase = (struct hc_head *)
malloc(tcp_hostcache.hashsize * sizeof(struct hc_head),
M_HOSTCACHE, M_WAITOK | M_ZERO);
/*
- * Initialize the hash buckets
+ * Initialize the hash buckets.
*/
for (i = 0; i < tcp_hostcache.hashsize; i++) {
TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket);
@@ -250,11 +255,11 @@
* Set up periodic cache cleanup.
*/
callout_init(&tcp_hc_callout, CALLOUT_MPSAFE);
- callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+ callout_reset(&tcp_hc_callout, tcp_hostcache.prune * hz, tcp_hc_purge, 0);
}
/*
- * Internal function: lookup an entry in the hostcache or return NULL.
+ * Internal function: look up an entry in the hostcache or return NULL.
*
* If an entry has been returned, the caller becomes responsible for
* unlocking the bucket row after he is done reading/modifying the entry.
@@ -279,14 +284,14 @@
hc_head = &tcp_hostcache.hashbase[hash];
/*
- * aquire lock for this bucket row
- * we release the lock if we don't find an entry,
- * otherwise the caller has to unlock after he is done
+ * Acquire lock for this bucket row; we release the lock if we don't
+ * find an entry, otherwise the caller has to unlock after he is
+ * done.
*/
THC_LOCK(&hc_head->hch_mtx);
/*
- * circle through entries in bucket row looking for a match
+ * Iterate through entries in bucket row looking for a match.
*/
TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
if (inc->inc_isipv6) {
@@ -301,15 +306,15 @@
}
/*
- * We were unsuccessful and didn't find anything
+ * We were unsuccessful and didn't find anything.
*/
THC_UNLOCK(&hc_head->hch_mtx);
return NULL;
}
/*
- * Internal function: insert an entry into the hostcache or return NULL
- * if unable to allocate a new one.
+ * Internal function: insert an entry into the hostcache or return NULL if
+ * unable to allocate a new one.
*
* If an entry has been returned, the caller becomes responsible for
* unlocking the bucket row after he is done reading/modifying the entry.
@@ -324,7 +329,7 @@
KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer"));
/*
- * Hash the foreign ip address
+ * Hash the foreign ip address.
*/
if (inc->inc_isipv6)
hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
@@ -334,25 +339,31 @@
hc_head = &tcp_hostcache.hashbase[hash];
/*
- * aquire lock for this bucket row
- * we release the lock if we don't find an entry,
- * otherwise the caller has to unlock after he is done
+ * Acquire lock for this bucket row; we release the lock if we don't
+ * find an entry, otherwise the caller has to unlock after he is
+ * done.
*/
THC_LOCK(&hc_head->hch_mtx);
/*
- * If the bucket limit is reached reuse the least used element
+ * If the bucket limit is reached, reuse the least-used element.
*/
if (hc_head->hch_length >= tcp_hostcache.bucket_limit ||
tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) {
hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
/*
* At first we were dropping the last element, just to
- * reaquire it in the next two lines again which ain't
- * very efficient. Instead just reuse the least used element.
- * Maybe we drop something that is still "in-use" but we can
- * be "lossy".
+ * reacquire it in the next two lines again, which isn't very
+ * efficient. Instead just reuse the least used element.
+ * We may drop something that is still "in-use" but we can be
+ * "lossy".
+ * Just give up if this bucket row is empty and we don't have
+ * anything to replace.
*/
+ if (hc_entry == NULL) {
+ THC_UNLOCK(&hc_head->hch_mtx);
+ return NULL;
+ }
TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
tcp_hostcache.hashbase[hash].hch_length--;
tcp_hostcache.cache_count--;
@@ -362,7 +373,7 @@
#endif
} else {
/*
- * Allocate a new entry, or balk if not possible
+ * Allocate a new entry, or balk if not possible.
*/
hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT);
if (hc_entry == NULL) {
@@ -372,7 +383,7 @@
}
/*
- * Initialize basic information of hostcache entry
+ * Initialize basic information of hostcache entry.
*/
bzero(hc_entry, sizeof(*hc_entry));
if (inc->inc_isipv6)
@@ -383,7 +394,7 @@
hc_entry->rmx_expire = tcp_hostcache.expire;
/*
- * Put it upfront
+ * Put it upfront.
*/
TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
tcp_hostcache.hashbase[hash].hch_length++;
@@ -394,9 +405,9 @@
}
/*
- * External function: lookup an entry in the hostcache and fill out the
- * supplied tcp metrics structure. Fills in null when no entry was found
- * or a value is not set.
+ * External function: look up an entry in the hostcache and fill out the
+ * supplied TCP metrics structure. Fills in NULL when no entry was found or
+ * a value is not set.
*/
void
tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
@@ -404,12 +415,12 @@
struct hc_metrics *hc_entry;
/*
- * Find the right bucket
+ * Find the right bucket.
*/
hc_entry = tcp_hc_lookup(inc);
/*
- * If we don't have an existing object
+ * If we don't have an existing object.
*/
if (hc_entry == NULL) {
bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
@@ -428,14 +439,14 @@
hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
/*
- * unlock bucket row
+ * Unlock bucket row.
*/
THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
}
/*
- * External function: lookup an entry in the hostcache and return the
- * discovered path mtu. Returns null if no entry is found or value is not
+ * External function: look up an entry in the hostcache and return the
+ * discovered path MTU. Returns NULL if no entry is found or value is not
* set.
*/
u_long
@@ -457,7 +468,7 @@
}
/*
- * External function: update the mtu value of an entry in the hostcache.
+ * External function: update the MTU value of an entry in the hostcache.
* Creates a new entry if none was found.
*/
void
@@ -466,12 +477,12 @@
struct hc_metrics *hc_entry;
/*
- * Find the right bucket
+ * Find the right bucket.
*/
hc_entry = tcp_hc_lookup(inc);
/*
- * If we don't have an existing object try to insert a new one
+ * If we don't have an existing object, try to insert a new one.
*/
if (hc_entry == NULL) {
hc_entry = tcp_hc_insert(inc);
@@ -484,19 +495,19 @@
hc_entry->rmx_mtu = mtu;
/*
- * put it upfront so we find it faster next time
+ * Put it upfront so we find it faster next time.
*/
TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
/*
- * unlock bucket row
+ * Unlock bucket row.
*/
THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
}
/*
- * External function: update the tcp metrics of an entry in the hostcache.
+ * External function: update the TCP metrics of an entry in the hostcache.
* Creates a new entry if none was found.
*/
void
@@ -587,6 +598,9 @@
char *p, *buf;
int len, i, error;
struct hc_metrics *hc_entry;
+#ifdef INET6
+ char ip6buf[INET6_ADDRSTRLEN];
+#endif
bufsize = linesize * (tcp_hostcache.cache_count + 1);
@@ -607,7 +621,7 @@
"%4lu %4lu %4i\n",
hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
#ifdef INET6
- ip6_sprintf(&hc_entry->ip6),
+ ip6_sprintf(ip6buf, &hc_entry->ip6),
#else
"IPv6?",
#endif
@@ -635,8 +649,8 @@
}
/*
- * Expire and purge (old|all) entries in the tcp_hostcache. Runs periodically
- * from the callout.
+ * Expire and purge (old|all) entries in the tcp_hostcache. Runs
+ * periodically from the callout.
*/
static void
tcp_hc_purge(void *arg)
@@ -661,9 +675,9 @@
tcp_hostcache.hashbase[i].hch_length--;
tcp_hostcache.cache_count--;
} else
- hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE;
+ hc_entry->rmx_expire -= tcp_hostcache.prune;
}
THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
}
- callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+ callout_reset(&tcp_hc_callout, tcp_hostcache.prune * hz, tcp_hc_purge, 0);
}
Index: in_proto.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_proto.c -L sys/netinet/in_proto.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_proto.c
+++ sys/netinet/in_proto.c
@@ -27,15 +27,18 @@
* SUCH DAMAGE.
*
* @(#)in_proto.c 8.2 (Berkeley) 2/9/95
- * $FreeBSD: src/sys/netinet/in_proto.c,v 1.77.2.3 2006/01/03 08:15:32 thompsa Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_proto.c,v 1.87 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_ipx.h"
#include "opt_mrouting.h"
#include "opt_ipsec.h"
#include "opt_inet6.h"
#include "opt_pf.h"
#include "opt_carp.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -55,9 +58,6 @@
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/igmp_var.h>
-#ifdef PIM
-#include <netinet/pim_var.h>
-#endif
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
@@ -69,22 +69,18 @@
* TCP/IP protocol family: IP, ICMP, UDP, TCP.
*/
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ah.h>
-#ifdef IPSEC_ESP
-#include <netinet6/esp.h>
-#endif
-#include <netinet6/ipcomp.h>
-#endif /* IPSEC */
+static struct pr_usrreqs nousrreqs;
-#ifdef FAST_IPSEC
+#ifdef IPSEC
#include <netipsec/ipsec.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
-#ifdef IPXIP
-#include <netipx/ipx_ip.h>
-#endif
+#ifdef SCTP
+#include <netinet/in_pcb.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_var.h>
+#endif /* SCTP */
#ifdef DEV_PFSYNC
#include <net/pfvar.h>
@@ -139,6 +135,43 @@
.pr_drain = tcp_drain,
.pr_usrreqs = &tcp_usrreqs
},
+#ifdef SCTP
+{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp_input,
+ .pr_ctlinput = sctp_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_init = sctp_init,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp_usrreqs
+},
+{
+ .pr_type = SOCK_SEQPACKET,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp_input,
+ .pr_ctlinput = sctp_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp_usrreqs
+},
+
+{
+ .pr_type = SOCK_STREAM,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp_input,
+ .pr_ctlinput = sctp_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp_usrreqs
+},
+#endif /* SCTP */
{
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
@@ -186,34 +219,6 @@
.pr_protocol = IPPROTO_AH,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ah4_input,
- .pr_usrreqs = &nousrreqs
-},
-#ifdef IPSEC_ESP
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_ESP,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = esp4_input,
- .pr_usrreqs = &nousrreqs
-},
-#endif
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_IPCOMP,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = ipcomp4_input,
- .pr_usrreqs = &nousrreqs
-},
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_AH,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = ah4_input,
.pr_ctlinput = ah4_ctlinput,
.pr_usrreqs = &nousrreqs
},
@@ -234,7 +239,7 @@
.pr_input = ipcomp4_input,
.pr_usrreqs = &nousrreqs
},
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
{
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
@@ -287,28 +292,15 @@
.pr_usrreqs = &rip_usrreqs
},
#endif
-#ifdef IPXIP
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_IDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = ipxip_input,
- .pr_ctlinput = ipxip_ctlinput,
- .pr_usrreqs = &rip_usrreqs
-},
-#endif
-#ifdef PIM
{
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim_input,
+ .pr_input = encap4_input,
.pr_ctloutput = rip_ctloutput,
.pr_usrreqs = &rip_usrreqs
},
-#endif /* PIM */
#ifdef DEV_PFSYNC
{
.pr_type = SOCK_RAW,
@@ -374,23 +366,19 @@
SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW, 0, "ICMP");
SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW, 0, "UDP");
SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW, 0, "TCP");
+#ifdef SCTP
+SYSCTL_NODE(_net_inet, IPPROTO_SCTP, sctp, CTLFLAG_RW, 0, "SCTP");
+#endif
SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW, 0, "IGMP");
-#ifdef FAST_IPSEC
+#ifdef IPSEC
/* XXX no protocol # to use, pick something "reserved" */
SYSCTL_NODE(_net_inet, 253, ipsec, CTLFLAG_RW, 0, "IPSEC");
SYSCTL_NODE(_net_inet, IPPROTO_AH, ah, CTLFLAG_RW, 0, "AH");
SYSCTL_NODE(_net_inet, IPPROTO_ESP, esp, CTLFLAG_RW, 0, "ESP");
SYSCTL_NODE(_net_inet, IPPROTO_IPCOMP, ipcomp, CTLFLAG_RW, 0, "IPCOMP");
SYSCTL_NODE(_net_inet, IPPROTO_IPIP, ipip, CTLFLAG_RW, 0, "IPIP");
-#else
-#ifdef IPSEC
-SYSCTL_NODE(_net_inet, IPPROTO_AH, ipsec, CTLFLAG_RW, 0, "IPSEC");
#endif /* IPSEC */
-#endif /* !FAST_IPSEC */
SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW, 0, "RAW");
-#ifdef PIM
-SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
-#endif
#ifdef DEV_PFSYNC
SYSCTL_NODE(_net_inet, IPPROTO_PFSYNC, pfsync, CTLFLAG_RW, 0, "PFSYNC");
#endif
--- /dev/null
+++ sys/netinet/sctp_cc_functions.c
@@ -0,0 +1,1631 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_cc_functions.h>
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_cc_functions.c,v 1.3 2007/09/08 11:35:10 rrs Exp $");
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /*
+ * We take the max of the burst limit times a MTU or the
+ * INITIAL_CWND. We then limit this to 4 MTU's of sending.
+ */
+ net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
+ /* we always get at LEAST 2 MTU's */
+ if (net->cwnd < (2 * net->mtu)) {
+ net->cwnd = 2 * net->mtu;
+ }
+ net->ssthresh = stcb->asoc.peers_rwnd;
+
+ if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+ }
+}
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ struct sctp_nets *net;
+
+ /*-
+ * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+ * (net->fast_retran_loss_recovery == 0)))
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+ /* out of a RFC2582 Fast recovery window? */
+ if (net->net_ack > 0) {
+ /*
+ * per section 7.2.3, are there any
+ * destinations that had a fast retransmit
+ * to them. If so what we need to do is
+ * adjust ssthresh and cwnd.
+ */
+ struct sctp_tmit_chunk *lchk;
+ int old_cwnd = net->cwnd;
+
+ net->ssthresh = net->cwnd / 2;
+ if (net->ssthresh < (net->mtu * 2)) {
+ net->ssthresh = 2 * net->mtu;
+ }
+ net->cwnd = net->ssthresh;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+ SCTP_CWND_LOG_FROM_FR);
+ }
+ lchk = TAILQ_FIRST(&asoc->send_queue);
+
+ net->partial_bytes_acked = 0;
+ /* Turn on fast recovery window */
+ asoc->fast_retran_loss_recovery = 1;
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * CMT fast recovery -- per destination
+ * recovery variable.
+ */
+ net->fast_retran_loss_recovery = 1;
+
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ net->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * Disable Nonce Sum Checking and store the
+ * resync tsn
+ */
+ asoc->nonce_sum_check = 0;
+ asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net);
+ }
+ } else if (net->net_ack > 0) {
+ /*
+ * Mark a peg that we WOULD have done a cwnd
+ * reduction but RFC2582 prevented this action.
+ */
+ SCTP_STAT_INCR(sctps_fastretransinrtt);
+ }
+ }
+}
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit)
+{
+ struct sctp_nets *net;
+
+ /******************************/
+ /* update cwnd and Early FR */
+ /******************************/
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code. Need to debug.
+ */
+ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+ if (compare_with_wrap(asoc->last_acked_seq,
+ net->fast_recovery_tsn, MAX_TSN) ||
+ (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+ compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+ (net->pseudo_cumack == net->fast_recovery_tsn)) {
+ net->will_exit_fast_recovery = 1;
+ }
+ }
+#endif
+ if (sctp_early_fr) {
+ /*
+ * So, first of all do we need to have a Early FR
+ * timer running?
+ */
+ if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+ (net->ref_count > 1) &&
+ (net->flight_size < net->cwnd)) ||
+ (reneged_all)) {
+ /*
+ * yes, so in this case stop it if its
+ * running, and then restart it. Reneging
+ * all is a special case where we want to
+ * run the Early FR timer and then force the
+ * last few unacked to be sent, causing us
+ * to illicit a sack with gaps to force out
+ * the others.
+ */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+ }
+ SCTP_STAT_INCR(sctps_earlyfrstrid);
+ sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+ } else {
+ /* No, stop it if its running */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+ }
+ }
+ }
+ /* if nothing was acked on this destination skip it */
+ if (net->net_ack == 0) {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+ }
+ continue;
+ }
+ if (net->net_ack2 > 0) {
+ /*
+ * Karn's rule applies to clearing error count, this
+ * is optional.
+ */
+ net->error_count = 0;
+ if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+ SCTP_ADDR_NOT_REACHABLE) {
+ /* addr came good */
+ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+ net->dest_state |= SCTP_ADDR_REACHABLE;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+ SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+ /* now was it the primary? if so restore */
+ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+ (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+ }
+ }
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the destination
+ * is in PF state, set the destination to active
+ * state and set the cwnd to one or two MTU's based
+ * on whether PF1 or PF2 is being used.
+ *
+ * Should we stop any running T3 timer here?
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+ SCTP_ADDR_PF)) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ net->cwnd = net->mtu * sctp_cmt_pf;
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+ net, net->cwnd);
+ /*
+ * Since the cwnd value is explicitly set,
+ * skip the code that updates the cwnd
+ * value.
+ */
+ goto skip_cwnd_update;
+ }
+ }
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code
+ */
+ /*
+ * if (sctp_cmt_on_off == 1 &&
+ * net->fast_retran_loss_recovery &&
+ * net->will_exit_fast_recovery == 0) { @@@ Do something }
+ * else if (sctp_cmt_on_off == 0 &&
+ * asoc->fast_retran_loss_recovery && will_exit == 0) {
+ */
+#endif
+
+ if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+ /*
+ * If we are in loss recovery we skip any cwnd
+ * update
+ */
+ goto skip_cwnd_update;
+ }
+ /*
+ * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+ * moved.
+ */
+ if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+ /* If the cumulative ack moved we can proceed */
+ if (net->cwnd <= net->ssthresh) {
+ /* We are in slow start */
+ if (net->flight_size + net->net_ack >=
+ net->cwnd) {
+ if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) {
+ net->cwnd += (net->mtu * sctp_L2_abc_variable);
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_FROM_SS);
+ }
+ } else {
+ net->cwnd += net->net_ack;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_FROM_SS);
+ }
+ }
+ } else {
+ unsigned int dif;
+
+ dif = net->cwnd - (net->flight_size +
+ net->net_ack);
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_SS);
+ }
+ }
+ } else {
+ /* We are in congestion avoidance */
+ if (net->flight_size + net->net_ack >=
+ net->cwnd) {
+ /*
+ * add to pba only if we had a
+ * cwnd's worth (or so) in flight OR
+ * the burst limit was applied.
+ */
+ net->partial_bytes_acked +=
+ net->net_ack;
+
+ /*
+ * Do we need to increase (if pba is
+ * > cwnd)?
+ */
+ if (net->partial_bytes_acked >=
+ net->cwnd) {
+ if (net->cwnd <
+ net->partial_bytes_acked) {
+ net->partial_bytes_acked -=
+ net->cwnd;
+ } else {
+ net->partial_bytes_acked =
+ 0;
+ }
+ net->cwnd += net->mtu;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_FROM_CA);
+ }
+ } else {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_CA);
+ }
+ }
+ } else {
+ unsigned int dif;
+
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_CA);
+ }
+ dif = net->cwnd - (net->flight_size +
+ net->net_ack);
+ }
+ }
+ } else {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_NO_CUMACK);
+ }
+ }
+skip_cwnd_update:
+ /*
+ * NOW, according to Karn's rule do we need to restore the
+ * RTO timer back? Check our net_ack2. If not set then we
+ * have a ambiguity.. i.e. all data ack'd was sent to more
+ * than one place.
+ */
+ if (net->net_ack2) {
+ /* restore any doubled timers */
+ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ if (net->RTO < stcb->asoc.minrto) {
+ net->RTO = stcb->asoc.minrto;
+ }
+ if (net->RTO > stcb->asoc.maxrto) {
+ net->RTO = stcb->asoc.maxrto;
+ }
+ }
+ }
+}
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int old_cwnd = net->cwnd;
+
+ net->ssthresh = net->cwnd >> 1;
+ if (net->ssthresh < (net->mtu << 1)) {
+ net->ssthresh = (net->mtu << 1);
+ }
+ net->cwnd = net->mtu;
+ /* floor of 1 mtu */
+ if (net->cwnd < net->mtu)
+ net->cwnd = net->mtu;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+ }
+ net->partial_bytes_acked = 0;
+}
+
+struct sctp_hs_raise_drop {
+ int32_t cwnd;
+ int32_t increase;
+ int32_t drop_percent;
+};
+
+#define SCTP_HS_TABLE_SIZE 73
+
+struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
+ {38, 1, 50}, /* 0 */
+ {118, 2, 44}, /* 1 */
+ {221, 3, 41}, /* 2 */
+ {347, 4, 38}, /* 3 */
+ {495, 5, 37}, /* 4 */
+ {663, 6, 35}, /* 5 */
+ {851, 7, 34}, /* 6 */
+ {1058, 8, 33}, /* 7 */
+ {1284, 9, 32}, /* 8 */
+ {1529, 10, 31}, /* 9 */
+ {1793, 11, 30}, /* 10 */
+ {2076, 12, 29}, /* 11 */
+ {2378, 13, 28}, /* 12 */
+ {2699, 14, 28}, /* 13 */
+ {3039, 15, 27}, /* 14 */
+ {3399, 16, 27}, /* 15 */
+ {3778, 17, 26}, /* 16 */
+ {4177, 18, 26}, /* 17 */
+ {4596, 19, 25}, /* 18 */
+ {5036, 20, 25}, /* 19 */
+ {5497, 21, 24}, /* 20 */
+ {5979, 22, 24}, /* 21 */
+ {6483, 23, 23}, /* 22 */
+ {7009, 24, 23}, /* 23 */
+ {7558, 25, 22}, /* 24 */
+ {8130, 26, 22}, /* 25 */
+ {8726, 27, 22}, /* 26 */
+ {9346, 28, 21}, /* 27 */
+ {9991, 29, 21}, /* 28 */
+ {10661, 30, 21}, /* 29 */
+ {11358, 31, 20}, /* 30 */
+ {12082, 32, 20}, /* 31 */
+ {12834, 33, 20}, /* 32 */
+ {13614, 34, 19}, /* 33 */
+ {14424, 35, 19}, /* 34 */
+ {15265, 36, 19}, /* 35 */
+ {16137, 37, 19}, /* 36 */
+ {17042, 38, 18}, /* 37 */
+ {17981, 39, 18}, /* 38 */
+ {18955, 40, 18}, /* 39 */
+ {19965, 41, 17}, /* 40 */
+ {21013, 42, 17}, /* 41 */
+ {22101, 43, 17}, /* 42 */
+ {23230, 44, 17}, /* 43 */
+ {24402, 45, 16}, /* 44 */
+ {25618, 46, 16}, /* 45 */
+ {26881, 47, 16}, /* 46 */
+ {28193, 48, 16}, /* 47 */
+ {29557, 49, 15}, /* 48 */
+ {30975, 50, 15}, /* 49 */
+ {32450, 51, 15}, /* 50 */
+ {33986, 52, 15}, /* 51 */
+ {35586, 53, 14}, /* 52 */
+ {37253, 54, 14}, /* 53 */
+ {38992, 55, 14}, /* 54 */
+ {40808, 56, 14}, /* 55 */
+ {42707, 57, 13}, /* 56 */
+ {44694, 58, 13}, /* 57 */
+ {46776, 59, 13}, /* 58 */
+ {48961, 60, 13}, /* 59 */
+ {51258, 61, 13}, /* 60 */
+ {53677, 62, 12}, /* 61 */
+ {56230, 63, 12}, /* 62 */
+ {58932, 64, 12}, /* 63 */
+ {61799, 65, 12}, /* 64 */
+ {64851, 66, 11}, /* 65 */
+ {68113, 67, 11}, /* 66 */
+ {71617, 68, 11}, /* 67 */
+ {75401, 69, 10}, /* 68 */
+ {79517, 70, 10}, /* 69 */
+ {84035, 71, 10}, /* 70 */
+ {89053, 72, 10}, /* 71 */
+ {94717, 73, 9} /* 72 */
+};
+
+static void
+sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ int cur_val, i, indx, incr;
+
+ cur_val = net->cwnd >> 10;
+ indx = SCTP_HS_TABLE_SIZE - 1;
+#ifdef SCTP_DEBUG
+ printf("HS CC CAlled.\n");
+#endif
+ if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+ /* normal mode */
+ if (net->net_ack > net->mtu) {
+ net->cwnd += net->mtu;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
+ }
+ } else {
+ net->cwnd += net->net_ack;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
+ }
+ }
+ } else {
+ for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
+ if (cur_val < sctp_cwnd_adjust[i].cwnd) {
+ indx = i;
+ break;
+ }
+ }
+ net->last_hs_used = indx;
+ incr = ((sctp_cwnd_adjust[indx].increase) << 10);
+ net->cwnd += incr;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
+ }
+ }
+}
+
+static void
+sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ int cur_val, i, indx;
+ int old_cwnd = net->cwnd;
+
+ cur_val = net->cwnd >> 10;
+ indx = net->last_hs_used;
+ if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+ /* normal mode */
+ net->ssthresh = net->cwnd / 2;
+ if (net->ssthresh < (net->mtu * 2)) {
+ net->ssthresh = 2 * net->mtu;
+ }
+ net->cwnd = net->ssthresh;
+ } else {
+ /* drop by the proper amount */
+ net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
+ sctp_cwnd_adjust[net->last_hs_used].drop_percent);
+ net->cwnd = net->ssthresh;
+ /* now where are we */
+ indx = net->last_hs_used;
+ cur_val = net->cwnd >> 10;
+ /* reset where we are in the table */
+ if (cur_val < sctp_cwnd_adjust[0].cwnd) {
+ /* feel out of hs */
+ net->last_hs_used = 0;
+ } else {
+ for (i = indx; i >= 1; i--) {
+ if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
+ break;
+ }
+ }
+ net->last_hs_used = indx;
+ }
+ }
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
+ }
+}
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ struct sctp_nets *net;
+
+ /*
+ * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+ * (net->fast_retran_loss_recovery == 0)))
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+ /* out of a RFC2582 Fast recovery window? */
+ if (net->net_ack > 0) {
+ /*
+ * per section 7.2.3, are there any
+ * destinations that had a fast retransmit
+ * to them. If so what we need to do is
+ * adjust ssthresh and cwnd.
+ */
+ struct sctp_tmit_chunk *lchk;
+
+ sctp_hs_cwnd_decrease(stcb, net);
+
+ lchk = TAILQ_FIRST(&asoc->send_queue);
+
+ net->partial_bytes_acked = 0;
+ /* Turn on fast recovery window */
+ asoc->fast_retran_loss_recovery = 1;
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * CMT fast recovery -- per destination
+ * recovery variable.
+ */
+ net->fast_retran_loss_recovery = 1;
+
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ net->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * Disable Nonce Sum Checking and store the
+ * resync tsn
+ */
+ asoc->nonce_sum_check = 0;
+ asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net);
+ }
+ } else if (net->net_ack > 0) {
+ /*
+ * Mark a peg that we WOULD have done a cwnd
+ * reduction but RFC2582 prevented this action.
+ */
+ SCTP_STAT_INCR(sctps_fastretransinrtt);
+ }
+ }
+}
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit)
+{
+ struct sctp_nets *net;
+
+ /******************************/
+ /* update cwnd and Early FR */
+ /******************************/
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code. Need to debug.
+ */
+ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+ if (compare_with_wrap(asoc->last_acked_seq,
+ net->fast_recovery_tsn, MAX_TSN) ||
+ (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+ compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+ (net->pseudo_cumack == net->fast_recovery_tsn)) {
+ net->will_exit_fast_recovery = 1;
+ }
+ }
+#endif
+ if (sctp_early_fr) {
+ /*
+ * So, first of all do we need to have a Early FR
+ * timer running?
+ */
+ if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+ (net->ref_count > 1) &&
+ (net->flight_size < net->cwnd)) ||
+ (reneged_all)) {
+ /*
+ * yes, so in this case stop it if its
+ * running, and then restart it. Reneging
+ * all is a special case where we want to
+ * run the Early FR timer and then force the
+ * last few unacked to be sent, causing us
+ * to illicit a sack with gaps to force out
+ * the others.
+ */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+ }
+ SCTP_STAT_INCR(sctps_earlyfrstrid);
+ sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+ } else {
+ /* No, stop it if its running */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+ }
+ }
+ }
+ /* if nothing was acked on this destination skip it */
+ if (net->net_ack == 0) {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+ }
+ continue;
+ }
+ if (net->net_ack2 > 0) {
+ /*
+ * Karn's rule applies to clearing error count, this
+ * is optional.
+ */
+ net->error_count = 0;
+ if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+ SCTP_ADDR_NOT_REACHABLE) {
+ /* addr came good */
+ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+ net->dest_state |= SCTP_ADDR_REACHABLE;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+ SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+ /* now was it the primary? if so restore */
+ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+ (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+ }
+ }
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the destination
+ * is in PF state, set the destination to active
+ * state and set the cwnd to one or two MTU's based
+ * on whether PF1 or PF2 is being used.
+ *
+ * Should we stop any running T3 timer here?
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+ SCTP_ADDR_PF)) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ net->cwnd = net->mtu * sctp_cmt_pf;
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+ net, net->cwnd);
+ /*
+ * Since the cwnd value is explicitly set,
+ * skip the code that updates the cwnd
+ * value.
+ */
+ goto skip_cwnd_update;
+ }
+ }
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code
+ */
+ /*
+ * if (sctp_cmt_on_off == 1 &&
+ * net->fast_retran_loss_recovery &&
+ * net->will_exit_fast_recovery == 0) { @@@ Do something }
+ * else if (sctp_cmt_on_off == 0 &&
+ * asoc->fast_retran_loss_recovery && will_exit == 0) {
+ */
+#endif
+
+ if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+ /*
+ * If we are in loss recovery we skip any cwnd
+ * update
+ */
+ goto skip_cwnd_update;
+ }
+ /*
+ * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+ * moved.
+ */
+ if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+ /* If the cumulative ack moved we can proceed */
+ if (net->cwnd <= net->ssthresh) {
+ /* We are in slow start */
+ if (net->flight_size + net->net_ack >=
+ net->cwnd) {
+
+ sctp_hs_cwnd_increase(stcb, net);
+
+ } else {
+ unsigned int dif;
+
+ dif = net->cwnd - (net->flight_size +
+ net->net_ack);
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_SS);
+ }
+ }
+ } else {
+ /* We are in congestion avoidance */
+ if (net->flight_size + net->net_ack >=
+ net->cwnd) {
+ /*
+ * add to pba only if we had a
+ * cwnd's worth (or so) in flight OR
+ * the burst limit was applied.
+ */
+ net->partial_bytes_acked +=
+ net->net_ack;
+
+ /*
+ * Do we need to increase (if pba is
+ * > cwnd)?
+ */
+ if (net->partial_bytes_acked >=
+ net->cwnd) {
+ if (net->cwnd <
+ net->partial_bytes_acked) {
+ net->partial_bytes_acked -=
+ net->cwnd;
+ } else {
+ net->partial_bytes_acked =
+ 0;
+ }
+ net->cwnd += net->mtu;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_FROM_CA);
+ }
+ } else {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_CA);
+ }
+ }
+ } else {
+ unsigned int dif;
+
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_CA);
+ }
+ dif = net->cwnd - (net->flight_size +
+ net->net_ack);
+ }
+ }
+ } else {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_NO_CUMACK);
+ }
+ }
+skip_cwnd_update:
+ /*
+ * NOW, according to Karn's rule do we need to restore the
+ * RTO timer back? Check our net_ack2. If not set then we
+ * have a ambiguity.. i.e. all data ack'd was sent to more
+ * than one place.
+ */
+ if (net->net_ack2) {
+ /* restore any doubled timers */
+ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ if (net->RTO < stcb->asoc.minrto) {
+ net->RTO = stcb->asoc.minrto;
+ }
+ if (net->RTO > stcb->asoc.maxrto) {
+ net->RTO = stcb->asoc.maxrto;
+ }
+ }
+ }
+}
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int old_cwnd;
+
+ old_cwnd = net->cwnd;
+
+ SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+ net->ssthresh = net->cwnd / 2;
+ if (net->ssthresh < net->mtu) {
+ net->ssthresh = net->mtu;
+ /* here back off the timer as well, to slow us down */
+ net->RTO <<= 1;
+ }
+ net->cwnd = net->ssthresh;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+ }
+}
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+ struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+ uint32_t * bottle_bw, uint32_t * on_queue)
+{
+ uint32_t bw_avail;
+ int rtt, incr;
+ int old_cwnd = net->cwnd;
+
+ /* need real RTT for this calc */
+ rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ /* get bottle neck bw */
+ *bottle_bw = ntohl(cp->bottle_bw);
+ /* and whats on queue */
+ *on_queue = ntohl(cp->current_onq);
+ /*
+ * adjust the on-queue if our flight is more it could be that the
+ * router has not yet gotten data "in-flight" to it
+ */
+ if (*on_queue < net->flight_size)
+ *on_queue = net->flight_size;
+ /* calculate the available space */
+ bw_avail = (*bottle_bw * rtt) / 1000;
+ if (bw_avail > *bottle_bw) {
+ /*
+ * Cap the growth to no more than the bottle neck. This can
+ * happen as RTT slides up due to queues. It also means if
+ * you have more than a 1 second RTT with a empty queue you
+ * will be limited to the bottle_bw per second no matter if
+ * other points have 1/2 the RTT and you could get more
+ * out...
+ */
+ bw_avail = *bottle_bw;
+ }
+ if (*on_queue > bw_avail) {
+ /*
+ * No room for anything else don't allow anything else to be
+ * "added to the fire".
+ */
+ int seg_inflight, seg_onqueue, my_portion;
+
+ net->partial_bytes_acked = 0;
+
+ /* how much are we over queue size? */
+ incr = *on_queue - bw_avail;
+ if (stcb->asoc.seen_a_sack_this_pkt) {
+ /*
+ * undo any cwnd adjustment that the sack might have
+ * made
+ */
+ net->cwnd = net->prev_cwnd;
+ }
+ /* Now how much of that is mine? */
+ seg_inflight = net->flight_size / net->mtu;
+ seg_onqueue = *on_queue / net->mtu;
+ my_portion = (incr * seg_inflight) / seg_onqueue;
+
+ /* Have I made an adjustment already */
+ if (net->cwnd > net->flight_size) {
+ /*
+ * for this flight I made an adjustment we need to
+ * decrease the portion by a share our previous
+ * adjustment.
+ */
+ int diff_adj;
+
+ diff_adj = net->cwnd - net->flight_size;
+ if (diff_adj > my_portion)
+ my_portion = 0;
+ else
+ my_portion -= diff_adj;
+ }
+ /*
+ * back down to the previous cwnd (assume we have had a sack
+ * before this packet). minus what ever portion of the
+ * overage is my fault.
+ */
+ net->cwnd -= my_portion;
+
+ /* we will NOT back down more than 1 MTU */
+ if (net->cwnd <= net->mtu) {
+ net->cwnd = net->mtu;
+ }
+ /* force into CA */
+ net->ssthresh = net->cwnd - 1;
+ } else {
+ /*
+ * Take 1/4 of the space left or max burst up .. whichever
+ * is less.
+ */
+ incr = min((bw_avail - *on_queue) >> 2,
+ stcb->asoc.max_burst * net->mtu);
+ net->cwnd += incr;
+ }
+ if (net->cwnd > bw_avail) {
+ /* We can't exceed the pipe size */
+ net->cwnd = bw_avail;
+ }
+ if (net->cwnd < net->mtu) {
+ /* We always have 1 MTU */
+ net->cwnd = net->mtu;
+ }
+ if (net->cwnd - old_cwnd != 0) {
+ /* log only changes */
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+ SCTP_CWND_LOG_FROM_SAT);
+ }
+ }
+}
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+ struct sctp_nets *net, int burst_limit)
+{
+ int old_cwnd;
+
+ if (net->ssthresh < net->cwnd)
+ net->ssthresh = net->cwnd;
+ old_cwnd = net->cwnd;
+ net->cwnd = (net->flight_size + (burst_limit * net->mtu));
+
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
+ }
+}
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ int old_cwnd;
+
+ old_cwnd = net->cwnd;
+
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+ /*
+ * make a small adjustment to cwnd and force to CA.
+ */
+ if (net->cwnd > net->mtu)
+ /* drop down one MTU after sending */
+ net->cwnd -= net->mtu;
+ if (net->cwnd < net->ssthresh)
+ /* still in SS move to CA */
+ net->ssthresh = net->cwnd - 1;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+ }
+}
+
+/*
+ * H-TCP congestion control. The algorithm is detailed in:
+ * R.N.Shorten, D.J.Leith:
+ * "H-TCP: TCP for high-speed and long-distance networks"
+ * Proc. PFLDnet, Argonne, 2004.
+ * http://www.hamilton.ie/net/htcp3.pdf
+ */
+
+
+static int use_rtt_scaling = 1;
+static int use_bandwidth_switch = 1;
+
+static inline int
+between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
+{
+ return seq3 - seq2 >= seq1 - seq2;
+}
+
+static inline uint32_t
+htcp_cong_time(struct htcp *ca)
+{
+ return sctp_get_tick_count() - ca->last_cong;
+}
+
+static inline uint32_t
+htcp_ccount(struct htcp *ca)
+{
+ return htcp_cong_time(ca) / ca->minRTT;
+}
+
+static inline void
+htcp_reset(struct htcp *ca)
+{
+ ca->undo_last_cong = ca->last_cong;
+ ca->undo_maxRTT = ca->maxRTT;
+ ca->undo_old_maxB = ca->old_maxB;
+ ca->last_cong = sctp_get_tick_count();
+}
+
+#ifdef SCTP_NOT_USED
+
+static uint32_t
+htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
+ net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
+ net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
+ return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
+}
+
+#endif
+
+static inline void
+measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ uint32_t srtt = net->lastsa >> 3;
+
+ /* keep track of minimum RTT seen so far, minRTT is zero at first */
+ if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
+ net->htcp_ca.minRTT = srtt;
+
+ /* max RTT */
+ if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
+ if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
+ net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
+ if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
+ net->htcp_ca.maxRTT = srtt;
+ }
+}
+
+static void
+measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ uint32_t now = sctp_get_tick_count();
+
+ if (net->fast_retran_ip == 0)
+ net->htcp_ca.bytes_acked = net->net_ack;
+
+ if (!use_bandwidth_switch)
+ return;
+
+ /* achieved throughput calculations */
+ /* JRS - not 100% sure of this statement */
+ if (net->fast_retran_ip == 1) {
+ net->htcp_ca.bytecount = 0;
+ net->htcp_ca.lasttime = now;
+ return;
+ }
+ net->htcp_ca.bytecount += net->net_ack;
+
+ if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
+ && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
+ && net->htcp_ca.minRTT > 0) {
+ uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
+
+ if (htcp_ccount(&net->htcp_ca) <= 3) {
+ /* just after backoff */
+ net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
+ } else {
+ net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
+ if (net->htcp_ca.Bi > net->htcp_ca.maxB)
+ net->htcp_ca.maxB = net->htcp_ca.Bi;
+ if (net->htcp_ca.minB > net->htcp_ca.maxB)
+ net->htcp_ca.minB = net->htcp_ca.maxB;
+ }
+ net->htcp_ca.bytecount = 0;
+ net->htcp_ca.lasttime = now;
+ }
+}
+
+static inline void
+htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
+{
+ if (use_bandwidth_switch) {
+ uint32_t maxB = ca->maxB;
+ uint32_t old_maxB = ca->old_maxB;
+
+ ca->old_maxB = ca->maxB;
+
+ if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
+ ca->beta = BETA_MIN;
+ ca->modeswitch = 0;
+ return;
+ }
+ }
+ if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
+ ca->beta = (minRTT << 7) / maxRTT;
+ if (ca->beta < BETA_MIN)
+ ca->beta = BETA_MIN;
+ else if (ca->beta > BETA_MAX)
+ ca->beta = BETA_MAX;
+ } else {
+ ca->beta = BETA_MIN;
+ ca->modeswitch = 1;
+ }
+}
+
+static inline void
+htcp_alpha_update(struct htcp *ca)
+{
+ uint32_t minRTT = ca->minRTT;
+ uint32_t factor = 1;
+ uint32_t diff = htcp_cong_time(ca);
+
+ if (diff > (uint32_t) hz) {
+ diff -= hz;
+ factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
+ }
+ if (use_rtt_scaling && minRTT) {
+ uint32_t scale = (hz << 3) / (10 * minRTT);
+
+ scale = min(max(scale, 1U << 2), 10U << 3); /* clamping ratio to
+ * interval [0.5,10]<<3 */
+ factor = (factor << 3) / scale;
+ if (!factor)
+ factor = 1;
+ }
+ ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
+ if (!ca->alpha)
+ ca->alpha = ALPHA_BASE;
+}
+
+/* After we have the rtt data to calculate beta, we'd still prefer to wait one
+ * rtt before we adjust our beta to ensure we are working from a consistent
+ * data.
+ *
+ * This function should be called when we hit a congestion event since only at
+ * that point do we really have a real sense of maxRTT (the queues en route
+ * were getting just too full now).
+ */
+static void
+htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ uint32_t minRTT = net->htcp_ca.minRTT;
+ uint32_t maxRTT = net->htcp_ca.maxRTT;
+
+ htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
+ htcp_alpha_update(&net->htcp_ca);
+
+ /*
+ * add slowly fading memory for maxRTT to accommodate routing
+ * changes etc
+ */
+ if (minRTT > 0 && maxRTT > minRTT)
+ net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
+}
+
+static uint32_t
+htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ htcp_param_update(stcb, net);
+ return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
+}
+
+static void
+htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /*-
+ * How to handle these functions?
+ * if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
+ * return;
+ */
+ if (net->cwnd <= net->ssthresh) {
+ /* We are in slow start */
+ if (net->flight_size + net->net_ack >= net->cwnd) {
+ if (net->net_ack > (net->mtu * sctp_L2_abc_variable)) {
+ net->cwnd += (net->mtu * sctp_L2_abc_variable);
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_FROM_SS);
+ }
+ } else {
+ net->cwnd += net->net_ack;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_FROM_SS);
+ }
+ }
+ } else {
+ unsigned int dif;
+
+ dif = net->cwnd - (net->flight_size +
+ net->net_ack);
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_SS);
+ }
+ }
+ } else {
+ measure_rtt(stcb, net);
+
+ /*
+ * In dangerous area, increase slowly. In theory this is
+ * net->cwnd += alpha / net->cwnd
+ */
+ /* What is snd_cwnd_cnt?? */
+ if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
+ /*-
+ * Does SCTP have a cwnd clamp?
+ * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
+ */
+ net->cwnd += net->mtu;
+ net->partial_bytes_acked = 0;
+ htcp_alpha_update(&net->htcp_ca);
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_FROM_CA);
+ }
+ } else {
+ net->partial_bytes_acked += net->net_ack;
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->net_ack,
+ SCTP_CWND_LOG_NOADV_CA);
+ }
+ }
+
+ net->htcp_ca.bytes_acked = net->mtu;
+ }
+}
+
+#ifdef SCTP_NOT_USED
+/* Lower bound on congestion window. */
+static uint32_t
+htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ return net->ssthresh;
+}
+
+#endif
+
+static void
+htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ memset(&net->htcp_ca, 0, sizeof(struct htcp));
+ net->htcp_ca.alpha = ALPHA_BASE;
+ net->htcp_ca.beta = BETA_MIN;
+ net->htcp_ca.bytes_acked = net->mtu;
+ net->htcp_ca.last_cong = sctp_get_tick_count();
+}
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /*
+ * We take the max of the burst limit times a MTU or the
+ * INITIAL_CWND. We then limit this to 4 MTU's of sending.
+ */
+ net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
+ /* we always get at LEAST 2 MTU's */
+ if (net->cwnd < (2 * net->mtu)) {
+ net->cwnd = 2 * net->mtu;
+ }
+ net->ssthresh = stcb->asoc.peers_rwnd;
+ htcp_init(stcb, net);
+
+ if (sctp_logging_level & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
+ }
+}
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit)
+{
+ struct sctp_nets *net;
+
+ /******************************/
+ /* update cwnd and Early FR */
+ /******************************/
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code. Need to debug.
+ */
+ if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
+ if (compare_with_wrap(asoc->last_acked_seq,
+ net->fast_recovery_tsn, MAX_TSN) ||
+ (asoc->last_acked_seq == net->fast_recovery_tsn) ||
+ compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
+ (net->pseudo_cumack == net->fast_recovery_tsn)) {
+ net->will_exit_fast_recovery = 1;
+ }
+ }
+#endif
+ if (sctp_early_fr) {
+ /*
+ * So, first of all do we need to have a Early FR
+ * timer running?
+ */
+ if (((TAILQ_FIRST(&asoc->sent_queue)) &&
+ (net->ref_count > 1) &&
+ (net->flight_size < net->cwnd)) ||
+ (reneged_all)) {
+ /*
+ * yes, so in this case stop it if its
+ * running, and then restart it. Reneging
+ * all is a special case where we want to
+ * run the Early FR timer and then force the
+ * last few unacked to be sent, causing us
+ * to illicit a sack with gaps to force out
+ * the others.
+ */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
+ }
+ SCTP_STAT_INCR(sctps_earlyfrstrid);
+ sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+ } else {
+ /* No, stop it if its running */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
+ SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
+ }
+ }
+ }
+ /* if nothing was acked on this destination skip it */
+ if (net->net_ack == 0) {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
+ }
+ continue;
+ }
+ if (net->net_ack2 > 0) {
+ /*
+ * Karn's rule applies to clearing error count, this
+ * is optional.
+ */
+ net->error_count = 0;
+ if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+ SCTP_ADDR_NOT_REACHABLE) {
+ /* addr came good */
+ net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
+ net->dest_state |= SCTP_ADDR_REACHABLE;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
+ SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
+ /* now was it the primary? if so restore */
+ if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
+ (void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
+ }
+ }
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the destination
+ * is in PF state, set the destination to active
+ * state and set the cwnd to one or two MTU's based
+ * on whether PF1 or PF2 is being used.
+ *
+ * Should we stop any running T3 timer here?
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+ SCTP_ADDR_PF)) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ net->cwnd = net->mtu * sctp_cmt_pf;
+ SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
+ net, net->cwnd);
+ /*
+ * Since the cwnd value is explicitly set,
+ * skip the code that updates the cwnd
+ * value.
+ */
+ goto skip_cwnd_update;
+ }
+ }
+#ifdef JANA_CMT_FAST_RECOVERY
+ /*
+ * CMT fast recovery code
+ */
+ /*
+ * if (sctp_cmt_on_off == 1 &&
+ * net->fast_retran_loss_recovery &&
+ * net->will_exit_fast_recovery == 0) { @@@ Do something }
+ * else if (sctp_cmt_on_off == 0 &&
+ * asoc->fast_retran_loss_recovery && will_exit == 0) {
+ */
+#endif
+
+ if (asoc->fast_retran_loss_recovery && will_exit == 0 && sctp_cmt_on_off == 0) {
+ /*
+ * If we are in loss recovery we skip any cwnd
+ * update
+ */
+ goto skip_cwnd_update;
+ }
+ /*
+ * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
+ * moved.
+ */
+ if (accum_moved || (sctp_cmt_on_off && net->new_pseudo_cumack)) {
+ htcp_cong_avoid(stcb, net);
+ measure_achieved_throughput(stcb, net);
+ } else {
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->mtu,
+ SCTP_CWND_LOG_NO_CUMACK);
+ }
+ }
+skip_cwnd_update:
+ /*
+ * NOW, according to Karn's rule do we need to restore the
+ * RTO timer back? Check our net_ack2. If not set then we
+ * have a ambiguity.. i.e. all data ack'd was sent to more
+ * than one place.
+ */
+ if (net->net_ack2) {
+ /* restore any doubled timers */
+ net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ if (net->RTO < stcb->asoc.minrto) {
+ net->RTO = stcb->asoc.minrto;
+ }
+ if (net->RTO > stcb->asoc.maxrto) {
+ net->RTO = stcb->asoc.maxrto;
+ }
+ }
+ }
+}
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ struct sctp_nets *net;
+
+ /*
+ * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
+ * (net->fast_retran_loss_recovery == 0)))
+ */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if ((asoc->fast_retran_loss_recovery == 0) || (sctp_cmt_on_off == 1)) {
+ /* out of a RFC2582 Fast recovery window? */
+ if (net->net_ack > 0) {
+ /*
+ * per section 7.2.3, are there any
+ * destinations that had a fast retransmit
+ * to them. If so what we need to do is
+ * adjust ssthresh and cwnd.
+ */
+ struct sctp_tmit_chunk *lchk;
+ int old_cwnd = net->cwnd;
+
+ /* JRS - reset as if state were changed */
+ htcp_reset(&net->htcp_ca);
+ net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+ net->cwnd = net->ssthresh;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
+ SCTP_CWND_LOG_FROM_FR);
+ }
+ lchk = TAILQ_FIRST(&asoc->send_queue);
+
+ net->partial_bytes_acked = 0;
+ /* Turn on fast recovery window */
+ asoc->fast_retran_loss_recovery = 1;
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ asoc->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * CMT fast recovery -- per destination
+ * recovery variable.
+ */
+ net->fast_retran_loss_recovery = 1;
+
+ if (lchk == NULL) {
+ /* Mark end of the window */
+ net->fast_recovery_tsn = asoc->sending_seq - 1;
+ } else {
+ net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
+ }
+
+ /*
+ * Disable Nonce Sum Checking and store the
+ * resync tsn
+ */
+ asoc->nonce_sum_check = 0;
+ asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
+
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND,
+ stcb->sctp_ep, stcb, net);
+ }
+ } else if (net->net_ack > 0) {
+ /*
+ * Mark a peg that we WOULD have done a cwnd
+ * reduction but RFC2582 prevented this action.
+ */
+ SCTP_STAT_INCR(sctps_fastretransinrtt);
+ }
+ }
+}
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int old_cwnd = net->cwnd;
+
+ /* JRS - reset as if the state were being changed to timeout */
+ htcp_reset(&net->htcp_ca);
+ net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+ net->cwnd = net->mtu;
+ /* floor of 1 mtu */
+ if (net->cwnd < net->mtu)
+ net->cwnd = net->mtu;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
+ }
+ net->partial_bytes_acked = 0;
+}
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ int old_cwnd;
+
+ old_cwnd = net->cwnd;
+
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+ net->htcp_ca.last_cong = sctp_get_tick_count();
+ /*
+ * make a small adjustment to cwnd and force to CA.
+ */
+ if (net->cwnd > net->mtu)
+ /* drop down one MTU after sending */
+ net->cwnd -= net->mtu;
+ if (net->cwnd < net->ssthresh)
+ /* still in SS move to CA */
+ net->ssthresh = net->cwnd - 1;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
+ }
+}
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int old_cwnd;
+
+ old_cwnd = net->cwnd;
+
+ /* JRS - reset hctp as if state changed */
+ htcp_reset(&net->htcp_ca);
+ SCTP_STAT_INCR(sctps_ecnereducedcwnd);
+ net->ssthresh = htcp_recalc_ssthresh(stcb, net);
+ if (net->ssthresh < net->mtu) {
+ net->ssthresh = net->mtu;
+ /* here back off the timer as well, to slow us down */
+ net->RTO <<= 1;
+ }
+ net->cwnd = net->ssthresh;
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
+ }
+}
--- /dev/null
+++ sys/netinet/in_mcast.c
@@ -0,0 +1,1844 @@
+/*-
+ * Copyright (c) 2007 Bruce M. Simpson.
+ * Copyright (c) 2005 Robert N. M. Watson.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * IPv4 multicast socket, group, and socket option processing module.
+ * Until further notice, this file requires INET to compile.
+ * TODO: Make this infrastructure independent of address family.
+ * TODO: Teach netinet6 to use this code.
+ * TODO: Hook up SSM logic to IGMPv3/MLDv2.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_mcast.c,v 1.3.2.1 2007/11/29 20:16:42 rwatson Exp $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/igmp_var.h>
+
+#ifndef __SOCKUNION_DECLARED
+union sockunion {
+ struct sockaddr_storage ss;
+ struct sockaddr sa;
+ struct sockaddr_dl sdl;
+ struct sockaddr_in sin;
+#ifdef INET6
+ struct sockaddr_in6 sin6;
+#endif
+};
+typedef union sockunion sockunion_t;
+#define __SOCKUNION_DECLARED
+#endif /* __SOCKUNION_DECLARED */
+
+static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
+static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
+static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
+
+/*
+ * The IPv4 multicast list (in_multihead and associated structures) are
+ * protected by the global in_multi_mtx. See in_var.h for more details. For
+ * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
+ * ip_output() to send IGMP packets while holding the lock; this probably is
+ * not quite desirable.
+ */
+struct in_multihead in_multihead; /* XXX BSS initialization */
+struct mtx in_multi_mtx;
+MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
+
+/*
+ * Functions with non-static linkage defined in this file should be
+ * declared in in_var.h:
+ * imo_match_group()
+ * imo_match_source()
+ * in_addmulti()
+ * in_delmulti()
+ * in_delmulti_locked()
+ * and ip_var.h:
+ * inp_freemoptions()
+ * inp_getmoptions()
+ * inp_setmoptions()
+ */
+static int imo_grow(struct ip_moptions *);
+static int imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
+static int imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
+static int inp_change_source_filter(struct inpcb *, struct sockopt *);
+static struct ip_moptions *
+ inp_findmoptions(struct inpcb *);
+static int inp_get_source_filters(struct inpcb *, struct sockopt *);
+static int inp_join_group(struct inpcb *, struct sockopt *);
+static int inp_leave_group(struct inpcb *, struct sockopt *);
+static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
+static int inp_set_source_filters(struct inpcb *, struct sockopt *);
+static struct ifnet *
+ ip_multicast_if(struct in_addr *a);
+
+/*
+ * Resize the ip_moptions vector to the next power-of-two minus 1.
+ * May be called with locks held; do not sleep.
+ */
+static int
+imo_grow(struct ip_moptions *imo)
+{
+ struct in_multi **nmships;
+ struct in_multi **omships;
+ struct in_mfilter *nmfilters;
+ struct in_mfilter *omfilters;
+ size_t idx;
+ size_t newmax;
+ size_t oldmax;
+
+ nmships = NULL;
+ nmfilters = NULL;
+ omships = imo->imo_membership;
+ omfilters = imo->imo_mfilters;
+ oldmax = imo->imo_max_memberships;
+ newmax = ((oldmax + 1) * 2) - 1;
+
+ if (newmax <= IP_MAX_MEMBERSHIPS) {
+ nmships = (struct in_multi **)realloc(omships,
+ sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
+ nmfilters = (struct in_mfilter *)realloc(omfilters,
+ sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
+ if (nmships != NULL && nmfilters != NULL) {
+ /* Initialize newly allocated source filter heads. */
+ for (idx = oldmax; idx < newmax; idx++) {
+ nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
+ nmfilters[idx].imf_nsources = 0;
+ TAILQ_INIT(&nmfilters[idx].imf_sources);
+ }
+ imo->imo_max_memberships = newmax;
+ imo->imo_membership = nmships;
+ imo->imo_mfilters = nmfilters;
+ }
+ }
+
+ if (nmships == NULL || nmfilters == NULL) {
+ if (nmships != NULL)
+ free(nmships, M_IPMOPTS);
+ if (nmfilters != NULL)
+ free(nmfilters, M_IPMSOURCE);
+ return (ETOOMANYREFS);
+ }
+
+ return (0);
+}
+
+/*
+ * Add a source to a multicast filter list.
+ * Assumes the associated inpcb is locked.
+ */
+static int
+imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
+{
+ struct in_msource *ims, *nims;
+ struct in_mfilter *imf;
+
+ KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
+ KASSERT(imo->imo_mfilters != NULL,
+ ("%s: imo_mfilters vector not allocated", __func__));
+
+ imf = &imo->imo_mfilters[gidx];
+ if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
+ return (ENOBUFS);
+
+ ims = imo_match_source(imo, gidx, &src->sa);
+ if (ims != NULL)
+ return (EADDRNOTAVAIL);
+
+ /* Do not sleep with inp lock held. */
+ MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
+ M_IPMSOURCE, M_NOWAIT | M_ZERO);
+ if (nims == NULL)
+ return (ENOBUFS);
+
+ nims->ims_addr = src->ss;
+ TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
+ imf->imf_nsources++;
+
+ return (0);
+}
+
+static int
+imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
+{
+ struct in_msource *ims;
+ struct in_mfilter *imf;
+
+ KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
+ KASSERT(imo->imo_mfilters != NULL,
+ ("%s: imo_mfilters vector not allocated", __func__));
+
+ imf = &imo->imo_mfilters[gidx];
+ if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
+ return (ENOBUFS);
+
+ ims = imo_match_source(imo, gidx, &src->sa);
+ if (ims == NULL)
+ return (EADDRNOTAVAIL);
+
+ TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+ FREE(ims, M_IPMSOURCE);
+ imf->imf_nsources--;
+
+ return (0);
+}
+
+/*
+ * Find an IPv4 multicast group entry for this ip_moptions instance
+ * which matches the specified group, and optionally an interface.
+ * Return its index into the array, or -1 if not found.
+ */
+size_t
+imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
+ struct sockaddr *group)
+{
+ sockunion_t *gsa;
+ struct in_multi **pinm;
+ int idx;
+ int nmships;
+
+ gsa = (sockunion_t *)group;
+
+ /* The imo_membership array may be lazy allocated. */
+ if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
+ return (-1);
+
+ nmships = imo->imo_num_memberships;
+ pinm = &imo->imo_membership[0];
+ for (idx = 0; idx < nmships; idx++, pinm++) {
+ if (*pinm == NULL)
+ continue;
+#if 0
+ printf("%s: trying ifp = %p, inaddr = %s ", __func__,
+ ifp, inet_ntoa(gsa->sin.sin_addr));
+ printf("against %p, %s\n",
+ (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
+#endif
+ if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
+ (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
+ break;
+ }
+ }
+ if (idx >= nmships)
+ idx = -1;
+
+ return (idx);
+}
+
+/*
+ * Find a multicast source entry for this imo which matches
+ * the given group index for this socket, and source address.
+ */
+struct in_msource *
+imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
+{
+ struct in_mfilter *imf;
+ struct in_msource *ims, *pims;
+
+ KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
+ KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
+ ("%s: invalid index %d\n", __func__, (int)gidx));
+
+ /* The imo_mfilters array may be lazy allocated. */
+ if (imo->imo_mfilters == NULL)
+ return (NULL);
+
+ pims = NULL;
+ imf = &imo->imo_mfilters[gidx];
+ TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
+ /*
+ * Perform bitwise comparison of two IPv4 addresses.
+ * TODO: Do the same for IPv6.
+ * Do not use sa_equal() for this as it is not aware of
+ * deeper structure in sockaddr_in or sockaddr_in6.
+ */
+ if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
+ ((struct sockaddr_in *)src)->sin_addr.s_addr) {
+ pims = ims;
+ break;
+ }
+ }
+
+ return (pims);
+}
+
+/*
+ * Join an IPv4 multicast group.
+ */
+struct in_multi *
+in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+{
+ struct in_multi *inm;
+
+ inm = NULL;
+
+ IFF_LOCKGIANT(ifp);
+ IN_MULTI_LOCK();
+
+ IN_LOOKUP_MULTI(*ap, ifp, inm);
+ if (inm != NULL) {
+ /*
+ * If we already joined this group, just bump the
+ * refcount and return it.
+ */
+ KASSERT(inm->inm_refcount >= 1,
+ ("%s: bad refcount %d", __func__, inm->inm_refcount));
+ ++inm->inm_refcount;
+ } else do {
+ sockunion_t gsa;
+ struct ifmultiaddr *ifma;
+ struct in_multi *ninm;
+ int error;
+
+ memset(&gsa, 0, sizeof(gsa));
+ gsa.sin.sin_family = AF_INET;
+ gsa.sin.sin_len = sizeof(struct sockaddr_in);
+ gsa.sin.sin_addr = *ap;
+
+ /*
+ * Check if a link-layer group is already associated
+ * with this network-layer group on the given ifnet.
+ * If so, bump the refcount on the existing network-layer
+ * group association and return it.
+ */
+ error = if_addmulti(ifp, &gsa.sa, &ifma);
+ if (error)
+ break;
+ if (ifma->ifma_protospec != NULL) {
+ inm = (struct in_multi *)ifma->ifma_protospec;
+#ifdef INVARIANTS
+ if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
+ inm->inm_addr.s_addr != ap->s_addr)
+ panic("%s: ifma is inconsistent", __func__);
+#endif
+ ++inm->inm_refcount;
+ break;
+ }
+
+ /*
+ * A new membership is needed; construct it and
+ * perform the IGMP join.
+ */
+ ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
+ if (ninm == NULL) {
+ if_delmulti_ifma(ifma);
+ break;
+ }
+ ninm->inm_addr = *ap;
+ ninm->inm_ifp = ifp;
+ ninm->inm_ifma = ifma;
+ ninm->inm_refcount = 1;
+ ifma->ifma_protospec = ninm;
+ LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
+
+ igmp_joingroup(ninm);
+
+ inm = ninm;
+ } while (0);
+
+ IN_MULTI_UNLOCK();
+ IFF_UNLOCKGIANT(ifp);
+
+ return (inm);
+}
+
+/*
+ * Leave an IPv4 multicast group.
+ * It is OK to call this routine if the underlying ifnet went away.
+ *
+ * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
+ * will set ifma_ifp to NULL when the associated ifnet instance is detached
+ * from the system.
+ *
+ * The only reason we need to violate layers and check ifma_ifp here at all
+ * is because certain hardware drivers still require Giant to be held,
+ * and it must always be taken before other locks.
+ */
+void
+in_delmulti(struct in_multi *inm)
+{
+ struct ifnet *ifp;
+
+ KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
+ KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
+ ifp = inm->inm_ifma->ifma_ifp;
+
+ if (ifp != NULL) {
+ /*
+ * Sanity check that netinet's notion of ifp is the
+ * same as net's.
+ */
+ KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
+ IFF_LOCKGIANT(ifp);
+ }
+
+ IN_MULTI_LOCK();
+ in_delmulti_locked(inm);
+ IN_MULTI_UNLOCK();
+
+ if (ifp != NULL)
+ IFF_UNLOCKGIANT(ifp);
+}
+
+/*
+ * Delete a multicast address record, with locks held.
+ *
+ * It is OK to call this routine if the ifp went away.
+ * Assumes that caller holds the IN_MULTI lock, and that
+ * Giant was taken before other locks if required by the hardware.
+ */
+void
+in_delmulti_locked(struct in_multi *inm)
+{
+ struct ifmultiaddr *ifma;
+
+ IN_MULTI_LOCK_ASSERT();
+ KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
+
+ if (--inm->inm_refcount == 0) {
+ igmp_leavegroup(inm);
+
+ ifma = inm->inm_ifma;
+#ifdef DIAGNOSTIC
+ if (bootverbose)
+ printf("%s: purging ifma %p\n", __func__, ifma);
+#endif
+ KASSERT(ifma->ifma_protospec == inm,
+ ("%s: ifma_protospec != inm", __func__));
+ ifma->ifma_protospec = NULL;
+
+ LIST_REMOVE(inm, inm_link);
+ free(inm, M_IPMADDR);
+
+ if_delmulti_ifma(ifma);
+ }
+}
+
+/*
+ * Block or unblock an ASM/SSM multicast source on an inpcb.
+ */
+static int
+inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct group_source_req gsr;
+ sockunion_t *gsa, *ssa;
+ struct ifnet *ifp;
+ struct in_mfilter *imf;
+ struct ip_moptions *imo;
+ struct in_msource *ims;
+ size_t idx;
+ int error;
+ int block;
+
+ ifp = NULL;
+ error = 0;
+ block = 0;
+
+ memset(&gsr, 0, sizeof(struct group_source_req));
+ gsa = (sockunion_t *)&gsr.gsr_group;
+ ssa = (sockunion_t *)&gsr.gsr_source;
+
+ switch (sopt->sopt_name) {
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE: {
+ struct ip_mreq_source mreqs;
+
+ error = sooptcopyin(sopt, &mreqs,
+ sizeof(struct ip_mreq_source),
+ sizeof(struct ip_mreq_source));
+ if (error)
+ return (error);
+
+ gsa->sin.sin_family = AF_INET;
+ gsa->sin.sin_len = sizeof(struct sockaddr_in);
+ gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+ ssa->sin.sin_family = AF_INET;
+ ssa->sin.sin_len = sizeof(struct sockaddr_in);
+ ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+
+ if (mreqs.imr_interface.s_addr != INADDR_ANY)
+ INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+ if (sopt->sopt_name == IP_BLOCK_SOURCE)
+ block = 1;
+
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: imr_interface = %s, ifp = %p\n",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
+ }
+#endif
+ break;
+ }
+
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ error = sooptcopyin(sopt, &gsr,
+ sizeof(struct group_source_req),
+ sizeof(struct group_source_req));
+ if (error)
+ return (error);
+
+ if (gsa->sin.sin_family != AF_INET ||
+ gsa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+
+ if (ssa->sin.sin_family != AF_INET ||
+ ssa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+
+ if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+ return (EADDRNOTAVAIL);
+
+ ifp = ifnet_byindex(gsr.gsr_interface);
+
+ if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
+ block = 1;
+ break;
+
+ default:
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: unknown sopt_name %d\n", __func__,
+ sopt->sopt_name);
+ }
+#endif
+ return (EOPNOTSUPP);
+ break;
+ }
+
+ /* XXX INET6 */
+ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+ return (EINVAL);
+
+ /*
+ * Check if we are actually a member of this group.
+ */
+ imo = inp_findmoptions(inp);
+ idx = imo_match_group(imo, ifp, &gsa->sa);
+ if (idx == -1 || imo->imo_mfilters == NULL) {
+ error = EADDRNOTAVAIL;
+ goto out_locked;
+ }
+
+ KASSERT(imo->imo_mfilters != NULL,
+ ("%s: imo_mfilters not allocated", __func__));
+ imf = &imo->imo_mfilters[idx];
+
+ /*
+ * SSM multicast truth table for block/unblock operations.
+ *
+ * Operation Filter Mode Entry exists? Action
+ *
+ * block exclude no add source to filter
+ * unblock include no add source to filter
+ * block include no EINVAL
+ * unblock exclude no EINVAL
+ * block exclude yes EADDRNOTAVAIL
+ * unblock include yes EADDRNOTAVAIL
+ * block include yes remove source from filter
+ * unblock exclude yes remove source from filter
+ *
+ * FreeBSD does not explicitly distinguish between ASM and SSM
+ * mode sockets; all sockets are assumed to have a filter list.
+ */
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: imf_fmode is %s\n", __func__,
+ imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
+ }
+#endif
+ ims = imo_match_source(imo, idx, &ssa->sa);
+ if (ims == NULL) {
+ if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
+ (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: adding %s to filter list\n",
+ __func__, inet_ntoa(ssa->sin.sin_addr));
+ }
+#endif
+ error = imo_join_source(imo, idx, ssa);
+ }
+ if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
+ (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
+ /*
+ * If the socket is in inclusive mode:
+ * the source is already blocked as it has no entry.
+ * If the socket is in exclusive mode:
+ * the source is already unblocked as it has no entry.
+ */
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: ims %p; %s already [un]blocked\n",
+ __func__, ims,
+ inet_ntoa(ssa->sin.sin_addr));
+ }
+#endif
+ error = EINVAL;
+ }
+ } else {
+ if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
+ (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
+ /*
+ * If the socket is in exclusive mode:
+ * the source is already blocked as it has an entry.
+ * If the socket is in inclusive mode:
+ * the source is already unblocked as it has an entry.
+ */
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: ims %p; %s already [un]blocked\n",
+ __func__, ims,
+ inet_ntoa(ssa->sin.sin_addr));
+ }
+#endif
+ error = EADDRNOTAVAIL;
+ }
+ if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
+ (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: removing %s from filter list\n",
+ __func__, inet_ntoa(ssa->sin.sin_addr));
+ }
+#endif
+ error = imo_leave_source(imo, idx, ssa);
+ }
+ }
+
+out_locked:
+ INP_UNLOCK(inp);
+ return (error);
+}
+
+/*
+ * Given an inpcb, return its multicast options structure pointer. Accepts
+ * an unlocked inpcb pointer, but will return it locked. May sleep.
+ */
+static struct ip_moptions *
+inp_findmoptions(struct inpcb *inp)
+{
+ struct ip_moptions *imo;
+ struct in_multi **immp;
+ struct in_mfilter *imfp;
+ size_t idx;
+
+ INP_LOCK(inp);
+ if (inp->inp_moptions != NULL)
+ return (inp->inp_moptions);
+
+ INP_UNLOCK(inp);
+
+ imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
+ M_WAITOK);
+ immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
+ M_IPMOPTS, M_WAITOK | M_ZERO);
+ imfp = (struct in_mfilter *)malloc(
+ sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
+ M_IPMSOURCE, M_WAITOK);
+
+ imo->imo_multicast_ifp = NULL;
+ imo->imo_multicast_addr.s_addr = INADDR_ANY;
+ imo->imo_multicast_vif = -1;
+ imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
+ imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
+ imo->imo_num_memberships = 0;
+ imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ imo->imo_membership = immp;
+
+ /* Initialize per-group source filters. */
+ for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
+ imfp[idx].imf_fmode = MCAST_EXCLUDE;
+ imfp[idx].imf_nsources = 0;
+ TAILQ_INIT(&imfp[idx].imf_sources);
+ }
+ imo->imo_mfilters = imfp;
+
+ INP_LOCK(inp);
+ if (inp->inp_moptions != NULL) {
+ free(imfp, M_IPMSOURCE);
+ free(immp, M_IPMOPTS);
+ free(imo, M_IPMOPTS);
+ return (inp->inp_moptions);
+ }
+ inp->inp_moptions = imo;
+ return (imo);
+}
+
+/*
+ * Discard the IP multicast options (and source filters).
+ */
+void
+inp_freemoptions(struct ip_moptions *imo)
+{
+ struct in_mfilter *imf;
+ struct in_msource *ims, *tims;
+ size_t idx, nmships;
+
+ KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
+
+ nmships = imo->imo_num_memberships;
+ for (idx = 0; idx < nmships; ++idx) {
+ in_delmulti(imo->imo_membership[idx]);
+
+ if (imo->imo_mfilters != NULL) {
+ imf = &imo->imo_mfilters[idx];
+ TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
+ ims_next, tims) {
+ TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+ FREE(ims, M_IPMSOURCE);
+ imf->imf_nsources--;
+ }
+ KASSERT(imf->imf_nsources == 0,
+ ("%s: did not free all imf_nsources", __func__));
+ }
+ }
+
+ if (imo->imo_mfilters != NULL)
+ free(imo->imo_mfilters, M_IPMSOURCE);
+ free(imo->imo_membership, M_IPMOPTS);
+ free(imo, M_IPMOPTS);
+}
+
+/*
+ * Atomically get source filters on a socket for an IPv4 multicast group.
+ * Called with INP lock held; returns with lock released.
+ */
+static int
+inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct __msfilterreq msfr;
+ sockunion_t *gsa;
+ struct ifnet *ifp;
+ struct ip_moptions *imo;
+ struct in_mfilter *imf;
+ struct in_msource *ims;
+ struct sockaddr_storage *ptss;
+ struct sockaddr_storage *tss;
+ int error;
+ size_t idx;
+
+ INP_LOCK_ASSERT(inp);
+
+ imo = inp->inp_moptions;
+ KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
+
+ INP_UNLOCK(inp);
+
+ error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+ sizeof(struct __msfilterreq));
+ if (error)
+ return (error);
+
+ if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
+ return (EINVAL);
+
+ ifp = ifnet_byindex(msfr.msfr_ifindex);
+ if (ifp == NULL)
+ return (EINVAL);
+
+ INP_LOCK(inp);
+
+ /*
+ * Lookup group on the socket.
+ */
+ gsa = (sockunion_t *)&msfr.msfr_group;
+ idx = imo_match_group(imo, ifp, &gsa->sa);
+ if (idx == -1 || imo->imo_mfilters == NULL) {
+ INP_UNLOCK(inp);
+ return (EADDRNOTAVAIL);
+ }
+
+ imf = &imo->imo_mfilters[idx];
+ msfr.msfr_fmode = imf->imf_fmode;
+ msfr.msfr_nsrcs = imf->imf_nsources;
+
+ /*
+ * If the user specified a buffer, copy out the source filter
+ * entries to userland gracefully.
+ * msfr.msfr_nsrcs is always set to the total number of filter
+ * entries which the kernel currently has for this group.
+ */
+ tss = NULL;
+ if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
+ /*
+ * Make a copy of the source vector so that we do not
+ * thrash the inpcb lock whilst copying it out.
+ * We only copy out the number of entries which userland
+ * has asked for, but we always tell userland how big the
+ * buffer really needs to be.
+ */
+ MALLOC(tss, struct sockaddr_storage *,
+ sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+ M_TEMP, M_NOWAIT);
+ if (tss == NULL) {
+ error = ENOBUFS;
+ } else {
+ ptss = tss;
+ TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
+ memcpy(ptss++, &ims->ims_addr,
+ sizeof(struct sockaddr_storage));
+ }
+ }
+ }
+
+ INP_UNLOCK(inp);
+
+ if (tss != NULL) {
+ error = copyout(tss, msfr.msfr_srcs,
+ sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+ FREE(tss, M_TEMP);
+ }
+
+ if (error)
+ return (error);
+
+ error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
+
+ return (error);
+}
+
+/*
+ * Return the IP multicast options in response to user getsockopt().
+ */
+int
+inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct ip_mreqn mreqn;
+ struct ip_moptions *imo;
+ struct ifnet *ifp;
+ struct in_ifaddr *ia;
+ int error, optval;
+ u_char coptval;
+
+ INP_LOCK(inp);
+ imo = inp->inp_moptions;
+ /*
+ * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+ * or is a divert socket, reject it.
+ */
+ if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+ (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+ inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
+ INP_UNLOCK(inp);
+ return (EOPNOTSUPP);
+ }
+
+ error = 0;
+ switch (sopt->sopt_name) {
+ case IP_MULTICAST_VIF:
+ if (imo != NULL)
+ optval = imo->imo_multicast_vif;
+ else
+ optval = -1;
+ INP_UNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof(int));
+ break;
+
+ case IP_MULTICAST_IF:
+ memset(&mreqn, 0, sizeof(struct ip_mreqn));
+ if (imo != NULL) {
+ ifp = imo->imo_multicast_ifp;
+ if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
+ mreqn.imr_address = imo->imo_multicast_addr;
+ } else if (ifp != NULL) {
+ mreqn.imr_ifindex = ifp->if_index;
+ IFP_TO_IA(ifp, ia);
+ if (ia != NULL) {
+ mreqn.imr_address =
+ IA_SIN(ia)->sin_addr;
+ }
+ }
+ }
+ INP_UNLOCK(inp);
+ if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+ error = sooptcopyout(sopt, &mreqn,
+ sizeof(struct ip_mreqn));
+ } else {
+ error = sooptcopyout(sopt, &mreqn.imr_address,
+ sizeof(struct in_addr));
+ }
+ break;
+
+ case IP_MULTICAST_TTL:
+ if (imo == 0)
+ optval = coptval = IP_DEFAULT_MULTICAST_TTL;
+ else
+ optval = coptval = imo->imo_multicast_ttl;
+ INP_UNLOCK(inp);
+ if (sopt->sopt_valsize == sizeof(u_char))
+ error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+ else
+ error = sooptcopyout(sopt, &optval, sizeof(int));
+ break;
+
+ case IP_MULTICAST_LOOP:
+ if (imo == 0)
+ optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
+ else
+ optval = coptval = imo->imo_multicast_loop;
+ INP_UNLOCK(inp);
+ if (sopt->sopt_valsize == sizeof(u_char))
+ error = sooptcopyout(sopt, &coptval, sizeof(u_char));
+ else
+ error = sooptcopyout(sopt, &optval, sizeof(int));
+ break;
+
+ case IP_MSFILTER:
+ if (imo == NULL) {
+ error = EADDRNOTAVAIL;
+ INP_UNLOCK(inp);
+ } else {
+ error = inp_get_source_filters(inp, sopt);
+ }
+ break;
+
+ default:
+ INP_UNLOCK(inp);
+ error = ENOPROTOOPT;
+ break;
+ }
+
+ INP_UNLOCK_ASSERT(inp);
+
+ return (error);
+}
+
+/*
+ * Join an IPv4 multicast group, possibly with a source.
+ */
+static int
+inp_join_group(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct group_source_req gsr;
+ sockunion_t *gsa, *ssa;
+ struct ifnet *ifp;
+ struct in_mfilter *imf;
+ struct ip_moptions *imo;
+ struct in_multi *inm;
+ size_t idx;
+ int error;
+
+ ifp = NULL;
+ error = 0;
+
+ memset(&gsr, 0, sizeof(struct group_source_req));
+ gsa = (sockunion_t *)&gsr.gsr_group;
+ gsa->ss.ss_family = AF_UNSPEC;
+ ssa = (sockunion_t *)&gsr.gsr_source;
+ ssa->ss.ss_family = AF_UNSPEC;
+
+ switch (sopt->sopt_name) {
+ case IP_ADD_MEMBERSHIP:
+ case IP_ADD_SOURCE_MEMBERSHIP: {
+ struct ip_mreq_source mreqs;
+
+ if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
+ error = sooptcopyin(sopt, &mreqs,
+ sizeof(struct ip_mreq),
+ sizeof(struct ip_mreq));
+ /*
+ * Do argument switcharoo from ip_mreq into
+ * ip_mreq_source to avoid using two instances.
+ */
+ mreqs.imr_interface = mreqs.imr_sourceaddr;
+ mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+ } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+ error = sooptcopyin(sopt, &mreqs,
+ sizeof(struct ip_mreq_source),
+ sizeof(struct ip_mreq_source));
+ }
+ if (error)
+ return (error);
+
+ gsa->sin.sin_family = AF_INET;
+ gsa->sin.sin_len = sizeof(struct sockaddr_in);
+ gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+ if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
+ ssa->sin.sin_family = AF_INET;
+ ssa->sin.sin_len = sizeof(struct sockaddr_in);
+ ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+ }
+
+ /*
+ * Obtain ifp. If no interface address was provided,
+ * use the interface of the route in the unicast FIB for
+ * the given multicast destination; usually, this is the
+ * default route.
+ * If this lookup fails, attempt to use the first non-loopback
+ * interface with multicast capability in the system as a
+ * last resort. The legacy IPv4 ASM API requires that we do
+ * this in order to allow groups to be joined when the routing
+ * table has not yet been populated during boot.
+ * If all of these conditions fail, return EADDRNOTAVAIL, and
+ * reject the IPv4 multicast join.
+ */
+ if (mreqs.imr_interface.s_addr != INADDR_ANY) {
+ ifp = ip_multicast_if(&mreqs.imr_interface);
+ } else {
+ struct route ro;
+
+ ro.ro_rt = NULL;
+ *(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
+ rtalloc_ign(&ro, RTF_CLONING);
+ if (ro.ro_rt != NULL) {
+ ifp = ro.ro_rt->rt_ifp;
+ KASSERT(ifp != NULL, ("%s: null ifp",
+ __func__));
+ RTFREE(ro.ro_rt);
+ } else {
+ struct in_ifaddr *ia;
+ struct ifnet *mfp = NULL;
+ TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+ mfp = ia->ia_ifp;
+ if (!(mfp->if_flags & IFF_LOOPBACK) &&
+ (mfp->if_flags & IFF_MULTICAST)) {
+ ifp = mfp;
+ break;
+ }
+ }
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: imr_interface = %s, ifp = %p\n",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
+ }
+#endif
+ break;
+ }
+
+ case MCAST_JOIN_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ if (sopt->sopt_name == MCAST_JOIN_GROUP) {
+ error = sooptcopyin(sopt, &gsr,
+ sizeof(struct group_req),
+ sizeof(struct group_req));
+ } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+ error = sooptcopyin(sopt, &gsr,
+ sizeof(struct group_source_req),
+ sizeof(struct group_source_req));
+ }
+ if (error)
+ return (error);
+
+ if (gsa->sin.sin_family != AF_INET ||
+ gsa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+
+ /*
+ * Overwrite the port field if present, as the sockaddr
+ * being copied in may be matched with a binary comparison.
+ * XXX INET6
+ */
+ gsa->sin.sin_port = 0;
+ if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
+ if (ssa->sin.sin_family != AF_INET ||
+ ssa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+ ssa->sin.sin_port = 0;
+ }
+
+ /*
+ * Obtain the ifp.
+ */
+ if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+ return (EADDRNOTAVAIL);
+ ifp = ifnet_byindex(gsr.gsr_interface);
+
+ break;
+
+ default:
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: unknown sopt_name %d\n", __func__,
+ sopt->sopt_name);
+ }
+#endif
+ return (EOPNOTSUPP);
+ break;
+ }
+
+ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+ return (EINVAL);
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EADDRNOTAVAIL);
+
+ /*
+ * Check if we already hold membership of this group for this inpcb.
+ * If so, we do not need to perform the initial join.
+ */
+ imo = inp_findmoptions(inp);
+ idx = imo_match_group(imo, ifp, &gsa->sa);
+ if (idx != -1) {
+ if (ssa->ss.ss_family != AF_UNSPEC) {
+ /*
+ * Attempting to join an ASM group (when already
+ * an ASM or SSM member) is an error.
+ */
+ error = EADDRNOTAVAIL;
+ } else {
+ imf = &imo->imo_mfilters[idx];
+ if (imf->imf_nsources == 0) {
+ /*
+ * Attempting to join an SSM group (when
+ * already an ASM member) is an error.
+ */
+ error = EINVAL;
+ } else {
+ /*
+ * Attempting to join an SSM group (when
+ * already an SSM member) means "add this
+ * source to the inclusive filter list".
+ */
+ error = imo_join_source(imo, idx, ssa);
+ }
+ }
+ goto out_locked;
+ }
+
+ /*
+ * Call imo_grow() to reallocate the membership and source filter
+ * vectors if they are full. If the size would exceed the hard limit,
+ * then we know we've really run out of entries. We keep the INP
+ * lock held to avoid introducing a race condition.
+ */
+ if (imo->imo_num_memberships == imo->imo_max_memberships) {
+ error = imo_grow(imo);
+ if (error)
+ goto out_locked;
+ }
+
+ /*
+ * So far, so good: perform the layer 3 join, layer 2 join,
+ * and make an IGMP announcement if needed.
+ */
+ inm = in_addmulti(&gsa->sin.sin_addr, ifp);
+ if (inm == NULL) {
+ error = ENOBUFS;
+ goto out_locked;
+ }
+ idx = imo->imo_num_memberships;
+ imo->imo_membership[idx] = inm;
+ imo->imo_num_memberships++;
+
+ KASSERT(imo->imo_mfilters != NULL,
+ ("%s: imf_mfilters vector was not allocated", __func__));
+ imf = &imo->imo_mfilters[idx];
+ KASSERT(TAILQ_EMPTY(&imf->imf_sources),
+ ("%s: imf_sources not empty", __func__));
+
+ /*
+ * If this is a new SSM group join (i.e. a source was specified
+ * with this group), add this source to the filter list.
+ */
+ if (ssa->ss.ss_family != AF_UNSPEC) {
+ /*
+ * An initial SSM join implies that this socket's membership
+ * of the multicast group is now in inclusive mode.
+ */
+ imf->imf_fmode = MCAST_INCLUDE;
+
+ error = imo_join_source(imo, idx, ssa);
+ if (error) {
+ /*
+ * Drop inp lock before calling in_delmulti(),
+ * to prevent a lock order reversal.
+ */
+ --imo->imo_num_memberships;
+ INP_UNLOCK(inp);
+ in_delmulti(inm);
+ return (error);
+ }
+ }
+
+out_locked:
+ INP_UNLOCK(inp);
+ return (error);
+}
+
+/*
+ * Leave an IPv4 multicast group on an inpcb, possibly with a source.
+ */
+static int
+inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct group_source_req gsr;
+ struct ip_mreq_source mreqs;
+ sockunion_t *gsa, *ssa;
+ struct ifnet *ifp;
+ struct in_mfilter *imf;
+ struct ip_moptions *imo;
+ struct in_msource *ims, *tims;
+ struct in_multi *inm;
+ size_t idx;
+ int error;
+
+ ifp = NULL;
+ error = 0;
+
+ memset(&gsr, 0, sizeof(struct group_source_req));
+ gsa = (sockunion_t *)&gsr.gsr_group;
+ gsa->ss.ss_family = AF_UNSPEC;
+ ssa = (sockunion_t *)&gsr.gsr_source;
+ ssa->ss.ss_family = AF_UNSPEC;
+
+ switch (sopt->sopt_name) {
+ case IP_DROP_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
+ error = sooptcopyin(sopt, &mreqs,
+ sizeof(struct ip_mreq),
+ sizeof(struct ip_mreq));
+ /*
+ * Swap interface and sourceaddr arguments,
+ * as ip_mreq and ip_mreq_source are laid
+ * out differently.
+ */
+ mreqs.imr_interface = mreqs.imr_sourceaddr;
+ mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
+ } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+ error = sooptcopyin(sopt, &mreqs,
+ sizeof(struct ip_mreq_source),
+ sizeof(struct ip_mreq_source));
+ }
+ if (error)
+ return (error);
+
+ gsa->sin.sin_family = AF_INET;
+ gsa->sin.sin_len = sizeof(struct sockaddr_in);
+ gsa->sin.sin_addr = mreqs.imr_multiaddr;
+
+ if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
+ ssa->sin.sin_family = AF_INET;
+ ssa->sin.sin_len = sizeof(struct sockaddr_in);
+ ssa->sin.sin_addr = mreqs.imr_sourceaddr;
+ }
+
+ if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
+ INADDR_TO_IFP(mreqs.imr_interface, ifp);
+
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: imr_interface = %s, ifp = %p\n",
+ __func__, inet_ntoa(mreqs.imr_interface), ifp);
+ }
+#endif
+ break;
+
+ case MCAST_LEAVE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
+ error = sooptcopyin(sopt, &gsr,
+ sizeof(struct group_req),
+ sizeof(struct group_req));
+ } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+ error = sooptcopyin(sopt, &gsr,
+ sizeof(struct group_source_req),
+ sizeof(struct group_source_req));
+ }
+ if (error)
+ return (error);
+
+ if (gsa->sin.sin_family != AF_INET ||
+ gsa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+
+ if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
+ if (ssa->sin.sin_family != AF_INET ||
+ ssa->sin.sin_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+ }
+
+ if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
+ return (EADDRNOTAVAIL);
+
+ ifp = ifnet_byindex(gsr.gsr_interface);
+ break;
+
+ default:
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: unknown sopt_name %d\n", __func__,
+ sopt->sopt_name);
+ }
+#endif
+ return (EOPNOTSUPP);
+ break;
+ }
+
+ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+ return (EINVAL);
+
+ /*
+ * Find the membership in the membership array.
+ */
+ imo = inp_findmoptions(inp);
+ idx = imo_match_group(imo, ifp, &gsa->sa);
+ if (idx == -1) {
+ error = EADDRNOTAVAIL;
+ goto out_locked;
+ }
+ imf = &imo->imo_mfilters[idx];
+
+ /*
+ * If we were instructed only to leave a given source, do so.
+ */
+ if (ssa->ss.ss_family != AF_UNSPEC) {
+ if (imf->imf_nsources == 0 ||
+ imf->imf_fmode == MCAST_EXCLUDE) {
+ /*
+ * Attempting to SSM leave an ASM group
+ * is an error; should use *_BLOCK_SOURCE instead.
+ * Attempting to SSM leave a source in a group when
+ * the socket is in 'exclude mode' is also an error.
+ */
+ error = EINVAL;
+ } else {
+ error = imo_leave_source(imo, idx, ssa);
+ }
+ /*
+ * If an error occurred, or this source is not the last
+ * source in the group, do not leave the whole group.
+ */
+ if (error || imf->imf_nsources > 0)
+ goto out_locked;
+ }
+
+ /*
+ * Give up the multicast address record to which the membership points.
+ */
+ inm = imo->imo_membership[idx];
+ in_delmulti(inm);
+
+ /*
+ * Free any source filters for this group if they exist.
+ * Revert inpcb to the default MCAST_EXCLUDE state.
+ */
+ if (imo->imo_mfilters != NULL) {
+ TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
+ TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+ FREE(ims, M_IPMSOURCE);
+ imf->imf_nsources--;
+ }
+ KASSERT(imf->imf_nsources == 0,
+ ("%s: imf_nsources not 0", __func__));
+ KASSERT(TAILQ_EMPTY(&imf->imf_sources),
+ ("%s: imf_sources not empty", __func__));
+ imf->imf_fmode = MCAST_EXCLUDE;
+ }
+
+ /*
+ * Remove the gap in the membership array.
+ */
+ for (++idx; idx < imo->imo_num_memberships; ++idx)
+ imo->imo_membership[idx-1] = imo->imo_membership[idx];
+ imo->imo_num_memberships--;
+
+out_locked:
+ INP_UNLOCK(inp);
+ return (error);
+}
+
+/*
+ * Select the interface for transmitting IPv4 multicast datagrams.
+ *
+ * Either an instance of struct in_addr or an instance of struct ip_mreqn
+ * may be passed to this socket option. An address of INADDR_ANY or an
+ * interface index of 0 is used to remove a previous selection.
+ * When no interface is selected, one is chosen for every send.
+ */
+static int
+inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct in_addr addr;
+ struct ip_mreqn mreqn;
+ struct ifnet *ifp;
+ struct ip_moptions *imo;
+ int error;
+
+ if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
+ /*
+ * An interface index was specified using the
+ * Linux-derived ip_mreqn structure.
+ */
+ error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
+ sizeof(struct ip_mreqn));
+ if (error)
+ return (error);
+
+ if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
+ return (EINVAL);
+
+ if (mreqn.imr_ifindex == 0) {
+ ifp = NULL;
+ } else {
+ ifp = ifnet_byindex(mreqn.imr_ifindex);
+ if (ifp == NULL)
+ return (EADDRNOTAVAIL);
+ }
+ } else {
+ /*
+ * An interface was specified by IPv4 address.
+ * This is the traditional BSD usage.
+ */
+ error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
+ sizeof(struct in_addr));
+ if (error)
+ return (error);
+ if (addr.s_addr == INADDR_ANY) {
+ ifp = NULL;
+ } else {
+ ifp = ip_multicast_if(&addr);
+ if (ifp == NULL)
+ return (EADDRNOTAVAIL);
+ }
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: ifp = %p, addr = %s\n",
+ __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
+ }
+#endif
+ }
+
+ /* Reject interfaces which do not support multicast. */
+ if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
+ return (EOPNOTSUPP);
+
+ imo = inp_findmoptions(inp);
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_addr.s_addr = INADDR_ANY;
+ INP_UNLOCK(inp);
+
+ return (0);
+}
+
+/*
+ * Atomically set source filters on a socket for an IPv4 multicast group.
+ */
+static int
+inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct __msfilterreq msfr;
+ sockunion_t *gsa;
+ struct ifnet *ifp;
+ struct in_mfilter *imf;
+ struct ip_moptions *imo;
+ struct in_msource *ims, *tims;
+ size_t idx;
+ int error;
+
+ error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
+ sizeof(struct __msfilterreq));
+ if (error)
+ return (error);
+
+ if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
+ (msfr.msfr_fmode != MCAST_EXCLUDE &&
+ msfr.msfr_fmode != MCAST_INCLUDE))
+ return (EINVAL);
+
+ if (msfr.msfr_group.ss_family != AF_INET ||
+ msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
+ return (EINVAL);
+
+ gsa = (sockunion_t *)&msfr.msfr_group;
+ if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
+ return (EINVAL);
+
+ gsa->sin.sin_port = 0; /* ignore port */
+
+ if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
+ return (EADDRNOTAVAIL);
+
+ ifp = ifnet_byindex(msfr.msfr_ifindex);
+ if (ifp == NULL)
+ return (EADDRNOTAVAIL);
+
+ /*
+ * Take the INP lock.
+ * Check if this socket is a member of this group.
+ */
+ imo = inp_findmoptions(inp);
+ idx = imo_match_group(imo, ifp, &gsa->sa);
+ if (idx == -1 || imo->imo_mfilters == NULL) {
+ error = EADDRNOTAVAIL;
+ goto out_locked;
+ }
+ imf = &imo->imo_mfilters[idx];
+
+#ifdef DIAGNOSTIC
+ if (bootverbose)
+ printf("%s: clearing source list\n", __func__);
+#endif
+
+ /*
+ * Remove any existing source filters.
+ */
+ TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
+ TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
+ FREE(ims, M_IPMSOURCE);
+ imf->imf_nsources--;
+ }
+ KASSERT(imf->imf_nsources == 0,
+ ("%s: source list not cleared", __func__));
+
+ /*
+ * Apply any new source filters, if present.
+ */
+ if (msfr.msfr_nsrcs > 0) {
+ struct in_msource **pnims;
+ struct in_msource *nims;
+ struct sockaddr_storage *kss;
+ struct sockaddr_storage *pkss;
+ sockunion_t *psu;
+ int i, j;
+
+ /*
+ * Drop the inp lock so we may sleep if we need to
+ * in order to satisfy a malloc request.
+ * We will re-take it before changing socket state.
+ */
+ INP_UNLOCK(inp);
+#ifdef DIAGNOSTIC
+ if (bootverbose) {
+ printf("%s: loading %lu source list entries\n",
+ __func__, (unsigned long)msfr.msfr_nsrcs);
+ }
+#endif
+ /*
+ * Make a copy of the user-space source vector so
+ * that we may copy them with a single copyin. This
+ * allows us to deal with page faults up-front.
+ */
+ MALLOC(kss, struct sockaddr_storage *,
+ sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
+ M_TEMP, M_WAITOK);
+ error = copyin(msfr.msfr_srcs, kss,
+ sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
+ if (error) {
+ FREE(kss, M_TEMP);
+ return (error);
+ }
+
+ /*
+ * Perform argument checking on every sockaddr_storage
+ * structure in the vector provided to us. Overwrite
+ * fields which should not apply to source entries.
+ * TODO: Check for duplicate sources on this pass.
+ */
+ psu = (sockunion_t *)kss;
+ for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
+ switch (psu->ss.ss_family) {
+ case AF_INET:
+ if (psu->sin.sin_len !=
+ sizeof(struct sockaddr_in)) {
+ error = EINVAL;
+ } else {
+ psu->sin.sin_port = 0;
+ }
+ break;
+#ifdef notyet
+ case AF_INET6;
+ if (psu->sin6.sin6_len !=
+ sizeof(struct sockaddr_in6)) {
+ error = EINVAL;
+ } else {
+ psu->sin6.sin6_port = 0;
+ psu->sin6.sin6_flowinfo = 0;
+ }
+ break;
+#endif
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ if (error)
+ break;
+ }
+ if (error) {
+ FREE(kss, M_TEMP);
+ return (error);
+ }
+
+ /*
+ * Allocate a block to track all the in_msource
+ * entries we are about to allocate, in case we
+ * abruptly need to free them.
+ */
+ MALLOC(pnims, struct in_msource **,
+ sizeof(struct in_msource *) * msfr.msfr_nsrcs,
+ M_TEMP, M_WAITOK | M_ZERO);
+
+ /*
+ * Allocate up to nsrcs individual chunks.
+ * If we encounter an error, backtrack out of
+ * all allocations cleanly; updates must be atomic.
+ */
+ pkss = kss;
+ nims = NULL;
+ for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
+ MALLOC(nims, struct in_msource *,
+ sizeof(struct in_msource) * msfr.msfr_nsrcs,
+ M_IPMSOURCE, M_WAITOK | M_ZERO);
+ pnims[i] = nims;
+ }
+ if (i < msfr.msfr_nsrcs) {
+ for (j = 0; j < i; j++) {
+ if (pnims[j] != NULL)
+ FREE(pnims[j], M_IPMSOURCE);
+ }
+ FREE(pnims, M_TEMP);
+ FREE(kss, M_TEMP);
+ return (ENOBUFS);
+ }
+
+ INP_UNLOCK_ASSERT(inp);
+
+ /*
+ * Finally, apply the filters to the socket.
+ * Re-take the inp lock; we are changing socket state.
+ */
+ pkss = kss;
+ INP_LOCK(inp);
+ for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
+ memcpy(&(pnims[i]->ims_addr), pkss,
+ sizeof(struct sockaddr_storage));
+ TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
+ ims_next);
+ imf->imf_nsources++;
+ }
+ FREE(pnims, M_TEMP);
+ FREE(kss, M_TEMP);
+ }
+
+ /*
+ * Update the filter mode on the socket before releasing the inpcb.
+ */
+ INP_LOCK_ASSERT(inp);
+ imf->imf_fmode = msfr.msfr_fmode;
+
+out_locked:
+ INP_UNLOCK(inp);
+ return (error);
+}
+
+/*
+ * Set the IP multicast options in response to user setsockopt().
+ *
+ * Many of the socket options handled in this function duplicate the
+ * functionality of socket options in the regular unicast API. However,
+ * it is not possible to merge the duplicate code, because the idempotence
+ * of the IPv4 multicast part of the BSD Sockets API must be preserved;
+ * the effects of these options must be treated as separate and distinct.
+ */
+int
+inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
+{
+ struct ip_moptions *imo;
+ int error;
+
+ error = 0;
+
+ /*
+ * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
+ * or is a divert socket, reject it.
+ * XXX Unlocked read of inp_socket believed OK.
+ */
+ if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
+ (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
+ inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
+ return (EOPNOTSUPP);
+
+ switch (sopt->sopt_name) {
+ case IP_MULTICAST_VIF: {
+ int vifi;
+ /*
+ * Select a multicast VIF for transmission.
+ * Only useful if multicast forwarding is active.
+ */
+ if (legal_vif_num == NULL) {
+ error = EOPNOTSUPP;
+ break;
+ }
+ error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
+ if (error)
+ break;
+ if (!legal_vif_num(vifi) && (vifi != -1)) {
+ error = EINVAL;
+ break;
+ }
+ imo = inp_findmoptions(inp);
+ imo->imo_multicast_vif = vifi;
+ INP_UNLOCK(inp);
+ break;
+ }
+
+ case IP_MULTICAST_IF:
+ error = inp_set_multicast_if(inp, sopt);
+ break;
+
+ case IP_MULTICAST_TTL: {
+ u_char ttl;
+
+ /*
+ * Set the IP time-to-live for outgoing multicast packets.
+ * The original multicast API required a char argument,
+ * which is inconsistent with the rest of the socket API.
+ * We allow either a char or an int.
+ */
+ if (sopt->sopt_valsize == sizeof(u_char)) {
+ error = sooptcopyin(sopt, &ttl, sizeof(u_char),
+ sizeof(u_char));
+ if (error)
+ break;
+ } else {
+ u_int ittl;
+
+ error = sooptcopyin(sopt, &ittl, sizeof(u_int),
+ sizeof(u_int));
+ if (error)
+ break;
+ if (ittl > 255) {
+ error = EINVAL;
+ break;
+ }
+ ttl = (u_char)ittl;
+ }
+ imo = inp_findmoptions(inp);
+ imo->imo_multicast_ttl = ttl;
+ INP_UNLOCK(inp);
+ break;
+ }
+
+ case IP_MULTICAST_LOOP: {
+ u_char loop;
+
+ /*
+ * Set the loopback flag for outgoing multicast packets.
+ * Must be zero or one. The original multicast API required a
+ * char argument, which is inconsistent with the rest
+ * of the socket API. We allow either a char or an int.
+ */
+ if (sopt->sopt_valsize == sizeof(u_char)) {
+ error = sooptcopyin(sopt, &loop, sizeof(u_char),
+ sizeof(u_char));
+ if (error)
+ break;
+ } else {
+ u_int iloop;
+
+ error = sooptcopyin(sopt, &iloop, sizeof(u_int),
+ sizeof(u_int));
+ if (error)
+ break;
+ loop = (u_char)iloop;
+ }
+ imo = inp_findmoptions(inp);
+ imo->imo_multicast_loop = !!loop;
+ INP_UNLOCK(inp);
+ break;
+ }
+
+ case IP_ADD_MEMBERSHIP:
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case MCAST_JOIN_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ error = inp_join_group(inp, sopt);
+ break;
+
+ case IP_DROP_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ error = inp_leave_group(inp, sopt);
+ break;
+
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ error = inp_change_source_filter(inp, sopt);
+ break;
+
+ case IP_MSFILTER:
+ error = inp_set_source_filters(inp, sopt);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ INP_UNLOCK_ASSERT(inp);
+
+ return (error);
+}
+
+/*
+ * Following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
+ */
+static struct ifnet *
+ip_multicast_if(struct in_addr *a)
+{
+ int ifindex;
+ struct ifnet *ifp;
+
+ if (ntohl(a->s_addr) >> 24 == 0) {
+ ifindex = ntohl(a->s_addr) & 0xffffff;
+ if (ifindex < 0 || if_index < ifindex)
+ return (NULL);
+ ifp = ifnet_byindex(ifindex);
+ } else
+ INADDR_TO_IFP(*a, ifp);
+ return (ifp);
+}
+
Index: igmp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp_var.h -L sys/netinet/igmp_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp_var.h
+++ sys/netinet/igmp_var.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* from: @(#)igmp_var.h 8.1 (Berkeley) 7/19/93
- * $FreeBSD: src/sys/netinet/igmp_var.h,v 1.21 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/igmp_var.h,v 1.22 2007/06/12 16:24:53 bms Exp $
*/
#ifndef _NETINET_IGMP_VAR_H_
@@ -56,6 +56,7 @@
u_int igps_rcv_badreports; /* received invalid reports */
u_int igps_rcv_ourreports; /* received reports for our groups */
u_int igps_snd_reports; /* sent membership reports */
+ u_int igps_rcv_toolong; /* received with too many bytes */
};
#ifdef _KERNEL
@@ -68,12 +69,20 @@
#define IGMP_IREPORTEDLAST 1
/*
+ * State masks for IGMPv3
+ */
+#define IGMP_V3_NONEXISTENT 0x01
+#define IGMP_V3_OTHERMEMBER 0x02
+#define IGMP_V3_IREPORTEDLAST 0x04
+
+/*
* We must remember what version the subnet's querier is.
* We conveniently use the IGMP message type for the proper
* membership report to keep this state.
*/
#define IGMP_V1_ROUTER IGMP_V1_MEMBERSHIP_REPORT
#define IGMP_V2_ROUTER IGMP_V2_MEMBERSHIP_REPORT
+#define IGMP_V3_ROUTER IGMP_V3_MEMBERSHIP_REPORT
/*
* Revert to new router if we haven't heard from an old router in
@@ -81,6 +90,51 @@
*/
#define IGMP_AGE_THRESHOLD 540
+/*
+ * IGMPv3 protocol defaults
+ */
+#define IGMP_INIT_ROBVAR 2 /* Robustness */
+#define IGMP_MAX_ROBVAR 7
+#define IGMP_INIT_QRYINT 125 /* Querier's Query interval */
+#define IGMP_MAX_QRYINT 255
+#define IGMP_INIT_QRYRSP 10 /* Query Response interval */
+#define IGMP_DEF_QRYMRT 10
+#define IGMP_UNSOL_INT 1 /* Unsolicited Report interval */
+
+/*
+ * IGMPv3 report types
+ */
+#define IGMP_REPORT_MODE_IN 1 /* mode-is-include */
+#define IGMP_REPORT_MODE_EX 2 /* mode-is-exclude */
+#define IGMP_REPORT_TO_IN 3 /* change-to-include */
+#define IGMP_REPORT_TO_EX 4 /* change-to-exclude */
+#define IGMP_REPORT_ALLOW_NEW 5 /* allow-new-sources */
+#define IGMP_REPORT_BLOCK_OLD 6 /* block-old-sources */
+
+/*
+ * Report types
+ */
+#define IGMP_MASK_CUR_STATE 0x01 /* Report current-state */
+#define IGMP_MASK_ALLOW_NEW 0x02 /* Report source as allow-new */
+#define IGMP_MASK_BLOCK_OLD 0x04 /* Report source as block-old */
+#define IGMP_MASK_TO_IN 0x08 /* Report source as to_in */
+#define IGMP_MASK_TO_EX 0x10 /* Report source as to_ex */
+#define IGMP_MASK_STATE_T1 0x20 /* State at T1 */
+#define IGMP_MASK_STATE_T2 0x40 /* State at T2 */
+#define IGMP_MASK_IF_STATE 0x80 /* Report current-state per interface */
+
+#define IGMP_MASK_STATE_TX (IGMP_MASK_STATE_T1 | IGMP_MASK_STATE_T2)
+#define IGMP_MASK_PENDING (IGMP_MASK_CUR_STATE | \
+ IGMP_MASK_ALLOW_NEW | \
+ IGMP_MASK_BLOCK_OLD)
+
+/*
+ * List identifiers
+ */
+#define IGMP_EXCLUDE_LIST 1 /* exclude list used to tag report */
+#define IGMP_INCLUDE_LIST 2 /* include list used to tag report */
+#define IGMP_RECORDED_LIST 3 /* recorded list used to tag report */
+
void igmp_init(void);
void igmp_input(struct mbuf *, int);
void igmp_joingroup(struct in_multi *);
@@ -100,6 +154,6 @@
#define IGMPCTL_NAMES { \
{ 0, 0 }, \
- { "stats", CTLTYPE_STRUCT }, \
+ { "stats", CTLTYPE_STRUCT } \
}
#endif
--- /dev/null
+++ sys/netinet/ip_options.c
@@ -0,0 +1,683 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California.
+ * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_options.c,v 1.6 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipstealth.h"
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/netisr.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip_icmp.h>
+#include <machine/in_cksum.h>
+
+#include <sys/socketvar.h>
+
+#include <security/mac/mac_framework.h>
+
+static int ip_dosourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
+ &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
+
+static int ip_acceptsourceroute = 0;
+SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
+ CTLFLAG_RW, &ip_acceptsourceroute, 0,
+ "Enable accepting source routed IP packets");
+
+int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
+SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
+ &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
+
+static void save_rte(struct mbuf *m, u_char *, struct in_addr);
+
+/*
+ * Do option processing on a datagram, possibly discarding it if bad options
+ * are encountered, or forwarding it if source-routed.
+ *
+ * The pass argument is used when operating in the IPSTEALTH mode to tell
+ * what options to process: [LS]SRR (pass 0) or the others (pass 1). The
+ * reason for as many as two passes is that when doing IPSTEALTH, non-routing
+ * options should be processed only if the packet is for us.
+ *
+ * Returns 1 if packet has been forwarded/freed, 0 if the packet should be
+ * processed further.
+ */
+int
+ip_dooptions(struct mbuf *m, int pass)
+{
+ struct ip *ip = mtod(m, struct ip *);
+ u_char *cp;
+ struct in_ifaddr *ia;
+ int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
+ struct in_addr *sin, dst;
+ n_time ntime;
+ struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
+
+ /* Ignore or reject packets with IP options. */
+ if (ip_doopts == 0)
+ return 0;
+ else if (ip_doopts == 2) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_FILTER_PROHIB;
+ goto bad;
+ }
+
+ dst = ip->ip_dst;
+ cp = (u_char *)(ip + 1);
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ if (cnt < IPOPT_OLEN + sizeof(*cp)) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ optlen = cp[IPOPT_OLEN];
+ if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ /*
+ * Source routing with record. Find interface with current
+ * destination address. If none on this machine then drop if
+ * strictly routed, or do nothing if loosely routed. Record
+ * interface address and bring up next address component. If
+ * strictly routed make sure next address is on directly
+ * accessible net.
+ */
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+#ifdef IPSTEALTH
+ if (ipstealth && pass > 0)
+ break;
+#endif
+ if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ ipaddr.sin_addr = ip->ip_dst;
+ ia = (struct in_ifaddr *)
+ ifa_ifwithaddr((struct sockaddr *)&ipaddr);
+ if (ia == NULL) {
+ if (opt == IPOPT_SSRR) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ if (!ip_dosourceroute)
+ goto nosourcerouting;
+ /*
+ * Loose routing, and not at next destination
+ * yet; nothing to do except forward.
+ */
+ break;
+ }
+ off--; /* 0 origin */
+ if (off > optlen - (int)sizeof(struct in_addr)) {
+ /*
+ * End of source route. Should be for us.
+ */
+ if (!ip_acceptsourceroute)
+ goto nosourcerouting;
+ save_rte(m, cp, ip->ip_src);
+ break;
+ }
+#ifdef IPSTEALTH
+ if (ipstealth)
+ goto dropit;
+#endif
+ if (!ip_dosourceroute) {
+ if (ipforwarding) {
+ char buf[16]; /* aaa.bbb.ccc.ddd\0 */
+ /*
+ * Acting as a router, so generate
+ * ICMP
+ */
+nosourcerouting:
+ strcpy(buf, inet_ntoa(ip->ip_dst));
+ log(LOG_WARNING,
+ "attempted source route from %s to %s\n",
+ inet_ntoa(ip->ip_src), buf);
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ } else {
+ /*
+ * Not acting as a router, so
+ * silently drop.
+ */
+#ifdef IPSTEALTH
+dropit:
+#endif
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return (1);
+ }
+ }
+
+ /*
+ * locate outgoing interface
+ */
+ (void)memcpy(&ipaddr.sin_addr, cp + off,
+ sizeof(ipaddr.sin_addr));
+
+ if (opt == IPOPT_SSRR) {
+#define INA struct in_ifaddr *
+#define SA struct sockaddr *
+ if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
+ ia = (INA)ifa_ifwithnet((SA)&ipaddr);
+ } else
+ ia = ip_rtaddr(ipaddr.sin_addr);
+ if (ia == NULL) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_SRCFAIL;
+ goto bad;
+ }
+ ip->ip_dst = ipaddr.sin_addr;
+ (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ /*
+ * Let ip_intr's mcast routing check handle mcast pkts
+ */
+ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
+ break;
+
+ case IPOPT_RR:
+#ifdef IPSTEALTH
+ if (ipstealth && pass == 0)
+ break;
+#endif
+ if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ /*
+ * If no space remains, ignore.
+ */
+ off--; /* 0 origin */
+ if (off > optlen - (int)sizeof(struct in_addr))
+ break;
+ (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
+ sizeof(ipaddr.sin_addr));
+ /*
+ * Locate outgoing interface; if we're the
+ * destination, use the incoming interface (should be
+ * same).
+ */
+ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
+ (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_HOST;
+ goto bad;
+ }
+ (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
+ sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS:
+#ifdef IPSTEALTH
+ if (ipstealth && pass == 0)
+ break;
+#endif
+ code = cp - (u_char *)ip;
+ if (optlen < 4 || optlen > 40) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ if ((off = cp[IPOPT_OFFSET]) < 5) {
+ code = &cp[IPOPT_OLEN] - (u_char *)ip;
+ goto bad;
+ }
+ if (off > optlen - (int)sizeof(int32_t)) {
+ cp[IPOPT_OFFSET + 1] += (1 << 4);
+ if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ break;
+ }
+ off--; /* 0 origin */
+ sin = (struct in_addr *)(cp + off);
+ switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
+
+ case IPOPT_TS_TSONLY:
+ break;
+
+ case IPOPT_TS_TSANDADDR:
+ if (off + sizeof(n_time) +
+ sizeof(struct in_addr) > optlen) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ ipaddr.sin_addr = dst;
+ ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
+ m->m_pkthdr.rcvif);
+ if (ia == NULL)
+ continue;
+ (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
+ sizeof(struct in_addr));
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ off += sizeof(struct in_addr);
+ break;
+
+ case IPOPT_TS_PRESPEC:
+ if (off + sizeof(n_time) +
+ sizeof(struct in_addr) > optlen) {
+ code = &cp[IPOPT_OFFSET] - (u_char *)ip;
+ goto bad;
+ }
+ (void)memcpy(&ipaddr.sin_addr, sin,
+ sizeof(struct in_addr));
+ if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
+ continue;
+ cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+ off += sizeof(struct in_addr);
+ break;
+
+ default:
+ code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
+ goto bad;
+ }
+ ntime = iptime();
+ (void)memcpy(cp + off, &ntime, sizeof(n_time));
+ cp[IPOPT_OFFSET] += sizeof(n_time);
+ }
+ }
+ if (forward && ipforwarding) {
+ ip_forward(m, 1);
+ return (1);
+ }
+ return (0);
+bad:
+ icmp_error(m, type, code, 0, 0);
+ ipstat.ips_badoptions++;
+ return (1);
+}
+
+/*
+ * Save incoming source route for use in replies, to be picked up later by
+ * ip_srcroute if the receiver is interested.
+ */
+static void
+save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
+{
+ unsigned olen;
+ struct ipopt_tag *opts;
+
+ opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
+ sizeof(struct ipopt_tag), M_NOWAIT);
+ if (opts == NULL)
+ return;
+
+ olen = option[IPOPT_OLEN];
+ if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
+ m_tag_free((struct m_tag *)opts);
+ return;
+ }
+ bcopy(option, opts->ip_srcrt.srcopt, olen);
+ opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
+ opts->ip_srcrt.dst = dst;
+ m_tag_prepend(m, (struct m_tag *)opts);
+}
+
+/*
+ * Retrieve incoming source route for use in replies, in the same form used
+ * by setsockopt. The first hop is placed before the options, will be
+ * removed later.
+ */
+struct mbuf *
+ip_srcroute(struct mbuf *m0)
+{
+ struct in_addr *p, *q;
+ struct mbuf *m;
+ struct ipopt_tag *opts;
+
+ opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
+ if (opts == NULL)
+ return (NULL);
+
+ if (opts->ip_nhops == 0)
+ return (NULL);
+ m = m_get(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+
+#define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
+
+ /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
+ m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
+ sizeof(struct in_addr) + OPTSIZ;
+
+ /*
+ * First, save first hop for return route.
+ */
+ p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
+ *(mtod(m, struct in_addr *)) = *p--;
+
+ /*
+ * Copy option fields and padding (nop) to mbuf.
+ */
+ opts->ip_srcrt.nop = IPOPT_NOP;
+ opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
+ (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
+ &(opts->ip_srcrt.nop), OPTSIZ);
+ q = (struct in_addr *)(mtod(m, caddr_t) +
+ sizeof(struct in_addr) + OPTSIZ);
+#undef OPTSIZ
+ /*
+ * Record return path as an IP source route, reversing the path
+ * (pointers are now aligned).
+ */
+ while (p >= opts->ip_srcrt.route) {
+ *q++ = *p--;
+ }
+ /*
+ * Last hop goes to final destination.
+ */
+ *q = opts->ip_srcrt.dst;
+ m_tag_delete(m0, (struct m_tag *)opts);
+ return (m);
+}
+
+/*
+ * Strip out IP options, at higher level protocol in the kernel. Second
+ * argument is buffer to which options will be moved, and return value is
+ * their length.
+ *
+ * XXX should be deleted; last arg currently ignored.
+ */
+void
+ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
+{
+ int i;
+ struct ip *ip = mtod(m, struct ip *);
+ caddr_t opts;
+ int olen;
+
+ olen = (ip->ip_hl << 2) - sizeof (struct ip);
+ opts = (caddr_t)(ip + 1);
+ i = m->m_len - (sizeof (struct ip) + olen);
+ bcopy(opts + olen, opts, (unsigned)i);
+ m->m_len -= olen;
+ if (m->m_flags & M_PKTHDR)
+ m->m_pkthdr.len -= olen;
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(struct ip) >> 2;
+}
+
+/*
+ * Insert IP options into preformed packet. Adjust IP destination as
+ * required for IP source routing, as indicated by a non-zero in_addr at the
+ * start of the options.
+ *
+ * XXX This routine assumes that the packet has no options in place.
+ */
+struct mbuf *
+ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
+{
+ struct ipoption *p = mtod(opt, struct ipoption *);
+ struct mbuf *n;
+ struct ip *ip = mtod(m, struct ip *);
+ unsigned optlen;
+
+ optlen = opt->m_len - sizeof(p->ipopt_dst);
+ if (optlen + ip->ip_len > IP_MAXPACKET) {
+ *phlen = 0;
+ return (m); /* XXX should fail */
+ }
+ if (p->ipopt_dst.s_addr)
+ ip->ip_dst = p->ipopt_dst;
+ if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
+ MGETHDR(n, M_DONTWAIT, MT_DATA);
+ if (n == NULL) {
+ *phlen = 0;
+ return (m);
+ }
+ M_MOVE_PKTHDR(n, m);
+ n->m_pkthdr.rcvif = NULL;
+#ifdef MAC
+ mac_copy_mbuf(m, n);
+#endif
+ n->m_pkthdr.len += optlen;
+ m->m_len -= sizeof(struct ip);
+ m->m_data += sizeof(struct ip);
+ n->m_next = m;
+ m = n;
+ m->m_len = optlen + sizeof(struct ip);
+ m->m_data += max_linkhdr;
+ bcopy(ip, mtod(m, void *), sizeof(struct ip));
+ } else {
+ m->m_data -= optlen;
+ m->m_len += optlen;
+ m->m_pkthdr.len += optlen;
+ bcopy(ip, mtod(m, void *), sizeof(struct ip));
+ }
+ ip = mtod(m, struct ip *);
+ bcopy(p->ipopt_list, ip + 1, optlen);
+ *phlen = sizeof(struct ip) + optlen;
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = *phlen >> 2;
+ ip->ip_len += optlen;
+ return (m);
+}
+
+/*
+ * Copy options from ip to jp, omitting those not copied during
+ * fragmentation.
+ */
+int
+ip_optcopy(struct ip *ip, struct ip *jp)
+{
+ u_char *cp, *dp;
+ int opt, optlen, cnt;
+
+ cp = (u_char *)(ip + 1);
+ dp = (u_char *)(jp + 1);
+ cnt = (ip->ip_hl << 2) - sizeof (struct ip);
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP) {
+ /* Preserve for IP mcast tunnel's LSRR alignment. */
+ *dp++ = IPOPT_NOP;
+ optlen = 1;
+ continue;
+ }
+
+ KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
+ ("ip_optcopy: malformed ipv4 option"));
+ optlen = cp[IPOPT_OLEN];
+ KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
+ ("ip_optcopy: malformed ipv4 option"));
+
+ /* Bogus lengths should have been caught by ip_dooptions. */
+ if (optlen > cnt)
+ optlen = cnt;
+ if (IPOPT_COPIED(opt)) {
+ bcopy(cp, dp, optlen);
+ dp += optlen;
+ }
+ }
+ for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
+ *dp++ = IPOPT_EOL;
+ return (optlen);
+}
+
+/*
+ * Set up IP options in pcb for insertion in output packets. Store in mbuf
+ * with pointer in pcbopt, adding pseudo-option with destination address if
+ * source routed.
+ */
+int
+ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
+{
+ int cnt, optlen;
+ u_char *cp;
+ struct mbuf **pcbopt;
+ u_char opt;
+
+ INP_LOCK_ASSERT(inp);
+
+ pcbopt = &inp->inp_options;
+
+ /* turn off any old options */
+ if (*pcbopt)
+ (void)m_free(*pcbopt);
+ *pcbopt = 0;
+ if (m == NULL || m->m_len == 0) {
+ /*
+ * Only turning off any previous options.
+ */
+ if (m != NULL)
+ (void)m_free(m);
+ return (0);
+ }
+
+ if (m->m_len % sizeof(int32_t))
+ goto bad;
+ /*
+ * IP first-hop destination address will be stored before actual
+ * options; move other options back and clear it when none present.
+ */
+ if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
+ goto bad;
+ cnt = m->m_len;
+ m->m_len += sizeof(struct in_addr);
+ cp = mtod(m, u_char *) + sizeof(struct in_addr);
+ bcopy(mtod(m, void *), cp, (unsigned)cnt);
+ bzero(mtod(m, void *), sizeof(struct in_addr));
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[IPOPT_OPTVAL];
+ if (opt == IPOPT_EOL)
+ break;
+ if (opt == IPOPT_NOP)
+ optlen = 1;
+ else {
+ if (cnt < IPOPT_OLEN + sizeof(*cp))
+ goto bad;
+ optlen = cp[IPOPT_OLEN];
+ if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
+ goto bad;
+ }
+ switch (opt) {
+
+ default:
+ break;
+
+ case IPOPT_LSRR:
+ case IPOPT_SSRR:
+ /*
+ * User process specifies route as:
+ *
+ * ->A->B->C->D
+ *
+ * D must be our final destination (but we can't
+ * check that since we may not have connected yet).
+ * A is first hop destination, which doesn't appear
+ * in actual IP option, but is stored before the
+ * options.
+ */
+ if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
+ goto bad;
+ m->m_len -= sizeof(struct in_addr);
+ cnt -= sizeof(struct in_addr);
+ optlen -= sizeof(struct in_addr);
+ cp[IPOPT_OLEN] = optlen;
+ /*
+ * Move first hop before start of options.
+ */
+ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
+ sizeof(struct in_addr));
+ /*
+ * Then copy rest of options back
+ * to close up the deleted entry.
+ */
+ bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
+ &cp[IPOPT_OFFSET+1],
+ (unsigned)cnt - (IPOPT_MINOFF - 1));
+ break;
+ }
+ }
+ if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
+ goto bad;
+ *pcbopt = m;
+ return (0);
+
+bad:
+ (void)m_free(m);
+ return (EINVAL);
+}
Index: in.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/in.h -L sys/netinet/in.h -u -r1.2 -r1.3
--- sys/netinet/in.h
+++ sys/netinet/in.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)in.h 8.3 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/in.h,v 1.90.2.4 2005/12/27 00:52:24 gnn Exp $
+ * $FreeBSD: src/sys/netinet/in.h,v 1.100 2007/06/12 16:24:53 bms Exp $
*/
#ifndef _NETINET_IN_H_
@@ -84,6 +84,33 @@
#define _STRUCT_IN_ADDR_DECLARED
#endif
+#ifndef _SOCKLEN_T_DECLARED
+typedef __socklen_t socklen_t;
+#define _SOCKLEN_T_DECLARED
+#endif
+
+/* Avoid collision with original definition in sys/socket.h. */
+#ifndef _STRUCT_SOCKADDR_STORAGE_DECLARED
+/*
+ * RFC 2553: protocol-independent placeholder for socket addresses
+ */
+#define _SS_MAXSIZE 128U
+#define _SS_ALIGNSIZE (sizeof(__int64_t))
+#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(unsigned char) - \
+ sizeof(sa_family_t))
+#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(unsigned char) - \
+ sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE)
+
+struct sockaddr_storage {
+ unsigned char ss_len; /* address length */
+ sa_family_t ss_family; /* address family */
+ char __ss_pad1[_SS_PAD1SIZE];
+ __int64_t __ss_align; /* force desired struct alignment */
+ char __ss_pad2[_SS_PAD2SIZE];
+};
+#define _STRUCT_SOCKADDR_STORAGE_DECLARED
+#endif
+
/* Socket address, internet style. */
struct sockaddr_in {
uint8_t sin_len;
@@ -228,7 +255,7 @@
#define IPPROTO_APES 99 /* any private encr. scheme */
#define IPPROTO_GMTP 100 /* GMTP*/
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
-#define IPPROTO_SCTP 132 /* SCTP */
+#define IPPROTO_SCTP 132 /* SCTP */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
#define IPPROTO_CARP 112 /* CARP */
@@ -351,15 +378,15 @@
#define IN_EXPERIMENTAL(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
#define IN_BADCLASS(i) (((u_int32_t)(i) & 0xf0000000) == 0xf0000000)
-#define IN_LINKLOCAL(i) (((u_int32_t)(i) & 0xffff0000) == 0xa9fe0000)
+#define IN_LINKLOCAL(i) (((u_int32_t)(i) & 0xffff0000) == 0xa9fe0000)
-#define IN_PRIVATE(i) ((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
- (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
- (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
-
-#define IN_LOCAL_GROUP(i) (((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
+#define IN_PRIVATE(i) ((((u_int32_t)(i) & 0xff000000) == 0x0a000000) || \
+ (((u_int32_t)(i) & 0xfff00000) == 0xac100000) || \
+ (((u_int32_t)(i) & 0xffff0000) == 0xc0a80000))
+
+#define IN_LOCAL_GROUP(i) (((u_int32_t)(i) & 0xffffff00) == 0xe0000000)
-#define IN_ANY_LOCAL(i) (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
+#define IN_ANY_LOCAL(i) (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
#define INADDR_LOOPBACK (u_int32_t)0x7f000001
#ifndef _KERNEL
@@ -369,6 +396,7 @@
#define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */
+#define INADDR_ALLRPTS_GROUP (u_int32_t)0xe0000016 /* 224.0.0.22, IGMPv3 */
#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */
#define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */
#define INADDR_ALLMDNS_GROUP (u_int32_t)0xe00000fb /* 224.0.0.251 */
@@ -389,7 +417,8 @@
#define IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */
#define IP_SENDSRCADDR IP_RECVDSTADDR /* cmsg_type to set src addr */
#define IP_RETOPTS 8 /* ip_opts; set/get IP options */
-#define IP_MULTICAST_IF 9 /* u_char; set/get IP multicast i/f */
+#define IP_MULTICAST_IF 9 /* struct in_addr *or* struct ip_mreqn;
+ * set/get IP multicast i/f */
#define IP_MULTICAST_TTL 10 /* u_char; set/get IP multicast ttl */
#define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */
#define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */
@@ -420,6 +449,11 @@
#define IP_FW_GET 54 /* get entire firewall rule chain */
#define IP_FW_RESETLOG 55 /* reset logging counters */
+#define IP_FW_NAT_CFG 56 /* add/config a nat rule */
+#define IP_FW_NAT_DEL 57 /* delete a nat rule */
+#define IP_FW_NAT_GET_CONFIG 58 /* get configuration of a nat rule */
+#define IP_FW_NAT_GET_LOG 59 /* get log of a nat rule */
+
#define IP_DUMMYNET_CONFIGURE 60 /* add/configure a dummynet pipe */
#define IP_DUMMYNET_DEL 61 /* delete a dummynet pipe from chain */
#define IP_DUMMYNET_FLUSH 62 /* flush dummynet */
@@ -429,12 +463,37 @@
#define IP_MINTTL 66 /* minimum TTL for packet or drop */
#define IP_DONTFRAG 67 /* don't fragment packet */
+/* IPv4 Source Filter Multicast API [RFC3678] */
+#define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */
+#define IP_DROP_SOURCE_MEMBERSHIP 71 /* drop a single source */
+#define IP_BLOCK_SOURCE 72 /* block a source */
+#define IP_UNBLOCK_SOURCE 73 /* unblock a source */
+
+/* The following option is private; do not use it from user applications. */
+#define IP_MSFILTER 74 /* set/get filter list */
+
+/* Protocol Independent Multicast API [RFC3678] */
+#define MCAST_JOIN_GROUP 80 /* join an any-source group */
+#define MCAST_LEAVE_GROUP 81 /* leave all sources for group */
+#define MCAST_JOIN_SOURCE_GROUP 82 /* join a source-specific group */
+#define MCAST_LEAVE_SOURCE_GROUP 83 /* leave a single source */
+#define MCAST_BLOCK_SOURCE 84 /* block a source */
+#define MCAST_UNBLOCK_SOURCE 85 /* unblock a source */
+
/*
* Defaults and limits for options
*/
#define IP_DEFAULT_MULTICAST_TTL 1 /* normally limit m'casts to 1 hop */
#define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */
-#define IP_MAX_MEMBERSHIPS 20 /* per socket */
+
+/*
+ * The imo_membership vector for each socket is now dynamically allocated at
+ * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
+ * according to a power-of-two increment.
+ */
+#define IP_MIN_MEMBERSHIPS 31
+#define IP_MAX_MEMBERSHIPS 4095
+#define IP_MAX_SOURCE_FILTER 1024 /* # of filters per socket, per group */
/*
* Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
@@ -445,6 +504,82 @@
};
/*
+ * Modified argument structure for IP_MULTICAST_IF, obtained from Linux.
+ * This is used to specify an interface index for multicast sends, as
+ * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available).
+ */
+struct ip_mreqn {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_address; /* local IP address of interface */
+ int imr_ifindex; /* Interface index; cast to uint32_t */
+};
+
+/*
+ * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678]
+ */
+struct ip_mreq_source {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_sourceaddr; /* IP address of source */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+/*
+ * Argument structures for Protocol-Independent Multicast Source
+ * Filter APIs. [RFC3678]
+ */
+struct group_req {
+ uint32_t gr_interface; /* interface index */
+ struct sockaddr_storage gr_group; /* group address */
+};
+
+struct group_source_req {
+ uint32_t gsr_interface; /* interface index */
+ struct sockaddr_storage gsr_group; /* group address */
+ struct sockaddr_storage gsr_source; /* source address */
+};
+
+#ifndef __MSFILTERREQ_DEFINED
+#define __MSFILTERREQ_DEFINED
+/*
+ * The following structure is private; do not use it from user applications.
+ * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between
+ * the RFC 3678 libc functions and the kernel.
+ */
+struct __msfilterreq {
+ uint32_t msfr_ifindex; /* interface index */
+ uint32_t msfr_fmode; /* filter mode for group */
+ uint32_t msfr_nsrcs; /* # of sources in msfr_srcs */
+ struct sockaddr_storage msfr_group; /* group address */
+ struct sockaddr_storage *msfr_srcs; /* pointer to the first member
+ * of a contiguous array of
+ * sources to filter in full.
+ */
+};
+#endif
+
+struct sockaddr;
+
+/*
+ * Advanced (Full-state) APIs [RFC3678]
+ * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter().
+ * We use uint32_t here to be consistent.
+ */
+int setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t,
+ uint32_t, struct in_addr *);
+int getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *,
+ uint32_t *, struct in_addr *);
+int setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+ uint32_t, uint32_t, struct sockaddr_storage *);
+int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
+ uint32_t *, uint32_t *, struct sockaddr_storage *);
+
+/*
+ * Filter modes; also used to represent per-socket filter mode internally.
+ */
+#define MCAST_INCLUDE 1 /* fmode: include these source(s) */
+#define MCAST_EXCLUDE 2 /* fmode: exclude these source(s) */
+
+/*
* Argument for IP_PORTRANGE:
* - which range to search when port is unspecified at bind() or connect()
*/
Index: tcp_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -L sys/netinet/tcp_input.c -L sys/netinet/tcp_input.c -u -r1.5 -r1.6
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -27,21 +27,20 @@
* SUCH DAMAGE.
*
* @(#)tcp_input.c 8.12 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.281.2.5 2006/03/01 21:13:29 andre Exp $
*/
-#include "opt_ipfw.h" /* for ipfw_fwd */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_input.c,v 1.370 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipfw.h" /* for ipfw_fwd */
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
#include "opt_tcpdebug.h"
-#include "opt_tcp_input.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/kernel.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
@@ -60,14 +59,17 @@
#include <net/if.h>
#include <net/route.h>
+#define TCPSTATES /* for logging */
+
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
-#include <netinet/ip_icmp.h> /* for ICMP_BANDLIM */
-#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
+#include <netinet/ip_icmp.h> /* required for icmp_var.h */
+#include <netinet/icmp_var.h> /* for ICMP_BANDLIM */
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet6/in6_pcb.h>
@@ -80,47 +82,42 @@
#include <netinet/tcp_var.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
+#include <netinet/tcp_syncache.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
-#ifdef FAST_IPSEC
+#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
-#endif /*FAST_IPSEC*/
-
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#include <netkey/key.h>
#endif /*IPSEC*/
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
static const int tcprexmtthresh = 3;
struct tcpstat tcpstat;
SYSCTL_STRUCT(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RW,
&tcpstat , tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
-static int log_in_vain = 0;
+int tcp_log_in_vain = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
- &log_in_vain, 0, "Log all incoming TCP connections");
+ &tcp_log_in_vain, 0, "Log all incoming TCP segments to closed ports");
static int blackhole = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_RW,
- &blackhole, 0, "Do not send RST when dropping refused connections");
+ &blackhole, 0, "Do not send RST on segments to closed ports");
int tcp_delack_enabled = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_RW,
&tcp_delack_enabled, 0,
"Delay ACK to try and piggyback it onto a data packet");
-#ifdef TCP_DROP_SYNFIN
static int drop_synfin = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
&drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
-#endif
static int tcp_do_rfc3042 = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_RW,
@@ -134,58 +131,34 @@
static int tcp_insecure_rst = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_RW,
&tcp_insecure_rst, 0,
- "Follow the old (insecure) criteria for accepting RST packets.");
-
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
- "TCP Segment Reassembly Queue");
-
-static int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
- &tcp_reass_maxseg, 0,
- "Global maximum number of TCP Segments in Reassembly Queue");
-
-int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
- &tcp_reass_qsize, 0,
- "Global number of TCP Segments currently in Reassembly Queue");
-
-static int tcp_reass_maxqlen = 48;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
- &tcp_reass_maxqlen, 0,
- "Maximum number of TCP Segments per individual Reassembly Queue");
-
-static int tcp_reass_overflows = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
- &tcp_reass_overflows, 0,
- "Global number of TCP Segment Reassembly Queue Overflows");
+ "Follow the old (insecure) criteria for accepting RST packets");
int tcp_do_autorcvbuf = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
- &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
+ &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
int tcp_autorcvbuf_inc = 16*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
- &tcp_autorcvbuf_inc, 0, "Incrementor step size of automatic receive buffer");
+ &tcp_autorcvbuf_inc, 0,
+ "Incrementor step size of automatic receive buffer");
int tcp_autorcvbuf_max = 256*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
- &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
+ &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
-struct mtx *tcbinfo_mtx;
static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
-
+static void tcp_do_segment(struct mbuf *, struct tcphdr *,
+ struct socket *, struct tcpcb *, int, int);
+static void tcp_dropwithreset(struct mbuf *, struct tcphdr *,
+ struct tcpcb *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
-static int tcp_reass(struct tcpcb *, struct tcphdr *, int *,
- struct mbuf *);
static void tcp_xmit_timer(struct tcpcb *, int);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
-static int tcp_timewait(struct tcptw *, struct tcpopt *,
- struct tcphdr *, struct mbuf *, int);
/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
#ifdef INET6
@@ -208,201 +181,25 @@
* - this is a half-synchronized T/TCP connection.
*/
#define DELAY_ACK(tp) \
- ((!callout_active(tp->tt_delack) && \
+ ((!tcp_timer_active(tp, TT_DELACK) && \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
-/* Initialize TCP reassembly queue */
-uma_zone_t tcp_reass_zone;
-void
-tcp_reass_init()
-{
- tcp_reass_maxseg = nmbclusters / 16;
- TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
- &tcp_reass_maxseg);
- tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
-}
-
-static int
-tcp_reass(tp, th, tlenp, m)
- register struct tcpcb *tp;
- register struct tcphdr *th;
- int *tlenp;
- struct mbuf *m;
-{
- struct tseg_qent *q;
- struct tseg_qent *p = NULL;
- struct tseg_qent *nq;
- struct tseg_qent *te = NULL;
- struct socket *so = tp->t_inpcb->inp_socket;
- int flags;
-
- INP_LOCK_ASSERT(tp->t_inpcb);
-
- /*
- * XXX: tcp_reass() is rather inefficient with its data structures
- * and should be rewritten (see NetBSD for optimizations). While
- * doing that it should move to its own file tcp_reass.c.
- */
-
- /*
- * Call with th==NULL after become established to
- * force pre-ESTABLISHED data up to user socket.
- */
- if (th == NULL)
- goto present;
-
- /*
- * Limit the number of segments in the reassembly queue to prevent
- * holding on to too many segments (and thus running out of mbufs).
- * Make sure to let the missing segment through which caused this
- * queue. Always keep one global queue entry spare to be able to
- * process the missing segment.
- */
- if (th->th_seq != tp->rcv_nxt &&
- (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
- tp->t_segqlen >= tcp_reass_maxqlen)) {
- tcp_reass_overflows++;
- tcpstat.tcps_rcvmemdrop++;
- m_freem(m);
- *tlenp = 0;
- return (0);
- }
-
- /*
- * Allocate a new queue entry. If we can't, or hit the zone limit
- * just drop the pkt.
- */
- te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
- if (te == NULL) {
- tcpstat.tcps_rcvmemdrop++;
- m_freem(m);
- *tlenp = 0;
- return (0);
- }
- tp->t_segqlen++;
- tcp_reass_qsize++;
-
- /*
- * Find a segment which begins after this one does.
- */
- LIST_FOREACH(q, &tp->t_segq, tqe_q) {
- if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
- break;
- p = q;
- }
-
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us.
- */
- if (p != NULL) {
- register int i;
- /* conversion to int (in i) handles seq wraparound */
- i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
- if (i > 0) {
- if (i >= *tlenp) {
- tcpstat.tcps_rcvduppack++;
- tcpstat.tcps_rcvdupbyte += *tlenp;
- m_freem(m);
- uma_zfree(tcp_reass_zone, te);
- tp->t_segqlen--;
- tcp_reass_qsize--;
- /*
- * Try to present any queued data
- * at the left window edge to the user.
- * This is needed after the 3-WHS
- * completes.
- */
- goto present; /* ??? */
- }
- m_adj(m, i);
- *tlenp -= i;
- th->th_seq += i;
- }
- }
- tcpstat.tcps_rcvoopack++;
- tcpstat.tcps_rcvoobyte += *tlenp;
-
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
- */
- while (q) {
- register int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
- if (i <= 0)
- break;
- if (i < q->tqe_len) {
- q->tqe_th->th_seq += i;
- q->tqe_len -= i;
- m_adj(q->tqe_m, i);
- break;
- }
-
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- m_freem(q->tqe_m);
- uma_zfree(tcp_reass_zone, q);
- tp->t_segqlen--;
- tcp_reass_qsize--;
- q = nq;
- }
-
- /* Insert the new segment queue entry into place. */
- te->tqe_m = m;
- te->tqe_th = th;
- te->tqe_len = *tlenp;
-
- if (p == NULL) {
- LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
- } else {
- LIST_INSERT_AFTER(p, te, tqe_q);
- }
-
-present:
- /*
- * Present data to user, advancing rcv_nxt through
- * completed sequence space.
- */
- if (!TCPS_HAVEESTABLISHED(tp->t_state))
- return (0);
- q = LIST_FIRST(&tp->t_segq);
- if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
- return (0);
- SOCKBUF_LOCK(&so->so_rcv);
- do {
- tp->rcv_nxt += q->tqe_len;
- flags = q->tqe_th->th_flags & TH_FIN;
- nq = LIST_NEXT(q, tqe_q);
- LIST_REMOVE(q, tqe_q);
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
- m_freem(q->tqe_m);
- else
- sbappendstream_locked(&so->so_rcv, q->tqe_m);
- uma_zfree(tcp_reass_zone, q);
- tp->t_segqlen--;
- tcp_reass_qsize--;
- q = nq;
- } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
- ND6_HINT(tp);
- sorwakeup_locked(so);
- return (flags);
-}
/*
- * TCP input routine, follows pages 65-76 of the
- * protocol specification dated September, 1981 very closely.
+ * TCP input handling is split into multiple parts:
+ * tcp6_input is a thin wrapper around tcp_input for the extended
+ * ip6_protox[] call format in ip6_input
+ * tcp_input handles primary segment validation, inpcb lookup and
+ * SYN processing on listen sockets
+ * tcp_do_segment processes the ACK and text of the segment for
+ * establishing, established and closing connections
*/
#ifdef INET6
int
-tcp6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+tcp6_input(struct mbuf **mp, int *offp, int proto)
{
- register struct mbuf *m = *mp;
+ struct mbuf *m = *mp;
struct in6_ifaddr *ia6;
IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
@@ -427,43 +224,39 @@
#endif
void
-tcp_input(m, off0)
- register struct mbuf *m;
- int off0;
+tcp_input(struct mbuf *m, int off0)
{
- register struct tcphdr *th;
- register struct ip *ip = NULL;
- register struct ipovly *ipov;
- register struct inpcb *inp = NULL;
+ struct tcphdr *th;
+ struct ip *ip = NULL;
+ struct ipovly *ipov;
+ struct inpcb *inp = NULL;
+ struct tcpcb *tp = NULL;
+ struct socket *so = NULL;
u_char *optp = NULL;
int optlen = 0;
int len, tlen, off;
int drop_hdrlen;
- register struct tcpcb *tp = 0;
- register int thflags;
- struct socket *so = 0;
- int todrop, acked, ourfinisacked, needoutput = 0;
- u_long tiwin;
- struct tcpopt to; /* options in this segment */
- int headlocked = 0;
+ int thflags;
+ int rstreason = 0; /* For badport_bandlim accounting purposes */
#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag;
#endif
- int rstreason; /* For badport_bandlim accounting purposes */
-
- struct ip6_hdr *ip6 = NULL;
#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
int isipv6;
#else
+ const void *ip6 = NULL;
const int isipv6 = 0;
#endif
+ struct tcpopt to; /* options in this segment */
+ char *s = NULL; /* address and port logging */
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
* now IPv6.
*/
- u_char tcp_saveipgen[40];
+ u_char tcp_saveipgen[IP6_HDR_LEN];
struct tcphdr tcp_savetcp;
short ostate = 0;
#endif
@@ -471,13 +264,13 @@
#ifdef INET6
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
- bzero((char *)&to, sizeof(to));
+ to.to_flags = 0;
tcpstat.tcps_rcvtotal++;
if (isipv6) {
#ifdef INET6
- /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */
+ /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
ip6 = mtod(m, struct ip6_hdr *);
tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) {
@@ -499,7 +292,7 @@
goto drop;
}
#else
- th = NULL; /* XXX: avoid compiler warning */
+ th = NULL; /* XXX: Avoid compiler warning. */
#endif
} else {
/*
@@ -511,7 +304,8 @@
off0 = sizeof(struct ip);
}
if (m->m_len < sizeof (struct tcpiphdr)) {
- if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+ if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
+ == NULL) {
tcpstat.tcps_rcvshort++;
return;
}
@@ -549,10 +343,8 @@
tcpstat.tcps_rcvbadsum++;
goto drop;
}
-#ifdef INET6
/* Re-initialization for later version check */
ip->ip_v = IPVERSION;
-#endif
}
/*
@@ -575,7 +367,7 @@
} else {
if (m->m_len < sizeof(struct ip) + off) {
if ((m = m_pullup(m, sizeof (struct ip) + off))
- == 0) {
+ == NULL) {
tcpstat.tcps_rcvshort++;
return;
}
@@ -589,18 +381,6 @@
}
thflags = th->th_flags;
-#ifdef TCP_DROP_SYNFIN
- /*
- * If the drop_synfin option is enabled, drop all packets with
- * both the SYN and FIN bits set. This prevents e.g. nmap from
- * identifying the TCP/IP stack.
- *
- * This is a violation of the TCP specification.
- */
- if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN))
- goto drop;
-#endif
-
/*
* Convert TCP protocol specific fields to host format.
*/
@@ -610,14 +390,7 @@
th->th_urp = ntohs(th->th_urp);
/*
- * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options,
- * until after ip6_savecontrol() is called and before other functions
- * which don't want those proto headers.
- * Because ip6_savecontrol() is going to parse the mbuf to
- * search for data to be passed up to user-land, it wants mbuf
- * parameters to be unchanged.
- * XXX: the call of ip6_savecontrol() has been obsoleted based on
- * latest version of the advanced API (20020110).
+ * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
*/
drop_hdrlen = off0 + off;
@@ -625,11 +398,12 @@
* Locate pcb for segment.
*/
INP_INFO_WLOCK(&tcbinfo);
- headlocked = 1;
findpcb:
- KASSERT(headlocked, ("tcp_input: findpcb: head not locked"));
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
#ifdef IPFIREWALL_FORWARD
- /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */
+ /*
+ * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
+ */
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag != NULL && isipv6 == 0) { /* IPv6 support is not yet */
@@ -652,251 +426,294 @@
next_hop->sin_port ?
ntohs(next_hop->sin_port) :
th->th_dport,
- 1, m->m_pkthdr.rcvif);
+ INPLOOKUP_WILDCARD,
+ m->m_pkthdr.rcvif);
}
/* Remove the tag from the packet. We don't need it anymore. */
m_tag_delete(m, fwd_tag);
- } else {
+ } else
#endif /* IPFIREWALL_FORWARD */
+ {
if (isipv6) {
#ifdef INET6
inp = in6_pcblookup_hash(&tcbinfo,
&ip6->ip6_src, th->th_sport,
&ip6->ip6_dst, th->th_dport,
- 1, m->m_pkthdr.rcvif);
+ INPLOOKUP_WILDCARD,
+ m->m_pkthdr.rcvif);
#endif
} else
inp = in_pcblookup_hash(&tcbinfo,
ip->ip_src, th->th_sport,
ip->ip_dst, th->th_dport,
- 1, m->m_pkthdr.rcvif);
-#ifdef IPFIREWALL_FORWARD
+ INPLOOKUP_WILDCARD,
+ m->m_pkthdr.rcvif);
}
-#endif /* IPFIREWALL_FORWARD */
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
-#ifdef INET6
- if (isipv6) {
- if (inp != NULL && ipsec6_in_reject(m, inp)) {
-#ifdef IPSEC
- ipsec6stat.in_polvio++;
-#endif
- goto drop;
- }
- } else
-#endif /* INET6 */
- if (inp != NULL && ipsec4_in_reject(m, inp)) {
-#ifdef IPSEC
- ipsecstat.in_polvio++;
-#endif
- goto drop;
- }
-#endif /*IPSEC || FAST_IPSEC*/
/*
- * If the state is CLOSED (i.e., TCB does not exist) then
- * all data in the incoming segment is discarded.
- * If the TCB exists but is in CLOSED state, it is embryonic,
- * but should either do a listen or a connect soon.
+ * If the INPCB does not exist then all data in the incoming
+ * segment is discarded and an appropriate RST is sent back.
*/
if (inp == NULL) {
- if (log_in_vain) {
-#ifdef INET6
- char dbuf[INET6_ADDRSTRLEN+2], sbuf[INET6_ADDRSTRLEN+2];
-#else
- char dbuf[4*sizeof "123"], sbuf[4*sizeof "123"];
-#endif
+ /*
+ * Log communication attempts to ports that are not
+ * in use.
+ */
+ if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
+ tcp_log_in_vain == 2) {
+ if ((s = tcp_log_addrs(NULL, th, (void *)ip, ip6)))
+ log(LOG_INFO, "%s; %s: Connection attempt "
+ "to closed port\n", s, __func__);
+ }
+ /*
+ * When blackholing do not respond with a RST but
+ * completely ignore the segment and drop it.
+ */
+ if ((blackhole == 1 && (thflags & TH_SYN)) ||
+ blackhole == 2)
+ goto dropunlock;
- if (isipv6) {
-#ifdef INET6
- strcpy(dbuf, "[");
- strcpy(sbuf, "[");
- strcat(dbuf, ip6_sprintf(&ip6->ip6_dst));
- strcat(sbuf, ip6_sprintf(&ip6->ip6_src));
- strcat(dbuf, "]");
- strcat(sbuf, "]");
-#endif
- } else {
- strcpy(dbuf, inet_ntoa(ip->ip_dst));
- strcpy(sbuf, inet_ntoa(ip->ip_src));
- }
- switch (log_in_vain) {
- case 1:
- if ((thflags & TH_SYN) == 0)
- break;
- /* FALLTHROUGH */
- case 2:
- log(LOG_INFO,
- "Connection attempt to TCP %s:%d "
- "from %s:%d flags:0x%02x\n",
- dbuf, ntohs(th->th_dport), sbuf,
- ntohs(th->th_sport), thflags);
- break;
- default:
- break;
- }
- }
- if (blackhole) {
- switch (blackhole) {
- case 1:
- if (thflags & TH_SYN)
- goto drop;
- break;
- case 2:
- goto drop;
- default:
- goto drop;
- }
- }
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
INP_LOCK(inp);
- /* Check the minimum TTL for socket. */
+#ifdef IPSEC
+#ifdef INET6
+ if (isipv6 && ipsec6_in_reject(m, inp)) {
+ ipsec6stat.in_polvio++;
+ goto dropunlock;
+ } else
+#endif /* INET6 */
+ if (ipsec4_in_reject(m, inp) != 0) {
+ ipsec4stat.in_polvio++;
+ goto dropunlock;
+ }
+#endif /* IPSEC */
+
+ /*
+ * Check the minimum TTL for socket.
+ */
if (inp->inp_ip_minttl != 0) {
#ifdef INET6
if (isipv6 && inp->inp_ip_minttl > ip6->ip6_hlim)
- goto drop;
+ goto dropunlock;
else
#endif
if (inp->inp_ip_minttl > ip->ip_ttl)
- goto drop;
+ goto dropunlock;
}
+ /*
+ * A previous connection in TIMEWAIT state is supposed to catch
+ * stray or duplicate segments arriving late. If this segment
+ * was a legitimate new connection attempt the old INPCB gets
+ * removed and we can try again to find a listening socket.
+ */
if (inp->inp_vflag & INP_TIMEWAIT) {
- /*
- * The only option of relevance is TOF_CC, and only if
- * present in a SYN segment. See tcp_timewait().
- */
if (thflags & TH_SYN)
- tcp_dooptions(&to, optp, optlen, 1);
- if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
- &to, th, m, tlen))
- goto findpcb;
+ tcp_dooptions(&to, optp, optlen, TO_SYN);
/*
- * tcp_timewait unlocks inp.
+ * NB: tcp_twcheck unlocks the INP and frees the mbuf.
*/
+ if (tcp_twcheck(inp, &to, th, m, tlen))
+ goto findpcb;
INP_INFO_WUNLOCK(&tcbinfo);
return;
}
+ /*
+ * The TCPCB may no longer exist if the connection is winding
+ * down or it is in the CLOSED state. Either way we drop the
+ * segment and send an appropriate response.
+ */
tp = intotcpcb(inp);
- if (tp == 0) {
- INP_UNLOCK(inp);
+ if (tp == NULL || tp->t_state == TCPS_CLOSED) {
rstreason = BANDLIM_RST_CLOSEDPORT;
goto dropwithreset;
}
- if (tp->t_state == TCPS_CLOSED)
- goto drop;
-
- /* Unscale the window into a 32-bit value. */
- if ((thflags & TH_SYN) == 0)
- tiwin = th->th_win << tp->snd_scale;
- else
- tiwin = th->th_win;
#ifdef MAC
INP_LOCK_ASSERT(inp);
if (mac_check_inpcb_deliver(inp, m))
- goto drop;
+ goto dropunlock;
#endif
so = inp->inp_socket;
+ KASSERT(so != NULL, ("%s: so == NULL", __func__));
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG) {
ostate = tp->t_state;
- if (isipv6)
+ if (isipv6) {
+#ifdef INET6
bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
- else
+#endif
+ } else
bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
tcp_savetcp = *th;
}
#endif
+ /*
+ * When the socket is accepting connections (the INPCB is in LISTEN
+ * state) we look into the SYN cache if this is a new connection
+ * attempt or the completion of a previous one.
+ */
if (so->so_options & SO_ACCEPTCONN) {
struct in_conninfo inc;
-#ifdef INET6
+ KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
+ "tp not listening", __func__));
+
+ bzero(&inc, sizeof(inc));
inc.inc_isipv6 = isipv6;
-#endif
+#ifdef INET6
if (isipv6) {
inc.inc6_faddr = ip6->ip6_src;
inc.inc6_laddr = ip6->ip6_dst;
- } else {
+ } else
+#endif
+ {
inc.inc_faddr = ip->ip_src;
inc.inc_laddr = ip->ip_dst;
}
inc.inc_fport = th->th_sport;
inc.inc_lport = th->th_dport;
- /*
- * If the state is LISTEN then ignore segment if it contains
- * a RST. If the segment contains an ACK then it is bad and
- * send a RST. If it does not contain a SYN then it is not
- * interesting; drop it.
- *
- * If the state is SYN_RECEIVED (syncache) and seg contains
- * an ACK, but not for our SYN/ACK, send a RST. If the seg
- * contains a RST, check the sequence number to see if it
- * is a valid reset segment.
- */
- if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
- if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
- if (!syncache_expand(&inc, th, &so, m)) {
- /*
- * No syncache entry, or ACK was not
- * for our SYN/ACK. Send a RST.
- */
- tcpstat.tcps_badsyn++;
- rstreason = BANDLIM_RST_OPENPORT;
- goto dropwithreset;
- }
- if (so == NULL) {
- /*
- * Could not complete 3-way handshake,
- * connection is being closed down, and
- * syncache will free mbuf.
- */
- INP_UNLOCK(inp);
- INP_INFO_WUNLOCK(&tcbinfo);
- return;
- }
- /*
- * Socket is created in state SYN_RECEIVED.
- * Continue processing segment.
- */
- INP_UNLOCK(inp);
- inp = sotoinpcb(so);
- INP_LOCK(inp);
- tp = intotcpcb(inp);
- /*
- * This is what would have happened in
- * tcp_output() when the SYN,ACK was sent.
- */
- tp->snd_up = tp->snd_una;
- tp->snd_max = tp->snd_nxt = tp->iss + 1;
- tp->last_ack_sent = tp->rcv_nxt;
+ /*
+ * Check for an existing connection attempt in syncache if
+ * the flag is only ACK. A successful lookup creates a new
+ * socket appended to the listen queue in SYN_RECEIVED state.
+ */
+ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
+ /*
+ * Parse the TCP options here because
+ * syncookies need access to the reflected
+ * timestamp.
+ */
+ tcp_dooptions(&to, optp, optlen, 0);
+ /*
+ * NB: syncache_expand() doesn't unlock
+ * inp and tcpinfo locks.
+ */
+ if (!syncache_expand(&inc, &to, th, &so, m)) {
/*
- * RFC1323: The window in SYN & SYN/ACK
- * segments is never scaled.
+ * No syncache entry or ACK was not
+ * for our SYN/ACK. Send a RST.
+ * NB: syncache did its own logging
+ * of the failure cause.
*/
- tp->snd_wnd = tiwin; /* unscaled */
- goto after_listen;
- }
- if (thflags & TH_RST) {
- syncache_chkrst(&inc, th);
- goto drop;
- }
- if (thflags & TH_ACK) {
- syncache_badack(&inc);
- tcpstat.tcps_badsyn++;
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
- goto drop;
+ if (so == NULL) {
+ /*
+ * We completed the 3-way handshake
+ * but could not allocate a socket
+ * either due to memory shortage,
+ * listen queue length limits or
+ * global socket limits. Send RST
+ * or wait and have the remote end
+ * retransmit the ACK for another
+ * try.
+ */
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Socket allocation failed due to "
+ "limits or memory shortage, %s\n",
+ s, __func__, (tcp_sc_rst_sock_fail ?
+ "sending RST" : "try again"));
+ if (tcp_sc_rst_sock_fail) {
+ rstreason = BANDLIM_UNLIMITED;
+ goto dropwithreset;
+ } else
+ goto dropunlock;
+ }
+ /*
+ * Socket is created in state SYN_RECEIVED.
+ * Unlock the listen socket, lock the newly
+ * created socket and update the tp variable.
+ */
+ INP_UNLOCK(inp); /* listen socket */
+ inp = sotoinpcb(so);
+ INP_LOCK(inp); /* new connection */
+ tp = intotcpcb(inp);
+ KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
+ ("%s: ", __func__));
+ /*
+ * Process the segment and the data it
+ * contains. tcp_do_segment() consumes
+ * the mbuf chain and unlocks the inpcb.
+ */
+ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
+ INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+ return;
}
-
/*
- * Segment's flags are (SYN) or (SYN|FIN).
+ * Segment flag validation for new connection attempts:
+ *
+ * Our (SYN|ACK) response was rejected.
+ * Check with syncache and remove entry to prevent
+ * retransmits.
+ *
+ * NB: syncache_chkrst does its own logging of failure
+ * causes.
+ */
+ if (thflags & TH_RST) {
+ syncache_chkrst(&inc, th);
+ goto dropunlock;
+ }
+ /*
+ * We can't do anything without SYN.
+ */
+ if ((thflags & TH_SYN) == 0) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "SYN is missing, segment ignored\n",
+ s, __func__);
+ tcpstat.tcps_badsyn++;
+ goto dropunlock;
+ }
+ /*
+ * (SYN|ACK) is bogus on a listen socket.
*/
+ if (thflags & TH_ACK) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "SYN|ACK invalid, segment rejected\n",
+ s, __func__);
+ syncache_badack(&inc); /* XXX: Not needed! */
+ tcpstat.tcps_badsyn++;
+ rstreason = BANDLIM_RST_OPENPORT;
+ goto dropwithreset;
+ }
+ /*
+ * If the drop_synfin option is enabled, drop all
+ * segments with both the SYN and FIN bits set.
+ * This prevents e.g. nmap from identifying the
+ * TCP/IP stack.
+ * XXX: Poor reasoning. nmap has other methods
+ * and is constantly refining its stack detection
+ * strategies.
+ * XXX: This is a violation of the TCP specification
+ * and was used by RFC1644.
+ */
+ if ((thflags & TH_FIN) && drop_synfin) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "SYN|FIN segment ignored (based on "
+ "sysctl setting)\n", s, __func__);
+ tcpstat.tcps_badsyn++;
+ goto dropunlock;
+ }
+ /*
+ * Segment's flags are (SYN) or (SYN|FIN).
+ *
+ * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
+ * as they do not affect the state of the TCP FSM.
+ * The data pointed to by TH_URG and th_urp is ignored.
+ */
+ KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
+ ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
+ KASSERT(thflags & (TH_SYN),
+ ("%s: Listen socket: TH_SYN not set", __func__));
#ifdef INET6
/*
* If deprecated address is forbidden,
@@ -933,182 +750,201 @@
if ((ia6 = ip6_getdstifaddr(m)) &&
(ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
- INP_UNLOCK(inp);
- tp = NULL;
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt to deprecated "
+ "IPv6 address rejected\n",
+ s, __func__);
rstreason = BANDLIM_RST_OPENPORT;
goto dropwithreset;
}
}
#endif
/*
- * If it is from this socket, drop it, it must be forged.
- * Don't bother responding if the destination was a broadcast.
- */
- if (th->th_dport == th->th_sport) {
- if (isipv6) {
- if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
- &ip6->ip6_src))
- goto drop;
- } else {
- if (ip->ip_dst.s_addr == ip->ip_src.s_addr)
- goto drop;
- }
+ * Basic sanity checks on incoming SYN requests:
+ * Don't respond if the destination is a link layer
+ * broadcast according to RFC1122 4.2.3.10, p. 104.
+ * If it is from this socket it must be forged.
+ * Don't respond if the source or destination is a
+ * global or subnet broad- or multicast address.
+ * Note that it is quite possible to receive unicast
+ * link-layer packets with a broadcast IP address. Use
+ * in_broadcast() to find them.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST)) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt from broad- or multicast "
+ "link layer address ignored\n", s, __func__);
+ goto dropunlock;
}
- /*
- * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
- *
- * Note that it is quite possible to receive unicast
- * link-layer packets with a broadcast IP address. Use
- * in_broadcast() to find them.
- */
- if (m->m_flags & (M_BCAST|M_MCAST))
- goto drop;
if (isipv6) {
+#ifdef INET6
+ if (th->th_dport == th->th_sport &&
+ IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt to/from self "
+ "ignored\n", s, __func__);
+ goto dropunlock;
+ }
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
- IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
- goto drop;
+ IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt from/to multicast "
+ "address ignored\n", s, __func__);
+ goto dropunlock;
+ }
+#endif
} else {
+ if (th->th_dport == th->th_sport &&
+ ip->ip_dst.s_addr == ip->ip_src.s_addr) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt from/to self "
+ "ignored\n", s, __func__);
+ goto dropunlock;
+ }
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
- in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
- goto drop;
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
+ if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Listen socket: "
+ "Connection attempt from/to broad- "
+ "or multicast address ignored\n",
+ s, __func__);
+ goto dropunlock;
+ }
}
/*
- * SYN appears to be valid; create compressed TCP state
- * for syncache, or perform t/tcp connection.
+ * SYN appears to be valid. Create compressed TCP state
+ * for syncache.
*/
- if (so->so_qlen <= so->so_qlimit) {
#ifdef TCPDEBUG
- if (so->so_options & SO_DEBUG)
- tcp_trace(TA_INPUT, ostate, tp,
- (void *)tcp_saveipgen, &tcp_savetcp, 0);
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_INPUT, ostate, tp,
+ (void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
- tcp_dooptions(&to, optp, optlen, 1);
- if (!syncache_add(&inc, &to, th, &so, m))
- goto drop;
- if (so == NULL) {
- /*
- * Entry added to syncache, mbuf used to
- * send SYN,ACK packet.
- */
- KASSERT(headlocked, ("headlocked"));
- INP_UNLOCK(inp);
- INP_INFO_WUNLOCK(&tcbinfo);
- return;
- }
- /*
- * Segment passed TAO tests.
- */
- INP_UNLOCK(inp);
- inp = sotoinpcb(so);
- INP_LOCK(inp);
- tp = intotcpcb(inp);
- tp->snd_wnd = tiwin;
- tp->t_starttime = ticks;
- tp->t_state = TCPS_ESTABLISHED;
-
- /*
- * T/TCP logic:
- * If there is a FIN or if there is data, then
- * delay SYN,ACK(SYN) in the hope of piggy-backing
- * it on a response segment. Otherwise must send
- * ACK now in case the other side is slow starting.
- */
- if (thflags & TH_FIN || tlen != 0)
- tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
- else
- tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
- tcpstat.tcps_connects++;
- soisconnected(so);
- goto trimthenstep6;
- }
- goto drop;
+ tcp_dooptions(&to, optp, optlen, TO_SYN);
+ syncache_add(&inc, &to, th, inp, &so, m);
+ /*
+ * Entry added to syncache and mbuf consumed.
+ * Everything already unlocked by syncache_add().
+ */
+ INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+ return;
}
-after_listen:
- KASSERT(headlocked, ("tcp_input: after_listen: head not locked"));
- INP_LOCK_ASSERT(inp);
- /* Syncache takes care of sockets in the listen state. */
- KASSERT(tp->t_state != TCPS_LISTEN, ("tcp_input: TCPS_LISTEN"));
+ /*
+ * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
+ * state. tcp_do_segment() always consumes the mbuf chain, unlocks
+ * the inpcb, and unlocks pcbinfo.
+ */
+ tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen);
+ INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+ return;
+
+dropwithreset:
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ tcp_dropwithreset(m, th, tp, tlen, rstreason);
+ m = NULL; /* mbuf chain got consumed. */
+dropunlock:
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ if (inp != NULL)
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+drop:
+ INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+ if (s != NULL)
+ free(s, M_TCPLOG);
+ if (m != NULL)
+ m_freem(m);
+ return;
+}
+
+static void
+tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
+ struct tcpcb *tp, int drop_hdrlen, int tlen)
+{
+ int thflags, acked, ourfinisacked, needoutput = 0;
+ int headlocked = 1;
+ int rstreason, todrop, win;
+ u_long tiwin;
+ struct tcpopt to;
+#ifdef TCPDEBUG
/*
- * This is the second part of the MSS DoS prevention code (after
- * minmss on the sending side) and it deals with too many too small
- * tcp packets in a too short timeframe (1 second).
- *
- * For every full second we count the number of received packets
- * and bytes. If we get a lot of packets per second for this connection
- * (tcp_minmssoverload) we take a closer look at it and compute the
- * average packet size for the past second. If that is less than
- * tcp_minmss we get too many packets with very small payload which
- * is not good and burdens our system (and every packet generates
- * a wakeup to the process connected to our socket). We can reasonable
- * expect this to be small packet DoS attack to exhaust our CPU
- * cycles.
- *
- * Care has to be taken for the minimum packet overload value. This
- * value defines the minimum number of packets per second before we
- * start to worry. This must not be too low to avoid killing for
- * example interactive connections with many small packets like
- * telnet or SSH.
- *
- * Setting either tcp_minmssoverload or tcp_minmss to "0" disables
- * this check.
- *
- * Account for packet if payload packet, skip over ACK, etc.
+ * The size of tcp_saveipgen must be the size of the max ip header,
+ * now IPv6.
*/
- if (tcp_minmss && tcp_minmssoverload &&
- tp->t_state == TCPS_ESTABLISHED && tlen > 0) {
- if ((unsigned int)(tp->rcv_second - ticks) < hz) {
- tp->rcv_pps++;
- tp->rcv_byps += tlen + off;
- if (tp->rcv_pps > tcp_minmssoverload) {
- if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) {
- printf("too many small tcp packets from "
- "%s:%u, av. %lubyte/packet, "
- "dropping connection\n",
-#ifdef INET6
- isipv6 ?
- ip6_sprintf(&inp->inp_inc.inc6_faddr) :
-#endif
- inet_ntoa(inp->inp_inc.inc_faddr),
- inp->inp_inc.inc_fport,
- tp->rcv_byps / tp->rcv_pps);
- KASSERT(headlocked, ("tcp_input: "
- "after_listen: tcp_drop: head "
- "not locked"));
- tp = tcp_drop(tp, ECONNRESET);
- tcpstat.tcps_minmssdrops++;
- goto drop;
- }
- }
- } else {
- tp->rcv_second = ticks + hz;
- tp->rcv_pps = 1;
- tp->rcv_byps = tlen + off;
- }
- }
+ u_char tcp_saveipgen[IP6_HDR_LEN];
+ struct tcphdr tcp_savetcp;
+ short ostate = 0;
+#endif
+ thflags = th->th_flags;
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(tp->t_inpcb);
+ KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
+ __func__));
+ KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
+ __func__));
/*
* Segment received on connection.
* Reset idle time and keep-alive timer.
+ * XXX: This should be done after segment
+ * validation to ignore broken/spoofed segs.
*/
tp->t_rcvtime = ticks;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+
+ /*
+ * Unscale the window into a 32-bit value.
+ * For the SYN_SENT state the scale is zero.
+ */
+ tiwin = th->th_win << tp->snd_scale;
+
+ /*
+ * Parse options on any incoming segment.
+ */
+ tcp_dooptions(&to, (u_char *)(th + 1),
+ (th->th_off << 2) - sizeof(struct tcphdr),
+ (thflags & TH_SYN) ? TO_SYN : 0);
+
+ /*
+ * If echoed timestamp is later than the current time,
+ * fall back to non RFC1323 RTT calculation. Normalize
+ * timestamp if syncookies were used when this connection
+ * was established.
+ */
+ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
+ to.to_tsecr -= tp->ts_offset;
+ if (TSTMP_GT(to.to_tsecr, ticks))
+ to.to_tsecr = 0;
+ }
/*
* Process options only when we get SYN/ACK back. The SYN case
* for incoming connections is handled in tcp_syncache.
+ * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled.
* XXX this is traditional behavior, may need to be cleaned up.
*/
- tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
- if (to.to_flags & TOF_SCALE) {
+ if ((to.to_flags & TOF_SCALE) &&
+ (tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
- tp->requested_s_scale = to.to_requested_s_scale;
+ tp->snd_scale = to.to_wscale;
}
+ /*
+ * Initial send window. It will be updated with
+ * the next incoming segment to the scaled value.
+ */
+ tp->snd_wnd = th->th_win;
if (to.to_flags & TOF_TS) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
@@ -1116,13 +952,9 @@
}
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
- if (tp->sack_enable) {
- if (!(to.to_flags & TOF_SACK))
- tp->sack_enable = 0;
- else
- tp->t_flags |= TF_SACK_PERMIT;
- }
-
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ (to.to_flags & TOF_SACKPERM) == 0)
+ tp->t_flags &= ~TF_SACK_PERMIT;
}
/*
@@ -1139,16 +971,18 @@
* (the reassembly queue is empty), add the data to
* the socket buffer and note that we need a delayed ack.
* Make sure that the hidden state-flags are also off.
- * Since we check for TCPS_ESTABLISHED above, it can only
+ * Since we check for TCPS_ESTABLISHED first, it can only
* be TH_NEEDSYN.
*/
if (tp->t_state == TCPS_ESTABLISHED &&
+ th->th_seq == tp->rcv_nxt &&
(thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+ tp->snd_nxt == tp->snd_max &&
+ tiwin && tiwin == tp->snd_wnd &&
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
+ LIST_EMPTY(&tp->t_segq) &&
((to.to_flags & TOF_TS) == 0 ||
- TSTMP_GEQ(to.to_tsval, tp->ts_recent)) &&
- th->th_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd &&
- tp->snd_nxt == tp->snd_max) {
+ TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
/*
* If last ACK falls within this segment's sequence numbers,
@@ -1166,20 +1000,24 @@
if (SEQ_GT(th->th_ack, tp->snd_una) &&
SEQ_LEQ(th->th_ack, tp->snd_max) &&
tp->snd_cwnd >= tp->snd_wnd &&
- ((!tcp_do_newreno && !tp->sack_enable &&
+ ((!tcp_do_newreno &&
+ !(tp->t_flags & TF_SACK_PERMIT) &&
tp->t_dupacks < tcprexmtthresh) ||
- ((tcp_do_newreno || tp->sack_enable) &&
- !IN_FASTRECOVERY(tp) && to.to_nsacks == 0 &&
+ ((tcp_do_newreno ||
+ (tp->t_flags & TF_SACK_PERMIT)) &&
+ !IN_FASTRECOVERY(tp) &&
+ (to.to_flags & TOF_SACK) == 0 &&
TAILQ_EMPTY(&tp->snd_holes)))) {
- KASSERT(headlocked, ("headlocked"));
+ KASSERT(headlocked,
+ ("%s: headlocked", __func__));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
/*
- * this is a pure ack for outstanding data.
+ * This is a pure ack for outstanding data.
*/
++tcpstat.tcps_predack;
/*
- * "bad retransmit" recovery
+ * "bad retransmit" recovery.
*/
if (tp->t_rxtshift == 1 &&
ticks < tp->t_badrxtwin) {
@@ -1210,7 +1048,7 @@
tcp_xmit_timer(tp,
ticks - to.to_tsecr + 1);
} else if (tp->t_rtttime &&
- SEQ_GT(th->th_ack, tp->t_rtseq)) {
+ SEQ_GT(th->th_ack, tp->t_rtseq)) {
if (!tp->t_rttlow ||
tp->t_rttlow > ticks - tp->t_rtttime)
tp->t_rttlow = ticks - tp->t_rtttime;
@@ -1227,13 +1065,13 @@
tp->snd_recover = th->th_ack - 1;
tp->snd_una = th->th_ack;
/*
- * pull snd_wl2 up to prevent seq wrap relative
+ * Pull snd_wl2 up to prevent seq wrap relative
* to th_ack.
*/
tp->snd_wl2 = th->th_ack;
tp->t_dupacks = 0;
m_freem(m);
- ND6_HINT(tp); /* some progress has been done */
+ ND6_HINT(tp); /* Some progress has been made. */
/*
* If all outstanding data are acked, stop
@@ -1243,41 +1081,37 @@
* wakeup/selwakeup/signal. If data
* are ready to send, let tcp_output
* decide between more output or persist.
-
+ */
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- */
if (tp->snd_una == tp->snd_max)
- callout_stop(tp->tt_rexmt);
- else if (!callout_active(tp->tt_persist))
- callout_reset(tp->tt_rexmt,
- tp->t_rxtcur,
- tcp_timer_rexmt, tp);
-
+ tcp_timer_activate(tp, TT_REXMT, 0);
+ else if (!tcp_timer_active(tp, TT_PERSIST))
+ tcp_timer_activate(tp, TT_REXMT,
+ tp->t_rxtcur);
sowwakeup(so);
if (so->so_snd.sb_cc)
(void) tcp_output(tp);
goto check_delack;
}
} else if (th->th_ack == tp->snd_una &&
- LIST_EMPTY(&tp->t_segq) &&
tlen <= sbspace(&so->so_rcv)) {
int newsize = 0; /* automatic sockbuf scaling */
- KASSERT(headlocked, ("headlocked"));
+ KASSERT(headlocked, ("%s: headlocked", __func__));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
/*
- * this is a pure, in-sequence data packet
+ * This is a pure, in-sequence data packet
* with nothing on the reassembly queue and
* we have enough buffer space to take it.
*/
/* Clean receiver SACK report if present */
- if (tp->sack_enable && tp->rcv_numsacks)
+ if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
tcp_clean_sackreport(tp);
++tcpstat.tcps_preddat;
tp->rcv_nxt += tlen;
@@ -1293,7 +1127,7 @@
tp->rcv_up = tp->rcv_nxt;
tcpstat.tcps_rcvpack++;
tcpstat.tcps_rcvbyte += tlen;
- ND6_HINT(tp); /* some progress has been done */
+ ND6_HINT(tp); /* Some progress has been made */
#ifdef TCPDEBUG
if (so->so_options & SO_DEBUG)
tcp_trace(TA_INPUT, ostate, tp,
@@ -1323,12 +1157,14 @@
* 2. received bytes per RTT is within seven eighth of the
* current socket buffer size;
* 3. receive buffer size has not hit maximal automatic size;
- * 4. Profit!
*
* This algorithm does one step per RTT at most and only if
* we receive a bulk stream w/o packet losses or reorderings.
* Shrinking the buffer during idle times is not necessary as
* it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
*/
if (tcp_do_autorcvbuf &&
to.to_tsecr &&
@@ -1343,12 +1179,6 @@
min(so->so_rcv.sb_hiwat +
tcp_autorcvbuf_inc,
tcp_autorcvbuf_max);
-#if 0
- log(LOG_DEBUG, "%s: hiwat %i, ref_ts %i, ts %i, "
- "count %i, new %i, max %i\n",
- __func__, so->so_rcv.sb_hiwat, tp->rfbuf_ts,
- to.to_tsecr, tp->rfbuf_cnt, newsize, (int)sb_max);
-#endif
}
/* Start over with next RTT. */
tp->rfbuf_ts = 0;
@@ -1373,6 +1203,7 @@
m_adj(m, drop_hdrlen); /* delayed header drop */
sbappendstream_locked(&so->so_rcv, m);
}
+ /* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
if (DELAY_ACK(tp)) {
tp->t_flags |= TF_DELACK;
@@ -1390,13 +1221,10 @@
* Receive window is amount of space in rcv queue,
* but not less than advertised window.
*/
- { int win;
-
win = sbspace(&so->so_rcv);
if (win < 0)
win = 0;
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
- }
/* Reset receive buffer auto scaling when not in bulk receive mode. */
tp->rfbuf_ts = 0;
@@ -1436,17 +1264,12 @@
rstreason = BANDLIM_UNLIMITED;
goto dropwithreset;
}
- if (thflags & TH_RST) {
- if (thflags & TH_ACK) {
- KASSERT(headlocked, ("tcp_input: after_listen"
- ": tcp_drop.2: head not locked"));
- tp = tcp_drop(tp, ECONNREFUSED);
- }
+ if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST))
+ tp = tcp_drop(tp, ECONNREFUSED);
+ if (thflags & TH_RST)
goto drop;
- }
- if ((thflags & TH_SYN) == 0)
+ if (!(thflags & TH_SYN))
goto drop;
- tp->snd_wnd = th->th_win; /* initial send window */
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
@@ -1461,7 +1284,6 @@
/* Do window scaling on this connection? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
- tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
tp->rcv_adv += tp->rcv_wnd;
@@ -1471,8 +1293,8 @@
* ACKNOW will be turned on later.
*/
if (DELAY_ACK(tp) && tlen != 0)
- callout_reset(tp->tt_delack, tcp_delacktime,
- tcp_timer_delack, tp);
+ tcp_timer_activate(tp, TT_DELACK,
+ tcp_delacktime);
else
tp->t_flags |= TF_ACKNOW;
/*
@@ -1488,8 +1310,7 @@
thflags &= ~TH_SYN;
} else {
tp->t_state = TCPS_ESTABLISHED;
- callout_reset(tp->tt_keep, tcp_keepidle,
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
}
} else {
/*
@@ -1502,15 +1323,14 @@
* SYN-SENT* -> SYN-RECEIVED*
* If there was no CC option, clear cached CC value.
*/
- tp->t_flags |= TF_ACKNOW;
- callout_stop(tp->tt_rexmt);
+ tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
+ tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_state = TCPS_SYN_RECEIVED;
}
-trimthenstep6:
- KASSERT(headlocked, ("tcp_input: trimthenstep6: head not "
- "locked"));
- INP_LOCK_ASSERT(inp);
+ KASSERT(headlocked, ("%s: trimthenstep6: head not locked",
+ __func__));
+ INP_LOCK_ASSERT(tp->t_inpcb);
/*
* Advance th->th_seq to correspond to first data byte.
@@ -1548,8 +1368,6 @@
*/
case TCPS_LAST_ACK:
case TCPS_CLOSING:
- case TCPS_TIME_WAIT:
- KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
break; /* continue normal processing */
}
@@ -1616,9 +1434,8 @@
* RFC 1337.
*/
if (thflags & TH_RST) {
- if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
- SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
- (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
+ if (SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+ SEQ_LEQ(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
switch (tp->t_state) {
case TCPS_SYN_RECEIVED:
@@ -1626,11 +1443,15 @@
goto close;
case TCPS_ESTABLISHED:
- if (tp->last_ack_sent != th->th_seq &&
- tcp_insecure_rst == 0) {
+ if (tcp_insecure_rst == 0 &&
+ !(SEQ_GEQ(th->th_seq, tp->rcv_nxt - 1) &&
+ SEQ_LEQ(th->th_seq, tp->rcv_nxt + 1)) &&
+ !(SEQ_GEQ(th->th_seq, tp->last_ack_sent - 1) &&
+ SEQ_LEQ(th->th_seq, tp->last_ack_sent + 1))) {
tcpstat.tcps_badrst++;
goto drop;
}
+ /* FALLTHROUGH */
case TCPS_FIN_WAIT_1:
case TCPS_FIN_WAIT_2:
case TCPS_CLOSE_WAIT:
@@ -1638,23 +1459,17 @@
close:
tp->t_state = TCPS_CLOSED;
tcpstat.tcps_drops++;
- KASSERT(headlocked, ("tcp_input: "
- "trimthenstep6: tcp_close: head not "
- "locked"));
+ KASSERT(headlocked, ("%s: trimthenstep6: "
+ "tcp_close: head not locked", __func__));
tp = tcp_close(tp);
break;
case TCPS_CLOSING:
case TCPS_LAST_ACK:
- KASSERT(headlocked, ("trimthenstep6: "
- "tcp_close.2: head not locked"));
+ KASSERT(headlocked, ("%s: trimthenstep6: "
+ "tcp_close.2: head not locked", __func__));
tp = tcp_close(tp);
break;
-
- case TCPS_TIME_WAIT:
- KASSERT(tp->t_state != TCPS_TIME_WAIT,
- ("timewait"));
- break;
}
}
goto drop;
@@ -1755,8 +1570,16 @@
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- KASSERT(headlocked, ("trimthenstep6: tcp_close.3: head not "
- "locked"));
+ char *s;
+
+ KASSERT(headlocked, ("%s: trimthenstep6: tcp_close.3: head "
+ "not locked", __func__));
+ if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data after socket "
+ "was closed, sending RST and removing tcpcb\n",
+ s, __func__, tcpstates[tp->t_state], tlen);
+ free(s, M_TCPLOG);
+ }
tp = tcp_close(tp);
tcpstat.tcps_rcvafterclose++;
rstreason = BANDLIM_UNLIMITED;
@@ -1767,27 +1590,12 @@
* If segment ends after window, drop trailing data
* (and PUSH and FIN); if nothing left, just ACK.
*/
- todrop = (th->th_seq+tlen) - (tp->rcv_nxt+tp->rcv_wnd);
+ todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
if (todrop > 0) {
tcpstat.tcps_rcvpackafterwin++;
if (todrop >= tlen) {
tcpstat.tcps_rcvbyteafterwin += tlen;
/*
- * If a new connection request is received
- * while in TIME_WAIT, drop the old connection
- * and start over if the sequence numbers
- * are above the previous ones.
- */
- KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
- if (thflags & TH_SYN &&
- tp->t_state == TCPS_TIME_WAIT &&
- SEQ_GT(th->th_seq, tp->rcv_nxt)) {
- KASSERT(headlocked, ("trimthenstep6: "
- "tcp_close.4: head not locked"));
- tp = tcp_close(tp);
- goto findpcb;
- }
- /*
* If window is closed can only take segments at
* window edge, and have to drop data and PUSH from
* incoming segments. Continue processing, but
@@ -1837,8 +1645,8 @@
* error and we send an RST and drop the connection.
*/
if (thflags & TH_SYN) {
- KASSERT(headlocked, ("tcp_input: tcp_drop: trimthenstep6: "
- "head not locked"));
+ KASSERT(headlocked, ("%s: tcp_drop: trimthenstep6: "
+ "head not locked", __func__));
tp = tcp_drop(tp, ECONNRESET);
rstreason = BANDLIM_UNLIMITED;
goto drop;
@@ -1876,8 +1684,8 @@
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
- tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
+ tp->snd_wnd = tiwin;
}
/*
* Make transitions:
@@ -1890,8 +1698,7 @@
tp->t_flags &= ~TF_NEEDFIN;
} else {
tp->t_state = TCPS_ESTABLISHED;
- callout_reset(tp->tt_keep, tcp_keepidle,
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
}
/*
* If segment contains data or ACK, will call tcp_reass()
@@ -1917,14 +1724,13 @@
case TCPS_CLOSE_WAIT:
case TCPS_CLOSING:
case TCPS_LAST_ACK:
- case TCPS_TIME_WAIT:
- KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
}
- if (tp->sack_enable &&
- (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes)))
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ ((to.to_flags & TOF_SACK) ||
+ !TAILQ_EMPTY(&tp->snd_holes)))
tcp_sack_doack(tp, &to, th->th_ack);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
@@ -1953,13 +1759,15 @@
* to keep a constant cwnd packets in the
* network.
*/
- if (!callout_active(tp->tt_rexmt) ||
+ if (!tcp_timer_active(tp, TT_REXMT) ||
th->th_ack != tp->snd_una)
tp->t_dupacks = 0;
else if (++tp->t_dupacks > tcprexmtthresh ||
- ((tcp_do_newreno || tp->sack_enable) &&
- IN_FASTRECOVERY(tp))) {
- if (tp->sack_enable && IN_FASTRECOVERY(tp)) {
+ ((tcp_do_newreno ||
+ (tp->t_flags & TF_SACK_PERMIT)) &&
+ IN_FASTRECOVERY(tp))) {
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ IN_FASTRECOVERY(tp)) {
int awnd;
/*
@@ -1990,7 +1798,7 @@
* check to see if we're in newreno
* recovery.
*/
- if (tp->sack_enable) {
+ if (tp->t_flags & TF_SACK_PERMIT) {
if (IN_FASTRECOVERY(tp)) {
tp->t_dupacks = 0;
break;
@@ -2009,9 +1817,9 @@
tp->snd_ssthresh = win * tp->t_maxseg;
ENTER_FASTRECOVERY(tp);
tp->snd_recover = tp->snd_max;
- callout_stop(tp->tt_rexmt);
+ tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- if (tp->sack_enable) {
+ if (tp->t_flags & TF_SACK_PERMIT) {
tcpstat.tcps_sack_recovery_episode++;
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = tp->t_maxseg;
@@ -2022,7 +1830,8 @@
tp->snd_cwnd = tp->t_maxseg;
(void) tcp_output(tp);
KASSERT(tp->snd_limited <= 2,
- ("tp->snd_limited too big"));
+ ("%s: tp->snd_limited too big",
+ __func__));
tp->snd_cwnd = tp->snd_ssthresh +
tp->t_maxseg *
(tp->t_dupacks - tp->snd_limited);
@@ -2036,7 +1845,8 @@
KASSERT(tp->t_dupacks == 1 ||
tp->t_dupacks == 2,
- ("dupacks not 1 or 2"));
+ ("%s: dupacks not 1 or 2",
+ __func__));
if (tp->t_dupacks == 1)
tp->snd_limited = 0;
tp->snd_cwnd =
@@ -2050,7 +1860,8 @@
tp->snd_limited == 0) ||
(sent == tp->t_maxseg + 1 &&
tp->t_flags & TF_SENTFIN),
- ("sent too much"));
+ ("%s: sent too much",
+ __func__));
tp->snd_limited = 2;
} else if (sent > 0)
++tp->snd_limited;
@@ -2062,16 +1873,17 @@
break;
}
- KASSERT(SEQ_GT(th->th_ack, tp->snd_una), ("th_ack <= snd_una"));
+ KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
+ ("%s: th_ack <= snd_una", __func__));
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tcp_do_newreno || tp->sack_enable) {
+ if (tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) {
if (IN_FASTRECOVERY(tp)) {
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
- if (tp->sack_enable)
+ if (tp->t_flags & TF_SACK_PERMIT)
tcp_sack_partialack(tp, th);
else
tcp_newreno_partial_ack(tp, th);
@@ -2118,15 +1930,15 @@
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
- tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
+ /* Send window already scaled. */
}
}
process_ACK:
- KASSERT(headlocked, ("tcp_input: process_ACK: head not "
- "locked"));
- INP_LOCK_ASSERT(inp);
+ KASSERT(headlocked, ("%s: process_ACK: head not locked",
+ __func__));
+ INP_LOCK_ASSERT(tp->t_inpcb);
acked = th->th_ack - tp->snd_una;
tcpstat.tcps_rcvackpack++;
@@ -2183,11 +1995,10 @@
* timer, using current (possibly backed-off) value.
*/
if (th->th_ack == tp->snd_max) {
- callout_stop(tp->tt_rexmt);
+ tcp_timer_activate(tp, TT_REXMT, 0);
needoutput = 1;
- } else if (!callout_active(tp->tt_persist))
- callout_reset(tp->tt_rexmt, tp->t_rxtcur,
- tcp_timer_rexmt, tp);
+ } else if (!tcp_timer_active(tp, TT_PERSIST))
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
/*
* If no data (only SYN) was ACK'd,
@@ -2203,10 +2014,10 @@
* Otherwise open linearly: maxseg per window
* (maxseg^2 / cwnd per packet).
*/
- if ((!tcp_do_newreno && !tp->sack_enable) ||
+ if ((!tcp_do_newreno && !(tp->t_flags & TF_SACK_PERMIT)) ||
!IN_FASTRECOVERY(tp)) {
- register u_int cw = tp->snd_cwnd;
- register u_int incr = tp->t_maxseg;
+ u_int cw = tp->snd_cwnd;
+ u_int incr = tp->t_maxseg;
if (cw > tp->snd_ssthresh)
incr = incr * incr / cw;
tp->snd_cwnd = min(cw+incr, TCP_MAXWIN<<tp->snd_scale);
@@ -2221,19 +2032,20 @@
tp->snd_wnd -= acked;
ourfinisacked = 0;
}
+ /* NB: sowwakeup_locked() does an implicit unlock. */
sowwakeup_locked(so);
- /* detect una wraparound */
- if ((tcp_do_newreno || tp->sack_enable) &&
+ /* Detect una wraparound. */
+ if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
!IN_FASTRECOVERY(tp) &&
SEQ_GT(tp->snd_una, tp->snd_recover) &&
SEQ_LEQ(th->th_ack, tp->snd_recover))
tp->snd_recover = th->th_ack - 1;
- if ((tcp_do_newreno || tp->sack_enable) &&
+ if ((tcp_do_newreno || (tp->t_flags & TF_SACK_PERMIT)) &&
IN_FASTRECOVERY(tp) &&
SEQ_GEQ(th->th_ack, tp->snd_recover))
EXIT_FASTRECOVERY(tp);
tp->snd_una = th->th_ack;
- if (tp->sack_enable) {
+ if (tp->t_flags & TF_SACK_PERMIT) {
if (SEQ_GT(tp->snd_una, tp->snd_recover))
tp->snd_recover = tp->snd_una;
}
@@ -2255,15 +2067,18 @@
* Starting the timer is contrary to the
* specification, but if we don't get a FIN
* we'll hang forever.
+ *
+ * XXXjl:
+ * we should release the tp also, and use a
+ * compressed state.
*/
- /* XXXjl
- * we should release the tp also, and use a
- * compressed state.
- */
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ int timeout;
+
soisdisconnected(so);
- callout_reset(tp->tt_2msl, tcp_maxidle,
- tcp_timer_2msl, tp);
+ timeout = (tcp_fast_finwait2_recycle) ?
+ tcp_finwait2_timeout : tcp_maxidle;
+ tcp_timer_activate(tp, TT_2MSL, timeout);
}
tp->t_state = TCPS_FIN_WAIT_2;
}
@@ -2277,10 +2092,11 @@
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- KASSERT(headlocked, ("tcp_input: process_ACK: "
- "head not locked"));
+ KASSERT(headlocked, ("%s: process_ACK: "
+ "head not locked", __func__));
tcp_twstart(tp);
INP_INFO_WUNLOCK(&tcbinfo);
+ headlocked = 0;
m_freem(m);
return;
}
@@ -2294,29 +2110,18 @@
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- KASSERT(headlocked, ("tcp_input: process_ACK:"
- " tcp_close: head not locked"));
+ KASSERT(headlocked, ("%s: process_ACK: "
+ "tcp_close: head not locked", __func__));
tp = tcp_close(tp);
goto drop;
}
break;
-
- /*
- * In TIME_WAIT state the only thing that should arrive
- * is a retransmission of the remote FIN. Acknowledge
- * it and restart the finack timer.
- */
- case TCPS_TIME_WAIT:
- KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- goto dropafterack;
}
}
step6:
- KASSERT(headlocked, ("tcp_input: step6: head not locked"));
- INP_LOCK_ASSERT(inp);
+ KASSERT(headlocked, ("%s: step6: head not locked", __func__));
+ INP_LOCK_ASSERT(tp->t_inpcb);
/*
* Update window information.
@@ -2401,8 +2206,8 @@
tp->rcv_up = tp->rcv_nxt;
}
dodata: /* XXX */
- KASSERT(headlocked, ("tcp_input: dodata: head not locked"));
- INP_LOCK_ASSERT(inp);
+ KASSERT(headlocked, ("%s: dodata: head not locked", __func__));
+ INP_LOCK_ASSERT(tp->t_inpcb);
/*
* Process the segment text, merging it into the TCP sequencing queue,
@@ -2415,7 +2220,6 @@
if ((tlen || (thflags & TH_FIN)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
- tcp_seq save_end = th->th_seq + tlen;
m_adj(m, drop_hdrlen); /* delayed header drop */
/*
* Insert segment which includes th into TCP reassembly queue
@@ -2446,19 +2250,29 @@
m_freem(m);
else
sbappendstream_locked(&so->so_rcv, m);
+ /* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
} else {
+ /*
+ * XXX: Due to the header drop above "th" is
+ * theoretically invalid by now. Fortunately
+ * m_adj() doesn't actually frees any mbufs
+ * when trimming from the head.
+ */
thflags = tcp_reass(tp, th, &tlen, m);
tp->t_flags |= TF_ACKNOW;
}
- if (tlen > 0 && tp->sack_enable)
- tcp_update_sack_list(tp, save_start, save_end);
+ if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
+ tcp_update_sack_list(tp, save_start, save_start + tlen);
+#if 0
/*
* Note the amount of data that peer has sent into
* our window, in order to estimate the sender's
* buffer size.
+ * XXX: Unused.
*/
len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+#endif
} else {
m_freem(m);
thflags &= ~TH_FIN;
@@ -2492,7 +2306,7 @@
*/
case TCPS_SYN_RECEIVED:
tp->t_starttime = ticks;
- /*FALLTHROUGH*/
+ /* FALLTHROUGH */
case TCPS_ESTABLISHED:
tp->t_state = TCPS_CLOSE_WAIT;
break;
@@ -2511,20 +2325,11 @@
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- KASSERT(headlocked == 1, ("tcp_input: dodata: "
- "TCP_FIN_WAIT_2: head not locked"));
+ KASSERT(headlocked == 1, ("%s: dodata: "
+ "TCP_FIN_WAIT_2: head not locked", __func__));
tcp_twstart(tp);
INP_INFO_WUNLOCK(&tcbinfo);
return;
-
- /*
- * In TIME_WAIT state restart the 2 MSL time_wait timer.
- */
- case TCPS_TIME_WAIT:
- KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
- callout_reset(tp->tt_2msl, 2 * tcp_msl,
- tcp_timer_2msl, tp);
- break;
}
}
INP_INFO_WUNLOCK(&tcbinfo);
@@ -2542,18 +2347,19 @@
(void) tcp_output(tp);
check_delack:
- KASSERT(headlocked == 0, ("tcp_input: check_delack: head locked"));
- INP_LOCK_ASSERT(inp);
+ KASSERT(headlocked == 0, ("%s: check_delack: head locked",
+ __func__));
+ INP_INFO_UNLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(tp->t_inpcb);
if (tp->t_flags & TF_DELACK) {
tp->t_flags &= ~TF_DELACK;
- callout_reset(tp->tt_delack, tcp_delacktime,
- tcp_timer_delack, tp);
+ tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
}
- INP_UNLOCK(inp);
+ INP_UNLOCK(tp->t_inpcb);
return;
dropafterack:
- KASSERT(headlocked, ("tcp_input: dropafterack: head not locked"));
+ KASSERT(headlocked, ("%s: dropafterack: head not locked", __func__));
/*
* Generate an ACK dropping incoming segment if it occupies
* sequence space, where the ACK reflects our state.
@@ -2580,79 +2386,92 @@
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- KASSERT(headlocked, ("headlocked should be 1"));
+ KASSERT(headlocked, ("%s: headlocked should be 1", __func__));
INP_INFO_WUNLOCK(&tcbinfo);
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
- INP_UNLOCK(inp);
+ INP_UNLOCK(tp->t_inpcb);
m_freem(m);
return;
dropwithreset:
- KASSERT(headlocked, ("tcp_input: dropwithreset: head not locked"));
+ KASSERT(headlocked, ("%s: dropwithreset: head not locked", __func__));
+
+ tcp_dropwithreset(m, th, tp, tlen, rstreason);
+
+ if (tp != NULL)
+ INP_UNLOCK(tp->t_inpcb);
+ if (headlocked)
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return;
+
+drop:
/*
- * Generate a RST, dropping incoming segment.
- * Make ACK acceptable to originator of segment.
- * Don't bother to respond if destination was broadcast/multicast.
+ * Drop space held by incoming segment and return.
*/
- if ((thflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
+#ifdef TCPDEBUG
+ if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
+ &tcp_savetcp, 0);
+#endif
+ if (tp != NULL)
+ INP_UNLOCK(tp->t_inpcb);
+ if (headlocked)
+ INP_INFO_WUNLOCK(&tcbinfo);
+ m_freem(m);
+ return;
+}
+
+/*
+ * Issue RST and make ACK acceptable to originator of segment.
+ * The mbuf must still include the original packet header.
+ * tp may be NULL.
+ */
+static void
+tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
+ int tlen, int rstreason)
+{
+ struct ip *ip;
+#ifdef INET6
+ struct ip6_hdr *ip6;
+#endif
+ /* Don't bother if destination was broadcast/multicast. */
+ if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
goto drop;
- if (isipv6) {
+#ifdef INET6
+ if (mtod(m, struct ip *)->ip_v == 6) {
+ ip6 = mtod(m, struct ip6_hdr *);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
goto drop;
- } else {
+ /* IPv6 anycast check is done at tcp6_input() */
+ } else
+#endif
+ {
+ ip = mtod(m, struct ip *);
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
goto drop;
}
- /* IPv6 anycast check is done at tcp6_input() */
- /*
- * Perform bandwidth limiting.
- */
+ /* Perform bandwidth limiting. */
if (badport_bandlim(rstreason) < 0)
goto drop;
-#ifdef TCPDEBUG
- if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
- tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
- &tcp_savetcp, 0);
-#endif
-
- if (thflags & TH_ACK)
- /* mtod() below is safe as long as hdr dropping is delayed */
- tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack,
- TH_RST);
- else {
- if (thflags & TH_SYN)
+ /* tcp_respond consumes the mbuf chain. */
+ if (th->th_flags & TH_ACK) {
+ tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
+ th->th_ack, TH_RST);
+ } else {
+ if (th->th_flags & TH_SYN)
tlen++;
- /* mtod() below is safe as long as hdr dropping is delayed */
tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
- (tcp_seq)0, TH_RST|TH_ACK);
+ (tcp_seq)0, TH_RST|TH_ACK);
}
-
- if (tp)
- INP_UNLOCK(inp);
- if (headlocked)
- INP_INFO_WUNLOCK(&tcbinfo);
return;
-
drop:
- /*
- * Drop space held by incoming segment and return.
- */
-#ifdef TCPDEBUG
- if (tp == 0 || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
- tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
- &tcp_savetcp, 0);
-#endif
- if (tp)
- INP_UNLOCK(inp);
- if (headlocked)
- INP_INFO_WUNLOCK(&tcbinfo);
m_freem(m);
return;
}
@@ -2661,11 +2480,7 @@
* Parse TCP options and place in tcpopt.
*/
static void
-tcp_dooptions(to, cp, cnt, is_syn)
- struct tcpopt *to;
- u_char *cp;
- int cnt;
- int is_syn;
+tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
int opt, optlen;
@@ -2687,7 +2502,7 @@
case TCPOPT_MAXSEG:
if (optlen != TCPOLEN_MAXSEG)
continue;
- if (!is_syn)
+ if (!(flags & TO_SYN))
continue;
to->to_flags |= TOF_MSS;
bcopy((char *)cp + 2,
@@ -2697,10 +2512,10 @@
case TCPOPT_WINDOW:
if (optlen != TCPOLEN_WINDOW)
continue;
- if (! is_syn)
+ if (!(flags & TO_SYN))
continue;
to->to_flags |= TOF_SCALE;
- to->to_requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
break;
case TCPOPT_TIMESTAMP:
if (optlen != TCPOLEN_TIMESTAMP)
@@ -2712,12 +2527,6 @@
bcopy((char *)cp + 6,
(char *)&to->to_tsecr, sizeof(to->to_tsecr));
to->to_tsecr = ntohl(to->to_tsecr);
- /*
- * If echoed timestamp is later than the current time,
- * fall back to non RFC1323 RTT calculation.
- */
- if ((to->to_tsecr != 0) && TSTMP_GT(to->to_tsecr, ticks))
- to->to_tsecr = 0;
break;
#ifdef TCP_SIGNATURE
/*
@@ -2729,21 +2538,25 @@
case TCPOPT_SIGNATURE:
if (optlen != TCPOLEN_SIGNATURE)
continue;
- to->to_flags |= (TOF_SIGNATURE | TOF_SIGLEN);
+ to->to_flags |= TOF_SIGNATURE;
+ to->to_signature = cp + 2;
break;
#endif
case TCPOPT_SACK_PERMITTED:
- if (!tcp_do_sack ||
- optlen != TCPOLEN_SACK_PERMITTED)
+ if (optlen != TCPOLEN_SACK_PERMITTED)
continue;
- if (is_syn) {
- /* MUST only be set on SYN */
- to->to_flags |= TOF_SACK;
- }
+ if (!(flags & TO_SYN))
+ continue;
+ if (!tcp_do_sack)
+ continue;
+ to->to_flags |= TOF_SACKPERM;
break;
case TCPOPT_SACK:
if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
continue;
+ if (flags & TO_SYN)
+ continue;
+ to->to_flags |= TOF_SACK;
to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
to->to_sacks = cp + 2;
tcpstat.tcps_sack_rcv_blocks++;
@@ -2761,11 +2574,8 @@
* sequencing purposes.
*/
static void
-tcp_pulloutofband(so, th, m, off)
- struct socket *so;
- struct tcphdr *th;
- register struct mbuf *m;
- int off; /* delayed to be droped hdrlen */
+tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
+ int off)
{
int cnt = off + th->th_urp - 1;
@@ -2784,7 +2594,7 @@
}
cnt -= m->m_len;
m = m->m_next;
- if (m == 0)
+ if (m == NULL)
break;
}
panic("tcp_pulloutofband");
@@ -2795,11 +2605,9 @@
* and update averages and current timeout.
*/
static void
-tcp_xmit_timer(tp, rtt)
- register struct tcpcb *tp;
- int rtt;
+tcp_xmit_timer(struct tcpcb *tp, int rtt)
{
- register int delta;
+ int delta;
INP_LOCK_ASSERT(tp->t_inpcb);
@@ -2894,7 +2702,6 @@
* are present. Store the upper limit of the length of options plus
* data in maxopd.
*
- *
* In case of T/TCP, we call this routine during implicit connection
* setup as well (offer = -1), to initialize maxseg from the cached
* MSS of our peer.
@@ -2903,9 +2710,7 @@
* segment. Outgoing SYN/ACK MSS settings are handled in tcp_mssopt().
*/
void
-tcp_mss(tp, offer)
- struct tcpcb *tp;
- int offer;
+tcp_mss(struct tcpcb *tp, int offer)
{
int rtt, mss;
u_long bufsize;
@@ -2914,6 +2719,7 @@
struct socket *so;
struct hc_metrics_lite metrics;
int origoffer = offer;
+ int mtuflags = 0;
#ifdef INET6
int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
size_t min_protoh = isipv6 ?
@@ -2923,26 +2729,26 @@
const size_t min_protoh = sizeof(struct tcpiphdr);
#endif
- /* initialize */
+ /* Initialize. */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc);
+ maxmtu = tcp_maxmtu6(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_v6mssdflt;
} else
#endif
{
- maxmtu = tcp_maxmtu(&inp->inp_inc);
+ maxmtu = tcp_maxmtu(&inp->inp_inc, &mtuflags);
tp->t_maxopd = tp->t_maxseg = tcp_mssdflt;
}
so = inp->inp_socket;
/*
- * no route to sender, stay with default mss and return
+ * No route to sender, stay with default mss and return.
*/
if (maxmtu == 0)
return;
- /* what have we got? */
+ /* What have we got? */
switch (offer) {
case 0:
/*
@@ -2978,12 +2784,12 @@
}
/*
- * rmx information is now retrieved from tcp_hostcache
+ * rmx information is now retrieved from tcp_hostcache.
*/
tcp_hc_get(&inp->inp_inc, &metrics);
/*
- * if there's a discovered mtu int tcp hostcache, use it
+ * If there's a discovered mtu int tcp hostcache, use it
* else, use the link mtu.
*/
if (metrics.rmx_mtu)
@@ -3016,7 +2822,7 @@
tp->t_maxopd = mss;
/*
- * origoffer==-1 indicates, that no segments were received yet.
+ * origoffer==-1 indicates that no segments were received yet.
* In this case we just guess.
*/
if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
@@ -3072,7 +2878,7 @@
}
SOCKBUF_UNLOCK(&so->so_rcv);
/*
- * While we're here, check the others too
+ * While we're here, check the others too.
*/
if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
tp->t_srtt = rtt;
@@ -3138,14 +2944,17 @@
tp->snd_cwnd = mss * ss_fltsz_local;
else
tp->snd_cwnd = mss * ss_fltsz;
+
+ /* Check the interface for TSO capabilities. */
+ if (mtuflags & CSUM_TSO)
+ tp->t_flags |= TF_TSO;
}
/*
* Determine the MSS option to send on an outgoing SYN.
*/
int
-tcp_mssopt(inc)
- struct in_conninfo *inc;
+tcp_mssopt(struct in_conninfo *inc)
{
int mss = 0;
u_long maxmtu = 0;
@@ -3160,14 +2969,14 @@
#ifdef INET6
if (isipv6) {
mss = tcp_v6mssdflt;
- maxmtu = tcp_maxmtu6(inc);
+ maxmtu = tcp_maxmtu6(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
} else
#endif
{
mss = tcp_mssdflt;
- maxmtu = tcp_maxmtu(inc);
+ maxmtu = tcp_maxmtu(inc, NULL);
thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
min_protoh = sizeof(struct tcpiphdr);
}
@@ -3187,14 +2996,12 @@
* be started again.
*/
static void
-tcp_newreno_partial_ack(tp, th)
- struct tcpcb *tp;
- struct tcphdr *th;
+tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
{
tcp_seq onxt = tp->snd_nxt;
u_long ocwnd = tp->snd_cwnd;
- callout_stop(tp->tt_rexmt);
+ tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
tp->snd_nxt = th->th_ack;
/*
@@ -3217,135 +3024,3 @@
tp->snd_cwnd = 0;
tp->snd_cwnd += tp->t_maxseg;
}
-
-/*
- * Returns 1 if the TIME_WAIT state was killed and we should start over,
- * looking for a pcb in the listen state. Returns 0 otherwise.
- */
-static int
-tcp_timewait(tw, to, th, m, tlen)
- struct tcptw *tw;
- struct tcpopt *to;
- struct tcphdr *th;
- struct mbuf *m;
- int tlen;
-{
- int thflags;
- tcp_seq seq;
-#ifdef INET6
- int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
-#else
- const int isipv6 = 0;
-#endif
-
- /* tcbinfo lock required for tcp_twclose(), tcp_2msl_reset. */
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
- INP_LOCK_ASSERT(tw->tw_inpcb);
-
- thflags = th->th_flags;
-
- /*
- * NOTE: for FIN_WAIT_2 (to be added later),
- * must validate sequence number before accepting RST
- */
-
- /*
- * If the segment contains RST:
- * Drop the segment - see Stevens, vol. 2, p. 964 and
- * RFC 1337.
- */
- if (thflags & TH_RST)
- goto drop;
-
-#if 0
-/* PAWS not needed at the moment */
- /*
- * RFC 1323 PAWS: If we have a timestamp reply on this segment
- * and it's less than ts_recent, drop it.
- */
- if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
- TSTMP_LT(to.to_tsval, tp->ts_recent)) {
- if ((thflags & TH_ACK) == 0)
- goto drop;
- goto ack;
- }
- /*
- * ts_recent is never updated because we never accept new segments.
- */
-#endif
-
- /*
- * If a new connection request is received
- * while in TIME_WAIT, drop the old connection
- * and start over if the sequence numbers
- * are above the previous ones.
- */
- if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
- (void) tcp_twclose(tw, 0);
- return (1);
- }
-
- /*
- * Drop the the segment if it does not contain an ACK.
- */
- if ((thflags & TH_ACK) == 0)
- goto drop;
-
- /*
- * Reset the 2MSL timer if this is a duplicate FIN.
- */
- if (thflags & TH_FIN) {
- seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
- if (seq + 1 == tw->rcv_nxt)
- tcp_timer_2msl_reset(tw, 2 * tcp_msl);
- }
-
- /*
- * Acknowledge the segment if it has data or is not a duplicate ACK.
- */
- if (thflags != TH_ACK || tlen != 0 ||
- th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
- tcp_twrespond(tw, TH_ACK);
- goto drop;
-
- /*
- * Generate a RST, dropping incoming segment.
- * Make ACK acceptable to originator of segment.
- * Don't bother to respond if destination was broadcast/multicast.
- */
- if (m->m_flags & (M_BCAST|M_MCAST))
- goto drop;
- if (isipv6) {
- struct ip6_hdr *ip6;
-
- /* IPv6 anycast check is done at tcp6_input() */
- ip6 = mtod(m, struct ip6_hdr *);
- if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
- IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
- goto drop;
- } else {
- struct ip *ip;
-
- ip = mtod(m, struct ip *);
- if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
- IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
- ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
- in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
- goto drop;
- }
- if (thflags & TH_ACK) {
- tcp_respond(NULL,
- mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
- } else {
- seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
- tcp_respond(NULL,
- mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
- }
- INP_UNLOCK(tw->tw_inpcb);
- return (0);
-
-drop:
- INP_UNLOCK(tw->tw_inpcb);
- m_freem(m);
- return (0);
-}
Index: in_pcb.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in_pcb.c -L sys/netinet/in_pcb.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -1,6 +1,8 @@
/*-
* Copyright (c) 1982, 1986, 1991, 1993, 1995
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * Copyright (c) 2007 Robert N. M. Watson
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,27 +29,34 @@
* SUCH DAMAGE.
*
* @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.165.2.3 2006/02/14 22:09:27 rwatson Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_pcb.c,v 1.196.4.1 2007/12/22 20:54:46 rwatson Exp $");
+
+#include "opt_ddb.h"
#include "opt_ipsec.h"
#include "opt_inet6.h"
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <vm/uma.h>
#include <net/if.h>
@@ -66,19 +75,13 @@
#include <netinet6/ip6_var.h>
#endif /* INET6 */
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
-#if defined(IPSEC) || defined(IPSEC_ESP)
-#error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
-#endif
+#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
/*
* These configure the range of local port addresses assigned to
@@ -167,19 +170,20 @@
/*
* Allocate a PCB and associate it with the socket.
+ * On success return with the PCB locked.
*/
int
-in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
+in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
{
struct inpcb *inp;
int error;
INP_INFO_WLOCK_ASSERT(pcbinfo);
error = 0;
- inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
+ inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
return (ENOBUFS);
- inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+ bzero(inp, inp_zero_size);
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
#ifdef MAC
@@ -190,31 +194,34 @@
mac_create_inpcb_from_socket(so, inp);
SOCK_UNLOCK(so);
#endif
-#if defined(IPSEC) || defined(FAST_IPSEC)
-#ifdef FAST_IPSEC
+
+#ifdef IPSEC
error = ipsec_init_policy(so, &inp->inp_sp);
-#else
- error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
+ if (error != 0) {
+#ifdef MAC
+ mac_destroy_inpcb(inp);
#endif
- if (error != 0)
goto out;
+}
#endif /*IPSEC*/
-#if defined(INET6)
+#ifdef INET6
if (INP_SOCKAF(so) == AF_INET6) {
inp->inp_vflag |= INP_IPV6PROTO;
if (ip6_v6only)
inp->inp_flags |= IN6P_IPV6_V6ONLY;
}
#endif
- LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
+ LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
- INP_LOCK_INIT(inp, "inp", type);
#ifdef INET6
if (ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
#endif
-#if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC)
+ INP_LOCK(inp);
+ inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
+
+#if defined(IPSEC) || defined(MAC)
out:
if (error != 0)
uma_zfree(pcbinfo->ipi_zone, inp);
@@ -280,7 +287,7 @@
if (nam != NULL && laddr.s_addr != INADDR_ANY)
return (EINVAL);
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- wild = 1;
+ wild = INPLOOKUP_WILDCARD;
if (nam) {
sin = (struct sockaddr_in *)nam;
if (nam->sa_len != sizeof (*sin))
@@ -322,15 +329,19 @@
laddr = sin->sin_addr;
if (lport) {
struct inpcb *t;
+ struct tcptw *tw;
+
/* GROSS */
if (ntohs(lport) <= ipport_reservedhigh &&
ntohs(lport) >= ipport_reservedlow &&
- suser_cred(cred, SUSER_ALLOWJAIL))
+ priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
+ 0))
return (EACCES);
if (jailed(cred))
prison = 1;
- if (so->so_cred->cr_uid != 0 &&
- !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
+ priv_check_cred(so->so_cred,
+ PRIV_NETINET_REUSEPORT, 0) != 0) {
t = in_pcblookup_local(inp->inp_pcbinfo,
sin->sin_addr, lport,
prison ? 0 : INPLOOKUP_WILDCARD);
@@ -355,19 +366,26 @@
t = in_pcblookup_local(pcbinfo, sin->sin_addr,
lport, prison ? 0 : wild);
if (t && (t->inp_vflag & INP_TIMEWAIT)) {
- if ((reuseport & intotw(t)->tw_so_options) == 0)
+ /*
+ * XXXRW: If an incpb has had its timewait
+ * state recycled, we treat the address as
+ * being in use (for now). This is better
+ * than a panic, but not desirable.
+ */
+ tw = intotw(inp);
+ if (tw == NULL ||
+ (reuseport & tw->tw_so_options) == 0)
return (EADDRINUSE);
- } else
- if (t &&
+ } else if (t &&
(reuseport & t->inp_socket->so_options) == 0) {
-#if defined(INET6)
+#ifdef INET6
if (ntohl(sin->sin_addr.s_addr) !=
INADDR_ANY ||
ntohl(t->inp_laddr.s_addr) !=
INADDR_ANY ||
INP_SOCKAF(so) ==
INP_SOCKAF(t->inp_socket))
-#endif /* defined(INET6) */
+#endif
return (EADDRINUSE);
}
}
@@ -385,17 +403,19 @@
if (inp->inp_flags & INP_HIGHPORT) {
first = ipport_hifirstauto; /* sysctl */
last = ipport_hilastauto;
- lastport = &pcbinfo->lasthi;
+ lastport = &pcbinfo->ipi_lasthi;
} else if (inp->inp_flags & INP_LOWPORT) {
- if ((error = suser_cred(cred, SUSER_ALLOWJAIL)) != 0)
+ error = priv_check_cred(cred,
+ PRIV_NETINET_RESERVEDPORT, 0);
+ if (error)
return error;
first = ipport_lowfirstauto; /* 1023 */
last = ipport_lowlastauto; /* 600 */
- lastport = &pcbinfo->lastlow;
+ lastport = &pcbinfo->ipi_lastlow;
} else {
first = ipport_firstauto; /* sysctl */
last = ipport_lastauto;
- lastport = &pcbinfo->lastport;
+ lastport = &pcbinfo->ipi_lastport;
}
/*
* For UDP, use random port allocation as long as the user
@@ -510,10 +530,7 @@
inp->inp_faddr.s_addr = faddr;
inp->inp_fport = fport;
in_pcbrehash(inp);
-#ifdef IPSEC
- if (inp->inp_socket->so_type == SOCK_STREAM)
- ipsec_pcbconn(inp->inp_sp);
-#endif
+
if (anonport)
inp->inp_flags |= INP_ANONPORT;
return (0);
@@ -591,29 +608,23 @@
&in_ifaddrhead)->ia_broadaddr)->sin_addr;
}
if (laddr.s_addr == INADDR_ANY) {
- struct route sro;
-
- bzero(&sro, sizeof(sro));
ia = (struct in_ifaddr *)0;
/*
* If route is known our src addr is taken from the i/f,
* else punt.
+ *
+ * Find out route to destination
*/
- if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
- /* Find out route to destination */
- sro.ro_dst.sa_family = AF_INET;
- sro.ro_dst.sa_len = sizeof(struct sockaddr_in);
- ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr;
- rtalloc_ign(&sro, RTF_CLONING);
- }
+ if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
+ ia = ip_rtaddr(faddr);
/*
- * If we found a route, use the address
- * corresponding to the outgoing interface.
+ * If we found a route, use the address corresponding to
+ * the outgoing interface.
+ *
+ * Otherwise assume faddr is reachable on a directly connected
+ * network and try to find a corresponding interface to take
+ * the source address from.
*/
- if (sro.ro_rt) {
- ia = ifatoia(sro.ro_rt->rt_ifa);
- RTFREE(sro.ro_rt);
- }
if (ia == 0) {
bzero(&sa, sizeof(sa));
sa.sin_addr = faddr;
@@ -679,44 +690,81 @@
inp->inp_faddr.s_addr = INADDR_ANY;
inp->inp_fport = 0;
in_pcbrehash(inp);
-#ifdef IPSEC
- ipsec_pcbdisconn(inp->inp_sp);
-#endif
- if (inp->inp_socket->so_state & SS_NOFDREF)
- in_pcbdetach(inp);
}
+/*
+ * In the old world order, in_pcbdetach() served two functions: to detach the
+ * pcb from the socket/potentially free the socket, and to free the pcb
+ * itself. In the new world order, the protocol code is responsible for
+ * managing the relationship with the socket, and this code simply frees the
+ * pcb.
+ */
void
in_pcbdetach(struct inpcb *inp)
{
- struct socket *so = inp->inp_socket;
+
+ KASSERT(inp->inp_socket != NULL, ("in_pcbdetach: inp_socket == NULL"));
+ inp->inp_socket->so_pcb = NULL;
+ inp->inp_socket = NULL;
+}
+
+void
+in_pcbfree(struct inpcb *inp)
+{
struct inpcbinfo *ipi = inp->inp_pcbinfo;
+ KASSERT(inp->inp_socket == NULL, ("in_pcbfree: inp_socket != NULL"));
INP_INFO_WLOCK_ASSERT(ipi);
INP_LOCK_ASSERT(inp);
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
ipsec4_delete_pcbpolicy(inp);
#endif /*IPSEC*/
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
- if (so) {
- ACCEPT_LOCK();
- SOCK_LOCK(so);
- so->so_pcb = NULL;
- sotryfree(so);
- }
if (inp->inp_options)
(void)m_free(inp->inp_options);
- ip_freemoptions(inp->inp_moptions);
+ if (inp->inp_moptions != NULL)
+ inp_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
- INP_LOCK_DESTROY(inp);
+
#ifdef MAC
mac_destroy_inpcb(inp);
#endif
+ INP_UNLOCK(inp);
uma_zfree(ipi->ipi_zone, inp);
}
+/*
+ * TCP needs to maintain its inpcb structure after the TCP connection has
+ * been torn down. However, it must be disconnected from the inpcb hashes as
+ * it must not prevent binding of future connections to the same port/ip
+ * combination by other inpcbs.
+ */
+void
+in_pcbdrop(struct inpcb *inp)
+{
+
+ INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+ INP_LOCK_ASSERT(inp);
+
+ inp->inp_vflag |= INP_DROPPED;
+ if (inp->inp_lport) {
+ struct inpcbport *phd = inp->inp_phd;
+
+ LIST_REMOVE(inp, inp_hash);
+ LIST_REMOVE(inp, inp_portlist);
+ if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
+ LIST_REMOVE(phd, phd_hash);
+ free(phd, M_PCB);
+ }
+ inp->inp_lport = 0;
+ }
+}
+
+/*
+ * Common routines to return the socket addresses associated with inpcbs.
+ */
struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr_p)
{
@@ -732,60 +780,39 @@
return (struct sockaddr *)sin;
}
-/*
- * The wrapper function will pass down the pcbinfo for this function to lock.
- * The socket must have a valid
- * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
- * except through a kernel programming error, so it is acceptable to panic
- * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
- * because there actually /is/ a programming error somewhere... XXX)
- */
int
-in_setsockaddr(struct socket *so, struct sockaddr **nam,
- struct inpcbinfo *pcbinfo)
+in_getsockaddr(struct socket *so, struct sockaddr **nam)
{
struct inpcb *inp;
struct in_addr addr;
in_port_t port;
- INP_INFO_RLOCK(pcbinfo);
inp = sotoinpcb(so);
- if (!inp) {
- INP_INFO_RUNLOCK(pcbinfo);
- return ECONNRESET;
- }
+ KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
+
INP_LOCK(inp);
port = inp->inp_lport;
addr = inp->inp_laddr;
INP_UNLOCK(inp);
- INP_INFO_RUNLOCK(pcbinfo);
*nam = in_sockaddr(port, &addr);
return 0;
}
-/*
- * The wrapper function will pass down the pcbinfo for this function to lock.
- */
int
-in_setpeeraddr(struct socket *so, struct sockaddr **nam,
- struct inpcbinfo *pcbinfo)
+in_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
struct inpcb *inp;
struct in_addr addr;
in_port_t port;
- INP_INFO_RLOCK(pcbinfo);
inp = sotoinpcb(so);
- if (!inp) {
- INP_INFO_RUNLOCK(pcbinfo);
- return ECONNRESET;
- }
+ KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
+
INP_LOCK(inp);
port = inp->inp_fport;
addr = inp->inp_faddr;
INP_UNLOCK(inp);
- INP_INFO_RUNLOCK(pcbinfo);
*nam = in_sockaddr(port, &addr);
return 0;
@@ -799,7 +826,7 @@
struct inpcbhead *head;
INP_INFO_WLOCK(pcbinfo);
- head = pcbinfo->listhead;
+ head = pcbinfo->ipi_listhead;
for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
INP_LOCK(inp);
ninp = LIST_NEXT(inp, inp_list);
@@ -828,7 +855,7 @@
int i, gap;
INP_INFO_RLOCK(pcbinfo);
- LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
+ LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_LOCK(inp);
imo = inp->inp_moptions;
if ((inp->inp_vflag & INP_IPV4) &&
@@ -885,7 +912,8 @@
* Look for an unconnected (wildcard foreign addr) PCB that
* matches the local address and port we're looking for.
*/
- head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -914,9 +942,8 @@
* First see if this local port is in use by looking on the
* port hash list.
*/
- retrylookup:
- porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
- pcbinfo->porthashmask)];
+ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+ pcbinfo->ipi_porthashmask)];
LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
@@ -947,18 +974,6 @@
if ((inp->inp_vflag & INP_IPV6) != 0)
wildcard += INP_LOOKUP_MAPPED_PCB_COST;
#endif
- /*
- * Clean out old time_wait sockets if they
- * are clogging up needed local ports.
- */
- if ((inp->inp_vflag & INP_TIMEWAIT) != 0) {
- if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) {
- INP_LOCK(inp);
- tcp_twclose((struct tcptw *)inp->inp_ppcb, 0);
- match = NULL;
- goto retrylookup;
- }
- }
if (inp->inp_faddr.s_addr != INADDR_ANY)
wildcard++;
if (inp->inp_laddr.s_addr != INADDR_ANY) {
@@ -997,10 +1012,12 @@
u_short fport = fport_arg, lport = lport_arg;
INP_INFO_RLOCK_ASSERT(pcbinfo);
+
/*
* First look for an exact match.
*/
- head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1009,20 +1026,21 @@
if (inp->inp_faddr.s_addr == faddr.s_addr &&
inp->inp_laddr.s_addr == laddr.s_addr &&
inp->inp_fport == fport &&
- inp->inp_lport == lport) {
- /*
- * Found.
- */
+ inp->inp_lport == lport)
return (inp);
- }
}
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
if (wildcard) {
struct inpcb *local_wild = NULL;
-#if defined(INET6)
+#ifdef INET6
struct inpcb *local_wild_mapped = NULL;
-#endif /* defined(INET6) */
+#endif
- head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
if ((inp->inp_vflag & INP_IPV4) == 0)
@@ -1036,26 +1054,22 @@
if (inp->inp_laddr.s_addr == laddr.s_addr)
return (inp);
else if (inp->inp_laddr.s_addr == INADDR_ANY) {
-#if defined(INET6)
+#ifdef INET6
if (INP_CHECK_SOCKAF(inp->inp_socket,
AF_INET6))
local_wild_mapped = inp;
else
-#endif /* defined(INET6) */
- local_wild = inp;
+#endif
+ local_wild = inp;
}
}
}
-#if defined(INET6)
+#ifdef INET6
if (local_wild == NULL)
return (local_wild_mapped);
-#endif /* defined(INET6) */
+#endif
return (local_wild);
}
-
- /*
- * Not found.
- */
return (NULL);
}
@@ -1072,6 +1086,8 @@
u_int32_t hashkey_faddr;
INP_INFO_WLOCK_ASSERT(pcbinfo);
+ INP_LOCK_ASSERT(inp);
+
#ifdef INET6
if (inp->inp_vflag & INP_IPV6)
hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -1079,11 +1095,11 @@
#endif /* INET6 */
hashkey_faddr = inp->inp_faddr.s_addr;
- pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
- inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
+ pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+ inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
- pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
- pcbinfo->porthashmask)];
+ pcbporthash = &pcbinfo->ipi_porthashbase[
+ INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
/*
* Go through port list and look for a head for this lport.
@@ -1125,6 +1141,7 @@
INP_INFO_WLOCK_ASSERT(pcbinfo);
INP_LOCK_ASSERT(inp);
+
#ifdef INET6
if (inp->inp_vflag & INP_IPV6)
hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
@@ -1132,8 +1149,8 @@
#endif /* INET6 */
hashkey_faddr = inp->inp_faddr.s_addr;
- head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
- inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
+ inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
@@ -1175,7 +1192,9 @@
#ifdef MAC
struct inpcb *inp;
- inp = (struct inpcb *)so->so_pcb;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
+
INP_LOCK(inp);
SOCK_LOCK(so);
mac_inpcb_sosetlabel(so, inp);
@@ -1185,23 +1204,268 @@
}
/*
- * ipport_tick runs once per second, determining if random port
- * allocation should be continued. If more than ipport_randomcps
- * ports have been allocated in the last second, then we return to
- * sequential port allocation. We return to random allocation only
- * once we drop below ipport_randomcps for at least ipport_randomtime
- * seconds.
+ * ipport_tick runs once per second, determining if random port allocation
+ * should be continued. If more than ipport_randomcps ports have been
+ * allocated in the last second, then we return to sequential port
+ * allocation. We return to random allocation only once we drop below
+ * ipport_randomcps for at least ipport_randomtime seconds.
*/
-
void
ipport_tick(void *xtp)
{
- if (ipport_tcpallocs > ipport_tcplastcount + ipport_randomcps) {
- ipport_stoprandom = ipport_randomtime;
- } else {
+
+ if (ipport_tcpallocs <= ipport_tcplastcount + ipport_randomcps) {
if (ipport_stoprandom > 0)
ipport_stoprandom--;
- }
+ } else
+ ipport_stoprandom = ipport_randomtime;
ipport_tcplastcount = ipport_tcpallocs;
callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ db_printf(" ");
+}
+
+static void
+db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
+{
+ char faddr_str[48], laddr_str[48];
+
+ db_print_indent(indent);
+ db_printf("%s at %p\n", name, inc);
+
+ indent += 2;
+
+#ifdef INET6
+ if (inc->inc_flags == 1) {
+ /* IPv6. */
+ ip6_sprintf(laddr_str, &inc->inc6_laddr);
+ ip6_sprintf(faddr_str, &inc->inc6_faddr);
+ } else {
+#endif
+ /* IPv4. */
+ inet_ntoa_r(inc->inc_laddr, laddr_str);
+ inet_ntoa_r(inc->inc_faddr, faddr_str);
+#ifdef INET6
+ }
+#endif
+ db_print_indent(indent);
+ db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
+ ntohs(inc->inc_lport));
+ db_print_indent(indent);
+ db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
+ ntohs(inc->inc_fport));
+}
+
+static void
+db_print_inpflags(int inp_flags)
+{
+ int comma;
+
+ comma = 0;
+ if (inp_flags & INP_RECVOPTS) {
+ db_printf("%sINP_RECVOPTS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_RECVRETOPTS) {
+ db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_RECVDSTADDR) {
+ db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_HDRINCL) {
+ db_printf("%sINP_HDRINCL", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_HIGHPORT) {
+ db_printf("%sINP_HIGHPORT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_LOWPORT) {
+ db_printf("%sINP_LOWPORT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_ANONPORT) {
+ db_printf("%sINP_ANONPORT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_RECVIF) {
+ db_printf("%sINP_RECVIF", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_MTUDISC) {
+ db_printf("%sINP_MTUDISC", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_FAITH) {
+ db_printf("%sINP_FAITH", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_RECVTTL) {
+ db_printf("%sINP_RECVTTL", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & INP_DONTFRAG) {
+ db_printf("%sINP_DONTFRAG", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_IPV6_V6ONLY) {
+ db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_PKTINFO) {
+ db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_HOPLIMIT) {
+ db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_HOPOPTS) {
+ db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_DSTOPTS) {
+ db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_RTHDR) {
+ db_printf("%sIN6P_RTHDR", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_RTHDRDSTOPTS) {
+ db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_TCLASS) {
+ db_printf("%sIN6P_TCLASS", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_AUTOFLOWLABEL) {
+ db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_RFC2292) {
+ db_printf("%sIN6P_RFC2292", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_flags & IN6P_MTU) {
+ db_printf("IN6P_MTU%s", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
+static void
+db_print_inpvflag(u_char inp_vflag)
+{
+ int comma;
+
+ comma = 0;
+ if (inp_vflag & INP_IPV4) {
+ db_printf("%sINP_IPV4", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_IPV6) {
+ db_printf("%sINP_IPV6", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_IPV6PROTO) {
+ db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_TIMEWAIT) {
+ db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_ONESBCAST) {
+ db_printf("%sINP_ONESBCAST", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_DROPPED) {
+ db_printf("%sINP_DROPPED", comma ? ", " : "");
+ comma = 1;
+ }
+ if (inp_vflag & INP_SOCKREF) {
+ db_printf("%sINP_SOCKREF", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
+void
+db_print_inpcb(struct inpcb *inp, const char *name, int indent)
+{
+
+ db_print_indent(indent);
+ db_printf("%s at %p\n", name, inp);
+
+ indent += 2;
+
+ db_print_indent(indent);
+ db_printf("inp_flow: 0x%x\n", inp->inp_flow);
+
+ db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
+
+ db_print_indent(indent);
+ db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n",
+ inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
+
+ db_print_indent(indent);
+ db_printf("inp_label: %p inp_flags: 0x%x (",
+ inp->inp_label, inp->inp_flags);
+ db_print_inpflags(inp->inp_flags);
+ db_printf(")\n");
+
+ db_print_indent(indent);
+ db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
+ inp->inp_vflag);
+ db_print_inpvflag(inp->inp_vflag);
+ db_printf(")\n");
+
+ db_print_indent(indent);
+ db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
+ inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
+
+ db_print_indent(indent);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ db_printf("in6p_options: %p in6p_outputopts: %p "
+ "in6p_moptions: %p\n", inp->in6p_options,
+ inp->in6p_outputopts, inp->in6p_moptions);
+ db_printf("in6p_icmp6filt: %p in6p_cksum %d "
+ "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
+ inp->in6p_hops);
+ } else
+#endif
+ {
+ db_printf("inp_ip_tos: %d inp_ip_options: %p "
+ "inp_ip_moptions: %p\n", inp->inp_ip_tos,
+ inp->inp_options, inp->inp_moptions);
+ }
+
+ db_print_indent(indent);
+ db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd,
+ (uintmax_t)inp->inp_gencnt);
+}
+
+DB_SHOW_COMMAND(inpcb, db_show_inpcb)
+{
+ struct inpcb *inp;
+
+ if (!have_addr) {
+ db_printf("usage: show inpcb <addr>\n");
+ return;
+ }
+ inp = (struct inpcb *)addr;
+
+ db_print_inpcb(inp, "inpcb", 0);
+}
+#endif
--- /dev/null
+++ sys/netinet/sctp_indata.h
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_indata.h,v 1.9 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_indata.h,v 1.9 2007/06/22 13:50:56 rrs Exp $");
+
+#ifndef __sctp_indata_h__
+#define __sctp_indata_h__
+
+#if defined(_KERNEL)
+
+struct sctp_queued_to_read *
+sctp_build_readq_entry(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ uint32_t tsn, uint32_t ppid,
+ uint32_t context, uint16_t stream_no,
+ uint16_t stream_seq, uint8_t flags,
+ struct mbuf *dm);
+
+
+#define sctp_build_readq_entry_mac(_ctl, in_it, a, net, tsn, ppid, context, stream_no, stream_seq, flags, dm) do { \
+ if (_ctl) { \
+ atomic_add_int(&((net)->ref_count), 1); \
+ (_ctl)->sinfo_stream = stream_no; \
+ (_ctl)->sinfo_ssn = stream_seq; \
+ (_ctl)->sinfo_flags = (flags << 8); \
+ (_ctl)->sinfo_ppid = ppid; \
+ (_ctl)->sinfo_context = a; \
+ (_ctl)->sinfo_timetolive = 0; \
+ (_ctl)->sinfo_tsn = tsn; \
+ (_ctl)->sinfo_cumtsn = tsn; \
+ (_ctl)->sinfo_assoc_id = sctp_get_associd((in_it)); \
+ (_ctl)->length = 0; \
+ (_ctl)->held_length = 0; \
+ (_ctl)->whoFrom = net; \
+ (_ctl)->data = dm; \
+ (_ctl)->tail_mbuf = NULL; \
+ (_ctl)->aux_data = NULL; \
+ (_ctl)->stcb = (in_it); \
+ (_ctl)->port_from = (in_it)->rport; \
+ (_ctl)->spec_flags = 0; \
+ (_ctl)->do_not_ref_stcb = 0; \
+ (_ctl)->end_added = 0; \
+ (_ctl)->pdapi_aborted = 0; \
+ (_ctl)->some_taken = 0; \
+ } \
+} while (0)
+
+
+
+struct mbuf *
+sctp_build_ctl_nchunk(struct sctp_inpcb *inp,
+ struct sctp_sndrcvinfo *sinfo);
+
+char *
+sctp_build_ctl_cchunk(struct sctp_inpcb *inp,
+ int *control_len,
+ struct sctp_sndrcvinfo *sinfo);
+
+void sctp_set_rwnd(struct sctp_tcb *, struct sctp_association *);
+
+uint32_t
+sctp_calc_rwnd(struct sctp_tcb *stcb, struct sctp_association *asoc);
+
+void
+sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
+ uint32_t rwnd, int nonce_sum_flag, int *abort_now);
+
+void
+sctp_handle_sack(struct mbuf *m, int offset, struct sctp_sack_chunk *, struct sctp_tcb *,
+ struct sctp_nets *, int *, int, uint32_t);
+
+/* draft-ietf-tsvwg-usctp */
+void
+sctp_handle_forward_tsn(struct sctp_tcb *,
+ struct sctp_forward_tsn_chunk *, int *, struct mbuf *, int);
+
+struct sctp_tmit_chunk *
+ sctp_try_advance_peer_ack_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_service_queues(struct sctp_tcb *, struct sctp_association *);
+
+void
+sctp_update_acked(struct sctp_tcb *, struct sctp_shutdown_chunk *,
+ struct sctp_nets *, int *);
+
+int
+sctp_process_data(struct mbuf **, int, int *, int, struct sctphdr *,
+ struct sctp_inpcb *, struct sctp_tcb *,
+ struct sctp_nets *, uint32_t *);
+
+void sctp_sack_check(struct sctp_tcb *, int, int, int *);
+
+#endif
+#endif
Index: ip_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_var.h -L sys/netinet/ip_var.h -u -r1.2 -r1.3
--- sys/netinet/ip_var.h
+++ sys/netinet/ip_var.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)ip_var.h 8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/ip_var.h,v 1.95 2005/07/02 23:13:31 thompsa Exp $
+ * $FreeBSD: src/sys/netinet/ip_var.h,v 1.101 2007/06/12 16:24:53 bms Exp $
*/
#ifndef _NETINET_IP_VAR_H_
@@ -61,7 +61,7 @@
struct mbuf *ipq_frags; /* to ip headers of fragments */
struct in_addr ipq_src,ipq_dst;
u_char ipq_nfrags; /* # frags in this packet */
- struct label *ipq_label; /* MAC label */
+ struct label *ipq_label; /* MAC label */
};
#endif /* _KERNEL */
@@ -79,17 +79,39 @@
};
/*
+ * Multicast source list entry.
+ */
+struct in_msource {
+ TAILQ_ENTRY(in_msource) ims_next; /* next source */
+ struct sockaddr_storage ims_addr; /* address of this source */
+};
+
+/*
+ * Multicast filter descriptor; there is one instance per group membership
+ * on a socket, allocated as an expandable vector hung off ip_moptions.
+ * struct in_multi contains separate IPv4-stack-wide state for IGMPv3.
+ */
+struct in_mfilter {
+ uint16_t imf_fmode; /* filter mode for this socket/group */
+ uint16_t imf_nsources; /* # of sources for this socket/group */
+ TAILQ_HEAD(, in_msource) imf_sources; /* source list */
+};
+
+/*
* Structure attached to inpcb.ip_moptions and
* passed to ip_output when IP multicast options are in use.
+ * This structure is lazy-allocated.
*/
struct ip_moptions {
struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */
struct in_addr imo_multicast_addr; /* ifindex/addr on MULTICAST_IF */
+ u_long imo_multicast_vif; /* vif num outgoing multicasts */
u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
u_char imo_multicast_loop; /* 1 => hear sends if a member */
u_short imo_num_memberships; /* no. memberships this socket */
- struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
- u_long imo_multicast_vif; /* vif num outgoing multicasts */
+ u_short imo_max_memberships; /* max memberships this socket */
+ struct in_multi **imo_membership; /* group memberships */
+ struct in_mfilter *imo_mfilters; /* source filters */
};
struct ipstat {
@@ -130,10 +152,13 @@
#define IP_FORWARDING 0x1 /* most of ip header exists */
#define IP_RAWOUTPUT 0x2 /* raw ip header exists */
#define IP_SENDONES 0x4 /* send all-ones broadcast */
-#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */
-#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */
+#define IP_SENDTOIF 0x8 /* send on specific ifnet */
+#define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */
+#define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */
-/* mbuf flag used by ip_fastfwd */
+/*
+ * mbuf flag used by ip_fastfwd
+ */
#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */
#ifdef __NO_STRICT_ALIGNMENT
@@ -144,52 +169,50 @@
struct ip;
struct inpcb;
-struct inpcbinfo;
struct route;
struct sockopt;
extern struct ipstat ipstat;
-extern u_short ip_id; /* ip packet ctr, for ids */
-extern int ip_defttl; /* default IP ttl */
-extern int ipforwarding; /* ip forwarding */
-extern int ip_doopts; /* process or ignore IP options */
+extern u_short ip_id; /* ip packet ctr, for ids */
+extern int ip_defttl; /* default IP ttl */
+extern int ipforwarding; /* ip forwarding */
#ifdef IPSTEALTH
-extern int ipstealth; /* stealth forwarding */
+extern int ipstealth; /* stealth forwarding */
#endif
extern u_char ip_protox[];
-extern struct socket *ip_rsvpd; /* reservation protocol daemon */
-extern struct socket *ip_mrouter; /* multicast routing daemon */
+extern struct socket *ip_rsvpd; /* reservation protocol daemon */
+extern struct socket *ip_mrouter; /* multicast routing daemon */
extern int (*legal_vif_num)(int);
extern u_long (*ip_mcast_src)(int);
extern int rsvp_on;
extern struct pr_usrreqs rip_usrreqs;
-int ip_ctloutput(struct socket *, struct sockopt *sopt);
-int ip_ctloutput_pcbinfo(struct socket *, struct sockopt *sopt,
- struct inpcbinfo *pcbinfo);
-void ip_drain(void);
-void ip_fini(void *xtp);
-int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
+void inp_freemoptions(struct ip_moptions *);
+int inp_getmoptions(struct inpcb *, struct sockopt *);
+int inp_setmoptions(struct inpcb *, struct sockopt *);
+
+int ip_ctloutput(struct socket *, struct sockopt *sopt);
+void ip_drain(void);
+void ip_fini(void *xtp);
+int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum);
-void ip_freemoptions(struct ip_moptions *);
-void ip_init(void);
-extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
- struct ip_moptions *);
-int ip_output(struct mbuf *,
+void ip_forward(struct mbuf *m, int srcrt);
+void ip_init(void);
+extern int
+ (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
+ struct ip_moptions *);
+int ip_output(struct mbuf *,
struct mbuf *, struct route *, int, struct ip_moptions *,
struct inpcb *);
-int ipproto_register(u_char);
-int ipproto_unregister(u_char);
+int ipproto_register(u_char);
+int ipproto_unregister(u_char);
struct mbuf *
- ip_reass(struct mbuf *);
+ ip_reass(struct mbuf *);
struct in_ifaddr *
- ip_rtaddr(struct in_addr);
-void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
- struct mbuf *);
-void ip_slowtimo(void);
-struct mbuf *
- ip_srcroute(struct mbuf *);
-void ip_stripoptions(struct mbuf *, struct mbuf *);
+ ip_rtaddr(struct in_addr);
+void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
+ struct mbuf *);
+void ip_slowtimo(void);
u_int16_t ip_randomid(void);
int rip_ctloutput(struct socket *, struct sockopt *);
void rip_ctlinput(int, struct sockaddr *, void *);
Index: ip_carp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_carp.c -L sys/netinet/ip_carp.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_carp.c
+++ sys/netinet/ip_carp.c
@@ -1,5 +1,3 @@
-/* $FreeBSD: src/sys/netinet/ip_carp.c,v 1.27.2.6 2005/12/25 21:59:20 mlaier Exp $ */
-
/*
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
* Copyright (c) 2003 Ryan McBride. All rights reserved.
@@ -26,6 +24,9 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_carp.c,v 1.52 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_carp.h"
#include "opt_bpf.h"
#include "opt_inet.h"
@@ -41,6 +42,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/time.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -59,6 +61,7 @@
#include <net/iso88025.h>
#include <net/if.h>
#include <net/if_clone.h>
+#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
@@ -78,7 +81,6 @@
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
-#include <net/if_dl.h>
#endif
#include <crypto/sha1.h>
@@ -189,9 +191,9 @@
unsigned char *);
static void carp_setroute(struct carp_softc *, int);
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
-static int carp_clone_create(struct if_clone *, int);
+static int carp_clone_create(struct if_clone *, int, caddr_t);
static void carp_clone_destroy(struct ifnet *);
-static void carpdetach(struct carp_softc *);
+static void carpdetach(struct carp_softc *, int);
static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
struct carp_header *);
static void carp_send_ad_all(void);
@@ -209,6 +211,7 @@
static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+static void carp_multicast_cleanup(struct carp_softc *);
static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
static void carp_carpdev_state_locked(struct carp_if *);
@@ -217,12 +220,15 @@
static void carp_send_na(struct carp_softc *);
static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static void carp_multicast6_cleanup(struct carp_softc *);
#endif
static LIST_HEAD(, carp_softc) carpif_list;
static struct mtx carp_mtx;
IFC_SIMPLE_DECLARE(carp, 0);
+static eventhandler_tag if_detach_event_tag;
+
static __inline u_int16_t
carp_cksum(struct mbuf *m, int len)
{
@@ -349,7 +355,7 @@
}
static int
-carp_clone_create(struct if_clone *ifc, int unit)
+carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct carp_softc *sc;
@@ -372,10 +378,16 @@
#ifdef INET6
sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
#endif
-
- callout_init(&sc->sc_ad_tmo, NET_CALLOUT_MPSAFE);
- callout_init(&sc->sc_md_tmo, NET_CALLOUT_MPSAFE);
- callout_init(&sc->sc_md6_tmo, NET_CALLOUT_MPSAFE);
+ sc->sc_imo.imo_membership = (struct in_multi **)malloc(
+ (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
+ M_WAITOK);
+ sc->sc_imo.imo_mfilters = NULL;
+ sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
+ sc->sc_imo.imo_multicast_vif = -1;
+
+ callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
+ callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
+ callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
ifp->if_softc = sc;
if_initname(ifp, CARP_IFNAME, unit);
@@ -399,63 +411,90 @@
carp_clone_destroy(struct ifnet *ifp)
{
struct carp_softc *sc = ifp->if_softc;
+
+ if (sc->sc_carpdev)
+ CARP_SCLOCK(sc);
+ carpdetach(sc, 1); /* Returns unlocked. */
+
+ mtx_lock(&carp_mtx);
+ LIST_REMOVE(sc, sc_next);
+ mtx_unlock(&carp_mtx);
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free_type(ifp, IFT_ETHER);
+ free(sc->sc_imo.imo_membership, M_CARP);
+ free(sc, M_CARP);
+}
+
+/*
+ * This function can be called on CARP interface destroy path,
+ * and in case of the removal of the underlying interface as
+ * well. We differentiate these two cases. In the latter case
+ * we do not cleanup our multicast memberships, since they
+ * are already freed. Also, in the latter case we do not
+ * release the lock on return, because the function will be
+ * called once more, for another CARP instance on the same
+ * interface.
+ */
+static void
+carpdetach(struct carp_softc *sc, int unlock)
+{
struct carp_if *cif;
- struct ip_moptions *imo = &sc->sc_imo;
-#ifdef INET6
- struct ip6_moptions *im6o = &sc->sc_im6o;
-#endif
-
-/* carpdetach(sc); */
- /*
- * If an interface is destroyed which is suppressing the preemption,
- * decrease the global counter, otherwise the host will never get
- * out of the carp supressing state.
- */
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+
if (sc->sc_suppress)
carp_suppress_preempt--;
sc->sc_suppress = 0;
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
- if (imo->imo_num_memberships) {
- in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
- imo->imo_multicast_ifp = NULL;
- }
+ carp_set_state(sc, INIT);
+ SC2IFP(sc)->if_flags &= ~IFF_UP;
+ carp_setrun(sc, 0);
+ if (unlock)
+ carp_multicast_cleanup(sc);
#ifdef INET6
- while (!LIST_EMPTY(&im6o->im6o_memberships)) {
- struct in6_multi_mship *imm =
- LIST_FIRST(&im6o->im6o_memberships);
- LIST_REMOVE(imm, i6mm_chain);
- in6_leavegroup(imm);
- }
- im6o->im6o_multicast_ifp = NULL;
+ carp_multicast6_cleanup(sc);
#endif
- /* Remove ourself from parents if_carp queue */
- if (sc->sc_carpdev && (cif = sc->sc_carpdev->if_carp)) {
- CARP_LOCK(cif);
+ if (sc->sc_carpdev != NULL) {
+ cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ CARP_LOCK_ASSERT(cif);
TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
if (!--cif->vhif_nvrs) {
+ ifpromisc(sc->sc_carpdev, 0);
sc->sc_carpdev->if_carp = NULL;
CARP_LOCK_DESTROY(cif);
- FREE(cif, M_CARP);
- ifpromisc(sc->sc_carpdev, 0);
- sc->sc_carpdev = NULL;
- } else {
+ FREE(cif, M_IFADDR);
+ } else if (unlock)
CARP_UNLOCK(cif);
- }
+ sc->sc_carpdev = NULL;
}
+}
- mtx_lock(&carp_mtx);
- LIST_REMOVE(sc, sc_next);
- mtx_unlock(&carp_mtx);
- bpfdetach(ifp);
- if_detach(ifp);
- if_free_type(ifp, IFT_ETHER);
- free(sc, M_CARP);
+/* Detach an interface from the carp. */
+static void
+carp_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct carp_if *cif = (struct carp_if *)ifp->if_carp;
+ struct carp_softc *sc, *nextsc;
+
+ if (cif == NULL)
+ return;
+
+ /*
+ * XXX: At the end of for() cycle the lock will be destroyed.
+ */
+ CARP_LOCK(cif);
+ for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
+ nextsc = TAILQ_NEXT(sc, sc_list);
+ carpdetach(sc, 0);
+ }
}
/*
@@ -641,7 +680,7 @@
SC2IFP(sc)->if_ipackets++;
SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
- if (SC2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
struct ip *ip = mtod(m, struct ip *);
uint32_t af1 = af;
@@ -751,42 +790,6 @@
return;
}
-static void
-carpdetach(struct carp_softc *sc)
-{
- struct ifaddr *ifa;
-
- callout_stop(&sc->sc_ad_tmo);
- callout_stop(&sc->sc_md_tmo);
- callout_stop(&sc->sc_md6_tmo);
-
- while ((ifa = TAILQ_FIRST(&SC2IFP(sc)->if_addrlist)) != NULL)
- if (ifa->ifa_addr->sa_family == AF_INET) {
- struct in_ifaddr *ia = ifatoia(ifa);
-
- carp_del_addr(sc, &ia->ia_addr);
-
- /* ripped screaming from in_control(SIOCDIFADDR) */
- in_ifscrub(SC2IFP(sc), ia);
- TAILQ_REMOVE(&SC2IFP(sc)->if_addrlist, ifa, ifa_link);
- TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
- IFAFREE((&ia->ia_ifa));
- }
-}
-
-/* Detach an interface from the carp. */
-void
-carp_ifdetach(struct ifnet *ifp)
-{
- struct carp_softc *sc;
- struct carp_if *cif = (struct carp_if *)ifp->if_carp;
-
- CARP_LOCK(cif);
- TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
- carpdetach(sc);
- CARP_UNLOCK(cif);
-}
-
static int
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
{
@@ -984,9 +987,14 @@
sizeof(struct in6_addr));
/* set the multicast destination */
- ip6->ip6_dst.s6_addr8[0] = 0xff;
- ip6->ip6_dst.s6_addr8[1] = 0x02;
+ ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
ip6->ip6_dst.s6_addr8[15] = 0x12;
+ if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+ SC2IFP(sc)->if_oerrors++;
+ m_freem(m);
+ CARP_LOG("%s: in6_setscope failed\n", __func__);
+ return;
+ }
ch_ptr = (struct carp_header *)(&ip6[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
@@ -1049,8 +1057,8 @@
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
-/* arprequest(sc->sc_carpdev, &in, &in, IFP2ENADDR(sc->sc_ifp)); */
- arp_ifinit2(sc->sc_carpdev, ifa, IFP2ENADDR(sc->sc_ifp));
+/* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
+ arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
DELAY(1000); /* XXX */
}
@@ -1145,7 +1153,7 @@
if (count == index) {
if (vh->sc_state ==
MASTER) {
- *enaddr = IFP2ENADDR(vh->sc_ifp);
+ *enaddr = IF_LLADDR(vh->sc_ifp);
CARP_UNLOCK(cif);
return (1);
} else {
@@ -1164,7 +1172,7 @@
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
ia->ia_ifp == SC2IFP(vh) &&
vh->sc_state == MASTER) {
- *enaddr = IFP2ENADDR(vh->sc_ifp);
+ *enaddr = IF_LLADDR(vh->sc_ifp);
CARP_UNLOCK(cif);
return (1);
}
@@ -1221,14 +1229,14 @@
if (mtag == NULL) {
/* better a bit than nothing */
CARP_UNLOCK(cif);
- return (IFP2ENADDR(sc->sc_ifp));
+ return (IF_LLADDR(sc->sc_ifp));
}
bcopy(&ifp, (caddr_t)(mtag + 1),
sizeof(struct ifnet *));
m_tag_prepend(m, mtag);
CARP_UNLOCK(cif);
- return (IFP2ENADDR(sc->sc_ifp));
+ return (IF_LLADDR(sc->sc_ifp));
}
}
}
@@ -1253,7 +1261,7 @@
if ((SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
vh->sc_state == MASTER &&
- !bcmp(dhost, IFP2ENADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
+ !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
CARP_UNLOCK(cif);
return (SC2IFP(vh));
}
@@ -1307,7 +1315,11 @@
{
struct timeval tv;
- if (sc->sc_carpdev)
+ if (sc->sc_carpdev == NULL) {
+ SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
+ carp_set_state(sc, INIT);
+ return;
+ } else
CARP_SCLOCK_ASSERT(sc);
if (SC2IFP(sc)->if_flags & IFF_UP &&
@@ -1374,6 +1386,42 @@
}
}
+static void
+carp_multicast_cleanup(struct carp_softc *sc)
+{
+ struct ip_moptions *imo = &sc->sc_imo;
+ u_int16_t n = imo->imo_num_memberships;
+
+ /* Clean up our own multicast memberships */
+ while (n-- > 0) {
+ if (imo->imo_membership[n] != NULL) {
+ in_delmulti(imo->imo_membership[n]);
+ imo->imo_membership[n] = NULL;
+ }
+ }
+ KASSERT(imo->imo_mfilters == NULL,
+ ("%s: imo_mfilters != NULL", __func__));
+ imo->imo_num_memberships = 0;
+ imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET6
+static void
+carp_multicast6_cleanup(struct carp_softc *sc)
+{
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ struct in6_multi_mship *imm =
+ LIST_FIRST(&im6o->im6o_memberships);
+
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ im6o->im6o_multicast_ifp = NULL;
+}
+#endif
+
static int
carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
{
@@ -1827,7 +1875,8 @@
break;
case SIOCSVH:
- if ((error = suser(curthread)) != 0)
+ error = priv_check(curthread, PRIV_NETINET_CARP);
+ if (error)
break;
if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
break;
@@ -1861,16 +1910,20 @@
cif = (struct carp_if *)sc->sc_carpdev->if_carp;
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
if (vr != sc &&
- vr->sc_vhid == carpr.carpr_vhid)
- return EEXIST;
+ vr->sc_vhid == carpr.carpr_vhid) {
+ error = EEXIST;
+ break;
+ }
+ if (error == EEXIST)
+ break;
}
sc->sc_vhid = carpr.carpr_vhid;
- IFP2ENADDR(sc->sc_ifp)[0] = 0;
- IFP2ENADDR(sc->sc_ifp)[1] = 0;
- IFP2ENADDR(sc->sc_ifp)[2] = 0x5e;
- IFP2ENADDR(sc->sc_ifp)[3] = 0;
- IFP2ENADDR(sc->sc_ifp)[4] = 1;
- IFP2ENADDR(sc->sc_ifp)[5] = sc->sc_vhid;
+ IF_LLADDR(sc->sc_ifp)[0] = 0;
+ IF_LLADDR(sc->sc_ifp)[1] = 0;
+ IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
+ IF_LLADDR(sc->sc_ifp)[3] = 0;
+ IF_LLADDR(sc->sc_ifp)[4] = 1;
+ IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
error--;
}
if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
@@ -1902,7 +1955,8 @@
carpr.carpr_vhid = sc->sc_vhid;
carpr.carpr_advbase = sc->sc_advbase;
carpr.carpr_advskew = sc->sc_advskew;
- if (suser(curthread) == 0)
+ error = priv_check(curthread, PRIV_NETINET_CARP);
+ if (error == 0)
bcopy(sc->sc_key, carpr.carpr_key,
sizeof(carpr.carpr_key));
error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
@@ -2134,28 +2188,28 @@
static int
carp_modevent(module_t mod, int type, void *data)
{
- int error = 0;
-
switch (type) {
case MOD_LOAD:
+ if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+ if (if_detach_event_tag == NULL)
+ return (ENOMEM);
mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
LIST_INIT(&carpif_list);
if_clone_attach(&carp_cloner);
break;
case MOD_UNLOAD:
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
if_clone_detach(&carp_cloner);
- while (!LIST_EMPTY(&carpif_list))
- carp_clone_destroy(SC2IFP(LIST_FIRST(&carpif_list)));
mtx_destroy(&carp_mtx);
break;
default:
- error = EINVAL;
- break;
+ return (EINVAL);
}
- return error;
+ return (0);
}
static moduledata_t carp_mod = {
Index: ip_dummynet.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_dummynet.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_dummynet.c -L sys/netinet/ip_dummynet.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_dummynet.c
+++ sys/netinet/ip_dummynet.c
@@ -23,10 +23,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.93.2.4 2006/02/23 08:28:15 ume Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110 2007/10/07 20:44:22 silby Exp $");
+
#define DUMMYNET_DEBUG
#include "opt_inet6.h"
@@ -66,7 +67,9 @@
#include <sys/socketvar.h>
#include <sys/time.h>
#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
#include <net/if.h>
+#include <net/netisr.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
@@ -77,7 +80,6 @@
#include <netinet/ip_var.h>
#include <netinet/if_ether.h> /* for struct arpcom */
-#include <net/bridge.h>
#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
#include <netinet6/ip6_var.h>
@@ -91,7 +93,7 @@
static int dn_hash_size = 64 ; /* default hash size */
/* statistics on number of queue searches and search steps */
-static int searches, search_steps ;
+static long searches, search_steps ;
static int pipe_expire = 1 ; /* expire queue if empty */
static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
@@ -99,6 +101,15 @@
static int red_avg_pkt_size = 512; /* RED - default medium packet size */
static int red_max_pkt_size = 1500; /* RED - default max packet size */
+static struct timeval prev_t, t;
+static long tick_last; /* Last tick duration (usec). */
+static long tick_delta; /* Last vs standard tick diff (usec). */
+static long tick_delta_sum; /* Accumulated tick difference (usec).*/
+static long tick_adjustment; /* Tick adjustments done. */
+static long tick_lost; /* Lost(coalesced) ticks number. */
+/* Adjusted vs non-adjusted curr_time difference (ticks). */
+static long tick_diff;
+
/*
* Three heaps contain queues and pipes that the scheduler handles:
*
@@ -134,31 +145,42 @@
extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
#ifdef SYSCTL_NODE
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
- CTLFLAG_RW, 0, "Dummynet");
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
- CTLFLAG_RD, &curr_time, 0, "Current tick");
+ CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
+ CTLFLAG_RD, &curr_time, 0, "Current tick");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
- CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
+ CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
- CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
- CTLFLAG_RD, &searches, 0, "Number of queue searches");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
- CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
+ CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
+ CTLFLAG_RD, &searches, 0, "Number of queue searches");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
+ CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
+ CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
- CTLFLAG_RW, &dn_max_ratio, 0,
- "Max ratio between dynamic queues and buckets");
+ CTLFLAG_RW, &dn_max_ratio, 0,
+ "Max ratio between dynamic queues and buckets");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
+ CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
+ CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+ CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
+ CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
+ CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
+ CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
+ CTLFLAG_RD, &tick_diff, 0,
+ "Adjusted vs non-adjusted curr_time difference (ticks).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
+ CTLFLAG_RD, &tick_lost, 0,
+ "Number of ticks coalesced by dummynet taskqueue.");
#endif
#ifdef DUMMYNET_DEBUG
@@ -172,21 +194,17 @@
#define DPRINTF(X)
#endif
+static struct task dn_task;
+static struct taskqueue *dn_tq = NULL;
+static void dummynet_task(void *, int);
+
static struct mtx dummynet_mtx;
-/*
- * NB: Recursion is needed to deal with re-entry via ICMP. That is,
- * a packet may be dispatched via ip_input from dummynet_io and
- * re-enter through ip_output. Yech.
- */
#define DUMMYNET_LOCK_INIT() \
- mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF | MTX_RECURSE)
+ mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx)
#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx)
#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx)
-#define DUMMYNET_LOCK_ASSERT() do { \
- mtx_assert(&dummynet_mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
+#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED)
static int config_pipe(struct dn_pipe *p);
static int ip_dn_ctl(struct sockopt *sopt);
@@ -486,8 +504,8 @@
* and put into delay line (p_queue)
*/
static void
-move_pkt(struct mbuf *pkt, struct dn_flow_queue *q,
- struct dn_pipe *p, int len)
+move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p,
+ int len)
{
struct dn_pkt_tag *dt = dn_tag_get(pkt);
@@ -695,64 +713,115 @@
}
/*
- * This is called once per tick, or HZ times per second. It is used to
- * increment the current tick counter and schedule expired events.
+ * This is called one tick, after previous run. It is used to
+ * schedule next run.
*/
static void
dummynet(void * __unused unused)
{
- struct mbuf *head = NULL, *tail = NULL;
- struct dn_pipe *pipe;
- struct dn_heap *heaps[3];
- struct dn_heap *h;
- void *p; /* generic parameter to handler */
- int i;
- heaps[0] = &ready_heap ; /* fixed-rate queues */
- heaps[1] = &wfq_ready_heap ; /* wfq queues */
- heaps[2] = &extract_heap ; /* delay line */
+ taskqueue_enqueue(dn_tq, &dn_task);
+}
- DUMMYNET_LOCK();
- curr_time++ ;
- for (i=0; i < 3 ; i++) {
- h = heaps[i];
- while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
- if (h->p[0].key > curr_time)
- printf("dummynet: warning, heap %d is %d ticks late\n",
- i, (int)(curr_time - h->p[0].key));
- p = h->p[0].object ; /* store a copy before heap_extract */
- heap_extract(h, NULL); /* need to extract before processing */
- if (i == 0)
- ready_event(p, &head, &tail);
- else if (i == 1) {
- struct dn_pipe *pipe = p;
- if (pipe->if_name[0] != '\0')
- printf("dummynet: bad ready_event_wfq for pipe %s\n",
- pipe->if_name);
- else
- ready_event_wfq(p, &head, &tail);
- } else
- transmit_event(p, &head, &tail);
- }
- }
- /* Sweep pipes trying to expire idle flow_queues. */
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH(pipe, &pipehash[i], next)
- if (pipe->idle_heap.elements > 0 &&
- DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V) ) {
- struct dn_flow_queue *q = pipe->idle_heap.p[0].object;
-
- heap_extract(&(pipe->idle_heap), NULL);
- q->S = q->F + 1; /* Mark timestamp as invalid. */
- pipe->sum -= q->fs->weight;
+/*
+ * The main dummynet processing function.
+ */
+static void
+dummynet_task(void *context, int pending)
+{
+ struct mbuf *head = NULL, *tail = NULL;
+ struct dn_pipe *pipe;
+ struct dn_heap *heaps[3];
+ struct dn_heap *h;
+ void *p; /* generic parameter to handler */
+ int i;
+
+ DUMMYNET_LOCK();
+
+ heaps[0] = &ready_heap; /* fixed-rate queues */
+ heaps[1] = &wfq_ready_heap; /* wfq queues */
+ heaps[2] = &extract_heap; /* delay line */
+
+ /* Update number of lost(coalesced) ticks. */
+ tick_lost += pending - 1;
+
+ getmicrouptime(&t);
+ /* Last tick duration (usec). */
+ tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 +
+ (t.tv_usec - prev_t.tv_usec);
+ /* Last tick vs standard tick difference (usec). */
+ tick_delta = (tick_last * hz - 1000000) / hz;
+ /* Accumulated tick difference (usec). */
+ tick_delta_sum += tick_delta;
+
+ prev_t = t;
+
+ /*
+ * Adjust curr_time if accumulated tick difference greater than
+ * 'standard' tick. Since curr_time should be monotonically increasing,
+ * we do positive adjustment as required and throttle curr_time in
+ * case of negative adjustment.
+ */
+ curr_time++;
+ if (tick_delta_sum - tick >= 0) {
+ int diff = tick_delta_sum / tick;
+
+ curr_time += diff;
+ tick_diff += diff;
+ tick_delta_sum %= tick;
+ tick_adjustment++;
+ } else if (tick_delta_sum + tick <= 0) {
+ curr_time--;
+ tick_diff--;
+ tick_delta_sum += tick;
+ tick_adjustment++;
+ }
+
+ for (i = 0; i < 3; i++) {
+ h = heaps[i];
+ while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) {
+ if (h->p[0].key > curr_time)
+ printf("dummynet: warning, "
+ "heap %d is %d ticks late\n",
+ i, (int)(curr_time - h->p[0].key));
+ /* store a copy before heap_extract */
+ p = h->p[0].object;
+ /* need to extract before processing */
+ heap_extract(h, NULL);
+ if (i == 0)
+ ready_event(p, &head, &tail);
+ else if (i == 1) {
+ struct dn_pipe *pipe = p;
+ if (pipe->if_name[0] != '\0')
+ printf("dummynet: bad ready_event_wfq "
+ "for pipe %s\n", pipe->if_name);
+ else
+ ready_event_wfq(p, &head, &tail);
+ } else
+ transmit_event(p, &head, &tail);
}
+ }
- DUMMYNET_UNLOCK();
+ /* Sweep pipes trying to expire idle flow_queues. */
+ for (i = 0; i < HASHSIZE; i++)
+ SLIST_FOREACH(pipe, &pipehash[i], next)
+ if (pipe->idle_heap.elements > 0 &&
+ DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) {
+ struct dn_flow_queue *q =
+ pipe->idle_heap.p[0].object;
+
+ heap_extract(&(pipe->idle_heap), NULL);
+ /* Mark timestamp as invalid. */
+ q->S = q->F + 1;
+ pipe->sum -= q->fs->weight;
+ }
- if (head != NULL)
- dummynet_send(head);
+ DUMMYNET_UNLOCK();
+
+ if (head != NULL)
+ dummynet_send(head);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
}
static void
@@ -774,11 +843,11 @@
ip = mtod(m, struct ip *);
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
- ip_input(m);
+ netisr_dispatch(NETISR_IP, m);
break;
#ifdef INET6
case DN_TO_IP6_IN:
- ip6_input(m);
+ netisr_dispatch(NETISR_IPV6, m);
break;
case DN_TO_IP6_OUT:
@@ -792,32 +861,6 @@
printf("dummynet: if_bridge not loaded\n");
break;
- case DN_TO_BDG_FWD :
- /*
- * The bridge requires/assumes the Ethernet header is
- * contiguous in the first mbuf header. Ensure this
- * is true.
- */
- if (BDG_LOADED) {
- if (m->m_len < ETHER_HDR_LEN &&
- (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
- printf("dummynet/bridge: pullup fail, "
- "dropping pkt\n");
- break;
- }
- m = bdg_forward_ptr(m, pkt->ifp);
- } else {
- /*
- * somebody unloaded the bridge module.
- * Drop pkt
- */
- /* XXX rate limit */
- printf("dummynet: dropping bridged packet "
- "trapped in pipe\n");
- }
- if (m)
- m_freem(m);
- break;
case DN_TO_ETH_DEMUX:
/*
* The Ethernet code assumes the Ethernet header is
@@ -853,9 +896,9 @@
struct dn_flow_queue *q, *prev ;
int i, initial_elements = fs->rq_elements ;
- if (fs->last_expired == time_second)
+ if (fs->last_expired == time_uptime)
return 0 ;
- fs->last_expired = time_second ;
+ fs->last_expired = time_uptime ;
for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
if (q->head != NULL || q->S != q->F+1) {
@@ -1021,103 +1064,106 @@
static int
red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
{
- /*
- * RED algorithm
- *
- * RED calculates the average queue size (avg) using a low-pass filter
- * with an exponential weighted (w_q) moving average:
- * avg <- (1-w_q) * avg + w_q * q_size
- * where q_size is the queue length (measured in bytes or * packets).
- *
- * If q_size == 0, we compute the idle time for the link, and set
- * avg = (1 - w_q)^(idle/s)
- * where s is the time needed for transmitting a medium-sized packet.
- *
- * Now, if avg < min_th the packet is enqueued.
- * If avg > max_th the packet is dropped. Otherwise, the packet is
- * dropped with probability P function of avg.
- *
- */
+ /*
+ * RED algorithm
+ *
+ * RED calculates the average queue size (avg) using a low-pass filter
+ * with an exponential weighted (w_q) moving average:
+ * avg <- (1-w_q) * avg + w_q * q_size
+ * where q_size is the queue length (measured in bytes or * packets).
+ *
+ * If q_size == 0, we compute the idle time for the link, and set
+ * avg = (1 - w_q)^(idle/s)
+ * where s is the time needed for transmitting a medium-sized packet.
+ *
+ * Now, if avg < min_th the packet is enqueued.
+ * If avg > max_th the packet is dropped. Otherwise, the packet is
+ * dropped with probability P function of avg.
+ */
- int64_t p_b = 0;
- /* queue in bytes or packets ? */
- u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len;
+ int64_t p_b = 0;
- DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time, q_size));
+ /* Queue in bytes or packets? */
+ u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ?
+ q->len_bytes : q->len;
- /* average queue size estimation */
- if (q_size != 0) {
- /*
- * queue is not empty, avg <- avg + (q_size - avg) * w_q
- */
- int diff = SCALE(q_size) - q->avg;
- int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q);
+ DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size));
- q->avg += (int) v;
- } else {
- /*
- * queue is empty, find for how long the queue has been
- * empty and use a lookup table for computing
- * (1 - * w_q)^(idle_time/s) where s is the time to send a
- * (small) packet.
- * XXX check wraps...
- */
- if (q->avg) {
- u_int t = (curr_time - q->q_time) / fs->lookup_step;
+ /* Average queue size estimation. */
+ if (q_size != 0) {
+ /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
+ int diff = SCALE(q_size) - q->avg;
+ int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
- q->avg = (t < fs->lookup_depth) ?
- SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+ q->avg += (int)v;
+ } else {
+ /*
+ * Queue is empty, find for how long the queue has been
+ * empty and use a lookup table for computing
+ * (1 - * w_q)^(idle_time/s) where s is the time to send a
+ * (small) packet.
+ * XXX check wraps...
+ */
+ if (q->avg) {
+ u_int t = (curr_time - q->q_time) / fs->lookup_step;
+
+ q->avg = (t < fs->lookup_depth) ?
+ SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+ }
}
- }
- DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
+ DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
- /* should i drop ? */
-
- if (q->avg < fs->min_th) {
- q->count = -1;
- return 0; /* accept packet ; */
- }
- if (q->avg >= fs->max_th) { /* average queue >= max threshold */
- if (fs->flags_fs & DN_IS_GENTLE_RED) {
- /*
- * According to Gentle-RED, if avg is greater than max_th the
- * packet is dropped with a probability
- * p_b = c_3 * avg - c_4
- * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
- */
- p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4;
- } else {
- q->count = -1;
- DPRINTF(("dummynet: - drop"));
- return 1 ;
+ /* Should i drop? */
+ if (q->avg < fs->min_th) {
+ q->count = -1;
+ return (0); /* accept packet */
}
- } else if (q->avg > fs->min_th) {
- /*
- * we compute p_b using the linear dropping function p_b = c_1 *
- * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
- * max_p * min_th / (max_th - min_th)
- */
- p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2;
- }
- if (fs->flags_fs & DN_QSIZE_IS_BYTES)
- p_b = (p_b * len) / fs->max_pkt_size;
- if (++q->count == 0)
- q->random = random() & 0xffff;
- else {
- /*
- * q->count counts packets arrived since last drop, so a greater
- * value of q->count means a greater packet drop probability.
- */
- if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) {
- q->count = 0;
- DPRINTF(("dummynet: - red drop"));
- /* after a drop we calculate a new random value */
- q->random = random() & 0xffff;
- return 1; /* drop */
+ if (q->avg >= fs->max_th) { /* average queue >= max threshold */
+ if (fs->flags_fs & DN_IS_GENTLE_RED) {
+ /*
+ * According to Gentle-RED, if avg is greater than
+ * max_th the packet is dropped with a probability
+ * p_b = c_3 * avg - c_4
+ * where c_3 = (1 - max_p) / max_th
+ * c_4 = 1 - 2 * max_p
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
+ fs->c_4;
+ } else {
+ q->count = -1;
+ DPRINTF(("dummynet: - drop"));
+ return (1);
+ }
+ } else if (q->avg > fs->min_th) {
+ /*
+ * We compute p_b using the linear dropping function
+ * p_b = c_1 * avg - c_2
+ * where c_1 = max_p / (max_th - min_th)
+ * c_2 = max_p * min_th / (max_th - min_th)
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
}
- }
- /* end of RED algorithm */
- return 0 ; /* accept */
+
+ if (fs->flags_fs & DN_QSIZE_IS_BYTES)
+ p_b = (p_b * len) / fs->max_pkt_size;
+ if (++q->count == 0)
+ q->random = random() & 0xffff;
+ else {
+ /*
+ * q->count counts packets arrived since last drop, so a greater
+ * value of q->count means a greater packet drop probability.
+ */
+ if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
+ q->count = 0;
+ DPRINTF(("dummynet: - red drop"));
+ /* After a drop we calculate a new random value. */
+ q->random = random() & 0xffff;
+ return (1); /* drop */
+ }
+ }
+ /* End of RED algorithm. */
+
+ return (0); /* accept */
}
static __inline struct dn_flow_set *
@@ -1153,7 +1199,6 @@
* m the mbuf with the packet
* ifp the 'ifp' parameter from the caller.
* NULL in ip_input, destination interface in ip_output,
- * real_dst in bdg_forward
* rule matching rule, in case of multiple passes
*
*/
@@ -1175,6 +1220,10 @@
if (cmd->opcode == O_LOG)
cmd += F_LEN(cmd);
+ if (cmd->opcode == O_ALTQ)
+ cmd += F_LEN(cmd);
+ if (cmd->opcode == O_TAG)
+ cmd += F_LEN(cmd);
is_pipe = (cmd->opcode == O_PIPE);
DUMMYNET_LOCK();
@@ -1349,36 +1398,37 @@
static void
purge_flow_set(struct dn_flow_set *fs, int all)
{
- struct dn_flow_queue *q, *qn ;
- int i ;
+ struct dn_flow_queue *q, *qn;
+ int i;
- DUMMYNET_LOCK_ASSERT();
+ DUMMYNET_LOCK_ASSERT();
- for (i = 0 ; i <= fs->rq_size ; i++ ) {
- for (q = fs->rq[i] ; q ; q = qn ) {
- struct mbuf *m, *mnext;
-
- mnext = q->head;
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- DN_FREE_PKT(m);
- }
- qn = q->next ;
- free(q, M_DUMMYNET);
+ for (i = 0; i <= fs->rq_size; i++) {
+ for (q = fs->rq[i]; q != NULL; q = qn) {
+ struct mbuf *m, *mnext;
+
+ mnext = q->head;
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ DN_FREE_PKT(m);
+ }
+ qn = q->next;
+ free(q, M_DUMMYNET);
+ }
+ fs->rq[i] = NULL;
+ }
+
+ fs->rq_elements = 0;
+ if (all) {
+ /* RED - free lookup table. */
+ if (fs->w_q_lookup != NULL)
+ free(fs->w_q_lookup, M_DUMMYNET);
+ if (fs->rq != NULL)
+ free(fs->rq, M_DUMMYNET);
+ /* If this fs is not part of a pipe, free it. */
+ if (fs->pipe == NULL || fs != &(fs->pipe->fs))
+ free(fs, M_DUMMYNET);
}
- fs->rq[i] = NULL ;
- }
- fs->rq_elements = 0 ;
- if (all) {
- /* RED - free lookup table */
- if (fs->w_q_lookup)
- free(fs->w_q_lookup, M_DUMMYNET);
- if (fs->rq)
- free(fs->rq, M_DUMMYNET);
- /* if this fs is not part of a pipe, free it */
- if (fs->pipe && fs != &(fs->pipe->fs) )
- free(fs, M_DUMMYNET);
- }
}
/*
@@ -1496,54 +1546,59 @@
* setup RED parameters
*/
static int
-config_red(struct dn_flow_set *p, struct dn_flow_set * x)
+config_red(struct dn_flow_set *p, struct dn_flow_set *x)
{
- int i;
+ int i;
- x->w_q = p->w_q;
- x->min_th = SCALE(p->min_th);
- x->max_th = SCALE(p->max_th);
- x->max_p = p->max_p;
-
- x->c_1 = p->max_p / (p->max_th - p->min_th);
- x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
- if (x->flags_fs & DN_IS_GENTLE_RED) {
- x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
- x->c_4 = (SCALE(1) - 2 * p->max_p);
- }
-
- /* if the lookup table already exist, free and create it again */
- if (x->w_q_lookup) {
- free(x->w_q_lookup, M_DUMMYNET);
- x->w_q_lookup = NULL ;
- }
- if (red_lookup_depth == 0) {
- printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n");
- free(x, M_DUMMYNET);
- return EINVAL;
- }
- x->lookup_depth = red_lookup_depth;
- x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int),
+ x->w_q = p->w_q;
+ x->min_th = SCALE(p->min_th);
+ x->max_th = SCALE(p->max_th);
+ x->max_p = p->max_p;
+
+ x->c_1 = p->max_p / (p->max_th - p->min_th);
+ x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
+
+ if (x->flags_fs & DN_IS_GENTLE_RED) {
+ x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
+ x->c_4 = SCALE(1) - 2 * p->max_p;
+ }
+
+ /* If the lookup table already exist, free and create it again. */
+ if (x->w_q_lookup) {
+ free(x->w_q_lookup, M_DUMMYNET);
+ x->w_q_lookup = NULL;
+ }
+ if (red_lookup_depth == 0) {
+ printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
+ "must be > 0\n");
+ free(x, M_DUMMYNET);
+ return (EINVAL);
+ }
+ x->lookup_depth = red_lookup_depth;
+ x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int),
M_DUMMYNET, M_NOWAIT);
- if (x->w_q_lookup == NULL) {
- printf("dummynet: sorry, cannot allocate red lookup table\n");
- free(x, M_DUMMYNET);
- return ENOSPC;
- }
-
- /* fill the lookup table with (1 - w_q)^x */
- x->lookup_step = p->lookup_step ;
- x->lookup_weight = p->lookup_weight ;
- x->w_q_lookup[0] = SCALE(1) - x->w_q;
- for (i = 1; i < x->lookup_depth; i++)
- x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
- if (red_avg_pkt_size < 1)
- red_avg_pkt_size = 512 ;
- x->avg_pkt_size = red_avg_pkt_size ;
- if (red_max_pkt_size < 1)
- red_max_pkt_size = 1500 ;
- x->max_pkt_size = red_max_pkt_size ;
- return 0 ;
+ if (x->w_q_lookup == NULL) {
+ printf("dummynet: sorry, cannot allocate red lookup table\n");
+ free(x, M_DUMMYNET);
+ return(ENOSPC);
+ }
+
+ /* Fill the lookup table with (1 - w_q)^x */
+ x->lookup_step = p->lookup_step;
+ x->lookup_weight = p->lookup_weight;
+ x->w_q_lookup[0] = SCALE(1) - x->w_q;
+
+ for (i = 1; i < x->lookup_depth; i++)
+ x->w_q_lookup[i] =
+ SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
+
+ if (red_avg_pkt_size < 1)
+ red_avg_pkt_size = 512;
+ x->avg_pkt_size = red_avg_pkt_size;
+ if (red_max_pkt_size < 1)
+ red_max_pkt_size = 1500;
+ x->max_pkt_size = red_max_pkt_size;
+ return (0);
}
static int
@@ -1574,137 +1629,146 @@
static void
set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
{
- x->flags_fs = src->flags_fs;
- x->qsize = src->qsize;
- x->plr = src->plr;
- x->flow_mask = src->flow_mask;
- if (x->flags_fs & DN_QSIZE_IS_BYTES) {
- if (x->qsize > 1024*1024)
- x->qsize = 1024*1024 ;
- } else {
- if (x->qsize == 0)
- x->qsize = 50 ;
- if (x->qsize > 100)
- x->qsize = 50 ;
- }
- /* configuring RED */
- if ( x->flags_fs & DN_IS_RED )
- config_red(src, x) ; /* XXX should check errors */
+ x->flags_fs = src->flags_fs;
+ x->qsize = src->qsize;
+ x->plr = src->plr;
+ x->flow_mask = src->flow_mask;
+ if (x->flags_fs & DN_QSIZE_IS_BYTES) {
+ if (x->qsize > 1024 * 1024)
+ x->qsize = 1024 * 1024;
+ } else {
+ if (x->qsize == 0)
+ x->qsize = 50;
+ if (x->qsize > 100)
+ x->qsize = 50;
+ }
+ /* Configuring RED. */
+ if (x->flags_fs & DN_IS_RED)
+ config_red(src, x); /* XXX should check errors */
}
/*
- * setup pipe or queue parameters.
+ * Setup pipe or queue parameters.
*/
-
static int
config_pipe(struct dn_pipe *p)
{
- struct dn_flow_set *pfs = &(p->fs);
- struct dn_flow_queue *q;
- int i, error;
-
- /*
- * The config program passes parameters as follows:
- * bw = bits/second (0 means no limits),
- * delay = ms, must be translated into ticks.
- * qsize = slots/bytes
- */
- p->delay = ( p->delay * hz ) / 1000 ;
- /* We need either a pipe number or a flow_set number */
- if (p->pipe_nr == 0 && pfs->fs_nr == 0)
- return EINVAL ;
- if (p->pipe_nr != 0 && pfs->fs_nr != 0)
- return EINVAL ;
- if (p->pipe_nr != 0) { /* this is a pipe */
- struct dn_pipe *pipe;
+ struct dn_flow_set *pfs = &(p->fs);
+ struct dn_flow_queue *q;
+ int i, error;
- DUMMYNET_LOCK();
- pipe = locate_pipe(p->pipe_nr); /* locate pipe */
-
- if (pipe == NULL) { /* new pipe */
- pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
- M_NOWAIT | M_ZERO);
- if (pipe == NULL) {
- DUMMYNET_UNLOCK();
- printf("dummynet: no memory for new pipe\n");
- return (ENOMEM);
- }
- pipe->pipe_nr = p->pipe_nr;
- pipe->fs.pipe = pipe ;
- /* idle_heap is the only one from which we extract from the middle.
- */
- pipe->idle_heap.size = pipe->idle_heap.elements = 0 ;
- pipe->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos);
- } else
- /* Flush accumulated credit for all queues */
- for (i = 0; i <= pipe->fs.rq_size; i++)
- for (q = pipe->fs.rq[i]; q; q = q->next)
- q->numbytes = 0;
-
- pipe->bandwidth = p->bandwidth ;
- pipe->numbytes = 0; /* just in case... */
- bcopy(p->if_name, pipe->if_name, sizeof(p->if_name) );
- pipe->ifp = NULL ; /* reset interface ptr */
- pipe->delay = p->delay ;
- set_fs_parms(&(pipe->fs), pfs);
-
-
- if (pipe->fs.rq == NULL) { /* a new pipe */
- error = alloc_hash(&(pipe->fs), pfs);
- if (error) {
+ /*
+ * The config program passes parameters as follows:
+ * bw = bits/second (0 means no limits),
+ * delay = ms, must be translated into ticks.
+ * qsize = slots/bytes
+ */
+ p->delay = (p->delay * hz) / 1000;
+ /* We need either a pipe number or a flow_set number. */
+ if (p->pipe_nr == 0 && pfs->fs_nr == 0)
+ return (EINVAL);
+ if (p->pipe_nr != 0 && pfs->fs_nr != 0)
+ return (EINVAL);
+ if (p->pipe_nr != 0) { /* this is a pipe */
+ struct dn_pipe *pipe;
+
+ DUMMYNET_LOCK();
+ pipe = locate_pipe(p->pipe_nr); /* locate pipe */
+
+ if (pipe == NULL) { /* new pipe */
+ pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
+ M_NOWAIT | M_ZERO);
+ if (pipe == NULL) {
+ DUMMYNET_UNLOCK();
+ printf("dummynet: no memory for new pipe\n");
+ return (ENOMEM);
+ }
+ pipe->pipe_nr = p->pipe_nr;
+ pipe->fs.pipe = pipe;
+ /*
+ * idle_heap is the only one from which
+ * we extract from the middle.
+ */
+ pipe->idle_heap.size = pipe->idle_heap.elements = 0;
+ pipe->idle_heap.offset =
+ offsetof(struct dn_flow_queue, heap_pos);
+ } else
+ /* Flush accumulated credit for all queues. */
+ for (i = 0; i <= pipe->fs.rq_size; i++)
+ for (q = pipe->fs.rq[i]; q; q = q->next)
+ q->numbytes = 0;
+
+ pipe->bandwidth = p->bandwidth;
+ pipe->numbytes = 0; /* just in case... */
+ bcopy(p->if_name, pipe->if_name, sizeof(p->if_name));
+ pipe->ifp = NULL; /* reset interface ptr */
+ pipe->delay = p->delay;
+ set_fs_parms(&(pipe->fs), pfs);
+
+ if (pipe->fs.rq == NULL) { /* a new pipe */
+ error = alloc_hash(&(pipe->fs), pfs);
+ if (error) {
+ DUMMYNET_UNLOCK();
+ free(pipe, M_DUMMYNET);
+ return (error);
+ }
+ SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
+ pipe, next);
+ }
DUMMYNET_UNLOCK();
- free(pipe, M_DUMMYNET);
- return (error);
- }
- SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], pipe, next);
- }
- DUMMYNET_UNLOCK();
- } else { /* config queue */
- struct dn_flow_set *fs;
+ } else { /* config queue */
+ struct dn_flow_set *fs;
- DUMMYNET_LOCK();
- fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
+ DUMMYNET_LOCK();
+ fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
- if (fs == NULL) { /* new */
- if (pfs->parent_nr == 0) { /* need link to a pipe */
- DUMMYNET_UNLOCK();
- return EINVAL ;
- }
- fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
- M_NOWAIT|M_ZERO);
- if (fs == NULL) {
- DUMMYNET_UNLOCK();
- printf("dummynet: no memory for new flow_set\n");
- return (ENOMEM);
- }
- fs->fs_nr = pfs->fs_nr;
- fs->parent_nr = pfs->parent_nr;
- fs->weight = pfs->weight;
- if (fs->weight == 0)
- fs->weight = 1;
- else if (fs->weight > 100)
- fs->weight = 100;
- } else {
- /* Change parent pipe not allowed; must delete and recreate */
- if (pfs->parent_nr != 0 && fs->parent_nr != pfs->parent_nr) {
- DUMMYNET_UNLOCK();
- return EINVAL ;
- }
- }
- set_fs_parms(fs, pfs);
+ if (fs == NULL) { /* new */
+ if (pfs->parent_nr == 0) { /* need link to a pipe */
+ DUMMYNET_UNLOCK();
+ return (EINVAL);
+ }
+ fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
+ M_NOWAIT | M_ZERO);
+ if (fs == NULL) {
+ DUMMYNET_UNLOCK();
+ printf(
+ "dummynet: no memory for new flow_set\n");
+ return (ENOMEM);
+ }
+ fs->fs_nr = pfs->fs_nr;
+ fs->parent_nr = pfs->parent_nr;
+ fs->weight = pfs->weight;
+ if (fs->weight == 0)
+ fs->weight = 1;
+ else if (fs->weight > 100)
+ fs->weight = 100;
+ } else {
+ /*
+ * Change parent pipe not allowed;
+ * must delete and recreate.
+ */
+ if (pfs->parent_nr != 0 &&
+ fs->parent_nr != pfs->parent_nr) {
+ DUMMYNET_UNLOCK();
+ return (EINVAL);
+ }
+ }
+
+ set_fs_parms(fs, pfs);
- if (fs->rq == NULL) { /* a new flow_set */
- error = alloc_hash(fs, pfs);
- if (error) {
+ if (fs->rq == NULL) { /* a new flow_set */
+ error = alloc_hash(fs, pfs);
+ if (error) {
+ DUMMYNET_UNLOCK();
+ free(fs, M_DUMMYNET);
+ return (error);
+ }
+ SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)],
+ fs, next);
+ }
DUMMYNET_UNLOCK();
- free(fs, M_DUMMYNET);
- return (error);
- }
- SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], fs, next);
}
- DUMMYNET_UNLOCK();
- }
- return 0 ;
+ return (0);
}
/*
@@ -1749,7 +1813,7 @@
* drain all queues. Called in case of severe mbuf shortage.
*/
void
-dummynet_drain()
+dummynet_drain(void)
{
struct dn_flow_set *fs;
struct dn_pipe *pipe;
@@ -1786,6 +1850,7 @@
static int
delete_pipe(struct dn_pipe *p)
{
+
if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
return EINVAL ;
if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
@@ -2042,52 +2107,66 @@
static void
ip_dn_init(void)
{
- int i;
+ int i;
- if (bootverbose)
- printf("DUMMYNET with IPv6 initialized (040826)\n");
+ if (bootverbose)
+ printf("DUMMYNET with IPv6 initialized (040826)\n");
- DUMMYNET_LOCK_INIT();
+ DUMMYNET_LOCK_INIT();
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_INIT(&pipehash[i]);
- SLIST_INIT(&flowsethash[i]);
- }
- ready_heap.size = ready_heap.elements = 0 ;
- ready_heap.offset = 0 ;
+ for (i = 0; i < HASHSIZE; i++) {
+ SLIST_INIT(&pipehash[i]);
+ SLIST_INIT(&flowsethash[i]);
+ }
+ ready_heap.size = ready_heap.elements = 0;
+ ready_heap.offset = 0;
+
+ wfq_ready_heap.size = wfq_ready_heap.elements = 0;
+ wfq_ready_heap.offset = 0;
- wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
- wfq_ready_heap.offset = 0 ;
+ extract_heap.size = extract_heap.elements = 0;
+ extract_heap.offset = 0;
- extract_heap.size = extract_heap.elements = 0 ;
- extract_heap.offset = 0 ;
+ ip_dn_ctl_ptr = ip_dn_ctl;
+ ip_dn_io_ptr = dummynet_io;
+ ip_dn_ruledel_ptr = dn_rule_delete;
- ip_dn_ctl_ptr = ip_dn_ctl;
- ip_dn_io_ptr = dummynet_io;
- ip_dn_ruledel_ptr = dn_rule_delete;
+ TASK_INIT(&dn_task, 0, dummynet_task, NULL);
+ dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
+ taskqueue_thread_enqueue, &dn_tq);
+ taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
- callout_init(&dn_timeout, NET_CALLOUT_MPSAFE);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+ callout_init(&dn_timeout, CALLOUT_MPSAFE);
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+ /* Initialize curr_time adjustment mechanics. */
+ getmicrouptime(&prev_t);
}
#ifdef KLD_MODULE
static void
ip_dn_destroy(void)
{
- ip_dn_ctl_ptr = NULL;
- ip_dn_io_ptr = NULL;
- ip_dn_ruledel_ptr = NULL;
+ ip_dn_ctl_ptr = NULL;
+ ip_dn_io_ptr = NULL;
+ ip_dn_ruledel_ptr = NULL;
- callout_stop(&dn_timeout);
- dummynet_flush();
+ DUMMYNET_LOCK();
+ callout_stop(&dn_timeout);
+ DUMMYNET_UNLOCK();
+ taskqueue_drain(dn_tq, &dn_task);
+ taskqueue_free(dn_tq);
- DUMMYNET_LOCK_DESTROY();
+ dummynet_flush();
+
+ DUMMYNET_LOCK_DESTROY();
}
#endif /* KLD_MODULE */
static int
dummynet_modevent(module_t mod, int type, void *data)
{
+
switch (type) {
case MOD_LOAD:
if (DUMMYNET_LOADED) {
Index: tcp_subr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_subr.c -L sys/netinet/tcp_subr.c -u -r1.2 -r1.3
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -27,28 +27,29 @@
* SUCH DAMAGE.
*
* @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.228.2.6 2006/03/01 21:13:29 andre Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_subr.c,v 1.300.2.2 2007/12/02 10:32:49 bz Exp $");
+
#include "opt_compat.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
#include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
#endif
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -83,6 +84,7 @@
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -93,35 +95,28 @@
#include <netinet6/ip6protosw.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/xform.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/key.h>
-#define IPSEC
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
#include <machine/in_cksum.h>
#include <sys/md5.h>
+#include <security/mac/mac_framework.h>
+
int tcp_mssdflt = TCP_MSS;
SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW,
- &tcp_mssdflt , 0, "Default TCP Maximum Segment Size");
+ &tcp_mssdflt, 0, "Default TCP Maximum Segment Size");
#ifdef INET6
int tcp_v6mssdflt = TCP6_MSS;
SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
- CTLFLAG_RW, &tcp_v6mssdflt , 0,
- "Default TCP Maximum Segment Size for IPv6");
+ CTLFLAG_RW, &tcp_v6mssdflt , 0,
+ "Default TCP Maximum Segment Size for IPv6");
#endif
/*
@@ -135,42 +130,30 @@
int tcp_minmss = TCP_MINMSS;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
&tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
-/*
- * Number of TCP segments per second we accept from remote host
- * before we start to calculate average segment size. If average
- * segment size drops below the minimum TCP MSS we assume a DoS
- * attack and reset+drop the connection. Care has to be taken not to
- * set this value too small to not kill interactive type connections
- * (telnet, SSH) which send many small packets.
- */
-int tcp_minmssoverload = TCP_MINMSSOVERLOAD;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW,
- &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to"
- "be under the MINMSS Size");
-
-#if 0
-static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW,
- &tcp_rttdflt , 0, "Default maximum TCP Round Trip Time");
-#endif
int tcp_do_rfc1323 = 1;
SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW,
- &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions");
+ &tcp_do_rfc1323, 0, "Enable rfc1323 (high performance TCP) extensions");
+
+static int tcp_log_debug = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
+ &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
static int tcp_tcbhashsize = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN,
- &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
+ &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
static int do_tcpdrain = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
- "Enable tcp_drain routine for extra help when low on mbufs");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW,
+ &do_tcpdrain, 0,
+ "Enable tcp_drain routine for extra help when low on mbufs");
SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD,
&tcbinfo.ipi_count, 0, "Number of active PCBs");
static int icmp_may_rst = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0,
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW,
+ &icmp_may_rst, 0,
"Certain ICMP unreachable messages may abort connections in SYN_SENT");
static int tcp_isn_reseed_interval = 0;
@@ -213,7 +196,6 @@
uma_zone_t sack_hole_zone;
static struct inpcb *tcp_notify(struct inpcb *, int);
-static void tcp_discardcb(struct tcpcb *);
static void tcp_isn_tick(void *);
/*
@@ -232,24 +214,46 @@
* separate because the tcpcb structure is exported to userland for sysctl
* parsing purposes, which do not know about callouts.
*/
-struct tcpcb_mem {
- struct tcpcb tcb;
- struct callout tcpcb_mem_rexmt, tcpcb_mem_persist, tcpcb_mem_keep;
- struct callout tcpcb_mem_2msl, tcpcb_mem_delack;
+struct tcpcb_mem {
+ struct tcpcb tcb;
+ struct tcp_timer tt;
};
static uma_zone_t tcpcb_zone;
-static uma_zone_t tcptw_zone;
+MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
struct callout isn_callout;
+static struct mtx isn_mtx;
+
+#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
+#define ISN_LOCK() mtx_lock(&isn_mtx)
+#define ISN_UNLOCK() mtx_unlock(&isn_mtx)
/*
- * Tcp initialization
+ * TCP initialization.
*/
+static void
+tcp_zone_change(void *tag)
+{
+
+ uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
+ uma_zone_set_max(tcpcb_zone, maxsockets);
+ tcp_tw_zone_change();
+}
+
+static int
+tcp_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp = mem;
+
+ INP_LOCK_INIT(inp, "inp", "tcpinp");
+ return (0);
+}
+
void
-tcp_init()
+tcp_init(void)
{
- int hashsize = TCBHASHSIZE;
+ int hashsize = TCBHASHSIZE;
tcp_delacktime = TCPTV_DELACK;
tcp_keepinit = TCPTV_KEEP_INIT;
tcp_keepidle = TCPTV_KEEP_IDLE;
@@ -257,23 +261,27 @@
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
tcp_msl = TCPTV_MSL;
tcp_rexmit_min = TCPTV_MIN;
+ if (tcp_rexmit_min < 1)
+ tcp_rexmit_min = 1;
tcp_rexmit_slop = TCPTV_CPU_VAR;
tcp_inflight_rttthresh = TCPTV_INFLIGHT_RTTTHRESH;
+ tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
LIST_INIT(&tcb);
- tcbinfo.listhead = &tcb;
+ tcbinfo.ipi_listhead = &tcb;
TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
if (!powerof2(hashsize)) {
printf("WARNING: TCB hash size not a power of 2\n");
hashsize = 512; /* safe default */
}
tcp_tcbhashsize = hashsize;
- tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
- tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
- &tcbinfo.porthashmask);
+ tcbinfo.ipi_hashbase = hashinit(hashsize, M_PCB,
+ &tcbinfo.ipi_hashmask);
+ tcbinfo.ipi_porthashbase = hashinit(hashsize, M_PCB,
+ &tcbinfo.ipi_porthashmask);
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
#ifdef INET6
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
@@ -291,27 +299,26 @@
tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcpcb_zone, maxsockets);
- tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(tcptw_zone, maxsockets / 5);
- tcp_timer_init();
+ tcp_tw_init();
syncache_init();
tcp_hc_init();
tcp_reass_init();
+ ISN_LOCK_INIT();
callout_init(&isn_callout, CALLOUT_MPSAFE);
tcp_isn_tick(NULL);
EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
+ EVENTHANDLER_PRI_ANY);
}
void
-tcp_fini(xtp)
- void *xtp;
+tcp_fini(void *xtp)
{
- callout_stop(&isn_callout);
+ callout_stop(&isn_callout);
}
/*
@@ -320,10 +327,7 @@
* of the tcpcb each time to conserve mbufs.
*/
void
-tcpip_fillheaders(inp, ip_ptr, tcp_ptr)
- struct inpcb *inp;
- void *ip_ptr;
- void *tcp_ptr;
+tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
{
struct tcphdr *th = (struct tcphdr *)tcp_ptr;
@@ -378,8 +382,7 @@
* use for this function is in keepalives, which use tcp_respond.
*/
struct tcptemp *
-tcpip_maketemplate(inp)
- struct inpcb *inp;
+tcpip_maketemplate(struct inpcb *inp)
{
struct mbuf *m;
struct tcptemp *n;
@@ -409,15 +412,10 @@
* NOTE: If m != NULL, then ti must point to *inside* the mbuf.
*/
void
-tcp_respond(tp, ipgen, th, m, ack, seq, flags)
- struct tcpcb *tp;
- void *ipgen;
- register struct tcphdr *th;
- register struct mbuf *m;
- tcp_seq ack, seq;
- int flags;
+tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
+ tcp_seq ack, tcp_seq seq, int flags)
{
- register int tlen;
+ int tlen;
int win = 0;
struct ip *ip;
struct tcphdr *nth;
@@ -439,7 +437,6 @@
if (tp != NULL) {
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
} else
inp = NULL;
@@ -452,7 +449,7 @@
}
}
if (m == NULL) {
- m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL)
return;
tlen = 0;
@@ -582,8 +579,7 @@
* come from the zone allocator set up in tcp_init().
*/
struct tcpcb *
-tcp_newtcpcb(inp)
- struct inpcb *inp;
+tcp_newtcpcb(struct inpcb *inp)
{
struct tcpcb_mem *tm;
struct tcpcb *tp;
@@ -595,6 +591,7 @@
if (tm == NULL)
return (NULL);
tp = &tm->tcb;
+ tp->t_timers = &tm->tt;
/* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */
tp->t_maxseg = tp->t_maxopd =
#ifdef INET6
@@ -603,15 +600,16 @@
tcp_mssdflt;
/* Set up our timeouts. */
- callout_init(tp->tt_rexmt = &tm->tcpcb_mem_rexmt, NET_CALLOUT_MPSAFE);
- callout_init(tp->tt_persist = &tm->tcpcb_mem_persist, NET_CALLOUT_MPSAFE);
- callout_init(tp->tt_keep = &tm->tcpcb_mem_keep, NET_CALLOUT_MPSAFE);
- callout_init(tp->tt_2msl = &tm->tcpcb_mem_2msl, NET_CALLOUT_MPSAFE);
- callout_init(tp->tt_delack = &tm->tcpcb_mem_delack, NET_CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_rexmt, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_persist, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_keep, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_2msl, CALLOUT_MPSAFE);
+ callout_init(&tp->t_timers->tt_delack, CALLOUT_MPSAFE);
if (tcp_do_rfc1323)
tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
- tp->sack_enable = tcp_do_sack;
+ if (tcp_do_sack)
+ tp->t_flags |= TF_SACK_PERMIT;
TAILQ_INIT(&tp->snd_holes);
tp->t_inpcb = inp; /* XXX */
/*
@@ -634,7 +632,7 @@
* which may match an IPv4-mapped IPv6 address.
*/
inp->inp_ip_ttl = ip_defttl;
- inp->inp_ppcb = (caddr_t)tp;
+ inp->inp_ppcb = tp;
return (tp); /* XXX */
}
@@ -644,9 +642,7 @@
* then send a RST to peer.
*/
struct tcpcb *
-tcp_drop(tp, errno)
- register struct tcpcb *tp;
- int errno;
+tcp_drop(struct tcpcb *tp, int errno)
{
struct socket *so = tp->t_inpcb->inp_socket;
@@ -665,9 +661,8 @@
return (tcp_close(tp));
}
-static void
-tcp_discardcb(tp)
- struct tcpcb *tp;
+void
+tcp_discardcb(struct tcpcb *tp)
{
struct tseg_qent *q;
struct inpcb *inp = tp->t_inpcb;
@@ -682,11 +677,11 @@
* Make sure that all of our timers are stopped before we
* delete the PCB.
*/
- callout_stop(tp->tt_rexmt);
- callout_stop(tp->tt_persist);
- callout_stop(tp->tt_keep);
- callout_stop(tp->tt_2msl);
- callout_stop(tp->tt_delack);
+ callout_stop(&tp->t_timers->tt_rexmt);
+ callout_stop(&tp->t_timers->tt_persist);
+ callout_stop(&tp->t_timers->tt_keep);
+ callout_stop(&tp->t_timers->tt_2msl);
+ callout_stop(&tp->t_timers->tt_delack);
/*
* If we got enough samples through the srtt filter,
@@ -708,6 +703,9 @@
* are satisfied. This gives us better new start value
* for the congestion avoidance for new connections.
* ssthresh is only set if packet loss occured on a session.
+ *
+ * XXXRW: 'so' may be NULL here, and/or socket buffer may be
+ * being torn down. Ideally this code would not use 'so'.
*/
ssthresh = tp->snd_ssthresh;
if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
@@ -755,43 +753,45 @@
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
uma_zfree(tcpcb_zone, tp);
- soisdisconnected(so);
}
/*
- * Close a TCP control block:
- * discard all space held by the tcp
- * discard internet protocol block
- * wake up any sleepers
+ * Attempt to close a TCP control block, marking it as dropped, and freeing
+ * the socket if we hold the only reference.
*/
struct tcpcb *
-tcp_close(tp)
- struct tcpcb *tp;
+tcp_close(struct tcpcb *tp)
{
struct inpcb *inp = tp->t_inpcb;
-#ifdef INET6
- struct socket *so = inp->inp_socket;
-#endif
+ struct socket *so;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
- tcp_discardcb(tp);
-#ifdef INET6
- if (INP_CHECK_SOCKAF(so, AF_INET6))
- in6_pcbdetach(inp);
- else
-#endif
- in_pcbdetach(inp);
+ in_pcbdrop(inp);
tcpstat.tcps_closed++;
- return (NULL);
+ KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
+ so = inp->inp_socket;
+ soisdisconnected(so);
+ if (inp->inp_vflag & INP_SOCKREF) {
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_close: !SS_PROTOREF"));
+ inp->inp_vflag &= ~INP_SOCKREF;
+ INP_UNLOCK(inp);
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ return (NULL);
+ }
+ return (tp);
}
void
-tcp_drain()
+tcp_drain(void)
{
- if (do_tcpdrain)
- {
+
+ if (do_tcpdrain) {
struct inpcb *inpb;
struct tcpcb *tcpb;
struct tseg_qent *te;
@@ -805,7 +805,7 @@
* usefull.
*/
INP_INFO_RLOCK(&tcbinfo);
- LIST_FOREACH(inpb, tcbinfo.listhead, inp_list) {
+ LIST_FOREACH(inpb, tcbinfo.ipi_listhead, inp_list) {
if (inpb->inp_vflag & INP_TIMEWAIT)
continue;
INP_LOCK(inpb);
@@ -835,15 +835,20 @@
* reporting soft errors (yet - a kqueue filter may be added).
*/
static struct inpcb *
-tcp_notify(inp, error)
- struct inpcb *inp;
- int error;
+tcp_notify(struct inpcb *inp, int error)
{
- struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+ struct tcpcb *tp;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
+ if ((inp->inp_vflag & INP_TIMEWAIT) ||
+ (inp->inp_vflag & INP_DROPPED))
+ return (inp);
+
+ tp = intotcpcb(inp);
+ KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
+
/*
* Ignore some errors if we are hooked up.
* If connection hasn't completed, has retransmitted several times,
@@ -857,8 +862,11 @@
return (inp);
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
tp->t_softerror) {
- tcp_drop(tp, error);
- return (struct inpcb *)0;
+ tp = tcp_drop(tp, error);
+ if (tp != NULL)
+ return (inp);
+ else
+ return (NULL);
} else {
tp->t_softerror = error;
return (inp);
@@ -873,7 +881,7 @@
static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
+ int error, i, m, n, pcb_count;
struct inpcb *inp, **inp_list;
inp_gen_t gencnt;
struct xinpgen xig;
@@ -883,9 +891,10 @@
* resource-intensive to repeat twice on every request.
*/
if (req->oldptr == NULL) {
+ m = syncache_pcbcount();
n = tcbinfo.ipi_count;
req->oldidx = 2 * (sizeof xig)
- + (n + n/8) * sizeof(struct xtcpcb);
+ + ((m + n) + n/8) * sizeof(struct xtcpcb);
return (0);
}
@@ -900,26 +909,32 @@
n = tcbinfo.ipi_count;
INP_INFO_RUNLOCK(&tcbinfo);
+ m = syncache_pcbcount();
+
error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + n * sizeof(struct xtcpcb));
+ + (n + m) * sizeof(struct xtcpcb));
if (error != 0)
return (error);
xig.xig_len = sizeof xig;
- xig.xig_count = n;
+ xig.xig_count = n + m;
xig.xig_gen = gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
+ error = syncache_pcblist(req, m, &pcb_count);
+ if (error)
+ return (error);
+
inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
if (inp_list == NULL)
return (ENOMEM);
INP_INFO_RLOCK(&tcbinfo);
- for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp != NULL && i < n;
- inp = LIST_NEXT(inp, inp_list)) {
+ for (inp = LIST_FIRST(tcbinfo.ipi_listhead), i = 0; inp != NULL && i
+ < n; inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
/*
@@ -927,10 +942,13 @@
* TCP state changes, is not quite right, but for
* now, better than nothing.
*/
- if (inp->inp_vflag & INP_TIMEWAIT)
- error = cr_cansee(req->td->td_ucred,
- intotw(inp)->tw_cred);
- else
+ if (inp->inp_vflag & INP_TIMEWAIT) {
+ if (intotw(inp) != NULL)
+ error = cr_cansee(req->td->td_ucred,
+ intotw(inp)->tw_cred);
+ else
+ error = EINVAL; /* Skip this inp. */
+ } else
error = cr_canseesocket(req->td->td_ucred,
inp->inp_socket);
if (error == 0)
@@ -944,9 +962,10 @@
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
+ INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xtcpcb xt;
- caddr_t inp_ppcb;
+ void *inp_ppcb;
bzero(&xt, sizeof(xt));
xt.xt_len = sizeof xt;
@@ -967,8 +986,11 @@
xt.xt_socket.xso_protocol = IPPROTO_TCP;
}
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
+ INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xt, sizeof xt);
- }
+ } else
+ INP_UNLOCK(inp);
+
}
if (!error) {
/*
@@ -981,7 +1003,7 @@
INP_INFO_RLOCK(&tcbinfo);
xig.xig_gen = tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = tcbinfo.ipi_count;
+ xig.xig_count = tcbinfo.ipi_count + pcb_count;
INP_INFO_RUNLOCK(&tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
@@ -990,7 +1012,7 @@
}
SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
- tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
+ tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
static int
tcp_getcred(SYSCTL_HANDLER_ARGS)
@@ -1000,7 +1022,7 @@
struct inpcb *inp;
int error;
- error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+ error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
return (error);
error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -1044,7 +1066,7 @@
struct inpcb *inp;
int error, mapped = 0;
- error = suser_cred(req->td->td_ucred, SUSER_ALLOWJAIL);
+ error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
return (error);
error = SYSCTL_IN(req, addrs, sizeof(addrs));
@@ -1102,10 +1124,7 @@
void
-tcp_ctlinput(cmd, sa, vip)
- int cmd;
- struct sockaddr *sa;
- void *vip;
+tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
{
struct ip *ip = vip;
struct tcphdr *th;
@@ -1156,7 +1175,9 @@
ip->ip_src, th->th_sport, 0, NULL);
if (inp != NULL) {
INP_LOCK(inp);
- if (inp->inp_socket != NULL) {
+ if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+ !(inp->inp_vflag & INP_DROPPED) &&
+ !(inp->inp_socket == NULL)) {
icmp_tcp_seq = htonl(th->th_seq);
tp = intotcpcb(inp);
if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
@@ -1194,7 +1215,7 @@
* or route MTU. tcp_mtudisc()
* will do right thing by itself.
*/
- if (mtu <= tcp_maxmtu(&inc))
+ if (mtu <= tcp_maxmtu(&inc, NULL))
tcp_hc_updatemtu(&inc, mtu);
}
@@ -1220,10 +1241,7 @@
#ifdef INET6
void
-tcp6_ctlinput(cmd, sa, d)
- int cmd;
- struct sockaddr *sa;
- void *d;
+tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
struct tcphdr th;
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
@@ -1351,15 +1369,14 @@
static MD5_CTX isn_ctx;
tcp_seq
-tcp_new_isn(tp)
- struct tcpcb *tp;
+tcp_new_isn(struct tcpcb *tp)
{
u_int32_t md5_buffer[4];
tcp_seq new_isn;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(tp->t_inpcb);
+ ISN_LOCK();
/* Seed if this is the first use, reseed if requested. */
if ((isn_last_reseed == 0) || ((tcp_isn_reseed_interval > 0) &&
(((u_int)isn_last_reseed + (u_int)tcp_isn_reseed_interval*hz)
@@ -1392,29 +1409,29 @@
isn_offset += ISN_STATIC_INCREMENT +
(arc4random() & ISN_RANDOM_INCREMENT);
new_isn += isn_offset;
+ ISN_UNLOCK();
return (new_isn);
}
/*
- * Increment the offset to the next ISN_BYTES_PER_SECOND / hz boundary
+ * Increment the offset to the next ISN_BYTES_PER_SECOND / 100 boundary
* to keep time flowing at a relatively constant rate. If the random
* increments have already pushed us past the projected offset, do nothing.
*/
static void
-tcp_isn_tick(xtp)
- void *xtp;
+tcp_isn_tick(void *xtp)
{
u_int32_t projected_offset;
- INP_INFO_WLOCK(&tcbinfo);
+ ISN_LOCK();
projected_offset = isn_offset_old + ISN_BYTES_PER_SECOND / 100;
- if (projected_offset > isn_offset)
+ if (SEQ_GT(projected_offset, isn_offset))
isn_offset = projected_offset;
isn_offset_old = isn_offset;
callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
- INP_INFO_WUNLOCK(&tcbinfo);
+ ISN_UNLOCK();
}
/*
@@ -1423,20 +1440,26 @@
* is controlled by the icmp_may_rst sysctl.
*/
struct inpcb *
-tcp_drop_syn_sent(inp, errno)
- struct inpcb *inp;
- int errno;
+tcp_drop_syn_sent(struct inpcb *inp, int errno)
{
- struct tcpcb *tp = intotcpcb(inp);
+ struct tcpcb *tp;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
- if (tp != NULL && tp->t_state == TCPS_SYN_SENT) {
- tcp_drop(tp, errno);
+ if ((inp->inp_vflag & INP_TIMEWAIT) ||
+ (inp->inp_vflag & INP_DROPPED))
+ return (inp);
+
+ tp = intotcpcb(inp);
+ if (tp->t_state != TCPS_SYN_SENT)
+ return (inp);
+
+ tp = tcp_drop(tp, errno);
+ if (tp != NULL)
+ return (inp);
+ else
return (NULL);
- }
- return (inp);
}
/*
@@ -1446,11 +1469,9 @@
* This duplicates some code in the tcp_mss() function in tcp_input.c.
*/
struct inpcb *
-tcp_mtudisc(inp, errno)
- struct inpcb *inp;
- int errno;
+tcp_mtudisc(struct inpcb *inp, int errno)
{
- struct tcpcb *tp = intotcpcb(inp);
+ struct tcpcb *tp;
struct socket *so = inp->inp_socket;
u_int maxmtu;
u_int romtu;
@@ -1460,80 +1481,88 @@
#endif /* INET6 */
INP_LOCK_ASSERT(inp);
- if (tp != NULL) {
+ if ((inp->inp_vflag & INP_TIMEWAIT) ||
+ (inp->inp_vflag & INP_DROPPED))
+ return (inp);
+
+ tp = intotcpcb(inp);
+ KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
+
#ifdef INET6
- isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
+ isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
#endif
- maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
- romtu =
+ maxmtu = tcp_hc_getmtu(&inp->inp_inc); /* IPv4 and IPv6 */
+ romtu =
#ifdef INET6
- isipv6 ? tcp_maxmtu6(&inp->inp_inc) :
+ isipv6 ? tcp_maxmtu6(&inp->inp_inc, NULL) :
#endif /* INET6 */
- tcp_maxmtu(&inp->inp_inc);
- if (!maxmtu)
- maxmtu = romtu;
- else
- maxmtu = min(maxmtu, romtu);
- if (!maxmtu) {
- tp->t_maxopd = tp->t_maxseg =
+ tcp_maxmtu(&inp->inp_inc, NULL);
+ if (!maxmtu)
+ maxmtu = romtu;
+ else
+ maxmtu = min(maxmtu, romtu);
+ if (!maxmtu) {
+ tp->t_maxopd = tp->t_maxseg =
#ifdef INET6
- isipv6 ? tcp_v6mssdflt :
+ isipv6 ? tcp_v6mssdflt :
#endif /* INET6 */
- tcp_mssdflt;
- return (inp);
- }
- mss = maxmtu -
+ tcp_mssdflt;
+ return (inp);
+ }
+ mss = maxmtu -
#ifdef INET6
- (isipv6 ?
- sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
+ (isipv6 ? sizeof(struct ip6_hdr) + sizeof(struct tcphdr) :
#endif /* INET6 */
- sizeof(struct tcpiphdr)
+ sizeof(struct tcpiphdr)
#ifdef INET6
- )
+ )
#endif /* INET6 */
- ;
+ ;
- /*
- * XXX - The above conditional probably violates the TCP
- * spec. The problem is that, since we don't know the
- * other end's MSS, we are supposed to use a conservative
- * default. But, if we do that, then MTU discovery will
- * never actually take place, because the conservative
- * default is much less than the MTUs typically seen
- * on the Internet today. For the moment, we'll sweep
- * this under the carpet.
- *
- * The conservative default might not actually be a problem
- * if the only case this occurs is when sending an initial
- * SYN with options and data to a host we've never talked
- * to before. Then, they will reply with an MSS value which
- * will get recorded and the new parameters should get
- * recomputed. For Further Study.
- */
- if (tp->t_maxopd <= mss)
- return (inp);
- tp->t_maxopd = mss;
+ /*
+ * XXX - The above conditional probably violates the TCP
+ * spec. The problem is that, since we don't know the
+ * other end's MSS, we are supposed to use a conservative
+ * default. But, if we do that, then MTU discovery will
+ * never actually take place, because the conservative
+ * default is much less than the MTUs typically seen
+ * on the Internet today. For the moment, we'll sweep
+ * this under the carpet.
+ *
+ * The conservative default might not actually be a problem
+ * if the only case this occurs is when sending an initial
+ * SYN with options and data to a host we've never talked
+ * to before. Then, they will reply with an MSS value which
+ * will get recorded and the new parameters should get
+ * recomputed. For Further Study.
+ */
+ if (tp->t_maxopd <= mss)
+ return (inp);
+ tp->t_maxopd = mss;
- if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
- (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
- mss -= TCPOLEN_TSTAMP_APPA;
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
+ (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
+ mss -= TCPOLEN_TSTAMP_APPA;
#if (MCLBYTES & (MCLBYTES - 1)) == 0
- if (mss > MCLBYTES)
- mss &= ~(MCLBYTES-1);
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
#else
- if (mss > MCLBYTES)
- mss = mss / MCLBYTES * MCLBYTES;
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
#endif
- if (so->so_snd.sb_hiwat < mss)
- mss = so->so_snd.sb_hiwat;
+ if (so->so_snd.sb_hiwat < mss)
+ mss = so->so_snd.sb_hiwat;
- tp->t_maxseg = mss;
+ tp->t_maxseg = mss;
- tcpstat.tcps_mturesent++;
- tp->t_rtttime = 0;
- tp->snd_nxt = tp->snd_una;
- tcp_output(tp);
- }
+ tcpstat.tcps_mturesent++;
+ tp->t_rtttime = 0;
+ tp->snd_nxt = tp->snd_una;
+ tcp_free_sackholes(tp);
+ tp->snd_recover = tp->snd_max;
+ if (tp->t_flags & TF_SACK_PERMIT)
+ EXIT_FASTRECOVERY(tp);
+ tcp_output(tp);
return (inp);
}
@@ -1544,8 +1573,7 @@
* to get the interface MTU.
*/
u_long
-tcp_maxmtu(inc)
- struct in_conninfo *inc;
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1568,6 +1596,13 @@
maxmtu = ifp->if_mtu;
else
maxmtu = min(sro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO4 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro.ro_rt);
}
return (maxmtu);
@@ -1575,8 +1610,7 @@
#ifdef INET6
u_long
-tcp_maxmtu6(inc)
- struct in_conninfo *inc;
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1598,6 +1632,13 @@
else
maxmtu = min(sro6.ro_rt->rt_rmx.rmx_mtu,
IN6_LINKMTU(sro6.ro_rt->rt_ifp));
+
+ /* Report additional interface capabilities. */
+ if (flags != NULL) {
+ if (ifp->if_capenable & IFCAP_TSO6 &&
+ ifp->if_hwassist & CSUM_TSO)
+ *flags |= CSUM_TSO;
+ }
RTFREE(sro6.ro_rt);
}
@@ -1608,8 +1649,7 @@
#ifdef IPSEC
/* compute ESP/AH header size for TCP, including outer IP header. */
size_t
-ipsec_hdrsiz_tcp(tp)
- struct tcpcb *tp;
+ipsec_hdrsiz_tcp(struct tcpcb *tp)
{
struct inpcb *inp;
struct mbuf *m;
@@ -1647,239 +1687,7 @@
m_free(m);
return (hdrsiz);
}
-#endif /*IPSEC*/
-
-/*
- * Move a TCP connection into TIME_WAIT state.
- * tcbinfo is locked.
- * inp is locked, and is unlocked before returning.
- */
-void
-tcp_twstart(tp)
- struct tcpcb *tp;
-{
- struct tcptw *tw;
- struct inpcb *inp;
- int tw_time, acknow;
- struct socket *so;
-
- INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_timer_2msl_reset(). */
- INP_LOCK_ASSERT(tp->t_inpcb);
-
- tw = uma_zalloc(tcptw_zone, M_NOWAIT);
- if (tw == NULL) {
- tw = tcp_timer_2msl_tw(1);
- if (tw == NULL) {
- tcp_close(tp);
- return;
- }
- }
- inp = tp->t_inpcb;
- tw->tw_inpcb = inp;
-
- /*
- * Recover last window size sent.
- */
- tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
-
- /*
- * Set t_recent if timestamps are used on the connection.
- */
- if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
- (TF_REQ_TSTMP|TF_RCVD_TSTMP))
- tw->t_recent = tp->ts_recent;
- else
- tw->t_recent = 0;
-
- tw->snd_nxt = tp->snd_nxt;
- tw->rcv_nxt = tp->rcv_nxt;
- tw->iss = tp->iss;
- tw->irs = tp->irs;
- tw->t_starttime = tp->t_starttime;
- tw->tw_time = 0;
-
-/* XXX
- * If this code will
- * be used for fin-wait-2 state also, then we may need
- * a ts_recent from the last segment.
- */
- tw_time = 2 * tcp_msl;
- acknow = tp->t_flags & TF_ACKNOW;
- tcp_discardcb(tp);
- so = inp->inp_socket;
- ACCEPT_LOCK();
- SOCK_LOCK(so);
- so->so_pcb = NULL;
- tw->tw_cred = crhold(so->so_cred);
- tw->tw_so_options = so->so_options;
- sotryfree(so);
- inp->inp_socket = NULL;
- if (acknow)
- tcp_twrespond(tw, TH_ACK);
- inp->inp_ppcb = (caddr_t)tw;
- inp->inp_vflag |= INP_TIMEWAIT;
- tcp_timer_2msl_reset(tw, tw_time);
- INP_UNLOCK(inp);
-}
-
-/*
- * The appromixate rate of ISN increase of Microsoft TCP stacks;
- * the actual rate is slightly higher due to the addition of
- * random positive increments.
- *
- * Most other new OSes use semi-randomized ISN values, so we
- * do not need to worry about them.
- */
-#define MS_ISN_BYTES_PER_SECOND 250000
-
-/*
- * Determine if the ISN we will generate has advanced beyond the last
- * sequence number used by the previous connection. If so, indicate
- * that it is safe to recycle this tw socket by returning 1.
- *
- * XXXRW: This function should assert the inpcb lock as it does multiple
- * non-atomic reads from the tcptw, but is currently called without it from
- * in_pcb.c:in_pcblookup_local().
- */
-int
-tcp_twrecycleable(struct tcptw *tw)
-{
- tcp_seq new_iss = tw->iss;
- tcp_seq new_irs = tw->irs;
-
- new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
- new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
-
- if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
- return (1);
- else
- return (0);
-}
-
-struct tcptw *
-tcp_twclose(struct tcptw *tw, int reuse)
-{
- struct inpcb *inp;
-
- inp = tw->tw_inpcb;
- INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_timer_2msl_stop(). */
- INP_LOCK_ASSERT(inp);
-
- tw->tw_inpcb = NULL;
- tcp_timer_2msl_stop(tw);
- inp->inp_ppcb = NULL;
-#ifdef INET6
- if (inp->inp_vflag & INP_IPV6PROTO)
- in6_pcbdetach(inp);
- else
-#endif
- in_pcbdetach(inp);
- tcpstat.tcps_closed++;
- crfree(tw->tw_cred);
- tw->tw_cred = NULL;
- if (reuse)
- return (tw);
- uma_zfree(tcptw_zone, tw);
- return (NULL);
-}
-
-int
-tcp_twrespond(struct tcptw *tw, int flags)
-{
- struct inpcb *inp = tw->tw_inpcb;
- struct tcphdr *th;
- struct mbuf *m;
- struct ip *ip = NULL;
- u_int8_t *optp;
- u_int hdrlen, optlen;
- int error;
-#ifdef INET6
- struct ip6_hdr *ip6 = NULL;
- int isipv6 = inp->inp_inc.inc_isipv6;
-#endif
-
- INP_LOCK_ASSERT(inp);
-
- m = m_gethdr(M_DONTWAIT, MT_HEADER);
- if (m == NULL)
- return (ENOBUFS);
- m->m_data += max_linkhdr;
-
-#ifdef MAC
- mac_create_mbuf_from_inpcb(inp, m);
-#endif
-
-#ifdef INET6
- if (isipv6) {
- hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
- ip6 = mtod(m, struct ip6_hdr *);
- th = (struct tcphdr *)(ip6 + 1);
- tcpip_fillheaders(inp, ip6, th);
- } else
-#endif
- {
- hdrlen = sizeof(struct tcpiphdr);
- ip = mtod(m, struct ip *);
- th = (struct tcphdr *)(ip + 1);
- tcpip_fillheaders(inp, ip, th);
- }
- optp = (u_int8_t *)(th + 1);
-
- /*
- * Send a timestamp and echo-reply if both our side and our peer
- * have sent timestamps in our SYN's and this is not a RST.
- */
- if (tw->t_recent && flags == TH_ACK) {
- u_int32_t *lp = (u_int32_t *)optp;
-
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
- *lp++ = htonl(ticks);
- *lp = htonl(tw->t_recent);
- optp += TCPOLEN_TSTAMP_APPA;
- }
-
- optlen = optp - (u_int8_t *)(th + 1);
-
- m->m_len = hdrlen + optlen;
- m->m_pkthdr.len = m->m_len;
-
- KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
-
- th->th_seq = htonl(tw->snd_nxt);
- th->th_ack = htonl(tw->rcv_nxt);
- th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
- th->th_flags = flags;
- th->th_win = htons(tw->last_win);
-
-#ifdef INET6
- if (isipv6) {
- th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
- sizeof(struct tcphdr) + optlen);
- ip6->ip6_hlim = in6_selecthlim(inp, NULL);
- error = ip6_output(m, inp->in6p_outputopts, NULL,
- (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
- } else
-#endif
- {
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
- m->m_pkthdr.csum_flags = CSUM_TCP;
- m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
- ip->ip_len = m->m_pkthdr.len;
- if (path_mtu_discovery)
- ip->ip_off |= IP_DF;
- error = ip_output(m, inp->inp_options, NULL,
- ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
- NULL, inp);
- }
- if (flags & TH_ACK)
- tcpstat.tcps_sndacks++;
- else
- tcpstat.tcps_sndctrl++;
- tcpstat.tcps_sndtotal++;
- return (error);
-}
+#endif /* IPSEC */
/*
* TCP BANDWIDTH DELAY PRODUCT WINDOW LIMITING
@@ -2140,7 +1948,7 @@
/*
* Step 4: Update MD5 hash with shared secret.
*/
- MD5Update(&ctx, _KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth));
+ MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
MD5Final(buf, &ctx);
key_sa_recordxfer(sav, m);
@@ -2231,11 +2039,21 @@
}
if (inp != NULL) {
INP_LOCK(inp);
- if ((tw = intotw(inp)) &&
- (inp->inp_vflag & INP_TIMEWAIT) != 0) {
- (void) tcp_twclose(tw, 0);
- } else if ((tp = intotcpcb(inp)) &&
- ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
+ if (inp->inp_vflag & INP_TIMEWAIT) {
+ /*
+ * XXXRW: There currently exists a state where an
+ * inpcb is present, but its timewait state has been
+ * discarded. For now, don't allow dropping of this
+ * type of inpcb.
+ */
+ tw = intotw(inp);
+ if (tw != NULL)
+ tcp_twclose(tw, 0);
+ else
+ INP_UNLOCK(inp);
+ } else if (!(inp->inp_vflag & INP_DROPPED) &&
+ !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
+ tp = intotcpcb(inp);
tp = tcp_drop(tp, ECONNABORTED);
if (tp != NULL)
INP_UNLOCK(inp);
@@ -2250,3 +2068,97 @@
SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
CTLTYPE_STRUCT|CTLFLAG_WR|CTLFLAG_SKIP, NULL,
0, sysctl_drop, "", "Drop TCP connection");
+
+/*
+ * Generate a standardized TCP log line for use throughout the
+ * tcp subsystem. Memory allocation is done with M_NOWAIT to
+ * allow use in the interrupt context.
+ *
+ * NB: The caller MUST free(s, M_TCPLOG) the returned string.
+ * NB: The function may return NULL if memory allocation failed.
+ *
+ * Due to header inclusion and ordering limitations the struct ip
+ * and ip6_hdr pointers have to be passed as void pointers.
+ */
+char *
+tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
+ const void *ip6hdr)
+{
+ char *s, *sp;
+ size_t size;
+ struct ip *ip;
+#ifdef INET6
+ const struct ip6_hdr *ip6;
+
+ ip6 = (const struct ip6_hdr *)ip6hdr;
+#endif /* INET6 */
+ ip = (struct ip *)ip4hdr;
+
+ /*
+ * The log line looks like this:
+ * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
+ */
+ size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
+ sizeof(PRINT_TH_FLAGS) + 1 +
+#ifdef INET6
+ 2 * INET6_ADDRSTRLEN;
+#else
+ 2 * INET_ADDRSTRLEN;
+#endif /* INET6 */
+
+ /* Is logging enabled? */
+ if (tcp_log_debug == 0 && tcp_log_in_vain == 0)
+ return (NULL);
+
+ s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
+ if (s == NULL)
+ return (NULL);
+
+ strcat(s, "TCP: [");
+ sp = s + strlen(s);
+
+ if (inc && inc->inc_isipv6 == 0) {
+ inet_ntoa_r(inc->inc_faddr, sp);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+ sp = s + strlen(s);
+ inet_ntoa_r(inc->inc_laddr, sp);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+#ifdef INET6
+ } else if (inc) {
+ ip6_sprintf(sp, &inc->inc6_faddr);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
+ sp = s + strlen(s);
+ ip6_sprintf(sp, &inc->inc6_laddr);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i", ntohs(inc->inc_lport));
+ } else if (ip6 && th) {
+ ip6_sprintf(sp, &ip6->ip6_src);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+ sp = s + strlen(s);
+ ip6_sprintf(sp, &ip6->ip6_dst);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i", ntohs(th->th_dport));
+#endif /* INET6 */
+ } else if (ip && th) {
+ inet_ntoa_r(ip->ip_src, sp);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i to [", ntohs(th->th_sport));
+ sp = s + strlen(s);
+ inet_ntoa_r(ip->ip_dst, sp);
+ sp = s + strlen(s);
+ sprintf(sp, "]:%i", ntohs(th->th_dport));
+ } else {
+ free(s, M_TCPLOG);
+ return (NULL);
+ }
+ sp = s + strlen(s);
+ if (th)
+ sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
+ if (*(s + size - 1) != '\0')
+ panic("%s: string too long", __func__);
+ return (s);
+}
--- /dev/null
+++ sys/netinet/sctputil.c
@@ -0,0 +1,6520 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctputil.c,v 1.37 2005/03/07 23:26:09 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctputil.c,v 1.65.2.4.2.1 2008/02/02 12:44:13 rwatson Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#ifdef INET6
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_crc32.h>
+#include <netinet/sctp_indata.h>/* for sctp_deliver_data() */
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_cc_functions.h>
+
+#define NUMBER_OF_MTU_SIZES 18
+
+
+#ifndef KTR_SCTP
+#define KTR_SCTP KTR_SUBSYS
+#endif
+
+void
+sctp_sblog(struct sockbuf *sb,
+ struct sctp_tcb *stcb, int from, int incr)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.sb.stcb = stcb;
+ sctp_clog.x.sb.so_sbcc = sb->sb_cc;
+ if (stcb)
+ sctp_clog.x.sb.stcb_sbcc = stcb->asoc.sb_cc;
+ else
+ sctp_clog.x.sb.stcb_sbcc = 0;
+ sctp_clog.x.sb.incr = incr;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_SB,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_closing(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int16_t loc)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.close.inp = (void *)inp;
+ sctp_clog.x.close.sctp_flags = inp->sctp_flags;
+ if (stcb) {
+ sctp_clog.x.close.stcb = (void *)stcb;
+ sctp_clog.x.close.state = (uint16_t) stcb->asoc.state;
+ } else {
+ sctp_clog.x.close.stcb = 0;
+ sctp_clog.x.close.state = 0;
+ }
+ sctp_clog.x.close.loc = loc;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_CLOSE,
+ 0,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+
+void
+rto_logging(struct sctp_nets *net, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.rto.net = (void *)net;
+ sctp_clog.x.rto.rtt = net->prev_rtt;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_RTT,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_strm_del_alt(struct sctp_tcb *stcb, uint32_t tsn, uint16_t sseq, uint16_t stream, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.strlog.stcb = stcb;
+ sctp_clog.x.strlog.n_tsn = tsn;
+ sctp_clog.x.strlog.n_sseq = sseq;
+ sctp_clog.x.strlog.e_tsn = 0;
+ sctp_clog.x.strlog.e_sseq = 0;
+ sctp_clog.x.strlog.strm = stream;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_STRM,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_nagle_event(struct sctp_tcb *stcb, int action)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.nagle.stcb = (void *)stcb;
+ sctp_clog.x.nagle.total_flight = stcb->asoc.total_flight;
+ sctp_clog.x.nagle.total_in_queue = stcb->asoc.total_output_queue_size;
+ sctp_clog.x.nagle.count_in_queue = stcb->asoc.chunks_on_out_queue;
+ sctp_clog.x.nagle.count_in_flight = stcb->asoc.total_flight_count;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_NAGLE,
+ action,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_sack(uint32_t old_cumack, uint32_t cumack, uint32_t tsn, uint16_t gaps, uint16_t dups, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.sack.cumack = cumack;
+ sctp_clog.x.sack.oldcumack = old_cumack;
+ sctp_clog.x.sack.tsn = tsn;
+ sctp_clog.x.sack.numGaps = gaps;
+ sctp_clog.x.sack.numDups = dups;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_SACK,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_map(uint32_t map, uint32_t cum, uint32_t high, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.map.base = map;
+ sctp_clog.x.map.cum = cum;
+ sctp_clog.x.map.high = high;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_MAP,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_fr(uint32_t biggest_tsn, uint32_t biggest_new_tsn, uint32_t tsn,
+ int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.fr.largest_tsn = biggest_tsn;
+ sctp_clog.x.fr.largest_new_tsn = biggest_new_tsn;
+ sctp_clog.x.fr.tsn = tsn;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_FR,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+
+void
+sctp_log_mb(struct mbuf *m, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.mb.mp = m;
+ sctp_clog.x.mb.mbuf_flags = (uint8_t) (SCTP_BUF_GET_FLAGS(m));
+ sctp_clog.x.mb.size = (uint16_t) (SCTP_BUF_LEN(m));
+ sctp_clog.x.mb.data = SCTP_BUF_AT(m, 0);
+ if (SCTP_BUF_IS_EXTENDED(m)) {
+ sctp_clog.x.mb.ext = SCTP_BUF_EXTEND_BASE(m);
+ sctp_clog.x.mb.refcnt = (uint8_t) (SCTP_BUF_EXTEND_REFCNT(m));
+ } else {
+ sctp_clog.x.mb.ext = 0;
+ sctp_clog.x.mb.refcnt = 0;
+ }
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_MBUF,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+
+void
+sctp_log_strm_del(struct sctp_queued_to_read *control, struct sctp_queued_to_read *poschk,
+ int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ if (control == NULL) {
+ SCTP_PRINTF("Gak log of NULL?\n");
+ return;
+ }
+ sctp_clog.x.strlog.stcb = control->stcb;
+ sctp_clog.x.strlog.n_tsn = control->sinfo_tsn;
+ sctp_clog.x.strlog.n_sseq = control->sinfo_ssn;
+ sctp_clog.x.strlog.strm = control->sinfo_stream;
+ if (poschk != NULL) {
+ sctp_clog.x.strlog.e_tsn = poschk->sinfo_tsn;
+ sctp_clog.x.strlog.e_sseq = poschk->sinfo_ssn;
+ } else {
+ sctp_clog.x.strlog.e_tsn = 0;
+ sctp_clog.x.strlog.e_sseq = 0;
+ }
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_STRM,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net, int augment, uint8_t from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.cwnd.net = net;
+ if (stcb->asoc.send_queue_cnt > 255)
+ sctp_clog.x.cwnd.cnt_in_send = 255;
+ else
+ sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+ if (stcb->asoc.stream_queue_cnt > 255)
+ sctp_clog.x.cwnd.cnt_in_str = 255;
+ else
+ sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+
+ if (net) {
+ sctp_clog.x.cwnd.cwnd_new_value = net->cwnd;
+ sctp_clog.x.cwnd.inflight = net->flight_size;
+ sctp_clog.x.cwnd.pseudo_cumack = net->pseudo_cumack;
+ sctp_clog.x.cwnd.meets_pseudo_cumack = net->new_pseudo_cumack;
+ sctp_clog.x.cwnd.need_new_pseudo_cumack = net->find_pseudo_cumack;
+ }
+ if (SCTP_CWNDLOG_PRESEND == from) {
+ sctp_clog.x.cwnd.meets_pseudo_cumack = stcb->asoc.peers_rwnd;
+ }
+ sctp_clog.x.cwnd.cwnd_augment = augment;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_CWND,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_lock(struct sctp_inpcb *inp, struct sctp_tcb *stcb, uint8_t from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ if (inp) {
+ sctp_clog.x.lock.sock = (void *)inp->sctp_socket;
+
+ } else {
+ sctp_clog.x.lock.sock = (void *)NULL;
+ }
+ sctp_clog.x.lock.inp = (void *)inp;
+ if (stcb) {
+ sctp_clog.x.lock.tcb_lock = mtx_owned(&stcb->tcb_mtx);
+ } else {
+ sctp_clog.x.lock.tcb_lock = SCTP_LOCK_UNKNOWN;
+ }
+ if (inp) {
+ sctp_clog.x.lock.inp_lock = mtx_owned(&inp->inp_mtx);
+ sctp_clog.x.lock.create_lock = mtx_owned(&inp->inp_create_mtx);
+ } else {
+ sctp_clog.x.lock.inp_lock = SCTP_LOCK_UNKNOWN;
+ sctp_clog.x.lock.create_lock = SCTP_LOCK_UNKNOWN;
+ }
+ sctp_clog.x.lock.info_lock = rw_wowned(&sctppcbinfo.ipi_ep_mtx);
+ if (inp->sctp_socket) {
+ sctp_clog.x.lock.sock_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+ sctp_clog.x.lock.sockrcvbuf_lock = mtx_owned(&(inp->sctp_socket->so_rcv.sb_mtx));
+ sctp_clog.x.lock.socksndbuf_lock = mtx_owned(&(inp->sctp_socket->so_snd.sb_mtx));
+ } else {
+ sctp_clog.x.lock.sock_lock = SCTP_LOCK_UNKNOWN;
+ sctp_clog.x.lock.sockrcvbuf_lock = SCTP_LOCK_UNKNOWN;
+ sctp_clog.x.lock.socksndbuf_lock = SCTP_LOCK_UNKNOWN;
+ }
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_LOCK_EVENT,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_maxburst(struct sctp_tcb *stcb, struct sctp_nets *net, int error, int burst, uint8_t from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.cwnd.net = net;
+ sctp_clog.x.cwnd.cwnd_new_value = error;
+ sctp_clog.x.cwnd.inflight = net->flight_size;
+ sctp_clog.x.cwnd.cwnd_augment = burst;
+ if (stcb->asoc.send_queue_cnt > 255)
+ sctp_clog.x.cwnd.cnt_in_send = 255;
+ else
+ sctp_clog.x.cwnd.cnt_in_send = stcb->asoc.send_queue_cnt;
+ if (stcb->asoc.stream_queue_cnt > 255)
+ sctp_clog.x.cwnd.cnt_in_str = 255;
+ else
+ sctp_clog.x.cwnd.cnt_in_str = stcb->asoc.stream_queue_cnt;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_MAXBURST,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_rwnd(uint8_t from, uint32_t peers_rwnd, uint32_t snd_size, uint32_t overhead)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.rwnd.rwnd = peers_rwnd;
+ sctp_clog.x.rwnd.send_size = snd_size;
+ sctp_clog.x.rwnd.overhead = overhead;
+ sctp_clog.x.rwnd.new_rwnd = 0;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_RWND,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_rwnd_set(uint8_t from, uint32_t peers_rwnd, uint32_t flight_size, uint32_t overhead, uint32_t a_rwndval)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.rwnd.rwnd = peers_rwnd;
+ sctp_clog.x.rwnd.send_size = flight_size;
+ sctp_clog.x.rwnd.overhead = overhead;
+ sctp_clog.x.rwnd.new_rwnd = a_rwndval;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_RWND,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+}
+
+void
+sctp_log_mbcnt(uint8_t from, uint32_t total_oq, uint32_t book, uint32_t total_mbcnt_q, uint32_t mbcnt)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.mbcnt.total_queue_size = total_oq;
+ sctp_clog.x.mbcnt.size_change = book;
+ sctp_clog.x.mbcnt.total_queue_mb_size = total_mbcnt_q;
+ sctp_clog.x.mbcnt.mbcnt_change = mbcnt;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_MBCNT,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_misc_ints(uint8_t from, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_MISC_EVENT,
+ from,
+ a, b, c, d);
+}
+
+void
+sctp_wakeup_log(struct sctp_tcb *stcb, uint32_t cumtsn, uint32_t wake_cnt, int from)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.wake.stcb = (void *)stcb;
+ sctp_clog.x.wake.wake_cnt = wake_cnt;
+ sctp_clog.x.wake.flight = stcb->asoc.total_flight_count;
+ sctp_clog.x.wake.send_q = stcb->asoc.send_queue_cnt;
+ sctp_clog.x.wake.sent_q = stcb->asoc.sent_queue_cnt;
+
+ if (stcb->asoc.stream_queue_cnt < 0xff)
+ sctp_clog.x.wake.stream_qcnt = (uint8_t) stcb->asoc.stream_queue_cnt;
+ else
+ sctp_clog.x.wake.stream_qcnt = 0xff;
+
+ if (stcb->asoc.chunks_on_out_queue < 0xff)
+ sctp_clog.x.wake.chunks_on_oque = (uint8_t) stcb->asoc.chunks_on_out_queue;
+ else
+ sctp_clog.x.wake.chunks_on_oque = 0xff;
+
+ sctp_clog.x.wake.sctpflags = 0;
+ /* set in the defered mode stuff */
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE)
+ sctp_clog.x.wake.sctpflags |= 1;
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT)
+ sctp_clog.x.wake.sctpflags |= 2;
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT)
+ sctp_clog.x.wake.sctpflags |= 4;
+ /* what about the sb */
+ if (stcb->sctp_socket) {
+ struct socket *so = stcb->sctp_socket;
+
+ sctp_clog.x.wake.sbflags = (uint8_t) ((so->so_snd.sb_flags & 0x00ff));
+ } else {
+ sctp_clog.x.wake.sbflags = 0xff;
+ }
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_WAKE,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+void
+sctp_log_block(uint8_t from, struct socket *so, struct sctp_association *asoc, int sendlen)
+{
+ struct sctp_cwnd_log sctp_clog;
+
+ sctp_clog.x.blk.onsb = asoc->total_output_queue_size;
+ sctp_clog.x.blk.send_sent_qcnt = (uint16_t) (asoc->send_queue_cnt + asoc->sent_queue_cnt);
+ sctp_clog.x.blk.peer_rwnd = asoc->peers_rwnd;
+ sctp_clog.x.blk.stream_qcnt = (uint16_t) asoc->stream_queue_cnt;
+ sctp_clog.x.blk.chunks_on_oque = (uint16_t) asoc->chunks_on_out_queue;
+ sctp_clog.x.blk.flight_size = (uint16_t) (asoc->total_flight / 1024);
+ sctp_clog.x.blk.sndlen = sendlen;
+ SCTP_CTR6(KTR_SCTP, "SCTP:%d[%d]:%x-%x-%x-%x",
+ SCTP_LOG_EVENT_BLOCK,
+ from,
+ sctp_clog.x.misc.log1,
+ sctp_clog.x.misc.log2,
+ sctp_clog.x.misc.log3,
+ sctp_clog.x.misc.log4);
+
+}
+
+int
+sctp_fill_stat_log(void *optval, size_t *optsize)
+{
+ /* May need to fix this if ktrdump does not work */
+ return (0);
+}
+
+#ifdef SCTP_AUDITING_ENABLED
+uint8_t sctp_audit_data[SCTP_AUDIT_SIZE][2];
+static int sctp_audit_indx = 0;
+
+static
+void
+sctp_print_audit_report(void)
+{
+ int i;
+ int cnt;
+
+ cnt = 0;
+ for (i = sctp_audit_indx; i < SCTP_AUDIT_SIZE; i++) {
+ if ((sctp_audit_data[i][0] == 0xe0) &&
+ (sctp_audit_data[i][1] == 0x01)) {
+ cnt = 0;
+ SCTP_PRINTF("\n");
+ } else if (sctp_audit_data[i][0] == 0xf0) {
+ cnt = 0;
+ SCTP_PRINTF("\n");
+ } else if ((sctp_audit_data[i][0] == 0xc0) &&
+ (sctp_audit_data[i][1] == 0x01)) {
+ SCTP_PRINTF("\n");
+ cnt = 0;
+ }
+ SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+ (uint32_t) sctp_audit_data[i][1]);
+ cnt++;
+ if ((cnt % 14) == 0)
+ SCTP_PRINTF("\n");
+ }
+ for (i = 0; i < sctp_audit_indx; i++) {
+ if ((sctp_audit_data[i][0] == 0xe0) &&
+ (sctp_audit_data[i][1] == 0x01)) {
+ cnt = 0;
+ SCTP_PRINTF("\n");
+ } else if (sctp_audit_data[i][0] == 0xf0) {
+ cnt = 0;
+ SCTP_PRINTF("\n");
+ } else if ((sctp_audit_data[i][0] == 0xc0) &&
+ (sctp_audit_data[i][1] == 0x01)) {
+ SCTP_PRINTF("\n");
+ cnt = 0;
+ }
+ SCTP_PRINTF("%2.2x%2.2x ", (uint32_t) sctp_audit_data[i][0],
+ (uint32_t) sctp_audit_data[i][1]);
+ cnt++;
+ if ((cnt % 14) == 0)
+ SCTP_PRINTF("\n");
+ }
+ SCTP_PRINTF("\n");
+}
+
+void
+sctp_auditing(int from, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int resend_cnt, tot_out, rep, tot_book_cnt;
+ struct sctp_nets *lnet;
+ struct sctp_tmit_chunk *chk;
+
+ sctp_audit_data[sctp_audit_indx][0] = 0xAA;
+ sctp_audit_data[sctp_audit_indx][1] = 0x000000ff & from;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ if (inp == NULL) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0x01;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ return;
+ }
+ if (stcb == NULL) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0x02;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ return;
+ }
+ sctp_audit_data[sctp_audit_indx][0] = 0xA1;
+ sctp_audit_data[sctp_audit_indx][1] =
+ (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ rep = 0;
+ tot_book_cnt = 0;
+ resend_cnt = tot_out = 0;
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ resend_cnt++;
+ } else if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ tot_out += chk->book_size;
+ tot_book_cnt++;
+ }
+ }
+ if (resend_cnt != stcb->asoc.sent_queue_retran_cnt) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0xA1;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ SCTP_PRINTF("resend_cnt:%d asoc-tot:%d\n",
+ resend_cnt, stcb->asoc.sent_queue_retran_cnt);
+ rep = 1;
+ stcb->asoc.sent_queue_retran_cnt = resend_cnt;
+ sctp_audit_data[sctp_audit_indx][0] = 0xA2;
+ sctp_audit_data[sctp_audit_indx][1] =
+ (0x000000ff & stcb->asoc.sent_queue_retran_cnt);
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ }
+ if (tot_out != stcb->asoc.total_flight) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0xA2;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ rep = 1;
+ SCTP_PRINTF("tot_flt:%d asoc_tot:%d\n", tot_out,
+ (int)stcb->asoc.total_flight);
+ stcb->asoc.total_flight = tot_out;
+ }
+ if (tot_book_cnt != stcb->asoc.total_flight_count) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0xA5;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ rep = 1;
+ SCTP_PRINTF("tot_flt_book:%d\n", tot_book);
+
+ stcb->asoc.total_flight_count = tot_book_cnt;
+ }
+ tot_out = 0;
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ tot_out += lnet->flight_size;
+ }
+ if (tot_out != stcb->asoc.total_flight) {
+ sctp_audit_data[sctp_audit_indx][0] = 0xAF;
+ sctp_audit_data[sctp_audit_indx][1] = 0xA3;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+ rep = 1;
+ SCTP_PRINTF("real flight:%d net total was %d\n",
+ stcb->asoc.total_flight, tot_out);
+ /* now corrective action */
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+
+ tot_out = 0;
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if ((chk->whoTo == lnet) &&
+ (chk->sent < SCTP_DATAGRAM_RESEND)) {
+ tot_out += chk->book_size;
+ }
+ }
+ if (lnet->flight_size != tot_out) {
+ SCTP_PRINTF("net:%x flight was %d corrected to %d\n",
+ (uint32_t) lnet, lnet->flight_size,
+ tot_out);
+ lnet->flight_size = tot_out;
+ }
+ }
+ }
+ if (rep) {
+ sctp_print_audit_report();
+ }
+}
+
+void
+sctp_audit_log(uint8_t ev, uint8_t fd)
+{
+
+ sctp_audit_data[sctp_audit_indx][0] = ev;
+ sctp_audit_data[sctp_audit_indx][1] = fd;
+ sctp_audit_indx++;
+ if (sctp_audit_indx >= SCTP_AUDIT_SIZE) {
+ sctp_audit_indx = 0;
+ }
+}
+
+#endif
+
+/*
+ * a list of sizes based on typical mtu's, used only if next hop size not
+ * returned.
+ */
+static int sctp_mtu_sizes[] = {
+ 68,
+ 296,
+ 508,
+ 512,
+ 544,
+ 576,
+ 1006,
+ 1492,
+ 1500,
+ 1536,
+ 2002,
+ 2048,
+ 4352,
+ 4464,
+ 8166,
+ 17914,
+ 32000,
+ 65535
+};
+
+void
+sctp_stop_timers_for_shutdown(struct sctp_tcb *stcb)
+{
+ struct sctp_association *asoc;
+ struct sctp_nets *net;
+
+ asoc = &stcb->asoc;
+
+ (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+ }
+}
+
+int
+find_next_best_mtu(int totsz)
+{
+ int i, perfer;
+
+ /*
+ * if we are in here we must find the next best fit based on the
+ * size of the dg that failed to be sent.
+ */
+ perfer = 0;
+ for (i = 0; i < NUMBER_OF_MTU_SIZES; i++) {
+ if (totsz < sctp_mtu_sizes[i]) {
+ perfer = i - 1;
+ if (perfer < 0)
+ perfer = 0;
+ break;
+ }
+ }
+ return (sctp_mtu_sizes[perfer]);
+}
+
+void
+sctp_fill_random_store(struct sctp_pcb *m)
+{
+ /*
+ * Here we use the MD5/SHA-1 to hash with our good randomNumbers and
+ * our counter. The result becomes our good random numbers and we
+ * then setup to give these out. Note that we do no locking to
+ * protect this. This is ok, since if competing folks call this we
+ * will get more gobbled gook in the random store which is what we
+ * want. There is a danger that two guys will use the same random
+ * numbers, but thats ok too since that is random as well :->
+ */
+ m->store_at = 0;
+ (void)sctp_hmac(SCTP_HMAC, (uint8_t *) m->random_numbers,
+ sizeof(m->random_numbers), (uint8_t *) & m->random_counter,
+ sizeof(m->random_counter), (uint8_t *) m->random_store);
+ m->random_counter++;
+}
+
+uint32_t
+sctp_select_initial_TSN(struct sctp_pcb *inp)
+{
+ /*
+ * A true implementation should use random selection process to get
+ * the initial stream sequence number, using RFC1750 as a good
+ * guideline
+ */
+ uint32_t x, *xp;
+ uint8_t *p;
+ int store_at, new_store;
+
+ if (inp->initial_sequence_debug != 0) {
+ uint32_t ret;
+
+ ret = inp->initial_sequence_debug;
+ inp->initial_sequence_debug++;
+ return (ret);
+ }
+retry:
+ store_at = inp->store_at;
+ new_store = store_at + sizeof(uint32_t);
+ if (new_store >= (SCTP_SIGNATURE_SIZE - 3)) {
+ new_store = 0;
+ }
+ if (!atomic_cmpset_int(&inp->store_at, store_at, new_store)) {
+ goto retry;
+ }
+ if (new_store == 0) {
+ /* Refill the random store */
+ sctp_fill_random_store(inp);
+ }
+ p = &inp->random_store[store_at];
+ xp = (uint32_t *) p;
+ x = *xp;
+ return (x);
+}
+
+uint32_t
+sctp_select_a_tag(struct sctp_inpcb *inp, int save_in_twait)
+{
+ u_long x, not_done;
+ struct timeval now;
+
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ not_done = 1;
+ while (not_done) {
+ x = sctp_select_initial_TSN(&inp->sctp_ep);
+ if (x == 0) {
+ /* we never use 0 */
+ continue;
+ }
+ if (sctp_is_vtag_good(inp, x, &now, save_in_twait)) {
+ not_done = 0;
+ }
+ }
+ return (x);
+}
+
+int
+sctp_init_asoc(struct sctp_inpcb *m, struct sctp_tcb *stcb,
+ int for_a_init, uint32_t override_tag, uint32_t vrf_id)
+{
+ struct sctp_association *asoc;
+
+ /*
+ * Anything set to zero is taken care of by the allocation routine's
+ * bzero
+ */
+
+ /*
+ * Up front select what scoping to apply on addresses I tell my peer
+ * Not sure what to do with these right now, we will need to come up
+ * with a way to set them. We may need to pass them through from the
+ * caller in the sctp_aloc_assoc() function.
+ */
+ int i;
+
+ asoc = &stcb->asoc;
+ /* init all variables to a known value. */
+ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_INUSE);
+ asoc->max_burst = m->sctp_ep.max_burst;
+ asoc->heart_beat_delay = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+ asoc->cookie_life = m->sctp_ep.def_cookie_life;
+ asoc->sctp_cmt_on_off = (uint8_t) sctp_cmt_on_off;
+ /* JRS 5/21/07 - Init CMT PF variables */
+ asoc->sctp_cmt_pf = (uint8_t) sctp_cmt_pf;
+ asoc->sctp_frag_point = m->sctp_frag_point;
+#ifdef INET
+ asoc->default_tos = m->ip_inp.inp.inp_ip_tos;
+#else
+ asoc->default_tos = 0;
+#endif
+
+#ifdef INET6
+ asoc->default_flowlabel = ((struct in6pcb *)m)->in6p_flowinfo;
+#else
+ asoc->default_flowlabel = 0;
+#endif
+ asoc->sb_send_resv = 0;
+ if (override_tag) {
+ struct timeval now;
+
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ if (sctp_is_in_timewait(override_tag)) {
+ /*
+ * It must be in the time-wait hash, we put it there
+ * when we aloc one. If not the peer is playing
+ * games.
+ */
+ asoc->my_vtag = override_tag;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+ panic("Huh is_in_timewait fails");
+ return (ENOMEM);
+ }
+
+ } else {
+ asoc->my_vtag = sctp_select_a_tag(m, 1);
+ }
+ /* Get the nonce tags */
+ asoc->my_vtag_nonce = sctp_select_a_tag(m, 0);
+ asoc->peer_vtag_nonce = sctp_select_a_tag(m, 0);
+ asoc->vrf_id = vrf_id;
+
+ if (sctp_is_feature_on(m, SCTP_PCB_FLAGS_DONOT_HEARTBEAT))
+ asoc->hb_is_disabled = 1;
+ else
+ asoc->hb_is_disabled = 0;
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ asoc->tsn_in_at = 0;
+ asoc->tsn_out_at = 0;
+ asoc->tsn_in_wrapped = 0;
+ asoc->tsn_out_wrapped = 0;
+ asoc->cumack_log_at = 0;
+ asoc->cumack_log_atsnt = 0;
+#endif
+#ifdef SCTP_FS_SPEC_LOG
+ asoc->fs_index = 0;
+#endif
+ asoc->refcnt = 0;
+ asoc->assoc_up_sent = 0;
+ asoc->assoc_id = asoc->my_vtag;
+ asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number = asoc->sending_seq =
+ sctp_select_initial_TSN(&m->sctp_ep);
+ /* we are optimisitic here */
+ asoc->peer_supports_pktdrop = 1;
+
+ asoc->sent_queue_retran_cnt = 0;
+
+ /* for CMT */
+ asoc->last_net_data_came_from = NULL;
+
+ /* This will need to be adjusted */
+ asoc->last_cwr_tsn = asoc->init_seq_number - 1;
+ asoc->last_acked_seq = asoc->init_seq_number - 1;
+ asoc->advanced_peer_ack_point = asoc->last_acked_seq;
+ asoc->asconf_seq_in = asoc->last_acked_seq;
+
+ /* here we are different, we hold the next one we expect */
+ asoc->str_reset_seq_in = asoc->last_acked_seq + 1;
+
+ asoc->initial_init_rto_max = m->sctp_ep.initial_init_rto_max;
+ asoc->initial_rto = m->sctp_ep.initial_rto;
+
+ asoc->max_init_times = m->sctp_ep.max_init_times;
+ asoc->max_send_times = m->sctp_ep.max_send_times;
+ asoc->def_net_failure = m->sctp_ep.def_net_failure;
+ asoc->free_chunk_cnt = 0;
+
+ asoc->iam_blocking = 0;
+ /* ECN Nonce initialization */
+ asoc->context = m->sctp_context;
+ asoc->def_send = m->def_send;
+ asoc->ecn_nonce_allowed = 0;
+ asoc->receiver_nonce_sum = 1;
+ asoc->nonce_sum_expect_base = 1;
+ asoc->nonce_sum_check = 1;
+ asoc->nonce_resync_tsn = 0;
+ asoc->nonce_wait_for_ecne = 0;
+ asoc->nonce_wait_tsn = 0;
+ asoc->delayed_ack = TICKS_TO_MSEC(m->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+ asoc->sack_freq = m->sctp_ep.sctp_sack_freq;
+ asoc->pr_sctp_cnt = 0;
+ asoc->total_output_queue_size = 0;
+
+ if (m->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ struct in6pcb *inp6;
+
+ /* Its a V6 socket */
+ inp6 = (struct in6pcb *)m;
+ asoc->ipv6_addr_legal = 1;
+ /* Now look at the binding flag to see if V4 will be legal */
+ if (SCTP_IPV6_V6ONLY(inp6) == 0) {
+ asoc->ipv4_addr_legal = 1;
+ } else {
+ /* V4 addresses are NOT legal on the association */
+ asoc->ipv4_addr_legal = 0;
+ }
+ } else {
+ /* Its a V4 socket, no - V6 */
+ asoc->ipv4_addr_legal = 1;
+ asoc->ipv6_addr_legal = 0;
+ }
+
+ asoc->my_rwnd = max(SCTP_SB_LIMIT_RCV(m->sctp_socket), SCTP_MINIMAL_RWND);
+ asoc->peers_rwnd = SCTP_SB_LIMIT_RCV(m->sctp_socket);
+
+ asoc->smallest_mtu = m->sctp_frag_point;
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("smallest_mtu init'd with asoc to :%d\n",
+ asoc->smallest_mtu);
+#endif
+ asoc->minrto = m->sctp_ep.sctp_minrto;
+ asoc->maxrto = m->sctp_ep.sctp_maxrto;
+
+ asoc->locked_on_sending = NULL;
+ asoc->stream_locked_on = 0;
+ asoc->ecn_echo_cnt_onq = 0;
+ asoc->stream_locked = 0;
+
+ asoc->send_sack = 1;
+
+ LIST_INIT(&asoc->sctp_restricted_addrs);
+
+ TAILQ_INIT(&asoc->nets);
+ TAILQ_INIT(&asoc->pending_reply_queue);
+ TAILQ_INIT(&asoc->asconf_ack_sent);
+ /* Setup to fill the hb random cache at first HB */
+ asoc->hb_random_idx = 4;
+
+ asoc->sctp_autoclose_ticks = m->sctp_ep.auto_close_time;
+
+ /*
+ * JRS - Pick the default congestion control module based on the
+ * sysctl.
+ */
+ switch (m->sctp_ep.sctp_default_cc_module) {
+ /* JRS - Standard TCP congestion control */
+ case SCTP_CC_RFC2581:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+ break;
+ }
+ /* JRS - High Speed TCP congestion control (Floyd) */
+ case SCTP_CC_HSTCP:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+ break;
+ }
+ /* JRS - HTCP congestion control */
+ case SCTP_CC_HTCP:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+ break;
+ }
+ /* JRS - By default, use RFC2581 */
+ default:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+ break;
+ }
+ }
+
+ /*
+ * Now the stream parameters, here we allocate space for all streams
+ * that we request by default.
+ */
+ asoc->streamoutcnt = asoc->pre_open_streams =
+ m->sctp_ep.pre_open_stream_count;
+ SCTP_MALLOC(asoc->strmout, struct sctp_stream_out *,
+ asoc->streamoutcnt * sizeof(struct sctp_stream_out),
+ SCTP_M_STRMO);
+ if (asoc->strmout == NULL) {
+ /* big trouble no memory */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+ return (ENOMEM);
+ }
+ for (i = 0; i < asoc->streamoutcnt; i++) {
+ /*
+ * inbound side must be set to 0xffff, also NOTE when we get
+ * the INIT-ACK back (for INIT sender) we MUST reduce the
+ * count (streamoutcnt) but first check if we sent to any of
+ * the upper streams that were dropped (if some were). Those
+ * that were dropped must be notified to the upper layer as
+ * failed to send.
+ */
+ asoc->strmout[i].next_sequence_sent = 0x0;
+ TAILQ_INIT(&asoc->strmout[i].outqueue);
+ asoc->strmout[i].stream_no = i;
+ asoc->strmout[i].last_msg_incomplete = 0;
+ asoc->strmout[i].next_spoke.tqe_next = 0;
+ asoc->strmout[i].next_spoke.tqe_prev = 0;
+ }
+ /* Now the mapping array */
+ asoc->mapping_array_size = SCTP_INITIAL_MAPPING_ARRAY;
+ SCTP_MALLOC(asoc->mapping_array, uint8_t *, asoc->mapping_array_size,
+ SCTP_M_MAP);
+ if (asoc->mapping_array == NULL) {
+ SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+ return (ENOMEM);
+ }
+ memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+ /* Now the init of the other outqueues */
+ TAILQ_INIT(&asoc->free_chunks);
+ TAILQ_INIT(&asoc->out_wheel);
+ TAILQ_INIT(&asoc->control_send_queue);
+ TAILQ_INIT(&asoc->send_queue);
+ TAILQ_INIT(&asoc->sent_queue);
+ TAILQ_INIT(&asoc->reasmqueue);
+ TAILQ_INIT(&asoc->resetHead);
+ asoc->max_inbound_streams = m->sctp_ep.max_open_streams_intome;
+ TAILQ_INIT(&asoc->asconf_queue);
+ /* authentication fields */
+ asoc->authinfo.random = NULL;
+ asoc->authinfo.assoc_key = NULL;
+ asoc->authinfo.assoc_keyid = 0;
+ asoc->authinfo.recv_key = NULL;
+ asoc->authinfo.recv_keyid = 0;
+ LIST_INIT(&asoc->shared_keys);
+ asoc->marked_retrans = 0;
+ asoc->timoinit = 0;
+ asoc->timodata = 0;
+ asoc->timosack = 0;
+ asoc->timoshutdown = 0;
+ asoc->timoheartbeat = 0;
+ asoc->timocookie = 0;
+ asoc->timoshutdownack = 0;
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->start_time);
+ asoc->discontinuity_time = asoc->start_time;
+ /*
+ * sa_ignore MEMLEAK {memory is put in the assoc mapping array and
+ * freed later whe the association is freed.
+ */
+ return (0);
+}
+
+int
+sctp_expand_mapping_array(struct sctp_association *asoc, uint32_t needed)
+{
+ /* mapping array needs to grow */
+ uint8_t *new_array;
+ uint32_t new_size;
+
+ new_size = asoc->mapping_array_size + ((needed + 7) / 8 + SCTP_MAPPING_ARRAY_INCR);
+ SCTP_MALLOC(new_array, uint8_t *, new_size, SCTP_M_MAP);
+ if (new_array == NULL) {
+ /* can't get more, forget it */
+ SCTP_PRINTF("No memory for expansion of SCTP mapping array %d\n",
+ new_size);
+ return (-1);
+ }
+ memset(new_array, 0, new_size);
+ memcpy(new_array, asoc->mapping_array, asoc->mapping_array_size);
+ SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+ asoc->mapping_array = new_array;
+ asoc->mapping_array_size = new_size;
+ return (0);
+}
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+static void
+sctp_iterator_work(struct sctp_iterator *it)
+{
+ int iteration_count = 0;
+ int inp_skip = 0;
+
+ SCTP_ITERATOR_LOCK();
+ if (it->inp) {
+ SCTP_INP_DECR_REF(it->inp);
+ }
+ if (it->inp == NULL) {
+ /* iterator is complete */
+done_with_iterator:
+ SCTP_ITERATOR_UNLOCK();
+ if (it->function_atend != NULL) {
+ (*it->function_atend) (it->pointer, it->val);
+ }
+ SCTP_FREE(it, SCTP_M_ITER);
+ return;
+ }
+select_a_new_ep:
+ SCTP_INP_WLOCK(it->inp);
+ while (((it->pcb_flags) &&
+ ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
+ ((it->pcb_features) &&
+ ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
+ /* endpoint flags or features don't match, so keep looking */
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ SCTP_INP_WUNLOCK(it->inp);
+ goto done_with_iterator;
+ }
+ SCTP_INP_WUNLOCK(it->inp);
+ it->inp = LIST_NEXT(it->inp, sctp_list);
+ if (it->inp == NULL) {
+ goto done_with_iterator;
+ }
+ SCTP_INP_WLOCK(it->inp);
+ }
+
+ SCTP_INP_WUNLOCK(it->inp);
+ SCTP_INP_RLOCK(it->inp);
+
+ /* now go through each assoc which is in the desired state */
+ if (it->done_current_ep == 0) {
+ if (it->function_inp != NULL)
+ inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
+ it->done_current_ep = 1;
+ }
+ if (it->stcb == NULL) {
+ /* run the per instance function */
+ it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
+ }
+ if ((inp_skip) || it->stcb == NULL) {
+ if (it->function_inp_end != NULL) {
+ inp_skip = (*it->function_inp_end) (it->inp,
+ it->pointer,
+ it->val);
+ }
+ SCTP_INP_RUNLOCK(it->inp);
+ goto no_stcb;
+ }
+ while (it->stcb) {
+ SCTP_TCB_LOCK(it->stcb);
+ if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
+ /* not in the right state... keep looking */
+ SCTP_TCB_UNLOCK(it->stcb);
+ goto next_assoc;
+ }
+ /* see if we have limited out the iterator loop */
+ iteration_count++;
+ if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
+ /* Pause to let others grab the lock */
+ atomic_add_int(&it->stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(it->stcb);
+
+ SCTP_INP_INCR_REF(it->inp);
+ SCTP_INP_RUNLOCK(it->inp);
+ SCTP_ITERATOR_UNLOCK();
+ SCTP_ITERATOR_LOCK();
+ SCTP_INP_RLOCK(it->inp);
+
+ SCTP_INP_DECR_REF(it->inp);
+ SCTP_TCB_LOCK(it->stcb);
+ atomic_add_int(&it->stcb->asoc.refcnt, -1);
+ iteration_count = 0;
+ }
+ /* run function on this one */
+ (*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
+
+ /*
+ * we lie here, it really needs to have its own type but
+ * first I must verify that this won't effect things :-0
+ */
+ if (it->no_chunk_output == 0)
+ sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+
+ SCTP_TCB_UNLOCK(it->stcb);
+next_assoc:
+ it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
+ if (it->stcb == NULL) {
+ /* Run last function */
+ if (it->function_inp_end != NULL) {
+ inp_skip = (*it->function_inp_end) (it->inp,
+ it->pointer,
+ it->val);
+ }
+ }
+ }
+ SCTP_INP_RUNLOCK(it->inp);
+no_stcb:
+ /* done with all assocs on this endpoint, move on to next endpoint */
+ it->done_current_ep = 0;
+ SCTP_INP_WLOCK(it->inp);
+ SCTP_INP_WUNLOCK(it->inp);
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ it->inp = NULL;
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ it->inp = LIST_NEXT(it->inp, sctp_list);
+ SCTP_INP_INFO_RUNLOCK();
+ }
+ if (it->inp == NULL) {
+ goto done_with_iterator;
+ }
+ goto select_a_new_ep;
+}
+
+void
+sctp_iterator_worker(void)
+{
+ struct sctp_iterator *it = NULL;
+
+ /* This function is called with the WQ lock in place */
+
+ sctppcbinfo.iterator_running = 1;
+again:
+ it = TAILQ_FIRST(&sctppcbinfo.iteratorhead);
+ while (it) {
+ /* now lets work on this one */
+ TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ sctp_iterator_work(it);
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ /* sa_ignore FREED_MEMORY */
+ it = TAILQ_FIRST(&sctppcbinfo.iteratorhead);
+ }
+ if (TAILQ_FIRST(&sctppcbinfo.iteratorhead)) {
+ goto again;
+ }
+ sctppcbinfo.iterator_running = 0;
+ return;
+}
+
+#endif
+
+
+static void
+sctp_handle_addr_wq(void)
+{
+ /* deal with the ADDR wq from the rtsock calls */
+ struct sctp_laddr *wi;
+ struct sctp_asconf_iterator *asc;
+
+ SCTP_MALLOC(asc, struct sctp_asconf_iterator *,
+ sizeof(struct sctp_asconf_iterator), SCTP_M_ASC_IT);
+ if (asc == NULL) {
+ /* Try later, no memory */
+ sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+ (struct sctp_inpcb *)NULL,
+ (struct sctp_tcb *)NULL,
+ (struct sctp_nets *)NULL);
+ return;
+ }
+ LIST_INIT(&asc->list_of_work);
+ asc->cnt = 0;
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ wi = LIST_FIRST(&sctppcbinfo.addr_wq);
+ while (wi != NULL) {
+ LIST_REMOVE(wi, sctp_nxt_addr);
+ LIST_INSERT_HEAD(&asc->list_of_work, wi, sctp_nxt_addr);
+ asc->cnt++;
+ wi = LIST_FIRST(&sctppcbinfo.addr_wq);
+ }
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ if (asc->cnt == 0) {
+ SCTP_FREE(asc, SCTP_M_ASC_IT);
+ } else {
+ (void)sctp_initiate_iterator(sctp_asconf_iterator_ep,
+ sctp_asconf_iterator_stcb,
+ NULL, /* No ep end for boundall */
+ SCTP_PCB_FLAGS_BOUNDALL,
+ SCTP_PCB_ANY_FEATURES,
+ SCTP_ASOC_ANY_STATE,
+ (void *)asc, 0,
+ sctp_asconf_iterator_end, NULL, 0);
+ }
+}
+
+int retcode = 0;
+int cur_oerr = 0;
+
+void
+sctp_timeout_handler(void *t)
+{
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+ struct sctp_timer *tmr;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ int did_output;
+ struct sctp_iterator *it = NULL;
+
+ tmr = (struct sctp_timer *)t;
+ inp = (struct sctp_inpcb *)tmr->ep;
+ stcb = (struct sctp_tcb *)tmr->tcb;
+ net = (struct sctp_nets *)tmr->net;
+ did_output = 1;
+
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xF0, (uint8_t) tmr->type);
+ sctp_auditing(3, inp, stcb, net);
+#endif
+
+ /* sanity checks... */
+ if (tmr->self != (void *)tmr) {
+ /*
+ * SCTP_PRINTF("Stale SCTP timer fired (%p), ignoring...\n",
+ * tmr);
+ */
+ return;
+ }
+ tmr->stopped_from = 0xa001;
+ if (!SCTP_IS_TIMER_TYPE_VALID(tmr->type)) {
+ /*
+ * SCTP_PRINTF("SCTP timer fired with invalid type: 0x%x\n",
+ * tmr->type);
+ */
+ return;
+ }
+ tmr->stopped_from = 0xa002;
+ if ((tmr->type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL)) {
+ return;
+ }
+ /* if this is an iterator timeout, get the struct and clear inp */
+ tmr->stopped_from = 0xa003;
+ if (tmr->type == SCTP_TIMER_TYPE_ITERATOR) {
+ it = (struct sctp_iterator *)inp;
+ inp = NULL;
+ }
+ if (inp) {
+ SCTP_INP_INCR_REF(inp);
+ if ((inp->sctp_socket == 0) &&
+ ((tmr->type != SCTP_TIMER_TYPE_INPKILL) &&
+ (tmr->type != SCTP_TIMER_TYPE_SHUTDOWN) &&
+ (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNACK) &&
+ (tmr->type != SCTP_TIMER_TYPE_SHUTDOWNGUARD) &&
+ (tmr->type != SCTP_TIMER_TYPE_ASOCKILL))
+ ) {
+ SCTP_INP_DECR_REF(inp);
+ return;
+ }
+ }
+ tmr->stopped_from = 0xa004;
+ if (stcb) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state == 0) {
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ if (inp) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ return;
+ }
+ }
+ tmr->stopped_from = 0xa005;
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer type %d goes off\n", tmr->type);
+ if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+ if (inp) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ if (stcb) {
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ }
+ return;
+ }
+ tmr->stopped_from = 0xa006;
+
+ if (stcb) {
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ if ((tmr->type != SCTP_TIMER_TYPE_ASOCKILL) &&
+ ((stcb->asoc.state == 0) ||
+ (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED))) {
+ SCTP_TCB_UNLOCK(stcb);
+ if (inp) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ return;
+ }
+ }
+ /* record in stopped what t-o occured */
+ tmr->stopped_from = tmr->type;
+
+ /* mark as being serviced now */
+ if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+ /*
+ * Callout has been rescheduled.
+ */
+ goto get_out;
+ }
+ if (!SCTP_OS_TIMER_ACTIVE(&tmr->timer)) {
+ /*
+ * Not active, so no action.
+ */
+ goto get_out;
+ }
+ SCTP_OS_TIMER_DEACTIVATE(&tmr->timer);
+
+ /* call the handler for the appropriate timer type */
+ switch (tmr->type) {
+ case SCTP_TIMER_TYPE_ZERO_COPY:
+ if (inp == NULL) {
+ break;
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+ SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+ }
+ break;
+ case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+ if (inp == NULL) {
+ break;
+ }
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+ SCTP_ZERO_COPY_SENDQ_EVENT(inp, inp->sctp_socket);
+ }
+ break;
+ case SCTP_TIMER_TYPE_ADDR_WQ:
+ sctp_handle_addr_wq();
+ break;
+ case SCTP_TIMER_TYPE_ITERATOR:
+ SCTP_STAT_INCR(sctps_timoiterator);
+ sctp_iterator_timer(it);
+ break;
+ case SCTP_TIMER_TYPE_SEND:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timodata);
+ stcb->asoc.timodata++;
+ stcb->asoc.num_send_timers_up--;
+ if (stcb->asoc.num_send_timers_up < 0) {
+ stcb->asoc.num_send_timers_up = 0;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ cur_oerr = stcb->asoc.overall_error_count;
+ retcode = sctp_t3rxt_timer(inp, stcb, net);
+ if (retcode) {
+ /* no need to unlock on tcb its gone */
+
+ goto out_decr;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+ if ((stcb->asoc.num_send_timers_up == 0) &&
+ (stcb->asoc.sent_queue_cnt > 0)
+ ) {
+ struct sctp_tmit_chunk *chk;
+
+ /*
+ * safeguard. If there on some on the sent queue
+ * somewhere but no timers running something is
+ * wrong... so we start a timer on the first chunk
+ * on the send queue on whatever net it is sent to.
+ */
+ chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb,
+ chk->whoTo);
+ }
+ break;
+ case SCTP_TIMER_TYPE_INIT:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoinit);
+ stcb->asoc.timoinit++;
+ if (sctp_t1init_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ /* We do output but not here */
+ did_output = 0;
+ break;
+ case SCTP_TIMER_TYPE_RECV:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ } {
+ int abort_flag;
+
+ SCTP_STAT_INCR(sctps_timosack);
+ stcb->asoc.timosack++;
+ if (stcb->asoc.cumulative_tsn != stcb->asoc.highest_tsn_inside_map)
+ sctp_sack_check(stcb, 0, 0, &abort_flag);
+ sctp_send_sack(stcb);
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SACK_TMR, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWN:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ if (sctp_shutdown_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ SCTP_STAT_INCR(sctps_timoshutdown);
+ stcb->asoc.timoshutdown++;
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_HEARTBEAT:
+ {
+ struct sctp_nets *lnet;
+ int cnt_of_unconf = 0;
+
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoheartbeat);
+ stcb->asoc.timoheartbeat++;
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+ (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+ cnt_of_unconf++;
+ }
+ }
+ if (cnt_of_unconf == 0) {
+ if (sctp_heartbeat_timer(inp, stcb, lnet,
+ cnt_of_unconf)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, lnet);
+#endif
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep, stcb, lnet);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_HB_TMR, SCTP_SO_NOT_LOCKED);
+ }
+ break;
+ case SCTP_TIMER_TYPE_COOKIE:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ if (sctp_cookie_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ SCTP_STAT_INCR(sctps_timocookie);
+ stcb->asoc.timocookie++;
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ /*
+ * We consider T3 and Cookie timer pretty much the same with
+ * respect to where from in chunk_output.
+ */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_NEWCOOKIE:
+ {
+ struct timeval tv;
+ int i, secret;
+
+ if (inp == NULL) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timosecret);
+ (void)SCTP_GETTIME_TIMEVAL(&tv);
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_ep.time_of_secret_change = tv.tv_sec;
+ inp->sctp_ep.last_secret_number =
+ inp->sctp_ep.current_secret_number;
+ inp->sctp_ep.current_secret_number++;
+ if (inp->sctp_ep.current_secret_number >=
+ SCTP_HOW_MANY_SECRETS) {
+ inp->sctp_ep.current_secret_number = 0;
+ }
+ secret = (int)inp->sctp_ep.current_secret_number;
+ for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+ inp->sctp_ep.secret_key[secret][i] =
+ sctp_select_initial_TSN(&inp->sctp_ep);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, stcb, net);
+ }
+ did_output = 0;
+ break;
+ case SCTP_TIMER_TYPE_PATHMTURAISE:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timopathmtu);
+ sctp_pathmtu_timer(inp, stcb, net);
+ did_output = 0;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNACK:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ if (sctp_shutdownack_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ SCTP_STAT_INCR(sctps_timoshutdownack);
+ stcb->asoc.timoshutdownack++;
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SHUT_ACK_TMR, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoshutdownguard);
+ sctp_abort_an_association(inp, stcb,
+ SCTP_SHUTDOWN_GUARD_EXPIRES, NULL, SCTP_SO_NOT_LOCKED);
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+
+ case SCTP_TIMER_TYPE_STRRESET:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ if (sctp_strreset_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ SCTP_STAT_INCR(sctps_timostrmrst);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_TMR, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_EARLYFR:
+ /* Need to do FR of things for net */
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoearlyfr);
+ sctp_early_fr_timer(inp, stcb, net);
+ break;
+ case SCTP_TIMER_TYPE_ASCONF:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ if (sctp_asconf_timer(inp, stcb, net)) {
+ /* no need to unlock on tcb its gone */
+ goto out_decr;
+ }
+ SCTP_STAT_INCR(sctps_timoasconf);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(4, inp, stcb, net);
+#endif
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_ASCONF_TMR, SCTP_SO_NOT_LOCKED);
+ break;
+ case SCTP_TIMER_TYPE_PRIM_DELETED:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ sctp_delete_prim_timer(inp, stcb, net);
+ SCTP_STAT_INCR(sctps_timodelprim);
+ break;
+
+ case SCTP_TIMER_TYPE_AUTOCLOSE:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoautoclose);
+ sctp_autoclose_timer(inp, stcb, net);
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+ did_output = 0;
+ break;
+ case SCTP_TIMER_TYPE_ASOCKILL:
+ if ((stcb == NULL) || (inp == NULL)) {
+ break;
+ }
+ SCTP_STAT_INCR(sctps_timoassockill);
+ /* Can we free it yet? */
+ SCTP_INP_DECR_REF(inp);
+ sctp_timer_stop(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_1);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ /*
+ * free asoc, always unlocks (or destroy's) so prevent
+ * duplicate unlock or unlock of a free mtx :-0
+ */
+ stcb = NULL;
+ goto out_no_decr;
+ case SCTP_TIMER_TYPE_INPKILL:
+ SCTP_STAT_INCR(sctps_timoinpkill);
+ if (inp == NULL) {
+ break;
+ }
+ /*
+ * special case, take away our increment since WE are the
+ * killer
+ */
+ SCTP_INP_DECR_REF(inp);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_3);
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_DIRECTLY_NOCMPSET);
+ goto out_no_decr;
+ default:
+ SCTPDBG(SCTP_DEBUG_TIMER1, "sctp_timeout_handler:unknown timer %d\n",
+ tmr->type);
+ break;
+ };
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xF1, (uint8_t) tmr->type);
+ if (inp)
+ sctp_auditing(5, inp, stcb, net);
+#endif
+ if ((did_output) && stcb) {
+ /*
+ * Now we need to clean up the control chunk chain if an
+ * ECNE is on it. It must be marked as UNSENT again so next
+ * call will continue to send it until such time that we get
+ * a CWR, to remove it. It is, however, less likely that we
+ * will find a ecn echo on the chain though.
+ */
+ sctp_fix_ecn_echo(&stcb->asoc);
+ }
+get_out:
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+out_decr:
+ if (inp) {
+ SCTP_INP_DECR_REF(inp);
+ }
+out_no_decr:
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Timer now complete (type %d)\n",
+ tmr->type);
+ if (inp) {
+ }
+}
+
+void
+sctp_timer_start(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ int to_ticks;
+ struct sctp_timer *tmr;
+
+ if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) && (inp == NULL))
+ return;
+
+ to_ticks = 0;
+
+ tmr = NULL;
+ if (stcb) {
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ }
+ switch (t_type) {
+ case SCTP_TIMER_TYPE_ZERO_COPY:
+ tmr = &inp->sctp_ep.zero_copy_timer;
+ to_ticks = SCTP_ZERO_COPY_TICK_DELAY;
+ break;
+ case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+ tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+ to_ticks = SCTP_ZERO_COPY_SENDQ_TICK_DELAY;
+ break;
+ case SCTP_TIMER_TYPE_ADDR_WQ:
+ /* Only 1 tick away :-) */
+ tmr = &sctppcbinfo.addr_wq_timer;
+ to_ticks = SCTP_ADDRESS_TICK_DELAY;
+ break;
+ case SCTP_TIMER_TYPE_ITERATOR:
+ {
+ struct sctp_iterator *it;
+
+ it = (struct sctp_iterator *)inp;
+ tmr = &it->tmr;
+ to_ticks = SCTP_ITERATOR_TICKS;
+ }
+ break;
+ case SCTP_TIMER_TYPE_SEND:
+ /* Here we use the RTO timer */
+ {
+ int rto_val;
+
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ if (net->RTO == 0) {
+ rto_val = stcb->asoc.initial_rto;
+ } else {
+ rto_val = net->RTO;
+ }
+ to_ticks = MSEC_TO_TICKS(rto_val);
+ }
+ break;
+ case SCTP_TIMER_TYPE_INIT:
+ /*
+ * Here we use the INIT timer default usually about 1
+ * minute.
+ */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ break;
+ case SCTP_TIMER_TYPE_RECV:
+ /*
+ * Here we use the Delayed-Ack timer value from the inp
+ * ususually about 200ms.
+ */
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.dack_timer;
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.delayed_ack);
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWN:
+ /* Here we use the RTO of the destination. */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_HEARTBEAT:
+ /*
+ * the net is used here so that we can add in the RTO. Even
+ * though we use a different timer. We also add the HB timer
+ * PLUS a random jitter.
+ */
+ if ((inp == NULL) || (stcb == NULL)) {
+ return;
+ } else {
+ uint32_t rndval;
+ uint8_t this_random;
+ int cnt_of_unconf = 0;
+ struct sctp_nets *lnet;
+
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+ (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+ cnt_of_unconf++;
+ }
+ }
+ if (cnt_of_unconf) {
+ net = lnet = NULL;
+ (void)sctp_heartbeat_timer(inp, stcb, lnet, cnt_of_unconf);
+ }
+ if (stcb->asoc.hb_random_idx > 3) {
+ rndval = sctp_select_initial_TSN(&inp->sctp_ep);
+ memcpy(stcb->asoc.hb_random_values, &rndval,
+ sizeof(stcb->asoc.hb_random_values));
+ stcb->asoc.hb_random_idx = 0;
+ }
+ this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+ stcb->asoc.hb_random_idx++;
+ stcb->asoc.hb_ect_randombit = 0;
+ /*
+ * this_random will be 0 - 256 ms RTO is in ms.
+ */
+ if ((stcb->asoc.hb_is_disabled) &&
+ (cnt_of_unconf == 0)) {
+ return;
+ }
+ if (net) {
+ int delay;
+
+ delay = stcb->asoc.heart_beat_delay;
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ if ((lnet->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+ ((lnet->dest_state & SCTP_ADDR_OUT_OF_SCOPE) == 0) &&
+ (lnet->dest_state & SCTP_ADDR_REACHABLE)) {
+ delay = 0;
+ }
+ }
+ if (net->RTO == 0) {
+ /* Never been checked */
+ to_ticks = this_random + stcb->asoc.initial_rto + delay;
+ } else {
+ /* set rto_val to the ms */
+ to_ticks = delay + net->RTO + this_random;
+ }
+ } else {
+ if (cnt_of_unconf) {
+ to_ticks = this_random + stcb->asoc.initial_rto;
+ } else {
+ to_ticks = stcb->asoc.heart_beat_delay + this_random + stcb->asoc.initial_rto;
+ }
+ }
+ /*
+ * Now we must convert the to_ticks that are now in
+ * ms to ticks.
+ */
+ to_ticks = MSEC_TO_TICKS(to_ticks);
+ tmr = &stcb->asoc.hb_timer;
+ }
+ break;
+ case SCTP_TIMER_TYPE_COOKIE:
+ /*
+ * Here we can use the RTO timer from the network since one
+ * RTT was compelete. If a retran happened then we will be
+ * using the RTO initial value.
+ */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_NEWCOOKIE:
+ /*
+ * nothing needed but the endpoint here ususually about 60
+ * minutes.
+ */
+ if (inp == NULL) {
+ return;
+ }
+ tmr = &inp->sctp_ep.signature_change;
+ to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_SIGNATURE];
+ break;
+ case SCTP_TIMER_TYPE_ASOCKILL:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.strreset_timer;
+ to_ticks = MSEC_TO_TICKS(SCTP_ASOC_KILL_TIMEOUT);
+ break;
+ case SCTP_TIMER_TYPE_INPKILL:
+ /*
+ * The inp is setup to die. We re-use the signature_chage
+ * timer since that has stopped and we are in the GONE
+ * state.
+ */
+ if (inp == NULL) {
+ return;
+ }
+ tmr = &inp->sctp_ep.signature_change;
+ to_ticks = MSEC_TO_TICKS(SCTP_INP_KILL_TIMEOUT);
+ break;
+ case SCTP_TIMER_TYPE_PATHMTURAISE:
+ /*
+ * Here we use the value found in the EP for PMTU ususually
+ * about 10 minutes.
+ */
+ if ((stcb == NULL) || (inp == NULL)) {
+ return;
+ }
+ if (net == NULL) {
+ return;
+ }
+ to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_PMTU];
+ tmr = &net->pmtu_timer;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNACK:
+ /* Here we use the RTO of the destination */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+ /*
+ * Here we use the endpoints shutdown guard timer usually
+ * about 3 minutes.
+ */
+ if ((inp == NULL) || (stcb == NULL)) {
+ return;
+ }
+ to_ticks = inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN];
+ tmr = &stcb->asoc.shut_guard_timer;
+ break;
+ case SCTP_TIMER_TYPE_STRRESET:
+ /*
+ * Here the timer comes from the stcb but its value is from
+ * the net's RTO.
+ */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ tmr = &stcb->asoc.strreset_timer;
+ break;
+
+ case SCTP_TIMER_TYPE_EARLYFR:
+ {
+ unsigned int msec;
+
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->flight_size > net->cwnd) {
+ /* no need to start */
+ return;
+ }
+ SCTP_STAT_INCR(sctps_earlyfrstart);
+ if (net->lastsa == 0) {
+ /* Hmm no rtt estimate yet? */
+ msec = stcb->asoc.initial_rto >> 2;
+ } else {
+ msec = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ }
+ if (msec < sctp_early_fr_msec) {
+ msec = sctp_early_fr_msec;
+ if (msec < SCTP_MINFR_MSEC_FLOOR) {
+ msec = SCTP_MINFR_MSEC_FLOOR;
+ }
+ }
+ to_ticks = MSEC_TO_TICKS(msec);
+ tmr = &net->fr_timer;
+ }
+ break;
+ case SCTP_TIMER_TYPE_ASCONF:
+ /*
+ * Here the timer comes from the stcb but its value is from
+ * the net's RTO.
+ */
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ if (net->RTO == 0) {
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ } else {
+ to_ticks = MSEC_TO_TICKS(net->RTO);
+ }
+ tmr = &stcb->asoc.asconf_timer;
+ break;
+ case SCTP_TIMER_TYPE_PRIM_DELETED:
+ if ((stcb == NULL) || (net != NULL)) {
+ return;
+ }
+ to_ticks = MSEC_TO_TICKS(stcb->asoc.initial_rto);
+ tmr = &stcb->asoc.delete_prim_timer;
+ break;
+ case SCTP_TIMER_TYPE_AUTOCLOSE:
+ if (stcb == NULL) {
+ return;
+ }
+ if (stcb->asoc.sctp_autoclose_ticks == 0) {
+ /*
+ * Really an error since stcb is NOT set to
+ * autoclose
+ */
+ return;
+ }
+ to_ticks = stcb->asoc.sctp_autoclose_ticks;
+ tmr = &stcb->asoc.autoclose_timer;
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+ __FUNCTION__, t_type);
+ return;
+ break;
+ };
+ if ((to_ticks <= 0) || (tmr == NULL)) {
+ SCTPDBG(SCTP_DEBUG_TIMER1, "%s: %d:software error to_ticks:%d tmr:%p not set ??\n",
+ __FUNCTION__, t_type, to_ticks, tmr);
+ return;
+ }
+ if (SCTP_OS_TIMER_PENDING(&tmr->timer)) {
+ /*
+ * we do NOT allow you to have it already running. if it is
+ * we leave the current one up unchanged
+ */
+ return;
+ }
+ /* At this point we can proceed */
+ if (t_type == SCTP_TIMER_TYPE_SEND) {
+ stcb->asoc.num_send_timers_up++;
+ }
+ tmr->stopped_from = 0;
+ tmr->type = t_type;
+ tmr->ep = (void *)inp;
+ tmr->tcb = (void *)stcb;
+ tmr->net = (void *)net;
+ tmr->self = (void *)tmr;
+ tmr->ticks = sctp_get_tick_count();
+ (void)SCTP_OS_TIMER_START(&tmr->timer, to_ticks, sctp_timeout_handler, tmr);
+ return;
+}
+
+void
+sctp_timer_stop(int t_type, struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, uint32_t from)
+{
+ struct sctp_timer *tmr;
+
+ if ((t_type != SCTP_TIMER_TYPE_ADDR_WQ) &&
+ (inp == NULL))
+ return;
+
+ tmr = NULL;
+ if (stcb) {
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ }
+ switch (t_type) {
+ case SCTP_TIMER_TYPE_ZERO_COPY:
+ tmr = &inp->sctp_ep.zero_copy_timer;
+ break;
+ case SCTP_TIMER_TYPE_ZCOPY_SENDQ:
+ tmr = &inp->sctp_ep.zero_copy_sendq_timer;
+ break;
+ case SCTP_TIMER_TYPE_ADDR_WQ:
+ tmr = &sctppcbinfo.addr_wq_timer;
+ break;
+ case SCTP_TIMER_TYPE_EARLYFR:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->fr_timer;
+ SCTP_STAT_INCR(sctps_earlyfrstop);
+ break;
+ case SCTP_TIMER_TYPE_ITERATOR:
+ {
+ struct sctp_iterator *it;
+
+ it = (struct sctp_iterator *)inp;
+ tmr = &it->tmr;
+ }
+ break;
+ case SCTP_TIMER_TYPE_SEND:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_INIT:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_RECV:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.dack_timer;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWN:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_HEARTBEAT:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.hb_timer;
+ break;
+ case SCTP_TIMER_TYPE_COOKIE:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_NEWCOOKIE:
+ /* nothing needed but the endpoint here */
+ tmr = &inp->sctp_ep.signature_change;
+ /*
+ * We re-use the newcookie timer for the INP kill timer. We
+ * must assure that we do not kill it by accident.
+ */
+ break;
+ case SCTP_TIMER_TYPE_ASOCKILL:
+ /*
+ * Stop the asoc kill timer.
+ */
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.strreset_timer;
+ break;
+
+ case SCTP_TIMER_TYPE_INPKILL:
+ /*
+ * The inp is setup to die. We re-use the signature_chage
+ * timer since that has stopped and we are in the GONE
+ * state.
+ */
+ tmr = &inp->sctp_ep.signature_change;
+ break;
+ case SCTP_TIMER_TYPE_PATHMTURAISE:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->pmtu_timer;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNACK:
+ if ((stcb == NULL) || (net == NULL)) {
+ return;
+ }
+ tmr = &net->rxt_timer;
+ break;
+ case SCTP_TIMER_TYPE_SHUTDOWNGUARD:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.shut_guard_timer;
+ break;
+ case SCTP_TIMER_TYPE_STRRESET:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.strreset_timer;
+ break;
+ case SCTP_TIMER_TYPE_ASCONF:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.asconf_timer;
+ break;
+ case SCTP_TIMER_TYPE_PRIM_DELETED:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.delete_prim_timer;
+ break;
+ case SCTP_TIMER_TYPE_AUTOCLOSE:
+ if (stcb == NULL) {
+ return;
+ }
+ tmr = &stcb->asoc.autoclose_timer;
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_TIMER1, "%s: Unknown timer type %d\n",
+ __FUNCTION__, t_type);
+ break;
+ };
+ if (tmr == NULL) {
+ return;
+ }
+ if ((tmr->type != t_type) && tmr->type) {
+ /*
+ * Ok we have a timer that is under joint use. Cookie timer
+ * per chance with the SEND timer. We therefore are NOT
+ * running the timer that the caller wants stopped. So just
+ * return.
+ */
+ return;
+ }
+ if ((t_type == SCTP_TIMER_TYPE_SEND) && (stcb != NULL)) {
+ stcb->asoc.num_send_timers_up--;
+ if (stcb->asoc.num_send_timers_up < 0) {
+ stcb->asoc.num_send_timers_up = 0;
+ }
+ }
+ tmr->self = NULL;
+ tmr->stopped_from = from;
+ (void)SCTP_OS_TIMER_STOP(&tmr->timer);
+ return;
+}
+
+#ifdef SCTP_USE_ADLER32
+static uint32_t
+update_adler32(uint32_t adler, uint8_t * buf, int32_t len)
+{
+ uint32_t s1 = adler & 0xffff;
+ uint32_t s2 = (adler >> 16) & 0xffff;
+ int n;
+
+ for (n = 0; n < len; n++, buf++) {
+ /* s1 = (s1 + buf[n]) % BASE */
+ /* first we add */
+ s1 = (s1 + *buf);
+ /*
+ * now if we need to, we do a mod by subtracting. It seems a
+ * bit faster since I really will only ever do one subtract
+ * at the MOST, since buf[n] is a max of 255.
+ */
+ if (s1 >= SCTP_ADLER32_BASE) {
+ s1 -= SCTP_ADLER32_BASE;
+ }
+ /* s2 = (s2 + s1) % BASE */
+ /* first we add */
+ s2 = (s2 + s1);
+ /*
+ * again, it is more efficent (it seems) to subtract since
+ * the most s2 will ever be is (BASE-1 + BASE-1) in the
+ * worse case. This would then be (2 * BASE) - 2, which will
+ * still only do one subtract. On Intel this is much better
+ * to do this way and avoid the divide. Have not -pg'd on
+ * sparc.
+ */
+ if (s2 >= SCTP_ADLER32_BASE) {
+ s2 -= SCTP_ADLER32_BASE;
+ }
+ }
+ /* Return the adler32 of the bytes buf[0..len-1] */
+ return ((s2 << 16) + s1);
+}
+
+#endif
+
+
+uint32_t
+sctp_calculate_len(struct mbuf *m)
+{
+ uint32_t tlen = 0;
+ struct mbuf *at;
+
+ at = m;
+ while (at) {
+ tlen += SCTP_BUF_LEN(at);
+ at = SCTP_BUF_NEXT(at);
+ }
+ return (tlen);
+}
+
+#if defined(SCTP_WITH_NO_CSUM)
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+ /*
+ * given a mbuf chain with a packetheader offset by 'offset'
+ * pointing at a sctphdr (with csum set to 0) go through the chain
+ * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+ * has a side bonus as it will calculate the total length of the
+ * mbuf chain. Note: if offset is greater than the total mbuf
+ * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+ */
+ if (pktlen == NULL)
+ return (0);
+ *pktlen = sctp_calculate_len(m);
+ return (0);
+}
+
+#elif defined(SCTP_USE_INCHKSUM)
+
+#include <machine/in_cksum.h>
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+ /*
+ * given a mbuf chain with a packetheader offset by 'offset'
+ * pointing at a sctphdr (with csum set to 0) go through the chain
+ * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+ * has a side bonus as it will calculate the total length of the
+ * mbuf chain. Note: if offset is greater than the total mbuf
+ * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+ */
+ int32_t tlen = 0;
+ struct mbuf *at;
+ uint32_t the_sum, retsum;
+
+ at = m;
+ while (at) {
+ tlen += SCTP_BUF_LEN(at);
+ at = SCTP_BUF_NEXT(at);
+ }
+ the_sum = (uint32_t) (in_cksum_skip(m, tlen, offset));
+ if (pktlen != NULL)
+ *pktlen = (tlen - offset);
+ retsum = htons(the_sum);
+ return (the_sum);
+}
+
+#else
+
+uint32_t
+sctp_calculate_sum(struct mbuf *m, int32_t * pktlen, uint32_t offset)
+{
+ /*
+ * given a mbuf chain with a packetheader offset by 'offset'
+ * pointing at a sctphdr (with csum set to 0) go through the chain
+ * of SCTP_BUF_NEXT()'s and calculate the SCTP checksum. This also
+ * has a side bonus as it will calculate the total length of the
+ * mbuf chain. Note: if offset is greater than the total mbuf
+ * length, checksum=1, pktlen=0 is returned (ie. no real error code)
+ */
+ int32_t tlen = 0;
+
+#ifdef SCTP_USE_ADLER32
+ uint32_t base = 1L;
+
+#else
+ uint32_t base = 0xffffffff;
+
+#endif
+ struct mbuf *at;
+
+ at = m;
+ /* find the correct mbuf and offset into mbuf */
+ while ((at != NULL) && (offset > (uint32_t) SCTP_BUF_LEN(at))) {
+ offset -= SCTP_BUF_LEN(at); /* update remaining offset
+ * left */
+ at = SCTP_BUF_NEXT(at);
+ }
+ while (at != NULL) {
+ if ((SCTP_BUF_LEN(at) - offset) > 0) {
+#ifdef SCTP_USE_ADLER32
+ base = update_adler32(base,
+ (unsigned char *)(SCTP_BUF_AT(at, offset)),
+ (unsigned int)(SCTP_BUF_LEN(at) - offset));
+#else
+ if ((SCTP_BUF_LEN(at) - offset) < 4) {
+ /* Use old method if less than 4 bytes */
+ base = old_update_crc32(base,
+ (unsigned char *)(SCTP_BUF_AT(at, offset)),
+ (unsigned int)(SCTP_BUF_LEN(at) - offset));
+ } else {
+ base = update_crc32(base,
+ (unsigned char *)(SCTP_BUF_AT(at, offset)),
+ (unsigned int)(SCTP_BUF_LEN(at) - offset));
+ }
+#endif
+ tlen += SCTP_BUF_LEN(at) - offset;
+ /* we only offset once into the first mbuf */
+ }
+ if (offset) {
+ if (offset < (uint32_t) SCTP_BUF_LEN(at))
+ offset = 0;
+ else
+ offset -= SCTP_BUF_LEN(at);
+ }
+ at = SCTP_BUF_NEXT(at);
+ }
+ if (pktlen != NULL) {
+ *pktlen = tlen;
+ }
+#ifdef SCTP_USE_ADLER32
+ /* Adler32 */
+ base = htonl(base);
+#else
+ /* CRC-32c */
+ base = sctp_csum_finalize(base);
+#endif
+ return (base);
+}
+
+
+#endif
+
+void
+sctp_mtu_size_reset(struct sctp_inpcb *inp,
+ struct sctp_association *asoc, uint32_t mtu)
+{
+ /*
+ * Reset the P-MTU size on this association, this involves changing
+ * the asoc MTU, going through ANY chunk+overhead larger than mtu to
+ * allow the DF flag to be cleared.
+ */
+ struct sctp_tmit_chunk *chk;
+ unsigned int eff_mtu, ovh;
+
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("sctp_mtu_size_reset(%p, asoc:%p mtu:%d\n",
+ inp, asoc, mtu);
+#endif
+ asoc->smallest_mtu = mtu;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MIN_OVERHEAD;
+ } else {
+ ovh = SCTP_MIN_V4_OVERHEAD;
+ }
+ eff_mtu = mtu - ovh;
+ TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+
+ if (chk->send_size > eff_mtu) {
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+ }
+ }
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ if (chk->send_size > eff_mtu) {
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+ }
+ }
+}
+
+
+/*
+ * given an association and starting time of the current RTT period return
+ * RTO in number of msecs net should point to the current network
+ */
+uint32_t
+sctp_calculate_rto(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_nets *net,
+ struct timeval *told,
+ int safe)
+{
+ /*-
+ * given an association and the starting time of the current RTT
+ * period (in value1/value2) return RTO in number of msecs.
+ */
+ int calc_time = 0;
+ int o_calctime;
+ uint32_t new_rto = 0;
+ int first_measure = 0;
+ struct timeval now, then, *old;
+
+ /* Copy it out for sparc64 */
+ if (safe == sctp_align_unsafe_makecopy) {
+ old = &then;
+ memcpy(&then, told, sizeof(struct timeval));
+ } else if (safe == sctp_align_safe_nocopy) {
+ old = told;
+ } else {
+ /* error */
+ SCTP_PRINTF("Huh, bad rto calc call\n");
+ return (0);
+ }
+ /************************/
+ /* 1. calculate new RTT */
+ /************************/
+ /* get the current time */
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ /* compute the RTT value */
+ if ((u_long)now.tv_sec > (u_long)old->tv_sec) {
+ calc_time = ((u_long)now.tv_sec - (u_long)old->tv_sec) * 1000;
+ if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+ calc_time += (((u_long)now.tv_usec -
+ (u_long)old->tv_usec) / 1000);
+ } else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+ /* Borrow 1,000ms from current calculation */
+ calc_time -= 1000;
+ /* Add in the slop over */
+ calc_time += ((int)now.tv_usec / 1000);
+ /* Add in the pre-second ms's */
+ calc_time += (((int)1000000 - (int)old->tv_usec) / 1000);
+ }
+ } else if ((u_long)now.tv_sec == (u_long)old->tv_sec) {
+ if ((u_long)now.tv_usec > (u_long)old->tv_usec) {
+ calc_time = ((u_long)now.tv_usec -
+ (u_long)old->tv_usec) / 1000;
+ } else if ((u_long)now.tv_usec < (u_long)old->tv_usec) {
+ /* impossible .. garbage in nothing out */
+ goto calc_rto;
+ } else if ((u_long)now.tv_usec == (u_long)old->tv_usec) {
+ /*
+ * We have to have 1 usec :-D this must be the
+ * loopback.
+ */
+ calc_time = 1;
+ } else {
+ /* impossible .. garbage in nothing out */
+ goto calc_rto;
+ }
+ } else {
+ /* Clock wrapped? */
+ goto calc_rto;
+ }
+ /***************************/
+ /* 2. update RTTVAR & SRTT */
+ /***************************/
+ o_calctime = calc_time;
+ /* this is Van Jacobson's integer version */
+ if (net->RTO_measured) {
+ calc_time -= (net->lastsa >> SCTP_RTT_SHIFT); /* take away 1/8th when
+ * shift=3 */
+ if (sctp_logging_level & SCTP_RTTVAR_LOGGING_ENABLE) {
+ rto_logging(net, SCTP_LOG_RTTVAR);
+ }
+ net->prev_rtt = o_calctime;
+ net->lastsa += calc_time; /* add 7/8th into sa when
+ * shift=3 */
+ if (calc_time < 0) {
+ calc_time = -calc_time;
+ }
+ calc_time -= (net->lastsv >> SCTP_RTT_VAR_SHIFT); /* take away 1/4 when
+ * VAR shift=2 */
+ net->lastsv += calc_time;
+ if (net->lastsv == 0) {
+ net->lastsv = SCTP_CLOCK_GRANULARITY;
+ }
+ } else {
+ /* First RTO measurment */
+ net->RTO_measured = 1;
+ net->lastsa = calc_time << SCTP_RTT_SHIFT; /* Multiply by 8 when
+ * shift=3 */
+ net->lastsv = calc_time;
+ if (net->lastsv == 0) {
+ net->lastsv = SCTP_CLOCK_GRANULARITY;
+ }
+ first_measure = 1;
+ net->prev_rtt = o_calctime;
+ if (sctp_logging_level & SCTP_RTTVAR_LOGGING_ENABLE) {
+ rto_logging(net, SCTP_LOG_INITIAL_RTT);
+ }
+ }
+calc_rto:
+ new_rto = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
+ if ((new_rto > SCTP_SAT_NETWORK_MIN) &&
+ (stcb->asoc.sat_network_lockout == 0)) {
+ stcb->asoc.sat_network = 1;
+ } else if ((!first_measure) && stcb->asoc.sat_network) {
+ stcb->asoc.sat_network = 0;
+ stcb->asoc.sat_network_lockout = 1;
+ }
+ /* bound it, per C6/C7 in Section 5.3.1 */
+ if (new_rto < stcb->asoc.minrto) {
+ new_rto = stcb->asoc.minrto;
+ }
+ if (new_rto > stcb->asoc.maxrto) {
+ new_rto = stcb->asoc.maxrto;
+ }
+ /* we are now returning the RTO */
+ return (new_rto);
+}
+
+/*
+ * return a pointer to a contiguous piece of data from the given mbuf chain
+ * starting at 'off' for 'len' bytes. If the desired piece spans more than
+ * one mbuf, a copy is made at 'ptr'. caller must ensure that the buffer size
+ * is >= 'len' returns NULL if there there isn't 'len' bytes in the chain.
+ */
+caddr_t
+sctp_m_getptr(struct mbuf *m, int off, int len, uint8_t * in_ptr)
+{
+ uint32_t count;
+ uint8_t *ptr;
+
+ ptr = in_ptr;
+ if ((off < 0) || (len <= 0))
+ return (NULL);
+
+ /* find the desired start location */
+ while ((m != NULL) && (off > 0)) {
+ if (off < SCTP_BUF_LEN(m))
+ break;
+ off -= SCTP_BUF_LEN(m);
+ m = SCTP_BUF_NEXT(m);
+ }
+ if (m == NULL)
+ return (NULL);
+
+ /* is the current mbuf large enough (eg. contiguous)? */
+ if ((SCTP_BUF_LEN(m) - off) >= len) {
+ return (mtod(m, caddr_t)+off);
+ } else {
+ /* else, it spans more than one mbuf, so save a temp copy... */
+ while ((m != NULL) && (len > 0)) {
+ count = min(SCTP_BUF_LEN(m) - off, len);
+ bcopy(mtod(m, caddr_t)+off, ptr, count);
+ len -= count;
+ ptr += count;
+ off = 0;
+ m = SCTP_BUF_NEXT(m);
+ }
+ if ((m == NULL) && (len > 0))
+ return (NULL);
+ else
+ return ((caddr_t)in_ptr);
+ }
+}
+
+
+
+struct sctp_paramhdr *
+sctp_get_next_param(struct mbuf *m,
+ int offset,
+ struct sctp_paramhdr *pull,
+ int pull_limit)
+{
+ /* This just provides a typed signature to Peter's Pull routine */
+ return ((struct sctp_paramhdr *)sctp_m_getptr(m, offset, pull_limit,
+ (uint8_t *) pull));
+}
+
+
+int
+sctp_add_pad_tombuf(struct mbuf *m, int padlen)
+{
+ /*
+ * add padlen bytes of 0 filled padding to the end of the mbuf. If
+ * padlen is > 3 this routine will fail.
+ */
+ uint8_t *dp;
+ int i;
+
+ if (padlen > 3) {
+ SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+ return (ENOBUFS);
+ }
+ if (padlen <= M_TRAILINGSPACE(m)) {
+ /*
+ * The easy way. We hope the majority of the time we hit
+ * here :)
+ */
+ dp = (uint8_t *) (mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ SCTP_BUF_LEN(m) += padlen;
+ } else {
+ /* Hard way we must grow the mbuf */
+ struct mbuf *tmp;
+
+ tmp = sctp_get_mbuf_for_msg(padlen, 0, M_DONTWAIT, 1, MT_DATA);
+ if (tmp == NULL) {
+ /* Out of space GAK! we are in big trouble. */
+ SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ return (ENOSPC);
+ }
+ /* setup and insert in middle */
+ SCTP_BUF_LEN(tmp) = padlen;
+ SCTP_BUF_NEXT(tmp) = NULL;
+ SCTP_BUF_NEXT(m) = tmp;
+ dp = mtod(tmp, uint8_t *);
+ }
+ /* zero out the pad */
+ for (i = 0; i < padlen; i++) {
+ *dp = 0;
+ dp++;
+ }
+ return (0);
+}
+
+int
+sctp_pad_lastmbuf(struct mbuf *m, int padval, struct mbuf *last_mbuf)
+{
+ /* find the last mbuf in chain and pad it */
+ struct mbuf *m_at;
+
+ m_at = m;
+ if (last_mbuf) {
+ return (sctp_add_pad_tombuf(last_mbuf, padval));
+ } else {
+ while (m_at) {
+ if (SCTP_BUF_NEXT(m_at) == NULL) {
+ return (sctp_add_pad_tombuf(m_at, padval));
+ }
+ m_at = SCTP_BUF_NEXT(m_at);
+ }
+ }
+ SCTP_LTRACE_ERR_RET_PKT(m, NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+ return (EFAULT);
+}
+
+int sctp_asoc_change_wake = 0;
+
+static void
+sctp_notify_assoc_change(uint32_t event, struct sctp_tcb *stcb,
+ uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct mbuf *m_notify;
+ struct sctp_assoc_change *sac;
+ struct sctp_queued_to_read *control;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ /*
+ * First if we are are going down dump everything we can to the
+ * socket rcv queue.
+ */
+
+ if ((stcb == NULL) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+ ) {
+ /* If the socket is gone we are out of here */
+ return;
+ }
+ /*
+ * For TCP model AND UDP connected sockets we will send an error up
+ * when an ABORT comes in.
+ */
+ if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
+ ((event == SCTP_COMM_LOST) || (event == SCTP_CANT_STR_ASSOC))) {
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNREFUSED);
+ stcb->sctp_socket->so_error = ECONNREFUSED;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+ stcb->sctp_socket->so_error = ECONNRESET;
+ }
+ /* Wake ANY sleepers */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+ }
+#endif
+ sorwakeup(stcb->sctp_socket);
+ sowwakeup(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+ sctp_asoc_change_wake++;
+ }
+ if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVASSOCEVNT)) {
+ /* event not enabled */
+ return;
+ }
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_assoc_change), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+
+ sac = mtod(m_notify, struct sctp_assoc_change *);
+ sac->sac_type = SCTP_ASSOC_CHANGE;
+ sac->sac_flags = 0;
+ sac->sac_length = sizeof(struct sctp_assoc_change);
+ sac->sac_state = event;
+ sac->sac_error = error;
+ /* XXX verify these stream counts */
+ sac->sac_outbound_streams = stcb->asoc.streamoutcnt;
+ sac->sac_inbound_streams = stcb->asoc.streamincnt;
+ sac->sac_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_assoc_change);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ control->spec_flags = M_NOTIFICATION;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, so_locked);
+ if (event == SCTP_COMM_LOST) {
+ /* Wake up any sleeper */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+ }
+#endif
+ sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+ }
+}
+
+static void
+sctp_notify_peer_addr_change(struct sctp_tcb *stcb, uint32_t state,
+ struct sockaddr *sa, uint32_t error)
+{
+ struct mbuf *m_notify;
+ struct sctp_paddr_change *spc;
+ struct sctp_queued_to_read *control;
+
+ if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVPADDREVNT)))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_paddr_change), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ spc = mtod(m_notify, struct sctp_paddr_change *);
+ spc->spc_type = SCTP_PEER_ADDR_CHANGE;
+ spc->spc_flags = 0;
+ spc->spc_length = sizeof(struct sctp_paddr_change);
+ if (sa->sa_family == AF_INET) {
+ memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in));
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ memcpy(&spc->spc_aaddr, sa, sizeof(struct sockaddr_in6));
+
+ sin6 = (struct sockaddr_in6 *)&spc->spc_aaddr;
+ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
+ if (sin6->sin6_scope_id == 0) {
+ /* recover scope_id for user */
+ (void)sa6_recoverscope(sin6);
+ } else {
+ /* clear embedded scope_id for user */
+ in6_clearscope(&sin6->sin6_addr);
+ }
+ }
+ }
+ spc->spc_state = state;
+ spc->spc_error = error;
+ spc->spc_assoc_id = sctp_get_associd(stcb);
+
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_paddr_change);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->length = SCTP_BUF_LEN(m_notify);
+ control->spec_flags = M_NOTIFICATION;
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+static void
+sctp_notify_send_failed(struct sctp_tcb *stcb, uint32_t error,
+ struct sctp_tmit_chunk *chk, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct mbuf *m_notify;
+ struct sctp_send_failed *ssf;
+ struct sctp_queued_to_read *control;
+ int length;
+
+ if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)))
+ /* event not enabled */
+ return;
+
+ length = sizeof(struct sctp_send_failed) + chk->send_size;
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ ssf = mtod(m_notify, struct sctp_send_failed *);
+ ssf->ssf_type = SCTP_SEND_FAILED;
+ if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+ ssf->ssf_flags = SCTP_DATA_UNSENT;
+ else
+ ssf->ssf_flags = SCTP_DATA_SENT;
+ ssf->ssf_length = length;
+ ssf->ssf_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+ ssf->ssf_info.sinfo_stream = chk->rec.data.stream_number;
+ ssf->ssf_info.sinfo_ssn = chk->rec.data.stream_seq;
+ ssf->ssf_info.sinfo_flags = chk->rec.data.rcv_flags;
+ ssf->ssf_info.sinfo_ppid = chk->rec.data.payloadtype;
+ ssf->ssf_info.sinfo_context = chk->rec.data.context;
+ ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+ ssf->ssf_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_NEXT(m_notify) = chk->data;
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+
+ /* Steal off the mbuf */
+ chk->data = NULL;
+ /*
+ * For this case, we check the actual socket buffer, since the assoc
+ * is going away we don't want to overfill the socket buffer for a
+ * non-reader
+ */
+ if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+ sctp_m_freem(m_notify);
+ return;
+ }
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, so_locked);
+}
+
+
+static void
+sctp_notify_send_failed2(struct sctp_tcb *stcb, uint32_t error,
+ struct sctp_stream_queue_pending *sp, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct mbuf *m_notify;
+ struct sctp_send_failed *ssf;
+ struct sctp_queued_to_read *control;
+ int length;
+
+ if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSENDFAILEVNT)))
+ /* event not enabled */
+ return;
+
+ length = sizeof(struct sctp_send_failed) + sp->length;
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_send_failed), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ ssf = mtod(m_notify, struct sctp_send_failed *);
+ ssf->ssf_type = SCTP_SEND_FAILED;
+ if (error == SCTP_NOTIFY_DATAGRAM_UNSENT)
+ ssf->ssf_flags = SCTP_DATA_UNSENT;
+ else
+ ssf->ssf_flags = SCTP_DATA_SENT;
+ ssf->ssf_length = length;
+ ssf->ssf_error = error;
+ /* not exactly what the user sent in, but should be close :) */
+ bzero(&ssf->ssf_info, sizeof(ssf->ssf_info));
+ ssf->ssf_info.sinfo_stream = sp->stream;
+ ssf->ssf_info.sinfo_ssn = sp->strseq;
+ ssf->ssf_info.sinfo_flags = sp->sinfo_flags;
+ ssf->ssf_info.sinfo_ppid = sp->ppid;
+ ssf->ssf_info.sinfo_context = sp->context;
+ ssf->ssf_info.sinfo_assoc_id = sctp_get_associd(stcb);
+ ssf->ssf_assoc_id = sctp_get_associd(stcb);
+ SCTP_BUF_NEXT(m_notify) = sp->data;
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_send_failed);
+
+ /* Steal off the mbuf */
+ sp->data = NULL;
+ /*
+ * For this case, we check the actual socket buffer, since the assoc
+ * is going away we don't want to overfill the socket buffer for a
+ * non-reader
+ */
+ if (sctp_sbspace_failedmsgs(&stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+ sctp_m_freem(m_notify);
+ return;
+ }
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, so_locked);
+}
+
+
+
+static void
+sctp_notify_adaptation_layer(struct sctp_tcb *stcb,
+ uint32_t error)
+{
+ struct mbuf *m_notify;
+ struct sctp_adaptation_event *sai;
+ struct sctp_queued_to_read *control;
+
+ if ((stcb == NULL) || (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_ADAPTATIONEVNT)))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_adaption_event), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ sai = mtod(m_notify, struct sctp_adaptation_event *);
+ sai->sai_type = SCTP_ADAPTATION_INDICATION;
+ sai->sai_flags = 0;
+ sai->sai_length = sizeof(struct sctp_adaptation_event);
+ sai->sai_adaptation_ind = stcb->asoc.peers_adaptation;
+ sai->sai_assoc_id = sctp_get_associd(stcb);
+
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_adaptation_event);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->length = SCTP_BUF_LEN(m_notify);
+ control->spec_flags = M_NOTIFICATION;
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+/* This always must be called with the read-queue LOCKED in the INP */
+void
+sctp_notify_partial_delivery_indication(struct sctp_tcb *stcb, uint32_t error,
+ int nolock, uint32_t val)
+{
+ struct mbuf *m_notify;
+ struct sctp_pdapi_event *pdapi;
+ struct sctp_queued_to_read *control;
+ struct sockbuf *sb;
+
+ if ((stcb == NULL) || (stcb->sctp_socket == NULL) ||
+ sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_PDAPIEVNT))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_pdapi_event), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ pdapi = mtod(m_notify, struct sctp_pdapi_event *);
+ pdapi->pdapi_type = SCTP_PARTIAL_DELIVERY_EVENT;
+ pdapi->pdapi_flags = 0;
+ pdapi->pdapi_length = sizeof(struct sctp_pdapi_event);
+ pdapi->pdapi_indication = error;
+ pdapi->pdapi_stream = (val >> 16);
+ pdapi->pdapi_seq = (val & 0x0000ffff);
+ pdapi->pdapi_assoc_id = sctp_get_associd(stcb);
+
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_pdapi_event);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ control->held_length = 0;
+ control->length = 0;
+ if (nolock == 0) {
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ }
+ sb = &stcb->sctp_socket->so_rcv;
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m_notify));
+ }
+ sctp_sballoc(stcb, sb, m_notify);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ atomic_add_int(&control->length, SCTP_BUF_LEN(m_notify));
+ control->end_added = 1;
+ if (stcb->asoc.control_pdapi)
+ TAILQ_INSERT_AFTER(&stcb->sctp_ep->read_queue, stcb->asoc.control_pdapi, control, next);
+ else {
+ /* we really should not see this case */
+ TAILQ_INSERT_TAIL(&stcb->sctp_ep->read_queue, control, next);
+ }
+ if (nolock == 0) {
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+ }
+ if (stcb->sctp_ep && stcb->sctp_socket) {
+ /* This should always be the case */
+ sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
+ }
+}
+
+static void
+sctp_notify_shutdown_event(struct sctp_tcb *stcb)
+{
+ struct mbuf *m_notify;
+ struct sctp_shutdown_event *sse;
+ struct sctp_queued_to_read *control;
+
+ /*
+ * For TCP model AND UDP connected sockets we will send an error up
+ * when an SHUTDOWN completes
+ */
+ if (stcb == NULL) {
+ return;
+ }
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /* mark socket closed for read/write and wakeup! */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+#endif
+ socantsendmore(stcb->sctp_socket);
+ socantrcvmore(stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_event), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ sse = mtod(m_notify, struct sctp_shutdown_event *);
+ sse->sse_type = SCTP_SHUTDOWN_EVENT;
+ sse->sse_flags = 0;
+ sse->sse_length = sizeof(struct sctp_shutdown_event);
+ sse->sse_assoc_id = sctp_get_associd(stcb);
+
+ SCTP_BUF_LEN(m_notify) = sizeof(struct sctp_shutdown_event);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+static void
+sctp_notify_stream_reset(struct sctp_tcb *stcb,
+ int number_entries, uint16_t * list, int flag)
+{
+ struct mbuf *m_notify;
+ struct sctp_queued_to_read *control;
+ struct sctp_stream_reset_event *strreset;
+ int len;
+
+ if (stcb == NULL) {
+ return;
+ }
+ if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+ SCTP_BUF_LEN(m_notify) = 0;
+ len = sizeof(struct sctp_stream_reset_event) + (number_entries * sizeof(uint16_t));
+ if (len > M_TRAILINGSPACE(m_notify)) {
+ /* never enough room */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ strreset = mtod(m_notify, struct sctp_stream_reset_event *);
+ strreset->strreset_type = SCTP_STREAM_RESET_EVENT;
+ if (number_entries == 0) {
+ strreset->strreset_flags = flag | SCTP_STRRESET_ALL_STREAMS;
+ } else {
+ strreset->strreset_flags = flag | SCTP_STRRESET_STREAM_LIST;
+ }
+ strreset->strreset_length = len;
+ strreset->strreset_assoc_id = sctp_get_associd(stcb);
+ if (number_entries) {
+ int i;
+
+ for (i = 0; i < number_entries; i++) {
+ strreset->strreset_list[i] = ntohs(list[i]);
+ }
+ }
+ SCTP_BUF_LEN(m_notify) = len;
+ SCTP_BUF_NEXT(m_notify) = NULL;
+ if (sctp_sbspace(&stcb->asoc, &stcb->sctp_socket->so_rcv) < SCTP_BUF_LEN(m_notify)) {
+ /* no space */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0,
+ m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ sctp_add_to_readq(stcb->sctp_ep, stcb,
+ control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+void
+sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb,
+ uint32_t error, void *data, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ if (stcb == NULL) {
+ /* unlikely but */
+ return;
+ }
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+ ) {
+ /* No notifications up when we are in a no socket state */
+ return;
+ }
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /* Can't send up to a closed socket any notifications */
+ return;
+ }
+ if (stcb && ((stcb->asoc.state & SCTP_STATE_COOKIE_WAIT) ||
+ (stcb->asoc.state & SCTP_STATE_COOKIE_ECHOED))) {
+ if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) ||
+ (notification == SCTP_NOTIFY_INTERFACE_UP) ||
+ (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) {
+ /* Don't report these in front states */
+ return;
+ }
+ }
+ switch (notification) {
+ case SCTP_NOTIFY_ASSOC_UP:
+ if (stcb->asoc.assoc_up_sent == 0) {
+ sctp_notify_assoc_change(SCTP_COMM_UP, stcb, error, NULL, so_locked);
+ stcb->asoc.assoc_up_sent = 1;
+ }
+ if (stcb->asoc.adaptation_needed && (stcb->asoc.adaptation_sent == 0)) {
+ sctp_notify_adaptation_layer(stcb, error);
+ }
+ break;
+ case SCTP_NOTIFY_ASSOC_DOWN:
+ sctp_notify_assoc_change(SCTP_SHUTDOWN_COMP, stcb, error, NULL, so_locked);
+ break;
+ case SCTP_NOTIFY_INTERFACE_DOWN:
+ {
+ struct sctp_nets *net;
+
+ net = (struct sctp_nets *)data;
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_UNREACHABLE,
+ (struct sockaddr *)&net->ro._l_addr, error);
+ break;
+ }
+ case SCTP_NOTIFY_INTERFACE_UP:
+ {
+ struct sctp_nets *net;
+
+ net = (struct sctp_nets *)data;
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_AVAILABLE,
+ (struct sockaddr *)&net->ro._l_addr, error);
+ break;
+ }
+ case SCTP_NOTIFY_INTERFACE_CONFIRMED:
+ {
+ struct sctp_nets *net;
+
+ net = (struct sctp_nets *)data;
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_CONFIRMED,
+ (struct sockaddr *)&net->ro._l_addr, error);
+ break;
+ }
+ case SCTP_NOTIFY_SPECIAL_SP_FAIL:
+ sctp_notify_send_failed2(stcb, error,
+ (struct sctp_stream_queue_pending *)data, so_locked);
+ break;
+ case SCTP_NOTIFY_DG_FAIL:
+ sctp_notify_send_failed(stcb, error,
+ (struct sctp_tmit_chunk *)data, so_locked);
+ break;
+ case SCTP_NOTIFY_PARTIAL_DELVIERY_INDICATION:
+ {
+ uint32_t val;
+
+ val = *((uint32_t *) data);
+
+ sctp_notify_partial_delivery_indication(stcb, error, 0, val);
+ }
+ break;
+ case SCTP_NOTIFY_STRDATA_ERR:
+ break;
+ case SCTP_NOTIFY_ASSOC_ABORTED:
+ if ((stcb) && (((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_WAIT) ||
+ ((stcb->asoc.state & SCTP_STATE_MASK) == SCTP_STATE_COOKIE_ECHOED))) {
+ sctp_notify_assoc_change(SCTP_CANT_STR_ASSOC, stcb, error, NULL, so_locked);
+ } else {
+ sctp_notify_assoc_change(SCTP_COMM_LOST, stcb, error, NULL, so_locked);
+ }
+ break;
+ case SCTP_NOTIFY_PEER_OPENED_STREAM:
+ break;
+ case SCTP_NOTIFY_STREAM_OPENED_OK:
+ break;
+ case SCTP_NOTIFY_ASSOC_RESTART:
+ sctp_notify_assoc_change(SCTP_RESTART, stcb, error, data, so_locked);
+ break;
+ case SCTP_NOTIFY_HB_RESP:
+ break;
+ case SCTP_NOTIFY_STR_RESET_SEND:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_OUTBOUND_STR);
+ break;
+ case SCTP_NOTIFY_STR_RESET_RECV:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), SCTP_STRRESET_INBOUND_STR);
+ break;
+ case SCTP_NOTIFY_STR_RESET_FAILED_OUT:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_OUTBOUND_STR | SCTP_STRRESET_FAILED));
+ break;
+
+ case SCTP_NOTIFY_STR_RESET_FAILED_IN:
+ sctp_notify_stream_reset(stcb, error, ((uint16_t *) data), (SCTP_STRRESET_INBOUND_STR | SCTP_STRRESET_FAILED));
+ break;
+
+ case SCTP_NOTIFY_ASCONF_ADD_IP:
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_ADDED, data,
+ error);
+ break;
+ case SCTP_NOTIFY_ASCONF_DELETE_IP:
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_REMOVED, data,
+ error);
+ break;
+ case SCTP_NOTIFY_ASCONF_SET_PRIMARY:
+ sctp_notify_peer_addr_change(stcb, SCTP_ADDR_MADE_PRIM, data,
+ error);
+ break;
+ case SCTP_NOTIFY_ASCONF_SUCCESS:
+ break;
+ case SCTP_NOTIFY_ASCONF_FAILED:
+ break;
+ case SCTP_NOTIFY_PEER_SHUTDOWN:
+ sctp_notify_shutdown_event(stcb);
+ break;
+ case SCTP_NOTIFY_AUTH_NEW_KEY:
+ sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY, error,
+ (uint16_t) (uintptr_t) data);
+ break;
+#if 0
+ case SCTP_NOTIFY_AUTH_KEY_CONFLICT:
+ sctp_notify_authentication(stcb, SCTP_AUTH_KEY_CONFLICT,
+ error, (uint16_t) (uintptr_t) data);
+ break;
+#endif /* not yet? remove? */
+
+
+ default:
+ SCTPDBG(SCTP_DEBUG_UTIL1, "%s: unknown notification %xh (%u)\n",
+ __FUNCTION__, notification, notification);
+ break;
+ } /* end switch */
+}
+
+void
+sctp_report_all_outbound(struct sctp_tcb *stcb, int holds_lock, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *outs;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_stream_queue_pending *sp;
+ int i;
+
+ asoc = &stcb->asoc;
+
+ if (stcb == NULL) {
+ return;
+ }
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+ return;
+ }
+ /* now through all the gunk freeing chunks */
+ if (holds_lock == 0) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ }
+ /* sent queue SHOULD be empty */
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ chk = TAILQ_FIRST(&asoc->sent_queue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+ asoc->sent_queue_cnt--;
+ if (chk->data) {
+ /*
+ * trim off the sctp chunk header(it should
+ * be there)
+ */
+ if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
+ m_adj(chk->data, sizeof(struct sctp_data_chunk));
+ sctp_mbuf_crush(chk->data);
+ chk->send_size -= sizeof(struct sctp_data_chunk);
+ }
+ }
+ sctp_free_bufspace(stcb, asoc, chk, 1);
+ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb,
+ SCTP_NOTIFY_DATAGRAM_SENT, chk, so_locked);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->sent_queue);
+ }
+ }
+ /* pending send queue SHOULD be empty */
+ if (!TAILQ_EMPTY(&asoc->send_queue)) {
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+ asoc->send_queue_cnt--;
+ if (chk->data) {
+ /*
+ * trim off the sctp chunk header(it should
+ * be there)
+ */
+ if (chk->send_size >= sizeof(struct sctp_data_chunk)) {
+ m_adj(chk->data, sizeof(struct sctp_data_chunk));
+ sctp_mbuf_crush(chk->data);
+ chk->send_size -= sizeof(struct sctp_data_chunk);
+ }
+ }
+ sctp_free_bufspace(stcb, asoc, chk, 1);
+ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, SCTP_NOTIFY_DATAGRAM_UNSENT, chk, so_locked);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ }
+ }
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ /* For each stream */
+ outs = &stcb->asoc.strmout[i];
+ /* clean up any sends there */
+ stcb->asoc.locked_on_sending = NULL;
+ sp = TAILQ_FIRST(&outs->outqueue);
+ while (sp) {
+ stcb->asoc.stream_queue_cnt--;
+ TAILQ_REMOVE(&outs->outqueue, sp, next);
+ sctp_free_spbufspace(stcb, asoc, sp);
+ sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL, stcb,
+ SCTP_NOTIFY_DATAGRAM_UNSENT, (void *)sp, so_locked);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ }
+ if (sp->net)
+ sctp_free_remote_addr(sp->net);
+ sp->net = NULL;
+ /* Free the chunk */
+ sctp_free_a_strmoq(stcb, sp);
+ /* sa_ignore FREED_MEMORY */
+ sp = TAILQ_FIRST(&outs->outqueue);
+ }
+ }
+
+ if (holds_lock == 0) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ }
+}
+
+void
+sctp_abort_notification(struct sctp_tcb *stcb, int error, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+
+ if (stcb == NULL) {
+ return;
+ }
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)) {
+ return;
+ }
+ /* Tell them we lost the asoc */
+ sctp_report_all_outbound(stcb, 1, so_locked);
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
+ ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_CONNECTED))) {
+ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_WAS_ABORTED;
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_ASSOC_ABORTED, stcb, error, NULL, so_locked);
+}
+
+void
+sctp_abort_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct mbuf *m, int iphlen, struct sctphdr *sh, struct mbuf *op_err,
+ uint32_t vrf_id)
+{
+ uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+ vtag = 0;
+ if (stcb != NULL) {
+ /* We have a TCB to abort, send notification too */
+ vtag = stcb->asoc.peer_vtag;
+ sctp_abort_notification(stcb, 0, SCTP_SO_NOT_LOCKED);
+ /* get the assoc vrf id and table id */
+ vrf_id = stcb->asoc.vrf_id;
+ stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ }
+ sctp_send_abort(m, iphlen, sh, vtag, op_err, vrf_id);
+ if (stcb != NULL) {
+ /* Ok, now lets free it */
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_4);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ } else {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_DIRECTLY_NOCMPSET);
+ }
+ }
+ }
+}
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+void
+sctp_print_out_track_log(struct sctp_tcb *stcb)
+{
+#ifdef NOSIY_PRINTS
+ int i;
+
+ SCTP_PRINTF("Last ep reason:%x\n", stcb->sctp_ep->last_abort_code);
+ SCTP_PRINTF("IN bound TSN log-aaa\n");
+ if ((stcb->asoc.tsn_in_at == 0) && (stcb->asoc.tsn_in_wrapped == 0)) {
+ SCTP_PRINTF("None rcvd\n");
+ goto none_in;
+ }
+ if (stcb->asoc.tsn_in_wrapped) {
+ for (i = stcb->asoc.tsn_in_at; i < SCTP_TSN_LOG_SIZE; i++) {
+ SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+ stcb->asoc.in_tsnlog[i].tsn,
+ stcb->asoc.in_tsnlog[i].strm,
+ stcb->asoc.in_tsnlog[i].seq,
+ stcb->asoc.in_tsnlog[i].flgs,
+ stcb->asoc.in_tsnlog[i].sz);
+ }
+ }
+ if (stcb->asoc.tsn_in_at) {
+ for (i = 0; i < stcb->asoc.tsn_in_at; i++) {
+ SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+ stcb->asoc.in_tsnlog[i].tsn,
+ stcb->asoc.in_tsnlog[i].strm,
+ stcb->asoc.in_tsnlog[i].seq,
+ stcb->asoc.in_tsnlog[i].flgs,
+ stcb->asoc.in_tsnlog[i].sz);
+ }
+ }
+none_in:
+ SCTP_PRINTF("OUT bound TSN log-aaa\n");
+ if ((stcb->asoc.tsn_out_at == 0) &&
+ (stcb->asoc.tsn_out_wrapped == 0)) {
+ SCTP_PRINTF("None sent\n");
+ }
+ if (stcb->asoc.tsn_out_wrapped) {
+ for (i = stcb->asoc.tsn_out_at; i < SCTP_TSN_LOG_SIZE; i++) {
+ SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+ stcb->asoc.out_tsnlog[i].tsn,
+ stcb->asoc.out_tsnlog[i].strm,
+ stcb->asoc.out_tsnlog[i].seq,
+ stcb->asoc.out_tsnlog[i].flgs,
+ stcb->asoc.out_tsnlog[i].sz);
+ }
+ }
+ if (stcb->asoc.tsn_out_at) {
+ for (i = 0; i < stcb->asoc.tsn_out_at; i++) {
+ SCTP_PRINTF("TSN:%x strm:%d seq:%d flags:%x sz:%d\n",
+ stcb->asoc.out_tsnlog[i].tsn,
+ stcb->asoc.out_tsnlog[i].strm,
+ stcb->asoc.out_tsnlog[i].seq,
+ stcb->asoc.out_tsnlog[i].flgs,
+ stcb->asoc.out_tsnlog[i].sz);
+ }
+ }
+#endif
+}
+
+#endif
+
+void
+sctp_abort_an_association(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ int error, struct mbuf *op_err,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ uint32_t vtag;
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+#endif
+ if (stcb == NULL) {
+ /* Got to have a TCB */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_DIRECTLY_NOCMPSET);
+ }
+ }
+ return;
+ } else {
+ stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
+ }
+ vtag = stcb->asoc.peer_vtag;
+ /* notify the ulp */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0)
+ sctp_abort_notification(stcb, error, so_locked);
+ /* notify the peer */
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("aborting an association");
+#endif
+ sctp_send_abort_tcb(stcb, op_err, so_locked);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ /* now free the asoc */
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ sctp_print_out_track_log(stcb);
+#endif
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ }
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTPUTIL + SCTP_LOC_5);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+}
+
+void
+sctp_handle_ootb(struct mbuf *m, int iphlen, int offset, struct sctphdr *sh,
+ struct sctp_inpcb *inp, struct mbuf *op_err, uint32_t vrf_id)
+{
+ struct sctp_chunkhdr *ch, chunk_buf;
+ unsigned int chk_length;
+
+ SCTP_STAT_INCR_COUNTER32(sctps_outoftheblue);
+ /* Generate a TO address for future reference */
+ if (inp && (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+ if (LIST_FIRST(&inp->sctp_asoc_list) == NULL) {
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_DIRECTLY_NOCMPSET);
+ }
+ }
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+ sizeof(*ch), (uint8_t *) & chunk_buf);
+ while (ch != NULL) {
+ chk_length = ntohs(ch->chunk_length);
+ if (chk_length < sizeof(*ch)) {
+ /* break to abort land */
+ break;
+ }
+ switch (ch->chunk_type) {
+ case SCTP_COOKIE_ECHO:
+ /* We hit here only if the assoc is being freed */
+ return;
+ case SCTP_PACKET_DROPPED:
+ /* we don't respond to pkt-dropped */
+ return;
+ case SCTP_ABORT_ASSOCIATION:
+ /* we don't respond with an ABORT to an ABORT */
+ return;
+ case SCTP_SHUTDOWN_COMPLETE:
+ /*
+ * we ignore it since we are not waiting for it and
+ * peer is gone
+ */
+ return;
+ case SCTP_SHUTDOWN_ACK:
+ sctp_send_shutdown_complete2(m, iphlen, sh, vrf_id);
+ return;
+ default:
+ break;
+ }
+ offset += SCTP_SIZE32(chk_length);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+ sizeof(*ch), (uint8_t *) & chunk_buf);
+ }
+ sctp_send_abort(m, iphlen, sh, 0, op_err, vrf_id);
+}
+
+/*
+ * check the inbound datagram to make sure there is not an abort inside it,
+ * if there is return 1, else return 0.
+ */
+int
+sctp_is_there_an_abort_here(struct mbuf *m, int iphlen, uint32_t * vtagfill)
+{
+ struct sctp_chunkhdr *ch;
+ struct sctp_init_chunk *init_chk, chunk_buf;
+ int offset;
+ unsigned int chk_length;
+
+ offset = iphlen + sizeof(struct sctphdr);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch),
+ (uint8_t *) & chunk_buf);
+ while (ch != NULL) {
+ chk_length = ntohs(ch->chunk_length);
+ if (chk_length < sizeof(*ch)) {
+ /* packet is probably corrupt */
+ break;
+ }
+ /* we seem to be ok, is it an abort? */
+ if (ch->chunk_type == SCTP_ABORT_ASSOCIATION) {
+ /* yep, tell them */
+ return (1);
+ }
+ if (ch->chunk_type == SCTP_INITIATION) {
+ /* need to update the Vtag */
+ init_chk = (struct sctp_init_chunk *)sctp_m_getptr(m,
+ offset, sizeof(*init_chk), (uint8_t *) & chunk_buf);
+ if (init_chk != NULL) {
+ *vtagfill = ntohl(init_chk->init.initiate_tag);
+ }
+ }
+ /* Nope, move to the next chunk */
+ offset += SCTP_SIZE32(chk_length);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+ sizeof(*ch), (uint8_t *) & chunk_buf);
+ }
+ return (0);
+}
+
+/*
+ * currently (2/02), ifa_addr embeds scope_id's and don't have sin6_scope_id
+ * set (i.e. it's 0) so, create this function to compare link local scopes
+ */
+uint32_t
+sctp_is_same_scope(struct sockaddr_in6 *addr1, struct sockaddr_in6 *addr2)
+{
+ struct sockaddr_in6 a, b;
+
+ /* save copies */
+ a = *addr1;
+ b = *addr2;
+
+ if (a.sin6_scope_id == 0)
+ if (sa6_recoverscope(&a)) {
+ /* can't get scope, so can't match */
+ return (0);
+ }
+ if (b.sin6_scope_id == 0)
+ if (sa6_recoverscope(&b)) {
+ /* can't get scope, so can't match */
+ return (0);
+ }
+ if (a.sin6_scope_id != b.sin6_scope_id)
+ return (0);
+
+ return (1);
+}
+
+/*
+ * returns a sockaddr_in6 with embedded scope recovered and removed
+ */
+struct sockaddr_in6 *
+sctp_recover_scope(struct sockaddr_in6 *addr, struct sockaddr_in6 *store)
+{
+ /* check and strip embedded scope junk */
+ if (addr->sin6_family == AF_INET6) {
+ if (IN6_IS_SCOPE_LINKLOCAL(&addr->sin6_addr)) {
+ if (addr->sin6_scope_id == 0) {
+ *store = *addr;
+ if (!sa6_recoverscope(store)) {
+ /* use the recovered scope */
+ addr = store;
+ }
+ } else {
+ /* else, return the original "to" addr */
+ in6_clearscope(&addr->sin6_addr);
+ }
+ }
+ }
+ return (addr);
+}
+
+/*
+ * are the two addresses the same? currently a "scopeless" check returns: 1
+ * if same, 0 if not
+ */
+int
+sctp_cmpaddr(struct sockaddr *sa1, struct sockaddr *sa2)
+{
+
+ /* must be valid */
+ if (sa1 == NULL || sa2 == NULL)
+ return (0);
+
+ /* must be the same family */
+ if (sa1->sa_family != sa2->sa_family)
+ return (0);
+
+ if (sa1->sa_family == AF_INET6) {
+ /* IPv6 addresses */
+ struct sockaddr_in6 *sin6_1, *sin6_2;
+
+ sin6_1 = (struct sockaddr_in6 *)sa1;
+ sin6_2 = (struct sockaddr_in6 *)sa2;
+ return (SCTP6_ARE_ADDR_EQUAL(&sin6_1->sin6_addr,
+ &sin6_2->sin6_addr));
+ } else if (sa1->sa_family == AF_INET) {
+ /* IPv4 addresses */
+ struct sockaddr_in *sin_1, *sin_2;
+
+ sin_1 = (struct sockaddr_in *)sa1;
+ sin_2 = (struct sockaddr_in *)sa2;
+ return (sin_1->sin_addr.s_addr == sin_2->sin_addr.s_addr);
+ } else {
+ /* we don't do these... */
+ return (0);
+ }
+}
+
+void
+sctp_print_address(struct sockaddr *sa)
+{
+ char ip6buf[INET6_ADDRSTRLEN];
+
+ ip6buf[0] = 0;
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ SCTP_PRINTF("IPv6 address: %s:port:%d scope:%u\n",
+ ip6_sprintf(ip6buf, &sin6->sin6_addr),
+ ntohs(sin6->sin6_port),
+ sin6->sin6_scope_id);
+ } else if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+ unsigned char *p;
+
+ sin = (struct sockaddr_in *)sa;
+ p = (unsigned char *)&sin->sin_addr;
+ SCTP_PRINTF("IPv4 address: %u.%u.%u.%u:%d\n",
+ p[0], p[1], p[2], p[3], ntohs(sin->sin_port));
+ } else {
+ SCTP_PRINTF("?\n");
+ }
+}
+
+void
+sctp_print_address_pkt(struct ip *iph, struct sctphdr *sh)
+{
+ if (iph->ip_v == IPVERSION) {
+ struct sockaddr_in lsa, fsa;
+
+ bzero(&lsa, sizeof(lsa));
+ lsa.sin_len = sizeof(lsa);
+ lsa.sin_family = AF_INET;
+ lsa.sin_addr = iph->ip_src;
+ lsa.sin_port = sh->src_port;
+ bzero(&fsa, sizeof(fsa));
+ fsa.sin_len = sizeof(fsa);
+ fsa.sin_family = AF_INET;
+ fsa.sin_addr = iph->ip_dst;
+ fsa.sin_port = sh->dest_port;
+ SCTP_PRINTF("src: ");
+ sctp_print_address((struct sockaddr *)&lsa);
+ SCTP_PRINTF("dest: ");
+ sctp_print_address((struct sockaddr *)&fsa);
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 lsa6, fsa6;
+
+ ip6 = (struct ip6_hdr *)iph;
+ bzero(&lsa6, sizeof(lsa6));
+ lsa6.sin6_len = sizeof(lsa6);
+ lsa6.sin6_family = AF_INET6;
+ lsa6.sin6_addr = ip6->ip6_src;
+ lsa6.sin6_port = sh->src_port;
+ bzero(&fsa6, sizeof(fsa6));
+ fsa6.sin6_len = sizeof(fsa6);
+ fsa6.sin6_family = AF_INET6;
+ fsa6.sin6_addr = ip6->ip6_dst;
+ fsa6.sin6_port = sh->dest_port;
+ SCTP_PRINTF("src: ");
+ sctp_print_address((struct sockaddr *)&lsa6);
+ SCTP_PRINTF("dest: ");
+ sctp_print_address((struct sockaddr *)&fsa6);
+ }
+}
+
+void
+sctp_pull_off_control_to_new_inp(struct sctp_inpcb *old_inp,
+ struct sctp_inpcb *new_inp,
+ struct sctp_tcb *stcb,
+ int waitflags)
+{
+ /*
+ * go through our old INP and pull off any control structures that
+ * belong to stcb and move then to the new inp.
+ */
+ struct socket *old_so, *new_so;
+ struct sctp_queued_to_read *control, *nctl;
+ struct sctp_readhead tmp_queue;
+ struct mbuf *m;
+ int error = 0;
+
+ old_so = old_inp->sctp_socket;
+ new_so = new_inp->sctp_socket;
+ TAILQ_INIT(&tmp_queue);
+ error = sblock(&old_so->so_rcv, waitflags);
+ if (error) {
+ /*
+ * Gak, can't get sblock, we have a problem. data will be
+ * left stranded.. and we don't dare look at it since the
+ * other thread may be reading something. Oh well, its a
+ * screwed up app that does a peeloff OR a accept while
+ * reading from the main socket... actually its only the
+ * peeloff() case, since I think read will fail on a
+ * listening socket..
+ */
+ return;
+ }
+ /* lock the socket buffers */
+ SCTP_INP_READ_LOCK(old_inp);
+ control = TAILQ_FIRST(&old_inp->read_queue);
+ /* Pull off all for out target stcb */
+ while (control) {
+ nctl = TAILQ_NEXT(control, next);
+ if (control->stcb == stcb) {
+ /* remove it we want it */
+ TAILQ_REMOVE(&old_inp->read_queue, control, next);
+ TAILQ_INSERT_TAIL(&tmp_queue, control, next);
+ m = control->data;
+ while (m) {
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+ }
+ sctp_sbfree(control, stcb, &old_so->so_rcv, m);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&old_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ }
+ control = nctl;
+ }
+ SCTP_INP_READ_UNLOCK(old_inp);
+ /* Remove the sb-lock on the old socket */
+
+ sbunlock(&old_so->so_rcv);
+ /* Now we move them over to the new socket buffer */
+ control = TAILQ_FIRST(&tmp_queue);
+ SCTP_INP_READ_LOCK(new_inp);
+ while (control) {
+ nctl = TAILQ_NEXT(control, next);
+ TAILQ_INSERT_TAIL(&new_inp->read_queue, control, next);
+ m = control->data;
+ while (m) {
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+ }
+ sctp_sballoc(stcb, &new_so->so_rcv, m);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&new_so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ control = nctl;
+ }
+ SCTP_INP_READ_UNLOCK(new_inp);
+}
+
+
+void
+sctp_add_to_readq(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_queued_to_read *control,
+ struct sockbuf *sb,
+ int end,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ /*
+ * Here we must place the control on the end of the socket read
+ * queue AND increment sb_cc so that select will work properly on
+ * read.
+ */
+ struct mbuf *m, *prev = NULL;
+
+ if (inp == NULL) {
+ /* Gak, TSNH!! */
+#ifdef INVARIANTS
+ panic("Gak, inp NULL on add_to_readq");
+#endif
+ return;
+ }
+ SCTP_INP_READ_LOCK(inp);
+ if (!(control->spec_flags & M_NOTIFICATION)) {
+ atomic_add_int(&inp->total_recvs, 1);
+ if (!control->do_not_ref_stcb) {
+ atomic_add_int(&stcb->total_recvs, 1);
+ }
+ }
+ m = control->data;
+ control->held_length = 0;
+ control->length = 0;
+ while (m) {
+ if (SCTP_BUF_LEN(m) == 0) {
+ /* Skip mbufs with NO length */
+ if (prev == NULL) {
+ /* First one */
+ control->data = sctp_m_free(m);
+ m = control->data;
+ } else {
+ SCTP_BUF_NEXT(prev) = sctp_m_free(m);
+ m = SCTP_BUF_NEXT(prev);
+ }
+ if (m == NULL) {
+ control->tail_mbuf = prev;;
+ }
+ continue;
+ }
+ prev = m;
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(m));
+ }
+ sctp_sballoc(stcb, sb, m);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ atomic_add_int(&control->length, SCTP_BUF_LEN(m));
+ m = SCTP_BUF_NEXT(m);
+ }
+ if (prev != NULL) {
+ control->tail_mbuf = prev;
+ } else {
+ /* Everything got collapsed out?? */
+ return;
+ }
+ if (end) {
+ control->end_added = 1;
+ }
+ TAILQ_INSERT_TAIL(&inp->read_queue, control, next);
+ SCTP_INP_READ_UNLOCK(inp);
+ if (inp && inp->sctp_socket) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+ SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+ } else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(inp);
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return;
+ }
+ }
+#endif
+ sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+ }
+ }
+}
+
+
+int
+sctp_append_to_readq(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_queued_to_read *control,
+ struct mbuf *m,
+ int end,
+ int ctls_cumack,
+ struct sockbuf *sb)
+{
+ /*
+ * A partial delivery API event is underway. OR we are appending on
+ * the reassembly queue.
+ *
+ * If PDAPI this means we need to add m to the end of the data.
+ * Increase the length in the control AND increment the sb_cc.
+ * Otherwise sb is NULL and all we need to do is put it at the end
+ * of the mbuf chain.
+ */
+ int len = 0;
+ struct mbuf *mm, *tail = NULL, *prev = NULL;
+
+ if (inp) {
+ SCTP_INP_READ_LOCK(inp);
+ }
+ if (control == NULL) {
+get_out:
+ if (inp) {
+ SCTP_INP_READ_UNLOCK(inp);
+ }
+ return (-1);
+ }
+ if (control->end_added) {
+ /* huh this one is complete? */
+ goto get_out;
+ }
+ mm = m;
+ if (mm == NULL) {
+ goto get_out;
+ }
+ while (mm) {
+ if (SCTP_BUF_LEN(mm) == 0) {
+ /* Skip mbufs with NO lenght */
+ if (prev == NULL) {
+ /* First one */
+ m = sctp_m_free(mm);
+ mm = m;
+ } else {
+ SCTP_BUF_NEXT(prev) = sctp_m_free(mm);
+ mm = SCTP_BUF_NEXT(prev);
+ }
+ continue;
+ }
+ prev = mm;
+ len += SCTP_BUF_LEN(mm);
+ if (sb) {
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBALLOC, SCTP_BUF_LEN(mm));
+ }
+ sctp_sballoc(stcb, sb, mm);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(sb, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ }
+ mm = SCTP_BUF_NEXT(mm);
+ }
+ if (prev) {
+ tail = prev;
+ } else {
+ /* Really there should always be a prev */
+ if (m == NULL) {
+ /* Huh nothing left? */
+#ifdef INVARIANTS
+ panic("Nothing left to add?");
+#else
+ goto get_out;
+#endif
+ }
+ tail = m;
+ }
+ if (control->tail_mbuf) {
+ /* append */
+ SCTP_BUF_NEXT(control->tail_mbuf) = m;
+ control->tail_mbuf = tail;
+ } else {
+ /* nothing there */
+#ifdef INVARIANTS
+ if (control->data != NULL) {
+ panic("This should NOT happen");
+ }
+#endif
+ control->data = m;
+ control->tail_mbuf = tail;
+ }
+ atomic_add_int(&control->length, len);
+ if (end) {
+ /* message is complete */
+ if (stcb && (control == stcb->asoc.control_pdapi)) {
+ stcb->asoc.control_pdapi = NULL;
+ }
+ control->held_length = 0;
+ control->end_added = 1;
+ }
+ if (stcb == NULL) {
+ control->do_not_ref_stcb = 1;
+ }
+ /*
+ * When we are appending in partial delivery, the cum-ack is used
+ * for the actual pd-api highest tsn on this mbuf. The true cum-ack
+ * is populated in the outbound sinfo structure from the true cumack
+ * if the association exists...
+ */
+ control->sinfo_tsn = control->sinfo_cumtsn = ctls_cumack;
+ if (inp) {
+ SCTP_INP_READ_UNLOCK(inp);
+ }
+ if (inp && inp->sctp_socket) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ZERO_COPY_ACTIVE)) {
+ SCTP_ZERO_COPY_EVENT(inp, inp->sctp_socket);
+ } else {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (0);
+ }
+#endif
+ sctp_sorwakeup(inp, inp->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+#endif
+ }
+ }
+ return (0);
+}
+
+
+
+/*************HOLD THIS COMMENT FOR PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+/*************HOLD THIS COMMENT FOR END OF PATCH FILE OF
+ *************ALTERNATE ROUTING CODE
+ */
+
+struct mbuf *
+sctp_generate_invmanparam(int err)
+{
+ /* Return a MBUF with a invalid mandatory parameter */
+ struct mbuf *m;
+
+ m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+ if (m) {
+ struct sctp_paramhdr *ph;
+
+ SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr);
+ ph = mtod(m, struct sctp_paramhdr *);
+ ph->param_length = htons(sizeof(struct sctp_paramhdr));
+ ph->param_type = htons(err);
+ }
+ return (m);
+}
+
+#ifdef SCTP_MBCNT_LOGGING
+void
+sctp_free_bufspace(struct sctp_tcb *stcb, struct sctp_association *asoc,
+ struct sctp_tmit_chunk *tp1, int chk_cnt)
+{
+ if (tp1->data == NULL) {
+ return;
+ }
+ asoc->chunks_on_out_queue -= chk_cnt;
+ if (sctp_logging_level & SCTP_MBCNT_LOGGING_ENABLE) {
+ sctp_log_mbcnt(SCTP_LOG_MBCNT_DECREASE,
+ asoc->total_output_queue_size,
+ tp1->book_size,
+ 0,
+ tp1->mbcnt);
+ }
+ if (asoc->total_output_queue_size >= tp1->book_size) {
+ atomic_add_int(&asoc->total_output_queue_size, -tp1->book_size);
+ } else {
+ asoc->total_output_queue_size = 0;
+ }
+
+ if (stcb->sctp_socket && (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) ||
+ ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE)))) {
+ if (stcb->sctp_socket->so_snd.sb_cc >= tp1->book_size) {
+ stcb->sctp_socket->so_snd.sb_cc -= tp1->book_size;
+ } else {
+ stcb->sctp_socket->so_snd.sb_cc = 0;
+
+ }
+ }
+}
+
+#endif
+
+int
+sctp_release_pr_sctp_chunk(struct sctp_tcb *stcb, struct sctp_tmit_chunk *tp1,
+ int reason, struct sctpchunk_listhead *queue, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ int ret_sz = 0;
+ int notdone;
+ uint8_t foundeom = 0;
+
+ do {
+ ret_sz += tp1->book_size;
+ tp1->sent = SCTP_FORWARD_TSN_SKIP;
+ if (tp1->data) {
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ sctp_free_bufspace(stcb, &stcb->asoc, tp1, 1);
+ sctp_ulp_notify(SCTP_NOTIFY_DG_FAIL, stcb, reason, tp1, SCTP_SO_NOT_LOCKED);
+ sctp_m_freem(tp1->data);
+ tp1->data = NULL;
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(stcb->sctp_ep);
+ if (!so_locked) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ /*
+ * assoc was freed while we were
+ * unlocked
+ */
+ SCTP_SOCKET_UNLOCK(so, 1);
+ return (ret_sz);
+ }
+ }
+#endif
+ sctp_sowwakeup(stcb->sctp_ep, stcb->sctp_socket);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ if (!so_locked) {
+ SCTP_SOCKET_UNLOCK(so, 1);
+ }
+#endif
+ }
+ if (PR_SCTP_BUF_ENABLED(tp1->flags)) {
+ stcb->asoc.sent_queue_cnt_removeable--;
+ }
+ if (queue == &stcb->asoc.send_queue) {
+ TAILQ_REMOVE(&stcb->asoc.send_queue, tp1, sctp_next);
+ /* on to the sent queue */
+ TAILQ_INSERT_TAIL(&stcb->asoc.sent_queue, tp1,
+ sctp_next);
+ stcb->asoc.sent_queue_cnt++;
+ }
+ if ((tp1->rec.data.rcv_flags & SCTP_DATA_NOT_FRAG) ==
+ SCTP_DATA_NOT_FRAG) {
+ /* not frag'ed we ae done */
+ notdone = 0;
+ foundeom = 1;
+ } else if (tp1->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) {
+ /* end of frag, we are done */
+ notdone = 0;
+ foundeom = 1;
+ } else {
+ /*
+ * Its a begin or middle piece, we must mark all of
+ * it
+ */
+ notdone = 1;
+ tp1 = TAILQ_NEXT(tp1, sctp_next);
+ }
+ } while (tp1 && notdone);
+ if ((foundeom == 0) && (queue == &stcb->asoc.sent_queue)) {
+ /*
+ * The multi-part message was scattered across the send and
+ * sent queue.
+ */
+ tp1 = TAILQ_FIRST(&stcb->asoc.send_queue);
+ /*
+ * recurse throught the send_queue too, starting at the
+ * beginning.
+ */
+ if (tp1) {
+ ret_sz += sctp_release_pr_sctp_chunk(stcb, tp1, reason,
+ &stcb->asoc.send_queue, so_locked);
+ } else {
+ SCTP_PRINTF("hmm, nothing on the send queue and no EOM?\n");
+ }
+ }
+ return (ret_sz);
+}
+
+/*
+ * checks to see if the given address, sa, is one that is currently known by
+ * the kernel note: can't distinguish the same address on multiple interfaces
+ * and doesn't handle multiple addresses with different zone/scope id's note:
+ * ifa_ifwithaddr() compares the entire sockaddr struct
+ */
+struct sctp_ifa *
+sctp_find_ifa_in_ep(struct sctp_inpcb *inp, struct sockaddr *addr,
+ int holds_lock)
+{
+ struct sctp_laddr *laddr;
+
+ if (holds_lock == 0) {
+ SCTP_INP_RLOCK(inp);
+ }
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL)
+ continue;
+ if (addr->sa_family != laddr->ifa->address.sa.sa_family)
+ continue;
+ if (addr->sa_family == AF_INET) {
+ if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+ laddr->ifa->address.sin.sin_addr.s_addr) {
+ /* found him. */
+ if (holds_lock == 0) {
+ SCTP_INP_RUNLOCK(inp);
+ }
+ return (laddr->ifa);
+ break;
+ }
+ } else if (addr->sa_family == AF_INET6) {
+ if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr,
+ &laddr->ifa->address.sin6.sin6_addr)) {
+ /* found him. */
+ if (holds_lock == 0) {
+ SCTP_INP_RUNLOCK(inp);
+ }
+ return (laddr->ifa);
+ break;
+ }
+ }
+ }
+ if (holds_lock == 0) {
+ SCTP_INP_RUNLOCK(inp);
+ }
+ return (NULL);
+}
+
+uint32_t
+sctp_get_ifa_hash_val(struct sockaddr *addr)
+{
+ if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)addr;
+ return (sin->sin_addr.s_addr ^ (sin->sin_addr.s_addr >> 16));
+ } else if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+ uint32_t hash_of_addr;
+
+ sin6 = (struct sockaddr_in6 *)addr;
+ hash_of_addr = (sin6->sin6_addr.s6_addr32[0] +
+ sin6->sin6_addr.s6_addr32[1] +
+ sin6->sin6_addr.s6_addr32[2] +
+ sin6->sin6_addr.s6_addr32[3]);
+ hash_of_addr = (hash_of_addr ^ (hash_of_addr >> 16));
+ return (hash_of_addr);
+ }
+ return (0);
+}
+
+struct sctp_ifa *
+sctp_find_ifa_by_addr(struct sockaddr *addr, uint32_t vrf_id, int holds_lock)
+{
+ struct sctp_ifa *sctp_ifap;
+ struct sctp_vrf *vrf;
+ struct sctp_ifalist *hash_head;
+ uint32_t hash_of_addr;
+
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RLOCK();
+
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (NULL);
+ }
+ hash_of_addr = sctp_get_ifa_hash_val(addr);
+
+ hash_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+ if (hash_head == NULL) {
+ SCTP_PRINTF("hash_of_addr:%x mask:%x table:%x - ",
+ hash_of_addr, (uint32_t) vrf->vrf_addr_hashmark,
+ (uint32_t) (hash_of_addr & vrf->vrf_addr_hashmark));
+ sctp_print_address(addr);
+ SCTP_PRINTF("No such bucket for address\n");
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RUNLOCK();
+
+ return (NULL);
+ }
+ LIST_FOREACH(sctp_ifap, hash_head, next_bucket) {
+ if (sctp_ifap == NULL) {
+ panic("Huh LIST_FOREACH corrupt");
+ }
+ if (addr->sa_family != sctp_ifap->address.sa.sa_family)
+ continue;
+ if (addr->sa_family == AF_INET) {
+ if (((struct sockaddr_in *)addr)->sin_addr.s_addr ==
+ sctp_ifap->address.sin.sin_addr.s_addr) {
+ /* found him. */
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (sctp_ifap);
+ break;
+ }
+ } else if (addr->sa_family == AF_INET6) {
+ if (SCTP6_ARE_ADDR_EQUAL(&((struct sockaddr_in6 *)addr)->sin6_addr,
+ &sctp_ifap->address.sin6.sin6_addr)) {
+ /* found him. */
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (sctp_ifap);
+ break;
+ }
+ }
+ }
+ if (holds_lock == 0)
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (NULL);
+}
+
+static void
+sctp_user_rcvd(struct sctp_tcb *stcb, uint32_t * freed_so_far, int hold_rlock,
+ uint32_t rwnd_req)
+{
+ /* User pulled some data, do we need a rwnd update? */
+ int r_unlocked = 0;
+ uint32_t dif, rwnd;
+ struct socket *so = NULL;
+
+ if (stcb == NULL)
+ return;
+
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+
+ if (stcb->asoc.state & (SCTP_STATE_ABOUT_TO_BE_FREED |
+ SCTP_STATE_SHUTDOWN_RECEIVED |
+ SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /* Pre-check If we are freeing no update */
+ goto no_lock;
+ }
+ SCTP_INP_INCR_REF(stcb->sctp_ep);
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+ goto out;
+ }
+ so = stcb->sctp_socket;
+ if (so == NULL) {
+ goto out;
+ }
+ atomic_add_int(&stcb->freed_by_sorcv_sincelast, *freed_so_far);
+ /* Have you have freed enough to look */
+ *freed_so_far = 0;
+ /* Yep, its worth a look and the lock overhead */
+
+ /* Figure out what the rwnd would be */
+ rwnd = sctp_calc_rwnd(stcb, &stcb->asoc);
+ if (rwnd >= stcb->asoc.my_last_reported_rwnd) {
+ dif = rwnd - stcb->asoc.my_last_reported_rwnd;
+ } else {
+ dif = 0;
+ }
+ if (dif >= rwnd_req) {
+ if (hold_rlock) {
+ SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
+ r_unlocked = 1;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ /*
+ * One last check before we allow the guy possibly
+ * to get in. There is a race, where the guy has not
+ * reached the gate. In that case
+ */
+ goto out;
+ }
+ SCTP_TCB_LOCK(stcb);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ /* No reports here */
+ SCTP_TCB_UNLOCK(stcb);
+ goto out;
+ }
+ SCTP_STAT_INCR(sctps_wu_sacks_sent);
+ sctp_send_sack(stcb);
+ sctp_chunk_output(stcb->sctp_ep, stcb,
+ SCTP_OUTPUT_FROM_USR_RCVD, SCTP_SO_LOCKED);
+ /* make sure no timer is running */
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTPUTIL + SCTP_LOC_6);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* Update how much we have pending */
+ stcb->freed_by_sorcv_sincelast = dif;
+ }
+out:
+ if (so && r_unlocked && hold_rlock) {
+ SCTP_INP_READ_LOCK(stcb->sctp_ep);
+ }
+ SCTP_INP_DECR_REF(stcb->sctp_ep);
+no_lock:
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ return;
+}
+
+int
+sctp_sorecvmsg(struct socket *so,
+ struct uio *uio,
+ struct mbuf **mp,
+ struct sockaddr *from,
+ int fromlen,
+ int *msg_flags,
+ struct sctp_sndrcvinfo *sinfo,
+ int filling_sinfo)
+{
+ /*
+ * MSG flags we will look at MSG_DONTWAIT - non-blocking IO.
+ * MSG_PEEK - Look don't touch :-D (only valid with OUT mbuf copy
+ * mp=NULL thus uio is the copy method to userland) MSG_WAITALL - ??
+ * On the way out we may send out any combination of:
+ * MSG_NOTIFICATION MSG_EOR
+ *
+ */
+ struct sctp_inpcb *inp = NULL;
+ int my_len = 0;
+ int cp_len = 0, error = 0;
+ struct sctp_queued_to_read *control = NULL, *ctl = NULL, *nxt = NULL;
+ struct mbuf *m = NULL, *embuf = NULL;
+ struct sctp_tcb *stcb = NULL;
+ int wakeup_read_socket = 0;
+ int freecnt_applied = 0;
+ int out_flags = 0, in_flags = 0;
+ int block_allowed = 1;
+ uint32_t freed_so_far = 0;
+ uint32_t copied_so_far = 0;
+ int in_eeor_mode = 0;
+ int no_rcv_needed = 0;
+ uint32_t rwnd_req = 0;
+ int hold_sblock = 0;
+ int hold_rlock = 0;
+ int slen = 0;
+ uint32_t held_length = 0;
+ int sockbuf_lock = 0;
+
+ if (uio == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ return (EINVAL);
+ }
+ if (msg_flags) {
+ in_flags = *msg_flags;
+ if (in_flags & MSG_PEEK)
+ SCTP_STAT_INCR(sctps_read_peeks);
+ } else {
+ in_flags = 0;
+ }
+ slen = uio->uio_resid;
+
+ /* Pull in and set up our int flags */
+ if (in_flags & MSG_OOB) {
+ /* Out of band's NOT supported */
+ return (EOPNOTSUPP);
+ }
+ if ((in_flags & MSG_PEEK) && (mp != NULL)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ return (EINVAL);
+ }
+ if ((in_flags & (MSG_DONTWAIT
+ | MSG_NBIO
+ )) ||
+ SCTP_SO_IS_NBIO(so)) {
+ block_allowed = 0;
+ }
+ /* setup the endpoint */
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EFAULT);
+ return (EFAULT);
+ }
+ rwnd_req = (SCTP_SB_LIMIT_RCV(so) >> SCTP_RWND_HIWAT_SHIFT);
+ /* Must be at least a MTU's worth */
+ if (rwnd_req < SCTP_MIN_RWND)
+ rwnd_req = SCTP_MIN_RWND;
+ in_eeor_mode = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+ if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_SORECV_ENTER,
+ rwnd_req, in_eeor_mode, so->so_rcv.sb_cc, uio->uio_resid);
+ }
+ if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_SORECV_ENTERPL,
+ rwnd_req, block_allowed, so->so_rcv.sb_cc, uio->uio_resid);
+ }
+ error = sblock(&so->so_rcv, (block_allowed ? SBL_WAIT : 0));
+ sockbuf_lock = 1;
+ if (error) {
+ goto release_unlocked;
+ }
+restart:
+
+
+restart_nosblocks:
+ if (hold_sblock == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ hold_sblock = 1;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+ goto out;
+ }
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ if (so->so_error) {
+ error = so->so_error;
+ if ((in_flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ goto out;
+ } else {
+ if (so->so_rcv.sb_cc == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+ /* indicate EOF */
+ error = 0;
+ goto out;
+ }
+ }
+ }
+ if ((so->so_rcv.sb_cc <= held_length) && block_allowed) {
+ /* we need to wait for data */
+ if ((so->so_rcv.sb_cc == 0) &&
+ ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+ /*
+ * For active open side clear flags for
+ * re-use passive open is blocked by
+ * connect.
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+ /*
+ * You were aborted, passive side
+ * always hits here
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+ error = ECONNRESET;
+ /*
+ * You get this once if you are
+ * active open side
+ */
+ if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /*
+ * Remove flag if on the
+ * active open side
+ */
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+ }
+ }
+ so->so_state &= ~(SS_ISCONNECTING |
+ SS_ISDISCONNECTING |
+ SS_ISCONFIRMING |
+ SS_ISCONNECTED);
+ if (error == 0) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+ error = ENOTCONN;
+ } else {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+ }
+ }
+ goto out;
+ }
+ }
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ goto out;
+ }
+ held_length = 0;
+ goto restart_nosblocks;
+ } else if (so->so_rcv.sb_cc == 0) {
+ if (so->so_error) {
+ error = so->so_error;
+ if ((in_flags & MSG_PEEK) == 0)
+ so->so_error = 0;
+ } else {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0) {
+ /*
+ * For active open side clear flags
+ * for re-use passive open is
+ * blocked by connect.
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED) {
+ /*
+ * You were aborted, passive
+ * side always hits here
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ECONNRESET);
+ error = ECONNRESET;
+ /*
+ * You get this once if you
+ * are active open side
+ */
+ if (!(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /*
+ * Remove flag if on
+ * the active open
+ * side
+ */
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_ABORTED;
+ }
+ }
+ so->so_state &= ~(SS_ISCONNECTING |
+ SS_ISDISCONNECTING |
+ SS_ISCONFIRMING |
+ SS_ISCONNECTED);
+ if (error == 0) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOTCONN);
+ error = ENOTCONN;
+ } else {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAS_CONNECTED;
+ }
+ }
+ goto out;
+ }
+ }
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EWOULDBLOCK);
+ error = EWOULDBLOCK;
+ }
+ goto out;
+ }
+ if (hold_sblock == 1) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ /* we possibly have data we can read */
+ /* sa_ignore FREED_MEMORY */
+ control = TAILQ_FIRST(&inp->read_queue);
+ if (control == NULL) {
+ /*
+ * This could be happening since the appender did the
+ * increment but as not yet did the tailq insert onto the
+ * read_queue
+ */
+ if (hold_rlock == 0) {
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
+ }
+ control = TAILQ_FIRST(&inp->read_queue);
+ if ((control == NULL) && (so->so_rcv.sb_cc != 0)) {
+#ifdef INVARIANTS
+ panic("Huh, its non zero and nothing on control?");
+#endif
+ so->so_rcv.sb_cc = 0;
+ }
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ goto restart;
+ }
+ if ((control->length == 0) &&
+ (control->do_not_ref_stcb)) {
+ /*
+ * Clean up code for freeing assoc that left behind a
+ * pdapi.. maybe a peer in EEOR that just closed after
+ * sending and never indicated a EOR.
+ */
+ if (hold_rlock == 0) {
+ hold_rlock = 1;
+ SCTP_INP_READ_LOCK(inp);
+ }
+ control->held_length = 0;
+ if (control->data) {
+ /* Hmm there is data here .. fix */
+ struct mbuf *m_tmp;
+ int cnt = 0;
+
+ m_tmp = control->data;
+ while (m_tmp) {
+ cnt += SCTP_BUF_LEN(m_tmp);
+ if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+ control->tail_mbuf = m_tmp;
+ control->end_added = 1;
+ }
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+ control->length = cnt;
+ } else {
+ /* remove it */
+ TAILQ_REMOVE(&inp->read_queue, control, next);
+ /* Add back any hiddend data */
+ sctp_free_remote_addr(control->whoFrom);
+ sctp_free_a_readq(stcb, control);
+ }
+ if (hold_rlock) {
+ hold_rlock = 0;
+ SCTP_INP_READ_UNLOCK(inp);
+ }
+ goto restart;
+ }
+ if (control->length == 0) {
+ if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
+ (filling_sinfo)) {
+ /* find a more suitable one then this */
+ ctl = TAILQ_NEXT(control, next);
+ while (ctl) {
+ if ((ctl->stcb != control->stcb) && (ctl->length) &&
+ (ctl->some_taken ||
+ (ctl->spec_flags & M_NOTIFICATION) ||
+ ((ctl->do_not_ref_stcb == 0) &&
+ (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
+ ) {
+ /*-
+ * If we have a different TCB next, and there is data
+ * present. If we have already taken some (pdapi), OR we can
+ * ref the tcb and no delivery as started on this stream, we
+ * take it. Note we allow a notification on a different
+ * assoc to be delivered..
+ */
+ control = ctl;
+ goto found_one;
+ } else if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) &&
+ (ctl->length) &&
+ ((ctl->some_taken) ||
+ ((ctl->do_not_ref_stcb == 0) &&
+ ((ctl->spec_flags & M_NOTIFICATION) == 0) &&
+ (ctl->stcb->asoc.strmin[ctl->sinfo_stream].delivery_started == 0)))
+ ) {
+ /*-
+ * If we have the same tcb, and there is data present, and we
+ * have the strm interleave feature present. Then if we have
+ * taken some (pdapi) or we can refer to tht tcb AND we have
+ * not started a delivery for this stream, we can take it.
+ * Note we do NOT allow a notificaiton on the same assoc to
+ * be delivered.
+ */
+ control = ctl;
+ goto found_one;
+ }
+ ctl = TAILQ_NEXT(ctl, next);
+ }
+ }
+ /*
+ * if we reach here, not suitable replacement is available
+ * <or> fragment interleave is NOT on. So stuff the sb_cc
+ * into the our held count, and its time to sleep again.
+ */
+ held_length = so->so_rcv.sb_cc;
+ control->held_length = so->so_rcv.sb_cc;
+ goto restart;
+ }
+ /* Clear the held length since there is something to read */
+ control->held_length = 0;
+ if (hold_rlock) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+found_one:
+ /*
+ * If we reach here, control has a some data for us to read off.
+ * Note that stcb COULD be NULL.
+ */
+ control->some_taken++;
+ if (hold_sblock) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ stcb = control->stcb;
+ if (stcb) {
+ if ((control->do_not_ref_stcb == 0) &&
+ (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
+ if (freecnt_applied == 0)
+ stcb = NULL;
+ } else if (control->do_not_ref_stcb == 0) {
+ /* you can't free it on me please */
+ /*
+ * The lock on the socket buffer protects us so the
+ * free code will stop. But since we used the
+ * socketbuf lock and the sender uses the tcb_lock
+ * to increment, we need to use the atomic add to
+ * the refcnt
+ */
+ if (freecnt_applied) {
+#ifdef INVARIANTS
+ panic("refcnt already incremented");
+#else
+ printf("refcnt already incremented?\n");
+#endif
+ } else {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ freecnt_applied = 1;
+ }
+ /*
+ * Setup to remember how much we have not yet told
+ * the peer our rwnd has opened up. Note we grab the
+ * value from the tcb from last time. Note too that
+ * sack sending clears this when a sack is sent,
+ * which is fine. Once we hit the rwnd_req, we then
+ * will go to the sctp_user_rcvd() that will not
+ * lock until it KNOWs it MUST send a WUP-SACK.
+ */
+ freed_so_far = stcb->freed_by_sorcv_sincelast;
+ stcb->freed_by_sorcv_sincelast = 0;
+ }
+ }
+ if (stcb &&
+ ((control->spec_flags & M_NOTIFICATION) == 0) &&
+ control->do_not_ref_stcb == 0) {
+ stcb->asoc.strmin[control->sinfo_stream].delivery_started = 1;
+ }
+ /* First lets get off the sinfo and sockaddr info */
+ if ((sinfo) && filling_sinfo) {
+ memcpy(sinfo, control, sizeof(struct sctp_nonpad_sndrcvinfo));
+ nxt = TAILQ_NEXT(control, next);
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO)) {
+ struct sctp_extrcvinfo *s_extra;
+
+ s_extra = (struct sctp_extrcvinfo *)sinfo;
+ if ((nxt) &&
+ (nxt->length)) {
+ s_extra->sreinfo_next_flags = SCTP_NEXT_MSG_AVAIL;
+ if (nxt->sinfo_flags & SCTP_UNORDERED) {
+ s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_UNORDERED;
+ }
+ if (nxt->spec_flags & M_NOTIFICATION) {
+ s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_IS_NOTIFICATION;
+ }
+ s_extra->sreinfo_next_aid = nxt->sinfo_assoc_id;
+ s_extra->sreinfo_next_length = nxt->length;
+ s_extra->sreinfo_next_ppid = nxt->sinfo_ppid;
+ s_extra->sreinfo_next_stream = nxt->sinfo_stream;
+ if (nxt->tail_mbuf != NULL) {
+ if (nxt->end_added) {
+ s_extra->sreinfo_next_flags |= SCTP_NEXT_MSG_ISCOMPLETE;
+ }
+ }
+ } else {
+ /*
+ * we explicitly 0 this, since the memcpy
+ * got some other things beyond the older
+ * sinfo_ that is on the control's structure
+ * :-D
+ */
+ nxt = NULL;
+ s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+ s_extra->sreinfo_next_aid = 0;
+ s_extra->sreinfo_next_length = 0;
+ s_extra->sreinfo_next_ppid = 0;
+ s_extra->sreinfo_next_stream = 0;
+ }
+ }
+ /*
+ * update off the real current cum-ack, if we have an stcb.
+ */
+ if ((control->do_not_ref_stcb == 0) && stcb)
+ sinfo->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+ /*
+ * mask off the high bits, we keep the actual chunk bits in
+ * there.
+ */
+ sinfo->sinfo_flags &= 0x00ff;
+ if ((control->sinfo_flags >> 8) & SCTP_DATA_UNORDERED) {
+ sinfo->sinfo_flags |= SCTP_UNORDERED;
+ }
+ }
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ {
+ int index, newindex;
+ struct sctp_pcbtsn_rlog *entry;
+
+ do {
+ index = inp->readlog_index;
+ newindex = index + 1;
+ if (newindex >= SCTP_READ_LOG_SIZE) {
+ newindex = 0;
+ }
+ } while (atomic_cmpset_int(&inp->readlog_index, index, newindex) == 0);
+ entry = &inp->readlog[index];
+ entry->vtag = control->sinfo_assoc_id;
+ entry->strm = control->sinfo_stream;
+ entry->seq = control->sinfo_ssn;
+ entry->sz = control->length;
+ entry->flgs = control->sinfo_flags;
+ }
+#endif
+ if (fromlen && from) {
+ struct sockaddr *to;
+
+#ifdef INET
+ cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin.sin_len);
+ memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+ ((struct sockaddr_in *)from)->sin_port = control->port_from;
+#else
+ /* No AF_INET use AF_INET6 */
+ cp_len = min((size_t)fromlen, (size_t)control->whoFrom->ro._l_addr.sin6.sin6_len);
+ memcpy(from, &control->whoFrom->ro._l_addr, cp_len);
+ ((struct sockaddr_in6 *)from)->sin6_port = control->port_from;
+#endif
+
+ to = from;
+#if defined(INET) && defined(INET6)
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) &&
+ (to->sa_family == AF_INET) &&
+ ((size_t)fromlen >= sizeof(struct sockaddr_in6))) {
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 sin6;
+
+ sin = (struct sockaddr_in *)to;
+ bzero(&sin6, sizeof(sin6));
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr.s6_addr16[2] = 0xffff;
+ bcopy(&sin->sin_addr,
+ &sin6.sin6_addr.s6_addr16[3],
+ sizeof(sin6.sin6_addr.s6_addr16[3]));
+ sin6.sin6_port = sin->sin_port;
+ memcpy(from, (caddr_t)&sin6, sizeof(sin6));
+ }
+#endif
+#if defined(INET6)
+ {
+ struct sockaddr_in6 lsa6, *to6;
+
+ to6 = (struct sockaddr_in6 *)to;
+ sctp_recover_scope_mac(to6, (&lsa6));
+ }
+#endif
+ }
+ /* now copy out what data we can */
+ if (mp == NULL) {
+ /* copy out each mbuf in the chain up to length */
+get_more_data:
+ m = control->data;
+ while (m) {
+ /* Move out all we can */
+ cp_len = (int)uio->uio_resid;
+ my_len = (int)SCTP_BUF_LEN(m);
+ if (cp_len > my_len) {
+ /* not enough in this buf */
+ cp_len = my_len;
+ }
+ if (hold_rlock) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+ if (cp_len > 0)
+ error = uiomove(mtod(m, char *), cp_len, uio);
+ /* re-read */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ goto release;
+ }
+ if ((control->do_not_ref_stcb == 0) && stcb &&
+ stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ no_rcv_needed = 1;
+ }
+ if (error) {
+ /* error we are out of here */
+ goto release;
+ }
+ if ((SCTP_BUF_NEXT(m) == NULL) &&
+ (cp_len >= SCTP_BUF_LEN(m)) &&
+ ((control->end_added == 0) ||
+ (control->end_added &&
+ (TAILQ_NEXT(control, next) == NULL)))
+ ) {
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
+ }
+ if (cp_len == SCTP_BUF_LEN(m)) {
+ if ((SCTP_BUF_NEXT(m) == NULL) &&
+ (control->end_added)) {
+ out_flags |= MSG_EOR;
+ if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+ control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+ }
+ if (control->spec_flags & M_NOTIFICATION) {
+ out_flags |= MSG_NOTIFICATION;
+ }
+ /* we ate up the mbuf */
+ if (in_flags & MSG_PEEK) {
+ /* just looking */
+ m = SCTP_BUF_NEXT(m);
+ copied_so_far += cp_len;
+ } else {
+ /* dispose of the mbuf */
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv,
+ control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+ }
+ sctp_sbfree(control, stcb, &so->so_rcv, m);
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv,
+ control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ embuf = m;
+ copied_so_far += cp_len;
+ freed_so_far += cp_len;
+ freed_so_far += MSIZE;
+ atomic_subtract_int(&control->length, cp_len);
+ control->data = sctp_m_free(m);
+ m = control->data;
+ /*
+ * been through it all, must hold sb
+ * lock ok to null tail
+ */
+ if (control->data == NULL) {
+#ifdef INVARIANTS
+ if ((control->end_added == 0) ||
+ (TAILQ_NEXT(control, next) == NULL)) {
+ /*
+ * If the end is not
+ * added, OR the
+ * next is NOT null
+ * we MUST have the
+ * lock.
+ */
+ if (mtx_owned(&inp->inp_rdata_mtx) == 0) {
+ panic("Hmm we don't own the lock?");
+ }
+ }
+#endif
+ control->tail_mbuf = NULL;
+#ifdef INVARIANTS
+ if ((control->end_added) && ((out_flags & MSG_EOR) == 0)) {
+ panic("end_added, nothing left and no MSG_EOR");
+ }
+#endif
+ }
+ }
+ } else {
+ /* Do we need to trim the mbuf? */
+ if (control->spec_flags & M_NOTIFICATION) {
+ out_flags |= MSG_NOTIFICATION;
+ }
+ if ((in_flags & MSG_PEEK) == 0) {
+ SCTP_BUF_RESV_UF(m, cp_len);
+ SCTP_BUF_LEN(m) -= cp_len;
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, cp_len);
+ }
+ atomic_subtract_int(&so->so_rcv.sb_cc, cp_len);
+ if ((control->do_not_ref_stcb == 0) &&
+ stcb) {
+ atomic_subtract_int(&stcb->asoc.sb_cc, cp_len);
+ }
+ copied_so_far += cp_len;
+ embuf = m;
+ freed_so_far += cp_len;
+ freed_so_far += MSIZE;
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv, control->do_not_ref_stcb ? NULL : stcb,
+ SCTP_LOG_SBRESULT, 0);
+ }
+ atomic_subtract_int(&control->length, cp_len);
+ } else {
+ copied_so_far += cp_len;
+ }
+ }
+ if ((out_flags & MSG_EOR) || (uio->uio_resid == 0)) {
+ break;
+ }
+ if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+ (control->do_not_ref_stcb == 0) &&
+ (freed_so_far >= rwnd_req)) {
+ sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+ }
+ } /* end while(m) */
+ /*
+ * At this point we have looked at it all and we either have
+ * a MSG_EOR/or read all the user wants... <OR>
+ * control->length == 0.
+ */
+ if ((out_flags & MSG_EOR) && ((in_flags & MSG_PEEK) == 0)) {
+ /* we are done with this control */
+ if (control->length == 0) {
+ if (control->data) {
+#ifdef INVARIANTS
+ panic("control->data not null at read eor?");
+#else
+ SCTP_PRINTF("Strange, data left in the control buffer .. invarients would panic?\n");
+ sctp_m_freem(control->data);
+ control->data = NULL;
+#endif
+ }
+ done_with_control:
+ if (TAILQ_NEXT(control, next) == NULL) {
+ /*
+ * If we don't have a next we need a
+ * lock, if there is a next
+ * interrupt is filling ahead of us
+ * and we don't need a lock to
+ * remove this guy (which is the
+ * head of the queue).
+ */
+ if (hold_rlock == 0) {
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
+ }
+ }
+ TAILQ_REMOVE(&inp->read_queue, control, next);
+ /* Add back any hiddend data */
+ if (control->held_length) {
+ held_length = 0;
+ control->held_length = 0;
+ wakeup_read_socket = 1;
+ }
+ if (control->aux_data) {
+ sctp_m_free(control->aux_data);
+ control->aux_data = NULL;
+ }
+ no_rcv_needed = control->do_not_ref_stcb;
+ sctp_free_remote_addr(control->whoFrom);
+ control->data = NULL;
+ sctp_free_a_readq(stcb, control);
+ control = NULL;
+ if ((freed_so_far >= rwnd_req) &&
+ (no_rcv_needed == 0))
+ sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+
+ } else {
+ /*
+ * The user did not read all of this
+ * message, turn off the returned MSG_EOR
+ * since we are leaving more behind on the
+ * control to read.
+ */
+#ifdef INVARIANTS
+ if (control->end_added &&
+ (control->data == NULL) &&
+ (control->tail_mbuf == NULL)) {
+ panic("Gak, control->length is corrupt?");
+ }
+#endif
+ no_rcv_needed = control->do_not_ref_stcb;
+ out_flags &= ~MSG_EOR;
+ }
+ }
+ if (out_flags & MSG_EOR) {
+ goto release;
+ }
+ if ((uio->uio_resid == 0) ||
+ ((in_eeor_mode) && (copied_so_far >= max(so->so_rcv.sb_lowat, 1)))
+ ) {
+ goto release;
+ }
+ /*
+ * If I hit here the receiver wants more and this message is
+ * NOT done (pd-api). So two questions. Can we block? if not
+ * we are done. Did the user NOT set MSG_WAITALL?
+ */
+ if (block_allowed == 0) {
+ goto release;
+ }
+ /*
+ * We need to wait for more data a few things: - We don't
+ * sbunlock() so we don't get someone else reading. - We
+ * must be sure to account for the case where what is added
+ * is NOT to our control when we wakeup.
+ */
+
+ /*
+ * Do we need to tell the transport a rwnd update might be
+ * needed before we go to sleep?
+ */
+ if (((stcb) && (in_flags & MSG_PEEK) == 0) &&
+ ((freed_so_far >= rwnd_req) &&
+ (control->do_not_ref_stcb == 0) &&
+ (no_rcv_needed == 0))) {
+ sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+ }
+wait_some_more:
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ goto release;
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)
+ goto release;
+
+ if (hold_rlock == 1) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+ if (hold_sblock == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ hold_sblock = 1;
+ }
+ if ((copied_so_far) && (control->length == 0) &&
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE))
+ ) {
+ goto release;
+ }
+ if (so->so_rcv.sb_cc <= control->held_length) {
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ goto release;
+ }
+ control->held_length = 0;
+ }
+ if (hold_sblock) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ if (control->length == 0) {
+ /* still nothing here */
+ if (control->end_added == 1) {
+ /* he aborted, or is done i.e.did a shutdown */
+ out_flags |= MSG_EOR;
+ if (control->pdapi_aborted) {
+ if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+ control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+
+ out_flags |= MSG_TRUNC;
+ } else {
+ if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+ control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+ }
+ goto done_with_control;
+ }
+ if (so->so_rcv.sb_cc > held_length) {
+ control->held_length = so->so_rcv.sb_cc;
+ held_length = 0;
+ }
+ goto wait_some_more;
+ } else if (control->data == NULL) {
+ /*
+ * we must re-sync since data is probably being
+ * added
+ */
+ SCTP_INP_READ_LOCK(inp);
+ if ((control->length > 0) && (control->data == NULL)) {
+ /*
+ * big trouble.. we have the lock and its
+ * corrupt?
+ */
+#ifdef INVARIANTS
+ panic("Impossible data==NULL length !=0");
+#endif
+ out_flags |= MSG_EOR;
+ out_flags |= MSG_TRUNC;
+ control->length = 0;
+ SCTP_INP_READ_UNLOCK(inp);
+ goto done_with_control;
+ }
+ SCTP_INP_READ_UNLOCK(inp);
+ /* We will fall around to get more data */
+ }
+ goto get_more_data;
+ } else {
+ /*-
+ * Give caller back the mbuf chain,
+ * store in uio_resid the length
+ */
+ wakeup_read_socket = 0;
+ if ((control->end_added == 0) ||
+ (TAILQ_NEXT(control, next) == NULL)) {
+ /* Need to get rlock */
+ if (hold_rlock == 0) {
+ SCTP_INP_READ_LOCK(inp);
+ hold_rlock = 1;
+ }
+ }
+ if (control->end_added) {
+ out_flags |= MSG_EOR;
+ if ((control->do_not_ref_stcb == 0) && ((control->spec_flags & M_NOTIFICATION) == 0))
+ control->stcb->asoc.strmin[control->sinfo_stream].delivery_started = 0;
+ }
+ if (control->spec_flags & M_NOTIFICATION) {
+ out_flags |= MSG_NOTIFICATION;
+ }
+ uio->uio_resid = control->length;
+ *mp = control->data;
+ m = control->data;
+ while (m) {
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv,
+ control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBFREE, SCTP_BUF_LEN(m));
+ }
+ sctp_sbfree(control, stcb, &so->so_rcv, m);
+ freed_so_far += SCTP_BUF_LEN(m);
+ freed_so_far += MSIZE;
+ if (sctp_logging_level & SCTP_SB_LOGGING_ENABLE) {
+ sctp_sblog(&so->so_rcv,
+ control->do_not_ref_stcb ? NULL : stcb, SCTP_LOG_SBRESULT, 0);
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ control->data = control->tail_mbuf = NULL;
+ control->length = 0;
+ if (out_flags & MSG_EOR) {
+ /* Done with this control */
+ goto done_with_control;
+ }
+ }
+release:
+ if (hold_rlock == 1) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+ if (hold_sblock == 1) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ sbunlock(&so->so_rcv);
+ sockbuf_lock = 0;
+
+release_unlocked:
+ if (hold_sblock) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ if ((stcb) && (in_flags & MSG_PEEK) == 0) {
+ if ((freed_so_far >= rwnd_req) &&
+ (control && (control->do_not_ref_stcb == 0)) &&
+ (no_rcv_needed == 0))
+ sctp_user_rcvd(stcb, &freed_so_far, hold_rlock, rwnd_req);
+ }
+ if (msg_flags)
+ *msg_flags = out_flags;
+out:
+ if (((out_flags & MSG_EOR) == 0) &&
+ ((in_flags & MSG_PEEK) == 0) &&
+ (sinfo) &&
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO))) {
+ struct sctp_extrcvinfo *s_extra;
+
+ s_extra = (struct sctp_extrcvinfo *)sinfo;
+ s_extra->sreinfo_next_flags = SCTP_NO_NEXT_MSG;
+ }
+ if (hold_rlock == 1) {
+ SCTP_INP_READ_UNLOCK(inp);
+ hold_rlock = 0;
+ }
+ if (hold_sblock) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ hold_sblock = 0;
+ }
+ if (sockbuf_lock) {
+ sbunlock(&so->so_rcv);
+ }
+ if (freecnt_applied) {
+ /*
+ * The lock on the socket buffer protects us so the free
+ * code will stop. But since we used the socketbuf lock and
+ * the sender uses the tcb_lock to increment, we need to use
+ * the atomic add to the refcnt.
+ */
+ if (stcb == NULL) {
+ panic("stcb for refcnt has gone NULL?");
+ }
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ freecnt_applied = 0;
+ /* Save the value back for next time */
+ stcb->freed_by_sorcv_sincelast = freed_so_far;
+ }
+ if (sctp_logging_level & SCTP_RECV_RWND_LOGGING_ENABLE) {
+ if (stcb) {
+ sctp_misc_ints(SCTP_SORECV_DONE,
+ freed_so_far,
+ ((uio) ? (slen - uio->uio_resid) : slen),
+ stcb->asoc.my_rwnd,
+ so->so_rcv.sb_cc);
+ } else {
+ sctp_misc_ints(SCTP_SORECV_DONE,
+ freed_so_far,
+ ((uio) ? (slen - uio->uio_resid) : slen),
+ 0,
+ so->so_rcv.sb_cc);
+ }
+ }
+ if (wakeup_read_socket) {
+ sctp_sorwakeup(inp, so);
+ }
+ return (error);
+}
+
+
+#ifdef SCTP_MBUF_LOGGING
+struct mbuf *
+sctp_m_free(struct mbuf *m)
+{
+ if (sctp_logging_level & SCTP_MBUF_LOGGING_ENABLE) {
+ if (SCTP_BUF_IS_EXTENDED(m)) {
+ sctp_log_mb(m, SCTP_MBUF_IFREE);
+ }
+ }
+ return (m_free(m));
+}
+
+void
+sctp_m_freem(struct mbuf *mb)
+{
+ while (mb != NULL)
+ mb = sctp_m_free(mb);
+}
+
+#endif
+
+int
+sctp_dynamic_set_primary(struct sockaddr *sa, uint32_t vrf_id)
+{
+ /*
+ * Given a local address. For all associations that holds the
+ * address, request a peer-set-primary.
+ */
+ struct sctp_ifa *ifa;
+ struct sctp_laddr *wi;
+
+ ifa = sctp_find_ifa_by_addr(sa, vrf_id, 0);
+ if (ifa == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, EADDRNOTAVAIL);
+ return (EADDRNOTAVAIL);
+ }
+ /*
+ * Now that we have the ifa we must awaken the iterator with this
+ * message.
+ */
+ wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+ if (wi == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTPUTIL, ENOMEM);
+ return (ENOMEM);
+ }
+ /* Now incr the count and int wi structure */
+ SCTP_INCR_LADDR_COUNT();
+ bzero(wi, sizeof(*wi));
+ (void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+ wi->ifa = ifa;
+ wi->action = SCTP_SET_PRIM_ADDR;
+ atomic_add_int(&ifa->refcount, 1);
+
+ /* Now add it to the work queue */
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ /*
+ * Should this really be a tailq? As it is we will process the
+ * newest first :-0
+ */
+ LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+ sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+ (struct sctp_inpcb *)NULL,
+ (struct sctp_tcb *)NULL,
+ (struct sctp_nets *)NULL);
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ return (0);
+}
+
+
+
+
+int
+sctp_soreceive(struct socket *so,
+ struct sockaddr **psa,
+ struct uio *uio,
+ struct mbuf **mp0,
+ struct mbuf **controlp,
+ int *flagsp)
+{
+ int error, fromlen;
+ uint8_t sockbuf[256];
+ struct sockaddr *from;
+ struct sctp_extrcvinfo sinfo;
+ int filling_sinfo = 1;
+ struct sctp_inpcb *inp;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ /* pickup the assoc we are reading from */
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ return (EINVAL);
+ }
+ if ((sctp_is_feature_off(inp,
+ SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+ (controlp == NULL)) {
+ /* user does not want the sndrcv ctl */
+ filling_sinfo = 0;
+ }
+ if (psa) {
+ from = (struct sockaddr *)sockbuf;
+ fromlen = sizeof(sockbuf);
+ from->sa_len = 0;
+ } else {
+ from = NULL;
+ fromlen = 0;
+ }
+
+ error = sctp_sorecvmsg(so, uio, mp0, from, fromlen, flagsp,
+ (struct sctp_sndrcvinfo *)&sinfo, filling_sinfo);
+ if ((controlp) && (filling_sinfo)) {
+ /* copy back the sinfo in a CMSG format */
+ if (filling_sinfo)
+ *controlp = sctp_build_ctl_nchunk(inp,
+ (struct sctp_sndrcvinfo *)&sinfo);
+ else
+ *controlp = NULL;
+ }
+ if (psa) {
+ /* copy back the address info */
+ if (from && from->sa_len) {
+ *psa = sodupsockaddr(from, M_NOWAIT);
+ } else {
+ *psa = NULL;
+ }
+ }
+ return (error);
+}
+
+
+int
+sctp_l_soreceive(struct socket *so,
+ struct sockaddr **name,
+ struct uio *uio,
+ char **controlp,
+ int *controllen,
+ int *flag)
+{
+ int error, fromlen;
+ uint8_t sockbuf[256];
+ struct sockaddr *from;
+ struct sctp_extrcvinfo sinfo;
+ int filling_sinfo = 1;
+ struct sctp_inpcb *inp;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ /* pickup the assoc we are reading from */
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ return (EINVAL);
+ }
+ if ((sctp_is_feature_off(inp,
+ SCTP_PCB_FLAGS_RECVDATAIOEVNT)) ||
+ (controlp == NULL)) {
+ /* user does not want the sndrcv ctl */
+ filling_sinfo = 0;
+ }
+ if (name) {
+ from = (struct sockaddr *)sockbuf;
+ fromlen = sizeof(sockbuf);
+ from->sa_len = 0;
+ } else {
+ from = NULL;
+ fromlen = 0;
+ }
+
+ error = sctp_sorecvmsg(so, uio,
+ (struct mbuf **)NULL,
+ from, fromlen, flag,
+ (struct sctp_sndrcvinfo *)&sinfo,
+ filling_sinfo);
+ if ((controlp) && (filling_sinfo)) {
+ /*
+ * copy back the sinfo in a CMSG format note that the caller
+ * has reponsibility for freeing the memory.
+ */
+ if (filling_sinfo)
+ *controlp = sctp_build_ctl_cchunk(inp,
+ controllen,
+ (struct sctp_sndrcvinfo *)&sinfo);
+ }
+ if (name) {
+ /* copy back the address info */
+ if (from && from->sa_len) {
+ *name = sodupsockaddr(from, M_WAIT);
+ } else {
+ *name = NULL;
+ }
+ }
+ return (error);
+}
+
+
+
+
+
+
+
+int
+sctp_connectx_helper_add(struct sctp_tcb *stcb, struct sockaddr *addr,
+ int totaddr, int *error)
+{
+ int added = 0;
+ int i;
+ struct sctp_inpcb *inp;
+ struct sockaddr *sa;
+ size_t incr = 0;
+
+ sa = addr;
+ inp = stcb->sctp_ep;
+ *error = 0;
+ for (i = 0; i < totaddr; i++) {
+ if (sa->sa_family == AF_INET) {
+ incr = sizeof(struct sockaddr_in);
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ /* assoc gone no un-lock */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
+ *error = ENOBUFS;
+ goto out_now;
+ }
+ added++;
+ } else if (sa->sa_family == AF_INET6) {
+ incr = sizeof(struct sockaddr_in6);
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) {
+ /* assoc gone no un-lock */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTPUTIL, ENOBUFS);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
+ *error = ENOBUFS;
+ goto out_now;
+ }
+ added++;
+ }
+ sa = (struct sockaddr *)((caddr_t)sa + incr);
+ }
+out_now:
+ return (added);
+}
+
+struct sctp_tcb *
+sctp_connectx_helper_find(struct sctp_inpcb *inp, struct sockaddr *addr,
+ int *totaddr, int *num_v4, int *num_v6, int *error,
+ int limit, int *bad_addr)
+{
+ struct sockaddr *sa;
+ struct sctp_tcb *stcb = NULL;
+ size_t incr, at, i;
+
+ at = incr = 0;
+ sa = addr;
+ *error = *num_v6 = *num_v4 = 0;
+ /* account and validate addresses */
+ for (i = 0; i < (size_t)*totaddr; i++) {
+ if (sa->sa_family == AF_INET) {
+ (*num_v4) += 1;
+ incr = sizeof(struct sockaddr_in);
+ if (sa->sa_len != incr) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ *bad_addr = 1;
+ return (NULL);
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ /* Must be non-mapped for connectx */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ *bad_addr = 1;
+ return (NULL);
+ }
+ (*num_v6) += 1;
+ incr = sizeof(struct sockaddr_in6);
+ if (sa->sa_len != incr) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ *bad_addr = 1;
+ return (NULL);
+ }
+ } else {
+ *totaddr = i;
+ /* we are done */
+ break;
+ }
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
+ if (stcb != NULL) {
+ /* Already have or am bring up an association */
+ return (stcb);
+ } else {
+ SCTP_INP_DECR_REF(inp);
+ }
+ if ((at + incr) > (size_t)limit) {
+ *totaddr = i;
+ break;
+ }
+ sa = (struct sockaddr *)((caddr_t)sa + incr);
+ }
+ return ((struct sctp_tcb *)NULL);
+}
+
+/*
+ * sctp_bindx(ADD) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_add_address(struct socket *so, struct sctp_inpcb *inp,
+ struct sockaddr *sa, sctp_assoc_t assoc_id,
+ uint32_t vrf_id, int *error, void *p)
+{
+ struct sockaddr *addr_touse;
+ struct sockaddr_in sin;
+
+ /* see if we're bound all already! */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ addr_touse = sa;
+#if defined(INET6)
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ /* can only bind v6 on PF_INET6 sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ sin6 = (struct sockaddr_in6 *)addr_touse;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* can't bind v4-mapped on PF_INET sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ in6_sin6_2_sin(&sin, sin6);
+ addr_touse = (struct sockaddr *)&sin;
+ }
+ }
+#endif
+ if (sa->sa_family == AF_INET) {
+ if (sa->sa_len != sizeof(struct sockaddr_in)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* can't bind v4 on PF_INET sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+ if (p == NULL) {
+ /* Can't get proc for Net/Open BSD */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ *error = sctp_inpcb_bind(so, addr_touse, NULL, p);
+ return;
+ }
+ /*
+ * No locks required here since bind and mgmt_ep_sa all do their own
+ * locking. If we do something for the FIX: below we may need to
+ * lock in that case.
+ */
+ if (assoc_id == 0) {
+ /* add the address */
+ struct sctp_inpcb *lep;
+ struct sockaddr_in *lsin = (struct sockaddr_in *)addr_touse;
+
+ /* validate the incoming port */
+ if ((lsin->sin_port != 0) &&
+ (lsin->sin_port != inp->sctp_lport)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ } else {
+ /* user specified 0 port, set it to existing port */
+ lsin->sin_port = inp->sctp_lport;
+ }
+
+ lep = sctp_pcb_findep(addr_touse, 1, 0, vrf_id);
+ if (lep != NULL) {
+ /*
+ * We must decrement the refcount since we have the
+ * ep already and are binding. No remove going on
+ * here.
+ */
+ SCTP_INP_DECR_REF(inp);
+ }
+ if (lep == inp) {
+ /* already bound to it.. ok */
+ return;
+ } else if (lep == NULL) {
+ ((struct sockaddr_in *)addr_touse)->sin_port = 0;
+ *error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+ SCTP_ADD_IP_ADDRESS,
+ vrf_id, NULL);
+ } else {
+ *error = EADDRINUSE;
+ }
+ if (*error)
+ return;
+ } else {
+ /*
+ * FIX: decide whether we allow assoc based bindx
+ */
+ }
+}
+
+/*
+ * sctp_bindx(DELETE) for one address.
+ * assumes all arguments are valid/checked by caller.
+ */
+void
+sctp_bindx_delete_address(struct socket *so, struct sctp_inpcb *inp,
+ struct sockaddr *sa, sctp_assoc_t assoc_id,
+ uint32_t vrf_id, int *error)
+{
+ struct sockaddr *addr_touse;
+ struct sockaddr_in sin;
+
+ /* see if we're bound all already! */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ addr_touse = sa;
+#if defined(INET6)
+ if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ if (sa->sa_len != sizeof(struct sockaddr_in6)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ /* can only bind v6 on PF_INET6 sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ sin6 = (struct sockaddr_in6 *)addr_touse;
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* can't bind mapped-v4 on PF_INET sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ in6_sin6_2_sin(&sin, sin6);
+ addr_touse = (struct sockaddr *)&sin;
+ }
+ }
+#endif
+ if (sa->sa_family == AF_INET) {
+ if (sa->sa_len != sizeof(struct sockaddr_in)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* can't bind v4 on PF_INET sockets */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTPUTIL, EINVAL);
+ *error = EINVAL;
+ return;
+ }
+ }
+ /*
+ * No lock required mgmt_ep_sa does its own locking. If the FIX:
+ * below is ever changed we may need to lock before calling
+ * association level binding.
+ */
+ if (assoc_id == 0) {
+ /* delete the address */
+ *error = sctp_addr_mgmt_ep_sa(inp, addr_touse,
+ SCTP_DEL_IP_ADDRESS,
+ vrf_id, NULL);
+ } else {
+ /*
+ * FIX: decide whether we allow assoc based bindx
+ */
+ }
+}
+
+/*
+ * returns the valid local address count for an assoc, taking into account
+ * all scoping rules
+ */
+int
+sctp_local_addr_count(struct sctp_tcb *stcb)
+{
+ int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+ int ipv4_addr_legal, ipv6_addr_legal;
+ struct sctp_vrf *vrf;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+ int count = 0;
+
+ /* Turn on all the appropriate scopes */
+ loopback_scope = stcb->asoc.loopback_scope;
+ ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+ local_scope = stcb->asoc.local_scope;
+ site_scope = stcb->asoc.site_scope;
+ ipv4_addr_legal = ipv6_addr_legal = 0;
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ipv6_addr_legal = 1;
+ if (SCTP_IPV6_V6ONLY(stcb->sctp_ep) == 0) {
+ ipv4_addr_legal = 1;
+ }
+ } else {
+ ipv4_addr_legal = 1;
+ }
+
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(stcb->asoc.vrf_id);
+ if (vrf == NULL) {
+ /* no vrf, no addresses */
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (0);
+ }
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /*
+ * bound all case: go through all ifns on the vrf
+ */
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if ((loopback_scope == 0) &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ continue;
+ }
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if (sctp_is_addr_restricted(stcb, sctp_ifa))
+ continue;
+
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (ipv4_addr_legal)) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0) {
+ /* skip unspecified addrs */
+ continue;
+ }
+ if ((ipv4_local_scope == 0) &&
+ (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+ continue;
+ }
+ /* count this one */
+ count++;
+ } else if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (ipv6_addr_legal)) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ continue;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (local_scope == 0)
+ continue;
+ if (sin6->sin6_scope_id == 0) {
+ if (sa6_recoverscope(sin6) != 0)
+ /*
+ * bad link
+ * local
+ * address
+ */
+ continue;
+ }
+ }
+ if ((site_scope == 0) &&
+ (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+ continue;
+ }
+ /* count this one */
+ count++;
+ }
+ }
+ }
+ } else {
+ /*
+ * subset bound case
+ */
+ struct sctp_laddr *laddr;
+
+ LIST_FOREACH(laddr, &stcb->sctp_ep->sctp_addr_list,
+ sctp_nxt_addr) {
+ if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+ continue;
+ }
+ /* count this one */
+ count++;
+ }
+ }
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (count);
+}
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+void
+sctp_log_trace(uint32_t subsys, const char *str SCTP_UNUSED, uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, uint32_t f)
+{
+ uint32_t saveindex, newindex;
+
+ do {
+ saveindex = sctp_log.index;
+ if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+ newindex = 1;
+ } else {
+ newindex = saveindex + 1;
+ }
+ } while (atomic_cmpset_int(&sctp_log.index, saveindex, newindex) == 0);
+ if (saveindex >= SCTP_MAX_LOGGING_SIZE) {
+ saveindex = 0;
+ }
+ sctp_log.entry[saveindex].timestamp = SCTP_GET_CYCLECOUNT;
+ sctp_log.entry[saveindex].subsys = subsys;
+ sctp_log.entry[saveindex].params[0] = a;
+ sctp_log.entry[saveindex].params[1] = b;
+ sctp_log.entry[saveindex].params[2] = c;
+ sctp_log.entry[saveindex].params[3] = d;
+ sctp_log.entry[saveindex].params[4] = e;
+ sctp_log.entry[saveindex].params[5] = f;
+}
+
+#endif
Index: ip_icmp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_icmp.c -L sys/netinet/ip_icmp.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_icmp.c
+++ sys/netinet/ip_icmp.c
@@ -27,15 +27,16 @@
* SUCH DAMAGE.
*
* @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
- * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.101.2.2 2006/02/16 17:50:57 andre Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_icmp.c,v 1.118 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_ipsec.h"
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
@@ -54,24 +55,21 @@
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#include <netinet/icmp_var.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
-#define IPSEC
#endif
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
/*
* ICMP routines: error generation, receive packet processing, and
* routines to turnaround packets back to the originator, and
@@ -92,19 +90,19 @@
static int drop_redirect = 0;
SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
- &drop_redirect, 0, "");
+ &drop_redirect, 0, "Ignore ICMP redirects");
static int log_redirect = 0;
SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
- &log_redirect, 0, "");
+ &log_redirect, 0, "Log ICMP redirects to the console");
static int icmplim = 200;
SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
- &icmplim, 0, "");
+ &icmplim, 0, "Maximum number of ICMP responses per second");
static int icmplim_output = 1;
SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
- &icmplim_output, 0, "");
+ &icmplim_output, 0, "Enable rate limiting of ICMP responses");
static char reply_src[IFNAMSIZ];
SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
@@ -143,11 +141,7 @@
* in response to bad packet ip.
*/
void
-icmp_error(n, type, code, dest, mtu)
- struct mbuf *n;
- int type, code;
- n_long dest;
- int mtu;
+icmp_error(struct mbuf *n, int type, int code, n_long dest, int mtu)
{
register struct ip *oip = mtod(n, struct ip *), *nip;
register unsigned oiphlen = oip->ip_hl << 2;
@@ -291,9 +285,7 @@
* Process a received ICMP message.
*/
void
-icmp_input(m, off)
- struct mbuf *m;
- int off;
+icmp_input(struct mbuf *m, int off)
{
struct icmp *icp;
struct in_ifaddr *ia;
@@ -620,8 +612,7 @@
* Reflect the ip packet back to the source
*/
static void
-icmp_reflect(m)
- struct mbuf *m;
+icmp_reflect(struct mbuf *m)
{
struct ip *ip = mtod(m, struct ip *);
struct ifaddr *ifa;
@@ -725,7 +716,7 @@
*/
cp = (u_char *) (ip + 1);
if ((opts = ip_srcroute(m)) == 0 &&
- (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
+ (opts = m_gethdr(M_DONTWAIT, MT_DATA))) {
opts->m_len = sizeof(struct in_addr);
mtod(opts, struct in_addr *)->s_addr = 0;
}
@@ -800,9 +791,7 @@
* after supplying a checksum.
*/
static void
-icmp_send(m, opts)
- register struct mbuf *m;
- struct mbuf *opts;
+icmp_send(struct mbuf *m, struct mbuf *opts)
{
register struct ip *ip = mtod(m, struct ip *);
register int hlen;
@@ -829,7 +818,7 @@
}
n_time
-iptime()
+iptime(void)
{
struct timeval atv;
u_long t;
@@ -845,9 +834,7 @@
* is returned; otherwise, a smaller value is returned.
*/
int
-ip_next_mtu(mtu, dir)
- int mtu;
- int dir;
+ip_next_mtu(int mtu, int dir)
{
static int mtutab[] = {
65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508,
@@ -903,7 +890,8 @@
{ "icmp ping response" },
{ "icmp tstamp response" },
{ "closed port RST response" },
- { "open port RST response" }
+ { "open port RST response" },
+ { "icmp6 unreach response" }
};
/*
--- /dev/null
+++ sys/netinet/sctp_output.h
@@ -0,0 +1,215 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.h,v 1.14 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_output.h,v 1.13 2007/10/01 03:22:28 rrs Exp $");
+
+#ifndef __sctp_output_h__
+#define __sctp_output_h__
+
+#include <netinet/sctp_header.h>
+
+#if defined(_KERNEL)
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp,
+ struct sctp_scoping *scope,
+ struct mbuf *m_at,
+ int cnt_inits_to);
+
+
+int sctp_is_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+ int ipv4_addr_legal,
+ int ipv6_addr_legal,
+ int loopback_scope,
+ int ipv4_local_scope,
+ int local_scope,
+ int site_scope,
+ int do_update);
+int
+ sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa);
+
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ sctp_route_t * ro, struct sctp_nets *net,
+ int non_asoc_addr_ok, uint32_t vrf_id);
+
+int
+ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro);
+int
+ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro);
+
+void
+sctp_send_initiate(struct sctp_inpcb *, struct sctp_tcb *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+void
+sctp_send_initiate_ack(struct sctp_inpcb *, struct sctp_tcb *,
+ struct mbuf *, int, int, struct sctphdr *, struct sctp_init_chunk *,
+ uint32_t, int);
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *, int, int *,
+ struct sctp_chunkhdr *);
+void sctp_queue_op_err(struct sctp_tcb *, struct mbuf *);
+
+int
+sctp_send_cookie_echo(struct mbuf *, int, struct sctp_tcb *,
+ struct sctp_nets *);
+
+void sctp_send_cookie_ack(struct sctp_tcb *);
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *, struct mbuf *, int, int,
+ struct sctp_nets *);
+
+
+void sctp_send_shutdown(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_ack(struct sctp_tcb *, struct sctp_nets *);
+
+void sctp_send_shutdown_complete(struct sctp_tcb *, struct sctp_nets *);
+
+void
+sctp_send_shutdown_complete2(struct mbuf *, int, struct sctphdr *,
+ uint32_t);
+
+void sctp_send_asconf(struct sctp_tcb *, struct sctp_nets *, int addr_locked);
+
+void sctp_send_asconf_ack(struct sctp_tcb *);
+
+int sctp_get_frag_point(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_cookies(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_toss_old_asconf(struct sctp_tcb *);
+
+void sctp_fix_ecn_echo(struct sctp_association *);
+
+int
+sctp_output(struct sctp_inpcb *, struct mbuf *, struct sockaddr *,
+ struct mbuf *, struct thread *, int);
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_stream_out *strq, int holdslock);
+
+void
+sctp_chunk_output(struct sctp_inpcb *, struct sctp_tcb *, int, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+void
+sctp_send_abort_tcb(struct sctp_tcb *, struct mbuf *, int
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+void send_forward_tsn(struct sctp_tcb *, struct sctp_association *);
+
+void sctp_send_sack(struct sctp_tcb *);
+
+int sctp_send_hb(struct sctp_tcb *, int, struct sctp_nets *);
+
+void sctp_send_ecn_echo(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *, struct sctp_nets *, struct mbuf *,
+ int, int);
+
+
+
+void sctp_send_cwr(struct sctp_tcb *, struct sctp_nets *, uint32_t);
+
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+ int number_entries, uint16_t * list,
+ uint32_t seq, uint32_t resp_seq, uint32_t last_sent);
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+ int number_entries, uint16_t * list,
+ uint32_t seq);
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+ uint32_t seq);
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+ uint32_t resp_seq, uint32_t result);
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+ uint32_t resp_seq, uint32_t result,
+ uint32_t send_una, uint32_t recv_next);
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+ int number_entries, uint16_t * list,
+ uint8_t send_out_req, uint32_t resp_seq,
+ uint8_t send_in_req,
+ uint8_t send_tsn_req);
+
+
+void
+sctp_send_abort(struct mbuf *, int, struct sctphdr *, uint32_t,
+ struct mbuf *, uint32_t);
+
+void sctp_send_operr_to(struct mbuf *, int, struct mbuf *, uint32_t, uint32_t);
+
+int
+sctp_sosend(struct socket *so,
+ struct sockaddr *addr,
+ struct uio *uio,
+ struct mbuf *top,
+ struct mbuf *control,
+ int flags,
+ struct thread *p
+);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/ip_ipsec.h
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/ip_ipsec.h,v 1.2 2007/08/05 16:16:15 bz Exp $
+ */
+
+#ifndef _NETINET_IP_IPSEC_H_
+#define _NETINET_IP_IPSEC_H_
+
+int ip_ipsec_filtertunnel(struct mbuf *);
+int ip_ipsec_fwd(struct mbuf *);
+int ip_ipsec_input(struct mbuf *);
+int ip_ipsec_mtu(struct mbuf *);
+int ip_ipsec_output(struct mbuf **, struct inpcb *, int *, int *,
+ struct route **, struct route *, struct sockaddr_in **,
+ struct in_ifaddr **, struct ifnet **);
+#endif
--- /dev/null
+++ sys/netinet/sctp_auth.c
@@ -0,0 +1,2477 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_auth.c,v 1.18.4.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_auth.h>
+
+#ifdef SCTP_DEBUG
+#define SCTP_AUTH_DEBUG (sctp_debug_on & SCTP_DEBUG_AUTH1)
+#define SCTP_AUTH_DEBUG2 (sctp_debug_on & SCTP_DEBUG_AUTH2)
+#endif /* SCTP_DEBUG */
+
+
+void
+sctp_clear_chunklist(sctp_auth_chklist_t * chklist)
+{
+ bzero(chklist, sizeof(*chklist));
+ /* chklist->num_chunks = 0; */
+}
+
+sctp_auth_chklist_t *
+sctp_alloc_chunklist(void)
+{
+ sctp_auth_chklist_t *chklist;
+
+ SCTP_MALLOC(chklist, sctp_auth_chklist_t *, sizeof(*chklist),
+ SCTP_M_AUTH_CL);
+ if (chklist == NULL) {
+ SCTPDBG(SCTP_DEBUG_AUTH1, "sctp_alloc_chunklist: failed to get memory!\n");
+ } else {
+ sctp_clear_chunklist(chklist);
+ }
+ return (chklist);
+}
+
+void
+sctp_free_chunklist(sctp_auth_chklist_t * list)
+{
+ if (list != NULL)
+ SCTP_FREE(list, SCTP_M_AUTH_CL);
+}
+
+sctp_auth_chklist_t *
+sctp_copy_chunklist(sctp_auth_chklist_t * list)
+{
+ sctp_auth_chklist_t *new_list;
+
+ if (list == NULL)
+ return (NULL);
+
+ /* get a new list */
+ new_list = sctp_alloc_chunklist();
+ if (new_list == NULL)
+ return (NULL);
+ /* copy it */
+ bcopy(list, new_list, sizeof(*new_list));
+
+ return (new_list);
+}
+
+
+/*
+ * add a chunk to the required chunks list
+ */
+int
+sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+ if (list == NULL)
+ return (-1);
+
+ /* is chunk restricted? */
+ if ((chunk == SCTP_INITIATION) ||
+ (chunk == SCTP_INITIATION_ACK) ||
+ (chunk == SCTP_SHUTDOWN_COMPLETE) ||
+ (chunk == SCTP_AUTHENTICATION)) {
+ return (-1);
+ }
+ if (list->chunks[chunk] == 0) {
+ list->chunks[chunk] = 1;
+ list->num_chunks++;
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: added chunk %u (0x%02x) to Auth list\n",
+ chunk, chunk);
+ }
+ return (0);
+}
+
+/*
+ * delete a chunk from the required chunks list
+ */
+int
+sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list)
+{
+ if (list == NULL)
+ return (-1);
+
+ /* is chunk restricted? */
+ if ((chunk == SCTP_ASCONF) ||
+ (chunk == SCTP_ASCONF_ACK)) {
+ return (-1);
+ }
+ if (list->chunks[chunk] == 1) {
+ list->chunks[chunk] = 0;
+ list->num_chunks--;
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: deleted chunk %u (0x%02x) from Auth list\n",
+ chunk, chunk);
+ }
+ return (0);
+}
+
+size_t
+sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list)
+{
+ if (list == NULL)
+ return (0);
+ else
+ return (list->num_chunks);
+}
+
+/*
+ * set the default list of chunks requiring AUTH
+ */
+void
+sctp_auth_set_default_chunks(sctp_auth_chklist_t * list)
+{
+ (void)sctp_auth_add_chunk(SCTP_ASCONF, list);
+ (void)sctp_auth_add_chunk(SCTP_ASCONF_ACK, list);
+}
+
+/*
+ * return the current number and list of required chunks caller must
+ * guarantee ptr has space for up to 256 bytes
+ */
+int
+sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+ int i, count = 0;
+
+ if (list == NULL)
+ return (0);
+
+ for (i = 0; i < 256; i++) {
+ if (list->chunks[i] != 0) {
+ *ptr++ = i;
+ count++;
+ }
+ }
+ return (count);
+}
+
+int
+sctp_pack_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr)
+{
+ int i, size = 0;
+
+ if (list == NULL)
+ return (0);
+
+ if (list->num_chunks <= 32) {
+ /* just list them, one byte each */
+ for (i = 0; i < 256; i++) {
+ if (list->chunks[i] != 0) {
+ *ptr++ = i;
+ size++;
+ }
+ }
+ } else {
+ int index, offset;
+
+ /* pack into a 32 byte bitfield */
+ for (i = 0; i < 256; i++) {
+ if (list->chunks[i] != 0) {
+ index = i / 8;
+ offset = i % 8;
+ ptr[index] |= (1 << offset);
+ }
+ }
+ size = 32;
+ }
+ return (size);
+}
+
+int
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+ sctp_auth_chklist_t * list)
+{
+ int i;
+ int size;
+
+ if (list == NULL)
+ return (0);
+
+ if (num_chunks <= 32) {
+ /* just pull them, one byte each */
+ for (i = 0; i < num_chunks; i++) {
+ (void)sctp_auth_add_chunk(*ptr++, list);
+ }
+ size = num_chunks;
+ } else {
+ int index, offset;
+
+ /* unpack from a 32 byte bitfield */
+ for (index = 0; index < 32; index++) {
+ for (offset = 0; offset < 8; offset++) {
+ if (ptr[index] & (1 << offset)) {
+ (void)sctp_auth_add_chunk((index * 8) + offset, list);
+ }
+ }
+ }
+ size = 32;
+ }
+ return (size);
+}
+
+
+/*
+ * allocate structure space for a key of length keylen
+ */
+sctp_key_t *
+sctp_alloc_key(uint32_t keylen)
+{
+ sctp_key_t *new_key;
+
+ SCTP_MALLOC(new_key, sctp_key_t *, sizeof(*new_key) + keylen,
+ SCTP_M_AUTH_KY);
+ if (new_key == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ new_key->keylen = keylen;
+ return (new_key);
+}
+
+void
+sctp_free_key(sctp_key_t * key)
+{
+ if (key != NULL)
+ SCTP_FREE(key, SCTP_M_AUTH_KY);
+}
+
+void
+sctp_print_key(sctp_key_t * key, const char *str)
+{
+ uint32_t i;
+
+ if (key == NULL) {
+ printf("%s: [Null key]\n", str);
+ return;
+ }
+ printf("%s: len %u, ", str, key->keylen);
+ if (key->keylen) {
+ for (i = 0; i < key->keylen; i++)
+ printf("%02x", key->key[i]);
+ printf("\n");
+ } else {
+ printf("[Null key]\n");
+ }
+}
+
+void
+sctp_show_key(sctp_key_t * key, const char *str)
+{
+ uint32_t i;
+
+ if (key == NULL) {
+ printf("%s: [Null key]\n", str);
+ return;
+ }
+ printf("%s: len %u, ", str, key->keylen);
+ if (key->keylen) {
+ for (i = 0; i < key->keylen; i++)
+ printf("%02x", key->key[i]);
+ printf("\n");
+ } else {
+ printf("[Null key]\n");
+ }
+}
+
+static uint32_t
+sctp_get_keylen(sctp_key_t * key)
+{
+ if (key != NULL)
+ return (key->keylen);
+ else
+ return (0);
+}
+
+/*
+ * generate a new random key of length 'keylen'
+ */
+sctp_key_t *
+sctp_generate_random_key(uint32_t keylen)
+{
+ sctp_key_t *new_key;
+
+ /* validate keylen */
+ if (keylen > SCTP_AUTH_RANDOM_SIZE_MAX)
+ keylen = SCTP_AUTH_RANDOM_SIZE_MAX;
+
+ new_key = sctp_alloc_key(keylen);
+ if (new_key == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ SCTP_READ_RANDOM(new_key->key, keylen);
+ new_key->keylen = keylen;
+ return (new_key);
+}
+
+sctp_key_t *
+sctp_set_key(uint8_t * key, uint32_t keylen)
+{
+ sctp_key_t *new_key;
+
+ new_key = sctp_alloc_key(keylen);
+ if (new_key == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ bcopy(key, new_key->key, keylen);
+ return (new_key);
+}
+
+/*
+ * given two keys of variable size, compute which key is "larger/smaller"
+ * returns: 1 if key1 > key2 -1 if key1 < key2 0 if key1 = key2
+ */
+static int
+sctp_compare_key(sctp_key_t * key1, sctp_key_t * key2)
+{
+ uint32_t maxlen;
+ uint32_t i;
+ uint32_t key1len, key2len;
+ uint8_t *key_1, *key_2;
+ uint8_t temp[SCTP_AUTH_RANDOM_SIZE_MAX];
+
+ /* sanity/length check */
+ key1len = sctp_get_keylen(key1);
+ key2len = sctp_get_keylen(key2);
+ if ((key1len == 0) && (key2len == 0))
+ return (0);
+ else if (key1len == 0)
+ return (-1);
+ else if (key2len == 0)
+ return (1);
+
+ if (key1len != key2len) {
+ if (key1len >= key2len)
+ maxlen = key1len;
+ else
+ maxlen = key2len;
+ bzero(temp, maxlen);
+ if (key1len < maxlen) {
+ /* prepend zeroes to key1 */
+ bcopy(key1->key, temp + (maxlen - key1len), key1len);
+ key_1 = temp;
+ key_2 = key2->key;
+ } else {
+ /* prepend zeroes to key2 */
+ bcopy(key2->key, temp + (maxlen - key2len), key2len);
+ key_1 = key1->key;
+ key_2 = temp;
+ }
+ } else {
+ maxlen = key1len;
+ key_1 = key1->key;
+ key_2 = key2->key;
+ }
+
+ for (i = 0; i < maxlen; i++) {
+ if (*key_1 > *key_2)
+ return (1);
+ else if (*key_1 < *key_2)
+ return (-1);
+ key_1++;
+ key_2++;
+ }
+
+ /* keys are equal value, so check lengths */
+ if (key1len == key2len)
+ return (0);
+ else if (key1len < key2len)
+ return (-1);
+ else
+ return (1);
+}
+
+/*
+ * generate the concatenated keying material based on the two keys and the
+ * shared key (if available). draft-ietf-tsvwg-auth specifies the specific
+ * order for concatenation
+ */
+sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2, sctp_key_t * shared)
+{
+ uint32_t keylen;
+ sctp_key_t *new_key;
+ uint8_t *key_ptr;
+
+ keylen = sctp_get_keylen(key1) + sctp_get_keylen(key2) +
+ sctp_get_keylen(shared);
+
+ if (keylen > 0) {
+ /* get space for the new key */
+ new_key = sctp_alloc_key(keylen);
+ if (new_key == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ new_key->keylen = keylen;
+ key_ptr = new_key->key;
+ } else {
+ /* all keys empty/null?! */
+ return (NULL);
+ }
+
+ /* concatenate the keys */
+ if (sctp_compare_key(key1, key2) <= 0) {
+#ifdef SCTP_AUTH_DRAFT_04
+ /* key is key1 + shared + key2 */
+ if (sctp_get_keylen(key1)) {
+ bcopy(key1->key, key_ptr, key1->keylen);
+ key_ptr += key1->keylen;
+ }
+ if (sctp_get_keylen(shared)) {
+ bcopy(shared->key, key_ptr, shared->keylen);
+ key_ptr += shared->keylen;
+ }
+ if (sctp_get_keylen(key2)) {
+ bcopy(key2->key, key_ptr, key2->keylen);
+ key_ptr += key2->keylen;
+ }
+#else
+ /* key is shared + key1 + key2 */
+ if (sctp_get_keylen(shared)) {
+ bcopy(shared->key, key_ptr, shared->keylen);
+ key_ptr += shared->keylen;
+ }
+ if (sctp_get_keylen(key1)) {
+ bcopy(key1->key, key_ptr, key1->keylen);
+ key_ptr += key1->keylen;
+ }
+ if (sctp_get_keylen(key2)) {
+ bcopy(key2->key, key_ptr, key2->keylen);
+ key_ptr += key2->keylen;
+ }
+#endif
+ } else {
+#ifdef SCTP_AUTH_DRAFT_04
+ /* key is key2 + shared + key1 */
+ if (sctp_get_keylen(key2)) {
+ bcopy(key2->key, key_ptr, key2->keylen);
+ key_ptr += key2->keylen;
+ }
+ if (sctp_get_keylen(shared)) {
+ bcopy(shared->key, key_ptr, shared->keylen);
+ key_ptr += shared->keylen;
+ }
+ if (sctp_get_keylen(key1)) {
+ bcopy(key1->key, key_ptr, key1->keylen);
+ key_ptr += key1->keylen;
+ }
+#else
+ /* key is shared + key2 + key1 */
+ if (sctp_get_keylen(shared)) {
+ bcopy(shared->key, key_ptr, shared->keylen);
+ key_ptr += shared->keylen;
+ }
+ if (sctp_get_keylen(key2)) {
+ bcopy(key2->key, key_ptr, key2->keylen);
+ key_ptr += key2->keylen;
+ }
+ if (sctp_get_keylen(key1)) {
+ bcopy(key1->key, key_ptr, key1->keylen);
+ key_ptr += key1->keylen;
+ }
+#endif
+ }
+ return (new_key);
+}
+
+
+sctp_sharedkey_t *
+sctp_alloc_sharedkey(void)
+{
+ sctp_sharedkey_t *new_key;
+
+ SCTP_MALLOC(new_key, sctp_sharedkey_t *, sizeof(*new_key),
+ SCTP_M_AUTH_KY);
+ if (new_key == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ new_key->keyid = 0;
+ new_key->key = NULL;
+ return (new_key);
+}
+
+void
+sctp_free_sharedkey(sctp_sharedkey_t * skey)
+{
+ if (skey != NULL) {
+ if (skey->key != NULL)
+ sctp_free_key(skey->key);
+ SCTP_FREE(skey, SCTP_M_AUTH_KY);
+ }
+}
+
+sctp_sharedkey_t *
+sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id)
+{
+ sctp_sharedkey_t *skey;
+
+ LIST_FOREACH(skey, shared_keys, next) {
+ if (skey->keyid == key_id)
+ return (skey);
+ }
+ return (NULL);
+}
+
+void
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+ sctp_sharedkey_t * new_skey)
+{
+ sctp_sharedkey_t *skey;
+
+ if ((shared_keys == NULL) || (new_skey == NULL))
+ return;
+
+ /* insert into an empty list? */
+ if (SCTP_LIST_EMPTY(shared_keys)) {
+ LIST_INSERT_HEAD(shared_keys, new_skey, next);
+ return;
+ }
+ /* insert into the existing list, ordered by key id */
+ LIST_FOREACH(skey, shared_keys, next) {
+ if (new_skey->keyid < skey->keyid) {
+ /* insert it before here */
+ LIST_INSERT_BEFORE(skey, new_skey, next);
+ return;
+ } else if (new_skey->keyid == skey->keyid) {
+ /* replace the existing key */
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "replacing shared key id %u\n",
+ new_skey->keyid);
+ LIST_INSERT_BEFORE(skey, new_skey, next);
+ LIST_REMOVE(skey, next);
+ sctp_free_sharedkey(skey);
+ return;
+ }
+ if (LIST_NEXT(skey, next) == NULL) {
+ /* belongs at the end of the list */
+ LIST_INSERT_AFTER(skey, new_skey, next);
+ return;
+ }
+ }
+}
+
+static sctp_sharedkey_t *
+sctp_copy_sharedkey(const sctp_sharedkey_t * skey)
+{
+ sctp_sharedkey_t *new_skey;
+
+ if (skey == NULL)
+ return (NULL);
+ new_skey = sctp_alloc_sharedkey();
+ if (new_skey == NULL)
+ return (NULL);
+ if (skey->key != NULL)
+ new_skey->key = sctp_set_key(skey->key->key, skey->key->keylen);
+ else
+ new_skey->key = NULL;
+ new_skey->keyid = skey->keyid;
+ return (new_skey);
+}
+
+int
+sctp_copy_skeylist(const struct sctp_keyhead *src, struct sctp_keyhead *dest)
+{
+ sctp_sharedkey_t *skey, *new_skey;
+ int count = 0;
+
+ if ((src == NULL) || (dest == NULL))
+ return (0);
+ LIST_FOREACH(skey, src, next) {
+ new_skey = sctp_copy_sharedkey(skey);
+ if (new_skey != NULL) {
+ sctp_insert_sharedkey(dest, new_skey);
+ count++;
+ }
+ }
+ return (count);
+}
+
+
+sctp_hmaclist_t *
+sctp_alloc_hmaclist(uint8_t num_hmacs)
+{
+ sctp_hmaclist_t *new_list;
+ int alloc_size;
+
+ alloc_size = sizeof(*new_list) + num_hmacs * sizeof(new_list->hmac[0]);
+ SCTP_MALLOC(new_list, sctp_hmaclist_t *, alloc_size,
+ SCTP_M_AUTH_HL);
+ if (new_list == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ new_list->max_algo = num_hmacs;
+ new_list->num_algo = 0;
+ return (new_list);
+}
+
+void
+sctp_free_hmaclist(sctp_hmaclist_t * list)
+{
+ if (list != NULL) {
+ SCTP_FREE(list, SCTP_M_AUTH_HL);
+ list = NULL;
+ }
+}
+
+int
+sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id)
+{
+ int i;
+
+ if (list == NULL)
+ return (-1);
+ if (list->num_algo == list->max_algo) {
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: HMAC id list full, ignoring add %u\n", hmac_id);
+ return (-1);
+ }
+ if ((hmac_id != SCTP_AUTH_HMAC_ID_SHA1) &&
+#ifdef HAVE_SHA224
+ (hmac_id != SCTP_AUTH_HMAC_ID_SHA224) &&
+#endif
+#ifdef HAVE_SHA2
+ (hmac_id != SCTP_AUTH_HMAC_ID_SHA256) &&
+ (hmac_id != SCTP_AUTH_HMAC_ID_SHA384) &&
+ (hmac_id != SCTP_AUTH_HMAC_ID_SHA512) &&
+#endif
+ (hmac_id != SCTP_AUTH_HMAC_ID_MD5)) {
+ return (-1);
+ }
+ /* Now is it already in the list */
+ for (i = 0; i < list->num_algo; i++) {
+ if (list->hmac[i] == hmac_id) {
+ /* already in list */
+ return (-1);
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: add HMAC id %u to list\n", hmac_id);
+ list->hmac[list->num_algo++] = hmac_id;
+ return (0);
+}
+
+sctp_hmaclist_t *
+sctp_copy_hmaclist(sctp_hmaclist_t * list)
+{
+ sctp_hmaclist_t *new_list;
+ int i;
+
+ if (list == NULL)
+ return (NULL);
+ /* get a new list */
+ new_list = sctp_alloc_hmaclist(list->max_algo);
+ if (new_list == NULL)
+ return (NULL);
+ /* copy it */
+ new_list->max_algo = list->max_algo;
+ new_list->num_algo = list->num_algo;
+ for (i = 0; i < list->num_algo; i++)
+ new_list->hmac[i] = list->hmac[i];
+ return (new_list);
+}
+
+sctp_hmaclist_t *
+sctp_default_supported_hmaclist(void)
+{
+ sctp_hmaclist_t *new_list;
+
+ new_list = sctp_alloc_hmaclist(2);
+ if (new_list == NULL)
+ return (NULL);
+ (void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA1);
+ (void)sctp_auth_add_hmacid(new_list, SCTP_AUTH_HMAC_ID_SHA256);
+ return (new_list);
+}
+
+/*
+ * HMAC algos are listed in priority/preference order find the best HMAC id
+ * to use for the peer based on local support
+ */
+uint16_t
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer, sctp_hmaclist_t * local)
+{
+ int i, j;
+
+ if ((local == NULL) || (peer == NULL))
+ return (SCTP_AUTH_HMAC_ID_RSVD);
+
+ for (i = 0; i < peer->num_algo; i++) {
+ for (j = 0; j < local->num_algo; j++) {
+ if (peer->hmac[i] == local->hmac[j]) {
+#ifndef SCTP_AUTH_DRAFT_04
+ /* "skip" MD5 as it's been deprecated */
+ if (peer->hmac[i] == SCTP_AUTH_HMAC_ID_MD5)
+ continue;
+#endif
+
+ /* found the "best" one */
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: negotiated peer HMAC id %u\n",
+ peer->hmac[i]);
+ return (peer->hmac[i]);
+ }
+ }
+ }
+ /* didn't find one! */
+ return (SCTP_AUTH_HMAC_ID_RSVD);
+}
+
+/*
+ * serialize the HMAC algo list and return space used caller must guarantee
+ * ptr has appropriate space
+ */
+int
+sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr)
+{
+ int i;
+ uint16_t hmac_id;
+
+ if (list == NULL)
+ return (0);
+
+ for (i = 0; i < list->num_algo; i++) {
+ hmac_id = htons(list->hmac[i]);
+ bcopy(&hmac_id, ptr, sizeof(hmac_id));
+ ptr += sizeof(hmac_id);
+ }
+ return (list->num_algo * sizeof(hmac_id));
+}
+
+int
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs, uint32_t num_hmacs)
+{
+ uint32_t i;
+ uint16_t hmac_id;
+ uint32_t sha1_supported = 0;
+
+ for (i = 0; i < num_hmacs; i++) {
+ hmac_id = ntohs(hmacs->hmac_ids[i]);
+ if (hmac_id == SCTP_AUTH_HMAC_ID_SHA1)
+ sha1_supported = 1;
+ }
+ /* all HMAC id's are supported */
+ if (sha1_supported == 0)
+ return (-1);
+ else
+ return (0);
+}
+
+sctp_authinfo_t *
+sctp_alloc_authinfo(void)
+{
+ sctp_authinfo_t *new_authinfo;
+
+ SCTP_MALLOC(new_authinfo, sctp_authinfo_t *, sizeof(*new_authinfo),
+ SCTP_M_AUTH_IF);
+
+ if (new_authinfo == NULL) {
+ /* out of memory */
+ return (NULL);
+ }
+ bzero(new_authinfo, sizeof(*new_authinfo));
+ return (new_authinfo);
+}
+
+void
+sctp_free_authinfo(sctp_authinfo_t * authinfo)
+{
+ if (authinfo == NULL)
+ return;
+
+ if (authinfo->random != NULL)
+ sctp_free_key(authinfo->random);
+ if (authinfo->peer_random != NULL)
+ sctp_free_key(authinfo->peer_random);
+ if (authinfo->assoc_key != NULL)
+ sctp_free_key(authinfo->assoc_key);
+ if (authinfo->recv_key != NULL)
+ sctp_free_key(authinfo->recv_key);
+
+ /* We are NOT dynamically allocating authinfo's right now... */
+ /* SCTP_FREE(authinfo, SCTP_M_AUTH_??); */
+}
+
+
+uint32_t
+sctp_get_auth_chunk_len(uint16_t hmac_algo)
+{
+ int size;
+
+ size = sizeof(struct sctp_auth_chunk) + sctp_get_hmac_digest_len(hmac_algo);
+ return (SCTP_SIZE32(size));
+}
+
+uint32_t
+sctp_get_hmac_digest_len(uint16_t hmac_algo)
+{
+ switch (hmac_algo) {
+ case SCTP_AUTH_HMAC_ID_SHA1:
+ return (SCTP_AUTH_DIGEST_LEN_SHA1);
+ case SCTP_AUTH_HMAC_ID_MD5:
+ return (SCTP_AUTH_DIGEST_LEN_MD5);
+#ifdef HAVE_SHA224
+ case SCTP_AUTH_HMAC_ID_SHA224:
+ return (SCTP_AUTH_DIGEST_LEN_SHA224);
+#endif
+#ifdef HAVE_SHA2
+ case SCTP_AUTH_HMAC_ID_SHA256:
+ return (SCTP_AUTH_DIGEST_LEN_SHA256);
+ case SCTP_AUTH_HMAC_ID_SHA384:
+ return (SCTP_AUTH_DIGEST_LEN_SHA384);
+ case SCTP_AUTH_HMAC_ID_SHA512:
+ return (SCTP_AUTH_DIGEST_LEN_SHA512);
+#endif
+ default:
+ /* unknown HMAC algorithm: can't do anything */
+ return (0);
+ } /* end switch */
+}
+
+static inline int
+sctp_get_hmac_block_len(uint16_t hmac_algo)
+{
+ switch (hmac_algo) {
+ case SCTP_AUTH_HMAC_ID_SHA1:
+ case SCTP_AUTH_HMAC_ID_MD5:
+#ifdef HAVE_SHA224
+ case SCTP_AUTH_HMAC_ID_SHA224:
+#endif
+ return (64);
+#ifdef HAVE_SHA2
+ case SCTP_AUTH_HMAC_ID_SHA256:
+ return (64);
+ case SCTP_AUTH_HMAC_ID_SHA384:
+ case SCTP_AUTH_HMAC_ID_SHA512:
+ return (128);
+#endif
+ case SCTP_AUTH_HMAC_ID_RSVD:
+ default:
+ /* unknown HMAC algorithm: can't do anything */
+ return (0);
+ } /* end switch */
+}
+
+static void
+sctp_hmac_init(uint16_t hmac_algo, sctp_hash_context_t * ctx)
+{
+ switch (hmac_algo) {
+ case SCTP_AUTH_HMAC_ID_SHA1:
+ SHA1_Init(&ctx->sha1);
+ break;
+ case SCTP_AUTH_HMAC_ID_MD5:
+ MD5_Init(&ctx->md5);
+ break;
+#ifdef HAVE_SHA224
+ case SCTP_AUTH_HMAC_ID_SHA224:
+ break;
+#endif
+#ifdef HAVE_SHA2
+ case SCTP_AUTH_HMAC_ID_SHA256:
+ SHA256_Init(&ctx->sha256);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA384:
+ SHA384_Init(&ctx->sha384);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA512:
+ SHA512_Init(&ctx->sha512);
+ break;
+#endif
+ case SCTP_AUTH_HMAC_ID_RSVD:
+ default:
+ /* unknown HMAC algorithm: can't do anything */
+ return;
+ } /* end switch */
+}
+
+static void
+sctp_hmac_update(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+ uint8_t * text, uint32_t textlen)
+{
+ switch (hmac_algo) {
+ case SCTP_AUTH_HMAC_ID_SHA1:
+ SHA1_Update(&ctx->sha1, text, textlen);
+ break;
+ case SCTP_AUTH_HMAC_ID_MD5:
+ MD5_Update(&ctx->md5, text, textlen);
+ break;
+#ifdef HAVE_SHA224
+ case SCTP_AUTH_HMAC_ID_SHA224:
+ break;
+#endif
+#ifdef HAVE_SHA2
+ case SCTP_AUTH_HMAC_ID_SHA256:
+ SHA256_Update(&ctx->sha256, text, textlen);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA384:
+ SHA384_Update(&ctx->sha384, text, textlen);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA512:
+ SHA512_Update(&ctx->sha512, text, textlen);
+ break;
+#endif
+ case SCTP_AUTH_HMAC_ID_RSVD:
+ default:
+ /* unknown HMAC algorithm: can't do anything */
+ return;
+ } /* end switch */
+}
+
+static void
+sctp_hmac_final(uint16_t hmac_algo, sctp_hash_context_t * ctx,
+ uint8_t * digest)
+{
+ switch (hmac_algo) {
+ case SCTP_AUTH_HMAC_ID_SHA1:
+ SHA1_Final(digest, &ctx->sha1);
+ break;
+ case SCTP_AUTH_HMAC_ID_MD5:
+ MD5_Final(digest, &ctx->md5);
+ break;
+#ifdef HAVE_SHA224
+ case SCTP_AUTH_HMAC_ID_SHA224:
+ break;
+#endif
+#ifdef HAVE_SHA2
+ case SCTP_AUTH_HMAC_ID_SHA256:
+ SHA256_Final(digest, &ctx->sha256);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA384:
+ /* SHA384 is truncated SHA512 */
+ SHA384_Final(digest, &ctx->sha384);
+ break;
+ case SCTP_AUTH_HMAC_ID_SHA512:
+ SHA512_Final(digest, &ctx->sha512);
+ break;
+#endif
+ case SCTP_AUTH_HMAC_ID_RSVD:
+ default:
+ /* unknown HMAC algorithm: can't do anything */
+ return;
+ } /* end switch */
+}
+
+/*
+ * Keyed-Hashing for Message Authentication: FIPS 198 (RFC 2104)
+ *
+ * Compute the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm. Resulting digest is placed in 'digest' and digest length
+ * is returned, if the HMAC was performed.
+ *
+ * WARNING: it is up to the caller to supply sufficient space to hold the
+ * resultant digest.
+ */
+uint32_t
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ uint8_t * text, uint32_t textlen, uint8_t * digest)
+{
+ uint32_t digestlen;
+ uint32_t blocklen;
+ sctp_hash_context_t ctx;
+ uint8_t ipad[128], opad[128]; /* keyed hash inner/outer pads */
+ uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+ uint32_t i;
+
+ /* sanity check the material and length */
+ if ((key == NULL) || (keylen == 0) || (text == NULL) ||
+ (textlen == 0) || (digest == NULL)) {
+ /* can't do HMAC with empty key or text or digest store */
+ return (0);
+ }
+ /* validate the hmac algo and get the digest length */
+ digestlen = sctp_get_hmac_digest_len(hmac_algo);
+ if (digestlen == 0)
+ return (0);
+
+ /* hash the key if it is longer than the hash block size */
+ blocklen = sctp_get_hmac_block_len(hmac_algo);
+ if (keylen > blocklen) {
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+ /* set the hashed key as the key */
+ keylen = digestlen;
+ key = temp;
+ }
+ /* initialize the inner/outer pads with the key and "append" zeroes */
+ bzero(ipad, blocklen);
+ bzero(opad, blocklen);
+ bcopy(key, ipad, keylen);
+ bcopy(key, opad, keylen);
+
+ /* XOR the key with ipad and opad values */
+ for (i = 0; i < blocklen; i++) {
+ ipad[i] ^= 0x36;
+ opad[i] ^= 0x5c;
+ }
+
+ /* perform inner hash */
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+ sctp_hmac_update(hmac_algo, &ctx, text, textlen);
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+
+ /* perform outer hash */
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+ sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+ sctp_hmac_final(hmac_algo, &ctx, digest);
+
+ return (digestlen);
+}
+
+/* mbuf version */
+uint32_t
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer)
+{
+ uint32_t digestlen;
+ uint32_t blocklen;
+ sctp_hash_context_t ctx;
+ uint8_t ipad[128], opad[128]; /* keyed hash inner/outer pads */
+ uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+ uint32_t i;
+ struct mbuf *m_tmp;
+
+ /* sanity check the material and length */
+ if ((key == NULL) || (keylen == 0) || (m == NULL) || (digest == NULL)) {
+ /* can't do HMAC with empty key or text or digest store */
+ return (0);
+ }
+ /* validate the hmac algo and get the digest length */
+ digestlen = sctp_get_hmac_digest_len(hmac_algo);
+ if (digestlen == 0)
+ return (0);
+
+ /* hash the key if it is longer than the hash block size */
+ blocklen = sctp_get_hmac_block_len(hmac_algo);
+ if (keylen > blocklen) {
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, key, keylen);
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+ /* set the hashed key as the key */
+ keylen = digestlen;
+ key = temp;
+ }
+ /* initialize the inner/outer pads with the key and "append" zeroes */
+ bzero(ipad, blocklen);
+ bzero(opad, blocklen);
+ bcopy(key, ipad, keylen);
+ bcopy(key, opad, keylen);
+
+ /* XOR the key with ipad and opad values */
+ for (i = 0; i < blocklen; i++) {
+ ipad[i] ^= 0x36;
+ opad[i] ^= 0x5c;
+ }
+
+ /* perform inner hash */
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, ipad, blocklen);
+ /* find the correct starting mbuf and offset (get start of text) */
+ m_tmp = m;
+ while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+ m_offset -= SCTP_BUF_LEN(m_tmp);
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+ /* now use the rest of the mbuf chain for the text */
+ while (m_tmp != NULL) {
+ if ((SCTP_BUF_NEXT(m_tmp) == NULL) && trailer) {
+ sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+ SCTP_BUF_LEN(m_tmp) - (trailer + m_offset));
+ } else {
+ sctp_hmac_update(hmac_algo, &ctx, mtod(m_tmp, uint8_t *) + m_offset,
+ SCTP_BUF_LEN(m_tmp) - m_offset);
+ }
+
+ /* clear the offset since it's only for the first mbuf */
+ m_offset = 0;
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+
+ /* perform outer hash */
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, opad, blocklen);
+ sctp_hmac_update(hmac_algo, &ctx, temp, digestlen);
+ sctp_hmac_final(hmac_algo, &ctx, digest);
+
+ return (digestlen);
+}
+
+/*
+ * verify the HMAC digest using the desired hash key, text, and HMAC
+ * algorithm. Returns -1 on error, 0 on success.
+ */
+int
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ uint8_t * text, uint32_t textlen,
+ uint8_t * digest, uint32_t digestlen)
+{
+ uint32_t len;
+ uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+ /* sanity check the material and length */
+ if ((key == NULL) || (keylen == 0) ||
+ (text == NULL) || (textlen == 0) || (digest == NULL)) {
+ /* can't do HMAC with empty key or text or digest */
+ return (-1);
+ }
+ len = sctp_get_hmac_digest_len(hmac_algo);
+ if ((len == 0) || (digestlen != len))
+ return (-1);
+
+ /* compute the expected hash */
+ if (sctp_hmac(hmac_algo, key, keylen, text, textlen, temp) != len)
+ return (-1);
+
+ if (memcmp(digest, temp, digestlen) != 0)
+ return (-1);
+ else
+ return (0);
+}
+
+
+/*
+ * computes the requested HMAC using a key struct (which may be modified if
+ * the keylen exceeds the HMAC block len).
+ */
+uint32_t
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key, uint8_t * text,
+ uint32_t textlen, uint8_t * digest)
+{
+ uint32_t digestlen;
+ uint32_t blocklen;
+ sctp_hash_context_t ctx;
+ uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+ /* sanity check */
+ if ((key == NULL) || (text == NULL) || (textlen == 0) ||
+ (digest == NULL)) {
+ /* can't do HMAC with empty key or text or digest store */
+ return (0);
+ }
+ /* validate the hmac algo and get the digest length */
+ digestlen = sctp_get_hmac_digest_len(hmac_algo);
+ if (digestlen == 0)
+ return (0);
+
+ /* hash the key if it is longer than the hash block size */
+ blocklen = sctp_get_hmac_block_len(hmac_algo);
+ if (key->keylen > blocklen) {
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+ /* save the hashed key as the new key */
+ key->keylen = digestlen;
+ bcopy(temp, key->key, key->keylen);
+ }
+ return (sctp_hmac(hmac_algo, key->key, key->keylen, text, textlen,
+ digest));
+}
+
+/* mbuf version */
+uint32_t
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key, struct mbuf *m,
+ uint32_t m_offset, uint8_t * digest)
+{
+ uint32_t digestlen;
+ uint32_t blocklen;
+ sctp_hash_context_t ctx;
+ uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX];
+
+ /* sanity check */
+ if ((key == NULL) || (m == NULL) || (digest == NULL)) {
+ /* can't do HMAC with empty key or text or digest store */
+ return (0);
+ }
+ /* validate the hmac algo and get the digest length */
+ digestlen = sctp_get_hmac_digest_len(hmac_algo);
+ if (digestlen == 0)
+ return (0);
+
+ /* hash the key if it is longer than the hash block size */
+ blocklen = sctp_get_hmac_block_len(hmac_algo);
+ if (key->keylen > blocklen) {
+ sctp_hmac_init(hmac_algo, &ctx);
+ sctp_hmac_update(hmac_algo, &ctx, key->key, key->keylen);
+ sctp_hmac_final(hmac_algo, &ctx, temp);
+ /* save the hashed key as the new key */
+ key->keylen = digestlen;
+ bcopy(temp, key->key, key->keylen);
+ }
+ return (sctp_hmac_m(hmac_algo, key->key, key->keylen, m, m_offset, digest, 0));
+}
+
+int
+sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id)
+{
+ int i;
+
+ if ((list == NULL) || (id == SCTP_AUTH_HMAC_ID_RSVD))
+ return (0);
+
+ for (i = 0; i < list->num_algo; i++)
+ if (list->hmac[i] == id)
+ return (1);
+
+ /* not in the list */
+ return (0);
+}
+
+
+/*
+ * clear any cached key(s) if they match the given key id on an association
+ * the cached key(s) will be recomputed and re-cached at next use. ASSUMES
+ * TCB_LOCK is already held
+ */
+void
+sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid)
+{
+ if (stcb == NULL)
+ return;
+
+ if (keyid == stcb->asoc.authinfo.assoc_keyid) {
+ sctp_free_key(stcb->asoc.authinfo.assoc_key);
+ stcb->asoc.authinfo.assoc_key = NULL;
+ }
+ if (keyid == stcb->asoc.authinfo.recv_keyid) {
+ sctp_free_key(stcb->asoc.authinfo.recv_key);
+ stcb->asoc.authinfo.recv_key = NULL;
+ }
+}
+
+/*
+ * clear any cached key(s) if they match the given key id for all assocs on
+ * an association ASSUMES INP_WLOCK is already held
+ */
+void
+sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+ struct sctp_tcb *stcb;
+
+ if (inp == NULL)
+ return;
+
+ /* clear the cached keys on all assocs on this instance */
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ sctp_clear_cachedkeys(stcb, keyid);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+}
+
+/*
+ * delete a shared key from an association ASSUMES TCB_LOCK is already held
+ */
+int
+sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+ sctp_sharedkey_t *skey;
+
+ if (stcb == NULL)
+ return (-1);
+
+ /* is the keyid the assoc active sending key */
+ if (keyid == stcb->asoc.authinfo.assoc_keyid)
+ return (-1);
+
+ /* does the key exist? */
+ skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+ if (skey == NULL)
+ return (-1);
+
+ /* remove it */
+ LIST_REMOVE(skey, next);
+ sctp_free_sharedkey(skey); /* frees skey->key as well */
+
+ /* clear any cached keys */
+ sctp_clear_cachedkeys(stcb, keyid);
+ return (0);
+}
+
+/*
+ * deletes a shared key from the endpoint ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+ sctp_sharedkey_t *skey;
+ struct sctp_tcb *stcb;
+
+ if (inp == NULL)
+ return (-1);
+
+ /* is the keyid the active sending key on the endpoint or any assoc */
+ if (keyid == inp->sctp_ep.default_keyid)
+ return (-1);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ if (keyid == stcb->asoc.authinfo.assoc_keyid) {
+ SCTP_TCB_UNLOCK(stcb);
+ return (-1);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+
+ /* does the key exist? */
+ skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+ if (skey == NULL)
+ return (-1);
+
+ /* remove it */
+ LIST_REMOVE(skey, next);
+ sctp_free_sharedkey(skey); /* frees skey->key as well */
+
+ /* clear any cached keys */
+ sctp_clear_cachedkeys_ep(inp, keyid);
+ return (0);
+}
+
+/*
+ * set the active key on an association ASSUME TCB_LOCK is already held
+ */
+int
+sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid)
+{
+ sctp_sharedkey_t *skey = NULL;
+ sctp_key_t *key = NULL;
+ int using_ep_key = 0;
+
+ /* find the key on the assoc */
+ skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, keyid);
+ if (skey == NULL) {
+ /* if not on the assoc, find the key on the endpoint */
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RLOCK(stcb->sctp_ep);
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+ keyid);
+ using_ep_key = 1;
+ }
+ if (skey == NULL) {
+ /* that key doesn't exist */
+ if (using_ep_key) {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ }
+ return (-1);
+ }
+ /* get the shared key text */
+ key = skey->key;
+
+ /* free any existing cached key */
+ if (stcb->asoc.authinfo.assoc_key != NULL)
+ sctp_free_key(stcb->asoc.authinfo.assoc_key);
+ /* compute a new assoc key and cache it */
+ stcb->asoc.authinfo.assoc_key =
+ sctp_compute_hashkey(stcb->asoc.authinfo.random,
+ stcb->asoc.authinfo.peer_random, key);
+ stcb->asoc.authinfo.assoc_keyid = keyid;
+#ifdef SCTP_DEBUG
+ if (SCTP_AUTH_DEBUG)
+ sctp_print_key(stcb->asoc.authinfo.assoc_key, "Assoc Key");
+#endif
+
+ if (using_ep_key) {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ }
+ return (0);
+}
+
+/*
+ * set the active key on an endpoint ASSUMES INP_WLOCK is already held
+ */
+int
+sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid)
+{
+ sctp_sharedkey_t *skey;
+
+ /* find the key */
+ skey = sctp_find_sharedkey(&inp->sctp_ep.shared_keys, keyid);
+ if (skey == NULL) {
+ /* that key doesn't exist */
+ return (-1);
+ }
+ inp->sctp_ep.default_keyid = keyid;
+ return (0);
+}
+
+/*
+ * get local authentication parameters from cookie (from INIT-ACK)
+ */
+void
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+ uint32_t offset, uint32_t length)
+{
+ struct sctp_paramhdr *phdr, tmp_param;
+ uint16_t plen, ptype;
+ uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_random *p_random = NULL;
+ uint16_t random_len = 0;
+ uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_hmac_algo *hmacs = NULL;
+ uint16_t hmacs_len = 0;
+ uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_chunk_list *chunks = NULL;
+ uint16_t num_chunks = 0;
+ sctp_key_t *new_key;
+ uint32_t keylen;
+
+ /* convert to upper bound */
+ length += offset;
+
+ phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset,
+ sizeof(struct sctp_paramhdr), (uint8_t *) & tmp_param);
+ while (phdr != NULL) {
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+
+ if ((plen == 0) || (offset + plen > length))
+ break;
+
+ if (ptype == SCTP_RANDOM) {
+ if (plen > sizeof(random_store))
+ break;
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)random_store, min(plen, sizeof(random_store)));
+ if (phdr == NULL)
+ return;
+ /* save the random and length for the key */
+ p_random = (struct sctp_auth_random *)phdr;
+ random_len = plen - sizeof(*p_random);
+ } else if (ptype == SCTP_HMAC_LIST) {
+ int num_hmacs;
+ int i;
+
+ if (plen > sizeof(hmacs_store))
+ break;
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)hmacs_store, min(plen, sizeof(hmacs_store)));
+ if (phdr == NULL)
+ return;
+ /* save the hmacs list and num for the key */
+ hmacs = (struct sctp_auth_hmac_algo *)phdr;
+ hmacs_len = plen - sizeof(*hmacs);
+ num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+ if (stcb->asoc.local_hmacs != NULL)
+ sctp_free_hmaclist(stcb->asoc.local_hmacs);
+ stcb->asoc.local_hmacs = sctp_alloc_hmaclist(num_hmacs);
+ if (stcb->asoc.local_hmacs != NULL) {
+ for (i = 0; i < num_hmacs; i++) {
+ (void)sctp_auth_add_hmacid(stcb->asoc.local_hmacs,
+ ntohs(hmacs->hmac_ids[i]));
+ }
+ }
+ } else if (ptype == SCTP_CHUNK_LIST) {
+ int i;
+
+ if (plen > sizeof(chunks_store))
+ break;
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)chunks_store, min(plen, sizeof(chunks_store)));
+ if (phdr == NULL)
+ return;
+ chunks = (struct sctp_auth_chunk_list *)phdr;
+ num_chunks = plen - sizeof(*chunks);
+ /* save chunks list and num for the key */
+ if (stcb->asoc.local_auth_chunks != NULL)
+ sctp_clear_chunklist(stcb->asoc.local_auth_chunks);
+ else
+ stcb->asoc.local_auth_chunks = sctp_alloc_chunklist();
+ for (i = 0; i < num_chunks; i++) {
+ (void)sctp_auth_add_chunk(chunks->chunk_types[i],
+ stcb->asoc.local_auth_chunks);
+ }
+ }
+ /* get next parameter */
+ offset += SCTP_SIZE32(plen);
+ if (offset + sizeof(struct sctp_paramhdr) > length)
+ break;
+ phdr = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_paramhdr),
+ (uint8_t *) & tmp_param);
+ }
+ /* concatenate the full random key */
+#ifdef SCTP_AUTH_DRAFT_04
+ keylen = random_len;
+ new_key = sctp_alloc_key(keylen);
+ if (new_key != NULL) {
+ /* copy in the RANDOM */
+ if (p_random != NULL)
+ bcopy(p_random->random_data, new_key->key, random_len);
+ }
+#else
+ keylen = sizeof(*p_random) + random_len + sizeof(*chunks) + num_chunks +
+ sizeof(*hmacs) + hmacs_len;
+ new_key = sctp_alloc_key(keylen);
+ if (new_key != NULL) {
+ /* copy in the RANDOM */
+ if (p_random != NULL) {
+ keylen = sizeof(*p_random) + random_len;
+ bcopy(p_random, new_key->key, keylen);
+ }
+ /* append in the AUTH chunks */
+ if (chunks != NULL) {
+ bcopy(chunks, new_key->key + keylen,
+ sizeof(*chunks) + num_chunks);
+ keylen += sizeof(*chunks) + num_chunks;
+ }
+ /* append in the HMACs */
+ if (hmacs != NULL) {
+ bcopy(hmacs, new_key->key + keylen,
+ sizeof(*hmacs) + hmacs_len);
+ }
+ }
+#endif
+ if (stcb->asoc.authinfo.random != NULL)
+ sctp_free_key(stcb->asoc.authinfo.random);
+ stcb->asoc.authinfo.random = new_key;
+ stcb->asoc.authinfo.random_len = random_len;
+#ifdef SCTP_AUTH_DRAFT_04
+ /* don't include the chunks and hmacs for draft -04 */
+ stcb->asoc.authinfo.random->keylen = random_len;
+#endif
+ sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+ sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+ /* negotiate what HMAC to use for the peer */
+ stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
+ stcb->asoc.local_hmacs);
+ /* copy defaults from the endpoint */
+ /* FIX ME: put in cookie? */
+ stcb->asoc.authinfo.assoc_keyid = stcb->sctp_ep->sctp_ep.default_keyid;
+}
+
+/*
+ * compute and fill in the HMAC digest for a packet
+ */
+void
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+ struct sctp_auth_chunk *auth, struct sctp_tcb *stcb)
+{
+ uint32_t digestlen;
+ sctp_sharedkey_t *skey;
+ sctp_key_t *key;
+
+ if ((stcb == NULL) || (auth == NULL))
+ return;
+
+ /* zero the digest + chunk padding */
+ digestlen = sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+ bzero(auth->hmac, SCTP_SIZE32(digestlen));
+ /* is an assoc key cached? */
+ if (stcb->asoc.authinfo.assoc_key == NULL) {
+ skey = sctp_find_sharedkey(&stcb->asoc.shared_keys,
+ stcb->asoc.authinfo.assoc_keyid);
+ if (skey == NULL) {
+ /* not in the assoc list, so check the endpoint list */
+ skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+ stcb->asoc.authinfo.assoc_keyid);
+ }
+ /* the only way skey is NULL is if null key id 0 is used */
+ if (skey != NULL)
+ key = skey->key;
+ else
+ key = NULL;
+ /* compute a new assoc key and cache it */
+ stcb->asoc.authinfo.assoc_key =
+ sctp_compute_hashkey(stcb->asoc.authinfo.random,
+ stcb->asoc.authinfo.peer_random, key);
+ SCTPDBG(SCTP_DEBUG_AUTH1, "caching key id %u\n",
+ stcb->asoc.authinfo.assoc_keyid);
+#ifdef SCTP_DEBUG
+ if (SCTP_AUTH_DEBUG)
+ sctp_print_key(stcb->asoc.authinfo.assoc_key,
+ "Assoc Key");
+#endif
+ }
+ /* set in the active key id */
+ auth->shared_key_id = htons(stcb->asoc.authinfo.assoc_keyid);
+
+ /* compute and fill in the digest */
+ (void)sctp_compute_hmac_m(stcb->asoc.peer_hmac_id,
+ stcb->asoc.authinfo.assoc_key,
+ m, auth_offset, auth->hmac);
+}
+
+
+static void
+sctp_bzero_m(struct mbuf *m, uint32_t m_offset, uint32_t size)
+{
+ struct mbuf *m_tmp;
+ uint8_t *data;
+
+ /* sanity check */
+ if (m == NULL)
+ return;
+
+ /* find the correct starting mbuf and offset (get start position) */
+ m_tmp = m;
+ while ((m_tmp != NULL) && (m_offset >= (uint32_t) SCTP_BUF_LEN(m_tmp))) {
+ m_offset -= SCTP_BUF_LEN(m_tmp);
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+ /* now use the rest of the mbuf chain */
+ while ((m_tmp != NULL) && (size > 0)) {
+ data = mtod(m_tmp, uint8_t *) + m_offset;
+ if (size > (uint32_t) SCTP_BUF_LEN(m_tmp)) {
+ bzero(data, SCTP_BUF_LEN(m_tmp));
+ size -= SCTP_BUF_LEN(m_tmp);
+ } else {
+ bzero(data, size);
+ size = 0;
+ }
+ /* clear the offset since it's only for the first mbuf */
+ m_offset = 0;
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+}
+
+/*
+ * process the incoming Authentication chunk return codes: -1 on any
+ * authentication error 0 on authentication verification
+ */
+int
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth,
+ struct mbuf *m, uint32_t offset)
+{
+ uint16_t chunklen;
+ uint16_t shared_key_id;
+ uint16_t hmac_id;
+ sctp_sharedkey_t *skey;
+ uint32_t digestlen;
+ uint8_t digest[SCTP_AUTH_DIGEST_LEN_MAX];
+ uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
+
+ /* auth is checked for NULL by caller */
+ chunklen = ntohs(auth->ch.chunk_length);
+ if (chunklen < sizeof(*auth)) {
+ SCTP_STAT_INCR(sctps_recvauthfailed);
+ return (-1);
+ }
+ SCTP_STAT_INCR(sctps_recvauth);
+
+ /* get the auth params */
+ shared_key_id = ntohs(auth->shared_key_id);
+ hmac_id = ntohs(auth->hmac_id);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP AUTH Chunk: shared key %u, HMAC id %u\n",
+ shared_key_id, hmac_id);
+
+ /* is the indicated HMAC supported? */
+ if (!sctp_auth_is_supported_hmac(stcb->asoc.local_hmacs, hmac_id)) {
+ struct mbuf *m_err;
+ struct sctp_auth_invalid_hmac *err;
+
+ SCTP_STAT_INCR(sctps_recvivalhmacid);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP Auth: unsupported HMAC id %u\n",
+ hmac_id);
+ /*
+ * report this in an Error Chunk: Unsupported HMAC
+ * Identifier
+ */
+ m_err = sctp_get_mbuf_for_msg(sizeof(*err), 0, M_DONTWAIT,
+ 1, MT_HEADER);
+ if (m_err != NULL) {
+ /* pre-reserve some space */
+ SCTP_BUF_RESV_UF(m_err, sizeof(struct sctp_chunkhdr));
+ /* fill in the error */
+ err = mtod(m_err, struct sctp_auth_invalid_hmac *);
+ bzero(err, sizeof(*err));
+ err->ph.param_type = htons(SCTP_CAUSE_UNSUPPORTED_HMACID);
+ err->ph.param_length = htons(sizeof(*err));
+ err->hmac_id = ntohs(hmac_id);
+ SCTP_BUF_LEN(m_err) = sizeof(*err);
+ /* queue it */
+ sctp_queue_op_err(stcb, m_err);
+ }
+ return (-1);
+ }
+ /* get the indicated shared key, if available */
+ if ((stcb->asoc.authinfo.recv_key == NULL) ||
+ (stcb->asoc.authinfo.recv_keyid != shared_key_id)) {
+ /* find the shared key on the assoc first */
+ skey = sctp_find_sharedkey(&stcb->asoc.shared_keys, shared_key_id);
+ if (skey == NULL) {
+ /* if not on the assoc, find it on the endpoint */
+ skey = sctp_find_sharedkey(&stcb->sctp_ep->sctp_ep.shared_keys,
+ shared_key_id);
+ }
+ /* if the shared key isn't found, discard the chunk */
+ if (skey == NULL) {
+ SCTP_STAT_INCR(sctps_recvivalkeyid);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP Auth: unknown key id %u\n",
+ shared_key_id);
+ return (-1);
+ }
+ /* generate a notification if this is a new key id */
+ if (stcb->asoc.authinfo.recv_keyid != shared_key_id)
+ /*
+ * sctp_ulp_notify(SCTP_NOTIFY_AUTH_NEW_KEY, stcb,
+ * shared_key_id, (void
+ * *)stcb->asoc.authinfo.recv_keyid);
+ */
+ sctp_notify_authentication(stcb, SCTP_AUTH_NEWKEY,
+ shared_key_id, stcb->asoc.authinfo.recv_keyid);
+ /* compute a new recv assoc key and cache it */
+ if (stcb->asoc.authinfo.recv_key != NULL)
+ sctp_free_key(stcb->asoc.authinfo.recv_key);
+ stcb->asoc.authinfo.recv_key =
+ sctp_compute_hashkey(stcb->asoc.authinfo.random,
+ stcb->asoc.authinfo.peer_random, skey->key);
+ stcb->asoc.authinfo.recv_keyid = shared_key_id;
+#ifdef SCTP_DEBUG
+ if (SCTP_AUTH_DEBUG)
+ sctp_print_key(stcb->asoc.authinfo.recv_key, "Recv Key");
+#endif
+ }
+ /* validate the digest length */
+ digestlen = sctp_get_hmac_digest_len(hmac_id);
+ if (chunklen < (sizeof(*auth) + digestlen)) {
+ /* invalid digest length */
+ SCTP_STAT_INCR(sctps_recvauthfailed);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP Auth: chunk too short for HMAC\n");
+ return (-1);
+ }
+ /* save a copy of the digest, zero the pseudo header, and validate */
+ bcopy(auth->hmac, digest, digestlen);
+ sctp_bzero_m(m, offset + sizeof(*auth), SCTP_SIZE32(digestlen));
+ (void)sctp_compute_hmac_m(hmac_id, stcb->asoc.authinfo.recv_key,
+ m, offset, computed_digest);
+
+ /* compare the computed digest with the one in the AUTH chunk */
+ if (memcmp(digest, computed_digest, digestlen) != 0) {
+ SCTP_STAT_INCR(sctps_recvauthfailed);
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP Auth: HMAC digest check failed\n");
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * Generate NOTIFICATION
+ */
+void
+sctp_notify_authentication(struct sctp_tcb *stcb, uint32_t indication,
+ uint16_t keyid, uint16_t alt_keyid)
+{
+ struct mbuf *m_notify;
+ struct sctp_authkey_event *auth;
+ struct sctp_queued_to_read *control;
+
+ if ((stcb == NULL) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET)
+ ) {
+ /* If the socket is gone we are out of here */
+ return;
+ }
+ if (sctp_is_feature_off(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTHEVNT))
+ /* event not enabled */
+ return;
+
+ m_notify = sctp_get_mbuf_for_msg(sizeof(struct sctp_authkey_event),
+ 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_notify == NULL)
+ /* no space left */
+ return;
+
+ SCTP_BUF_LEN(m_notify) = 0;
+ auth = mtod(m_notify, struct sctp_authkey_event *);
+ auth->auth_type = SCTP_AUTHENTICATION_EVENT;
+ auth->auth_flags = 0;
+ auth->auth_length = sizeof(*auth);
+ auth->auth_keynumber = keyid;
+ auth->auth_altkeynumber = alt_keyid;
+ auth->auth_indication = indication;
+ auth->auth_assoc_id = sctp_get_associd(stcb);
+
+ SCTP_BUF_LEN(m_notify) = sizeof(*auth);
+ SCTP_BUF_NEXT(m_notify) = NULL;
+
+ /* append to socket */
+ control = sctp_build_readq_entry(stcb, stcb->asoc.primary_destination,
+ 0, 0, 0, 0, 0, 0, m_notify);
+ if (control == NULL) {
+ /* no memory */
+ sctp_m_freem(m_notify);
+ return;
+ }
+ control->spec_flags = M_NOTIFICATION;
+ control->length = SCTP_BUF_LEN(m_notify);
+ /* not that we need this */
+ control->tail_mbuf = m_notify;
+ sctp_add_to_readq(stcb->sctp_ep, stcb, control,
+ &stcb->sctp_socket->so_rcv, 1, SCTP_SO_NOT_LOCKED);
+}
+
+
+/*
+ * validates the AUTHentication related parameters in an INIT/INIT-ACK
+ * Note: currently only used for INIT as INIT-ACK is handled inline
+ * with sctp_load_addresses_from_init()
+ */
+int
+sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit)
+{
+ struct sctp_paramhdr *phdr, parm_buf;
+ uint16_t ptype, plen;
+ int peer_supports_asconf = 0;
+ int peer_supports_auth = 0;
+ int got_random = 0, got_hmacs = 0, got_chklist = 0;
+ uint8_t saw_asconf = 0;
+ uint8_t saw_asconf_ack = 0;
+
+ /* go through each of the params. */
+ phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+ while (phdr) {
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+
+ if (offset + plen > limit) {
+ break;
+ }
+ if (plen < sizeof(struct sctp_paramhdr)) {
+ break;
+ }
+ if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+ /* A supported extension chunk */
+ struct sctp_supported_chunk_types_param *pr_supported;
+ uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+ int num_ent, i;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&local_store, min(plen, sizeof(local_store)));
+ if (phdr == NULL) {
+ return (-1);
+ }
+ pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+ num_ent = plen - sizeof(struct sctp_paramhdr);
+ for (i = 0; i < num_ent; i++) {
+ switch (pr_supported->chunk_types[i]) {
+ case SCTP_ASCONF:
+ case SCTP_ASCONF_ACK:
+ peer_supports_asconf = 1;
+ break;
+ case SCTP_AUTHENTICATION:
+ peer_supports_auth = 1;
+ break;
+ default:
+ /* one we don't care about */
+ break;
+ }
+ }
+ } else if (ptype == SCTP_RANDOM) {
+ got_random = 1;
+ /* enforce the random length */
+ if (plen != (sizeof(struct sctp_auth_random) +
+ SCTP_AUTH_RANDOM_SIZE_REQUIRED)) {
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: invalid RANDOM len\n");
+ return (-1);
+ }
+ } else if (ptype == SCTP_HMAC_LIST) {
+ uint8_t store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_hmac_algo *hmacs;
+ int num_hmacs;
+
+ if (plen > sizeof(store))
+ break;
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)store, min(plen, sizeof(store)));
+ if (phdr == NULL)
+ return (-1);
+ hmacs = (struct sctp_auth_hmac_algo *)phdr;
+ num_hmacs = (plen - sizeof(*hmacs)) /
+ sizeof(hmacs->hmac_ids[0]);
+ /* validate the hmac list */
+ if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: invalid HMAC param\n");
+ return (-1);
+ }
+ got_hmacs = 1;
+ } else if (ptype == SCTP_CHUNK_LIST) {
+ int i, num_chunks;
+ uint8_t chunks_store[SCTP_SMALL_CHUNK_STORE];
+
+ /* did the peer send a non-empty chunk list? */
+ struct sctp_auth_chunk_list *chunks = NULL;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)chunks_store,
+ min(plen, sizeof(chunks_store)));
+ if (phdr == NULL)
+ return (-1);
+
+ /*-
+ * Flip through the list and mark that the
+ * peer supports asconf/asconf_ack.
+ */
+ chunks = (struct sctp_auth_chunk_list *)phdr;
+ num_chunks = plen - sizeof(*chunks);
+ for (i = 0; i < num_chunks; i++) {
+ /* record asconf/asconf-ack if listed */
+ if (chunks->chunk_types[i] == SCTP_ASCONF)
+ saw_asconf = 1;
+ if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+ saw_asconf_ack = 1;
+
+ }
+ if (num_chunks)
+ got_chklist = 1;
+ }
+ offset += SCTP_SIZE32(plen);
+ if (offset >= limit) {
+ break;
+ }
+ phdr = sctp_get_next_param(m, offset, &parm_buf,
+ sizeof(parm_buf));
+ }
+ /* validate authentication required parameters */
+ if (got_random && got_hmacs) {
+ peer_supports_auth = 1;
+ } else {
+ peer_supports_auth = 0;
+ }
+ if (!peer_supports_auth && got_chklist) {
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: peer sent chunk list w/o AUTH\n");
+ return (-1);
+ }
+ if (!sctp_asconf_auth_nochk && peer_supports_asconf &&
+ !peer_supports_auth) {
+ SCTPDBG(SCTP_DEBUG_AUTH1,
+ "SCTP: peer supports ASCONF but not AUTH\n");
+ return (-1);
+ } else if ((peer_supports_asconf) && (peer_supports_auth) &&
+ ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+ return (-2);
+ }
+ return (0);
+}
+
+void
+sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+ uint16_t chunks_len = 0;
+ uint16_t hmacs_len = 0;
+ uint16_t random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+ sctp_key_t *new_key;
+ uint16_t keylen;
+
+ /* initialize hmac list from endpoint */
+ stcb->asoc.local_hmacs = sctp_copy_hmaclist(inp->sctp_ep.local_hmacs);
+ if (stcb->asoc.local_hmacs != NULL) {
+ hmacs_len = stcb->asoc.local_hmacs->num_algo *
+ sizeof(stcb->asoc.local_hmacs->hmac[0]);
+ }
+ /* initialize auth chunks list from endpoint */
+ stcb->asoc.local_auth_chunks =
+ sctp_copy_chunklist(inp->sctp_ep.local_auth_chunks);
+ if (stcb->asoc.local_auth_chunks != NULL) {
+ int i;
+
+ for (i = 0; i < 256; i++) {
+ if (stcb->asoc.local_auth_chunks->chunks[i])
+ chunks_len++;
+ }
+ }
+ /* copy defaults from the endpoint */
+ stcb->asoc.authinfo.assoc_keyid = inp->sctp_ep.default_keyid;
+
+ /* now set the concatenated key (random + chunks + hmacs) */
+#ifdef SCTP_AUTH_DRAFT_04
+ /* don't include the chunks and hmacs for draft -04 */
+ keylen = random_len;
+ new_key = sctp_generate_random_key(keylen);
+#else
+ /* key includes parameter headers */
+ keylen = (3 * sizeof(struct sctp_paramhdr)) + random_len + chunks_len +
+ hmacs_len;
+ new_key = sctp_alloc_key(keylen);
+ if (new_key != NULL) {
+ struct sctp_paramhdr *ph;
+ int plen;
+
+ /* generate and copy in the RANDOM */
+ ph = (struct sctp_paramhdr *)new_key->key;
+ ph->param_type = htons(SCTP_RANDOM);
+ plen = sizeof(*ph) + random_len;
+ ph->param_length = htons(plen);
+ SCTP_READ_RANDOM(new_key->key + sizeof(*ph), random_len);
+ keylen = plen;
+
+ /* append in the AUTH chunks */
+ /* NOTE: currently we always have chunks to list */
+ ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+ ph->param_type = htons(SCTP_CHUNK_LIST);
+ plen = sizeof(*ph) + chunks_len;
+ ph->param_length = htons(plen);
+ keylen += sizeof(*ph);
+ if (stcb->asoc.local_auth_chunks) {
+ int i;
+
+ for (i = 0; i < 256; i++) {
+ if (stcb->asoc.local_auth_chunks->chunks[i])
+ new_key->key[keylen++] = i;
+ }
+ }
+ /* append in the HMACs */
+ ph = (struct sctp_paramhdr *)(new_key->key + keylen);
+ ph->param_type = htons(SCTP_HMAC_LIST);
+ plen = sizeof(*ph) + hmacs_len;
+ ph->param_length = htons(plen);
+ keylen += sizeof(*ph);
+ (void)sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+ new_key->key + keylen);
+ }
+#endif
+ if (stcb->asoc.authinfo.random != NULL)
+ sctp_free_key(stcb->asoc.authinfo.random);
+ stcb->asoc.authinfo.random = new_key;
+ stcb->asoc.authinfo.random_len = random_len;
+}
+
+
+#ifdef SCTP_HMAC_TEST
+/*
+ * HMAC and key concatenation tests
+ */
+static void
+sctp_print_digest(uint8_t * digest, uint32_t digestlen, const char *str)
+{
+ uint32_t i;
+
+ printf("\n%s: 0x", str);
+ if (digest == NULL)
+ return;
+
+ for (i = 0; i < digestlen; i++)
+ printf("%02x", digest[i]);
+}
+
+static int
+sctp_test_hmac(const char *str, uint16_t hmac_id, uint8_t * key,
+ uint32_t keylen, uint8_t * text, uint32_t textlen,
+ uint8_t * digest, uint32_t digestlen)
+{
+ uint8_t computed_digest[SCTP_AUTH_DIGEST_LEN_MAX];
+
+ printf("\n%s:", str);
+ sctp_hmac(hmac_id, key, keylen, text, textlen, computed_digest);
+ sctp_print_digest(digest, digestlen, "Expected digest");
+ sctp_print_digest(computed_digest, digestlen, "Computed digest");
+ if (memcmp(digest, computed_digest, digestlen) != 0) {
+ printf("\nFAILED");
+ return (-1);
+ } else {
+ printf("\nPASSED");
+ return (0);
+ }
+}
+
+
+/*
+ * RFC 2202: HMAC-SHA1 test cases
+ */
+void
+sctp_test_hmac_sha1(void)
+{
+ uint8_t *digest;
+ uint8_t key[128];
+ uint32_t keylen;
+ uint8_t text[128];
+ uint32_t textlen;
+ uint32_t digestlen = 20;
+ int failed = 0;
+
+ /*
+ * test_case = 1 key =
+ * 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b key_len = 20
+ * data = "Hi There" data_len = 8 digest =
+ * 0xb617318655057264e28bc0b6fb378c8ef146be00
+ */
+ keylen = 20;
+ memset(key, 0x0b, keylen);
+ textlen = 8;
+ strcpy(text, "Hi There");
+ digest = "\xb6\x17\x31\x86\x55\x05\x72\x64\xe2\x8b\xc0\xb6\xfb\x37\x8c\x8e\xf1\x46\xbe\x00";
+ if (sctp_test_hmac("SHA1 test case 1", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 2 key = "Jefe" key_len = 4 data =
+ * "what do ya want for nothing?" data_len = 28 digest =
+ * 0xeffcdf6ae5eb2fa2d27416d5f184df9c259a7c79
+ */
+ keylen = 4;
+ strcpy(key, "Jefe");
+ textlen = 28;
+ strcpy(text, "what do ya want for nothing?");
+ digest = "\xef\xfc\xdf\x6a\xe5\xeb\x2f\xa2\xd2\x74\x16\xd5\xf1\x84\xdf\x9c\x25\x9a\x7c\x79";
+ if (sctp_test_hmac("SHA1 test case 2", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 3 key =
+ * 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa key_len = 20
+ * data = 0xdd repeated 50 times data_len = 50 digest
+ * = 0x125d7342b9ac11cd91a39af48aa17b4f63f175d3
+ */
+ keylen = 20;
+ memset(key, 0xaa, keylen);
+ textlen = 50;
+ memset(text, 0xdd, textlen);
+ digest = "\x12\x5d\x73\x42\xb9\xac\x11\xcd\x91\xa3\x9a\xf4\x8a\xa1\x7b\x4f\x63\xf1\x75\xd3";
+ if (sctp_test_hmac("SHA1 test case 3", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 4 key =
+ * 0x0102030405060708090a0b0c0d0e0f10111213141516171819 key_len = 25
+ * data = 0xcd repeated 50 times data_len = 50 digest
+ * = 0x4c9007f4026250c6bc8414f9bf50c86c2d7235da
+ */
+ keylen = 25;
+ memcpy(key, "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19", keylen);
+ textlen = 50;
+ memset(text, 0xcd, textlen);
+ digest = "\x4c\x90\x07\xf4\x02\x62\x50\xc6\xbc\x84\x14\xf9\xbf\x50\xc8\x6c\x2d\x72\x35\xda";
+ if (sctp_test_hmac("SHA1 test case 4", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 5 key =
+ * 0x0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c key_len = 20
+ * data = "Test With Truncation" data_len = 20 digest
+ * = 0x4c1a03424b55e07fe7f27be1d58bb9324a9a5a04 digest-96 =
+ * 0x4c1a03424b55e07fe7f27be1
+ */
+ keylen = 20;
+ memset(key, 0x0c, keylen);
+ textlen = 20;
+ strcpy(text, "Test With Truncation");
+ digest = "\x4c\x1a\x03\x42\x4b\x55\xe0\x7f\xe7\xf2\x7b\xe1\xd5\x8b\xb9\x32\x4a\x9a\x5a\x04";
+ if (sctp_test_hmac("SHA1 test case 5", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 6 key = 0xaa repeated 80 times key_len
+ * = 80 data = "Test Using Larger Than Block-Size Key -
+ * Hash Key First" data_len = 54 digest =
+ * 0xaa4ae5e15272d00e95705637ce8a3b55ed402112
+ */
+ keylen = 80;
+ memset(key, 0xaa, keylen);
+ textlen = 54;
+ strcpy(text, "Test Using Larger Than Block-Size Key - Hash Key First");
+ digest = "\xaa\x4a\xe5\xe1\x52\x72\xd0\x0e\x95\x70\x56\x37\xce\x8a\x3b\x55\xed\x40\x21\x12";
+ if (sctp_test_hmac("SHA1 test case 6", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 7 key = 0xaa repeated 80 times key_len
+ * = 80 data = "Test Using Larger Than Block-Size Key and
+ * Larger Than One Block-Size Data" data_len = 73 digest =
+ * 0xe8e99d0f45237d786d6bbaa7965c7808bbff1a91
+ */
+ keylen = 80;
+ memset(key, 0xaa, keylen);
+ textlen = 73;
+ strcpy(text, "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data");
+ digest = "\xe8\xe9\x9d\x0f\x45\x23\x7d\x78\x6d\x6b\xba\xa7\x96\x5c\x78\x08\xbb\xff\x1a\x91";
+ if (sctp_test_hmac("SHA1 test case 7", SCTP_AUTH_HMAC_ID_SHA1, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /* done with all tests */
+ if (failed)
+ printf("\nSHA1 test results: %d cases failed", failed);
+ else
+ printf("\nSHA1 test results: all test cases passed");
+}
+
+/*
+ * RFC 2202: HMAC-MD5 test cases
+ */
+void
+sctp_test_hmac_md5(void)
+{
+ uint8_t *digest;
+ uint8_t key[128];
+ uint32_t keylen;
+ uint8_t text[128];
+ uint32_t textlen;
+ uint32_t digestlen = 16;
+ int failed = 0;
+
+ /*
+ * test_case = 1 key = 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b
+ * key_len = 16 data = "Hi There" data_len = 8 digest =
+ * 0x9294727a3638bb1c13f48ef8158bfc9d
+ */
+ keylen = 16;
+ memset(key, 0x0b, keylen);
+ textlen = 8;
+ strcpy(text, "Hi There");
+ digest = "\x92\x94\x72\x7a\x36\x38\xbb\x1c\x13\xf4\x8e\xf8\x15\x8b\xfc\x9d";
+ if (sctp_test_hmac("MD5 test case 1", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 2 key = "Jefe" key_len = 4 data =
+ * "what do ya want for nothing?" data_len = 28 digest =
+ * 0x750c783e6ab0b503eaa86e310a5db738
+ */
+ keylen = 4;
+ strcpy(key, "Jefe");
+ textlen = 28;
+ strcpy(text, "what do ya want for nothing?");
+ digest = "\x75\x0c\x78\x3e\x6a\xb0\xb5\x03\xea\xa8\x6e\x31\x0a\x5d\xb7\x38";
+ if (sctp_test_hmac("MD5 test case 2", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 3 key = 0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ * key_len = 16 data = 0xdd repeated 50 times data_len = 50
+ * digest = 0x56be34521d144c88dbb8c733f0e8b3f6
+ */
+ keylen = 16;
+ memset(key, 0xaa, keylen);
+ textlen = 50;
+ memset(text, 0xdd, textlen);
+ digest = "\x56\xbe\x34\x52\x1d\x14\x4c\x88\xdb\xb8\xc7\x33\xf0\xe8\xb3\xf6";
+ if (sctp_test_hmac("MD5 test case 3", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 4 key =
+ * 0x0102030405060708090a0b0c0d0e0f10111213141516171819 key_len = 25
+ * data = 0xcd repeated 50 times data_len = 50 digest
+ * = 0x697eaf0aca3a3aea3a75164746ffaa79
+ */
+ keylen = 25;
+ memcpy(key, "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19", keylen);
+ textlen = 50;
+ memset(text, 0xcd, textlen);
+ digest = "\x69\x7e\xaf\x0a\xca\x3a\x3a\xea\x3a\x75\x16\x47\x46\xff\xaa\x79";
+ if (sctp_test_hmac("MD5 test case 4", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 5 key = 0x0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c0c
+ * key_len = 16 data = "Test With Truncation" data_len = 20
+ * digest = 0x56461ef2342edc00f9bab995690efd4c digest-96
+ * 0x56461ef2342edc00f9bab995
+ */
+ keylen = 16;
+ memset(key, 0x0c, keylen);
+ textlen = 20;
+ strcpy(text, "Test With Truncation");
+ digest = "\x56\x46\x1e\xf2\x34\x2e\xdc\x00\xf9\xba\xb9\x95\x69\x0e\xfd\x4c";
+ if (sctp_test_hmac("MD5 test case 5", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 6 key = 0xaa repeated 80 times key_len
+ * = 80 data = "Test Using Larger Than Block-Size Key -
+ * Hash Key First" data_len = 54 digest =
+ * 0x6b1ab7fe4bd7bf8f0b62e6ce61b9d0cd
+ */
+ keylen = 80;
+ memset(key, 0xaa, keylen);
+ textlen = 54;
+ strcpy(text, "Test Using Larger Than Block-Size Key - Hash Key First");
+ digest = "\x6b\x1a\xb7\xfe\x4b\xd7\xbf\x8f\x0b\x62\xe6\xce\x61\xb9\xd0\xcd";
+ if (sctp_test_hmac("MD5 test case 6", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /*
+ * test_case = 7 key = 0xaa repeated 80 times key_len
+ * = 80 data = "Test Using Larger Than Block-Size Key and
+ * Larger Than One Block-Size Data" data_len = 73 digest =
+ * 0x6f630fad67cda0ee1fb1f562db3aa53e
+ */
+ keylen = 80;
+ memset(key, 0xaa, keylen);
+ textlen = 73;
+ strcpy(text, "Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data");
+ digest = "\x6f\x63\x0f\xad\x67\xcd\xa0\xee\x1f\xb1\xf5\x62\xdb\x3a\xa5\x3e";
+ if (sctp_test_hmac("MD5 test case 7", SCTP_AUTH_HMAC_ID_MD5, key, keylen,
+ text, textlen, digest, digestlen) < 0)
+ failed++;
+
+ /* done with all tests */
+ if (failed)
+ printf("\nMD5 test results: %d cases failed", failed);
+ else
+ printf("\nMD5 test results: all test cases passed");
+}
+
+/*
+ * test assoc key concatenation
+ */
+static int
+sctp_test_key_concatenation(sctp_key_t * key1, sctp_key_t * key2,
+ sctp_key_t * expected_key)
+{
+ sctp_key_t *key;
+ int ret_val;
+
+ sctp_show_key(key1, "\nkey1");
+ sctp_show_key(key2, "\nkey2");
+ key = sctp_compute_hashkey(key1, key2, NULL);
+ sctp_show_key(expected_key, "\nExpected");
+ sctp_show_key(key, "\nComputed");
+ if (memcmp(key, expected_key, expected_key->keylen) != 0) {
+ printf("\nFAILED");
+ ret_val = -1;
+ } else {
+ printf("\nPASSED");
+ ret_val = 0;
+ }
+ sctp_free_key(key1);
+ sctp_free_key(key2);
+ sctp_free_key(expected_key);
+ sctp_free_key(key);
+ return (ret_val);
+}
+
+
+void
+sctp_test_authkey(void)
+{
+ sctp_key_t *key1, *key2, *expected_key;
+ int failed = 0;
+
+ /* test case 1 */
+ key1 = sctp_set_key("\x01\x01\x01\x01", 4);
+ key2 = sctp_set_key("\x01\x02\x03\x04", 4);
+ expected_key = sctp_set_key("\x01\x01\x01\x01\x01\x02\x03\x04", 8);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 2 */
+ key1 = sctp_set_key("\x00\x00\x00\x01", 4);
+ key2 = sctp_set_key("\x02", 1);
+ expected_key = sctp_set_key("\x00\x00\x00\x01\x02", 5);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 3 */
+ key1 = sctp_set_key("\x01", 1);
+ key2 = sctp_set_key("\x00\x00\x00\x02", 4);
+ expected_key = sctp_set_key("\x01\x00\x00\x00\x02", 5);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 4 */
+ key1 = sctp_set_key("\x00\x00\x00\x01", 4);
+ key2 = sctp_set_key("\x01", 1);
+ expected_key = sctp_set_key("\x01\x00\x00\x00\x01", 5);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 5 */
+ key1 = sctp_set_key("\x01", 1);
+ key2 = sctp_set_key("\x00\x00\x00\x01", 4);
+ expected_key = sctp_set_key("\x01\x00\x00\x00\x01", 5);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 6 */
+ key1 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07", 11);
+ key2 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 11);
+ expected_key = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 22);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* test case 7 */
+ key1 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 11);
+ key2 = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07", 11);
+ expected_key = sctp_set_key("\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x08", 22);
+ if (sctp_test_key_concatenation(key1, key2, expected_key) < 0)
+ failed++;
+
+ /* done with all tests */
+ if (failed)
+ printf("\nKey concatenation test results: %d cases failed", failed);
+ else
+ printf("\nKey concatenation test results: all test cases passed");
+}
+
+
+#if defined(STANDALONE_HMAC_TEST)
+int
+main(void)
+{
+ sctp_test_hmac_sha1();
+ sctp_test_hmac_md5();
+ sctp_test_authkey();
+}
+
+#endif /* STANDALONE_HMAC_TEST */
+
+#endif /* SCTP_HMAC_TEST */
--- /dev/null
+++ sys/netinet/sctp_lock_bsd.h
@@ -0,0 +1,403 @@
+#ifndef __sctp_lock_bsd_h__
+#define __sctp_lock_bsd_h__
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * General locking concepts: The goal of our locking is to of course provide
+ * consistency and yet minimize overhead. We will attempt to use
+ * non-recursive locks which are supposed to be quite inexpensive. Now in
+ * order to do this the goal is that most functions are not aware of locking.
+ * Once we have a TCB we lock it and unlock when we are through. This means
+ * that the TCB lock is kind-of a "global" lock when working on an
+ * association. Caution must be used when asserting a TCB_LOCK since if we
+ * recurse we deadlock.
+ *
+ * Most other locks (INP and INFO) attempt to localize the locking i.e. we try
+ * to contain the lock and unlock within the function that needs to lock it.
+ * This sometimes mean we do extra locks and unlocks and lose a bit of
+ * efficency, but if the performance statements about non-recursive locks are
+ * true this should not be a problem. One issue that arises with this only
+ * lock when needed is that if an implicit association setup is done we have
+ * a problem. If at the time I lookup an association I have NULL in the tcb
+ * return, by the time I call to create the association some other processor
+ * could have created it. This is what the CREATE lock on the endpoint.
+ * Places where we will be implicitly creating the association OR just
+ * creating an association (the connect call) will assert the CREATE_INP
+ * lock. This will assure us that during all the lookup of INP and INFO if
+ * another creator is also locking/looking up we can gate the two to
+ * synchronize. So the CREATE_INP lock is also another one we must use
+ * extreme caution in locking to make sure we don't hit a re-entrancy issue.
+ *
+ * For non FreeBSD 5.x we provide a bunch of EMPTY lock macros so we can
+ * blatantly put locks everywhere and they reduce to nothing on
+ * NetBSD/OpenBSD and FreeBSD 4.x
+ *
+ */
+
+/*
+ * When working with the global SCTP lists we lock and unlock the INP_INFO
+ * lock. So when we go to lookup an association we will want to do a
+ * SCTP_INP_INFO_RLOCK() and then when we want to add a new association to
+ * the sctppcbinfo list's we will do a SCTP_INP_INFO_WLOCK().
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_lock_bsd.h,v 1.10 2007/09/18 15:16:38 rrs Exp $");
+
+
+extern struct sctp_foo_stuff sctp_logoff[];
+extern int sctp_logoff_stuff;
+
+#define SCTP_IPI_COUNT_INIT()
+
+#define SCTP_STATLOG_INIT_LOCK()
+#define SCTP_STATLOG_LOCK()
+#define SCTP_STATLOG_UNLOCK()
+#define SCTP_STATLOG_DESTROY()
+
+#define SCTP_INP_INFO_LOCK_DESTROY() do { \
+ if(rw_wowned(sctppcbinfo.ipi_ep_mtx)) { \
+ rw_wunlock(&sctppcbinfo.ipi_ep_mtx); \
+ } \
+ rw_destroy(sctppcbinfo.ipi_ep_mtx); \
+ } while (0)
+
+#define SCTP_INP_INFO_LOCK_INIT() \
+ rw_init(&sctppcbinfo.ipi_ep_mtx, "sctp-info");
+
+
+#define SCTP_INP_INFO_RLOCK() do { \
+ rw_rlock(&sctppcbinfo.ipi_ep_mtx); \
+} while (0)
+
+
+#define SCTP_INP_INFO_WLOCK() do { \
+ rw_wlock(&sctppcbinfo.ipi_ep_mtx); \
+} while (0)
+
+
+#define SCTP_INP_INFO_RUNLOCK() rw_runlock(&sctppcbinfo.ipi_ep_mtx)
+#define SCTP_INP_INFO_WUNLOCK() rw_wunlock(&sctppcbinfo.ipi_ep_mtx)
+
+
+#define SCTP_IPI_ADDR_INIT() \
+ rw_init(&sctppcbinfo.ipi_addr_mtx, "sctp-addr")
+
+#define SCTP_IPI_ADDR_DESTROY() do { \
+ if(rw_wowned(sctppcbinfo.ipi_addr_mtx)) { \
+ rw_wunlock(&sctppcbinfo.ipi_addr_mtx); \
+ } \
+ rw_destroy(&sctppcbinfo.ipi_addr_mtx) \
+ } while (0)
+
+
+
+#define SCTP_IPI_ADDR_RLOCK() do { \
+ rw_rlock(&sctppcbinfo.ipi_addr_mtx); \
+} while (0)
+
+#define SCTP_IPI_ADDR_WLOCK() do { \
+ rw_wlock(&sctppcbinfo.ipi_addr_mtx); \
+} while (0)
+
+
+#define SCTP_IPI_ADDR_RUNLOCK() rw_runlock(&sctppcbinfo.ipi_addr_mtx)
+#define SCTP_IPI_ADDR_WUNLOCK() rw_wunlock(&sctppcbinfo.ipi_addr_mtx)
+
+
+#define SCTP_IPI_ITERATOR_WQ_INIT() \
+ mtx_init(&sctppcbinfo.ipi_iterator_wq_mtx, "sctp-it-wq", "sctp_it_wq", MTX_DEF)
+
+#define SCTP_IPI_ITERATOR_WQ_DESTROY() \
+ mtx_destroy(&sctppcbinfo.ipi_iterator_wq_mtx)
+
+#define SCTP_IPI_ITERATOR_WQ_LOCK() do { \
+ mtx_lock(&sctppcbinfo.ipi_iterator_wq_mtx); \
+} while (0)
+
+#define SCTP_IPI_ITERATOR_WQ_UNLOCK() mtx_unlock(&sctppcbinfo.ipi_iterator_wq_mtx)
+
+
+#define SCTP_IP_PKTLOG_INIT() \
+ mtx_init(&sctppcbinfo.ipi_pktlog_mtx, "sctp-pktlog", "packetlog", MTX_DEF)
+
+
+#define SCTP_IP_PKTLOG_LOCK() do { \
+ mtx_lock(&sctppcbinfo.ipi_pktlog_mtx); \
+} while (0)
+
+#define SCTP_IP_PKTLOG_UNLOCK() mtx_unlock(&sctppcbinfo.ipi_pktlog_mtx)
+
+#define SCTP_IP_PKTLOG_DESTROY() \
+ mtx_destroy(&sctppcbinfo.ipi_pktlog_mtx)
+
+
+
+
+
+/*
+ * The INP locks we will use for locking an SCTP endpoint, so for example if
+ * we want to change something at the endpoint level for example random_store
+ * or cookie secrets we lock the INP level.
+ */
+
+#define SCTP_INP_READ_INIT(_inp) \
+ mtx_init(&(_inp)->inp_rdata_mtx, "sctp-read", "inpr", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_READ_DESTROY(_inp) \
+ mtx_destroy(&(_inp)->inp_rdata_mtx)
+
+#define SCTP_INP_READ_LOCK(_inp) do { \
+ mtx_lock(&(_inp)->inp_rdata_mtx); \
+} while (0)
+
+
+#define SCTP_INP_READ_UNLOCK(_inp) mtx_unlock(&(_inp)->inp_rdata_mtx)
+
+
+#define SCTP_INP_LOCK_INIT(_inp) \
+ mtx_init(&(_inp)->inp_mtx, "sctp-inp", "inp", MTX_DEF | MTX_DUPOK)
+#define SCTP_ASOC_CREATE_LOCK_INIT(_inp) \
+ mtx_init(&(_inp)->inp_create_mtx, "sctp-create", "inp_create", \
+ MTX_DEF | MTX_DUPOK)
+
+#define SCTP_INP_LOCK_DESTROY(_inp) \
+ mtx_destroy(&(_inp)->inp_mtx)
+
+#define SCTP_ASOC_CREATE_LOCK_DESTROY(_inp) \
+ mtx_destroy(&(_inp)->inp_create_mtx)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_INP_RLOCK(_inp) do { \
+ if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+ mtx_lock(&(_inp)->inp_mtx); \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp) do { \
+ if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_INP);\
+ mtx_lock(&(_inp)->inp_mtx); \
+} while (0)
+
+#else
+
+#define SCTP_INP_RLOCK(_inp) do { \
+ mtx_lock(&(_inp)->inp_mtx); \
+} while (0)
+
+#define SCTP_INP_WLOCK(_inp) do { \
+ mtx_lock(&(_inp)->inp_mtx); \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_SEND_LOCK_INIT(_tcb) \
+ mtx_init(&(_tcb)->tcb_send_mtx, "sctp-send-tcb", "tcbs", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_SEND_LOCK_DESTROY(_tcb) mtx_destroy(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_TCB_SEND_LOCK(_tcb) do { \
+ mtx_lock(&(_tcb)->tcb_send_mtx); \
+} while (0)
+
+#define SCTP_TCB_SEND_UNLOCK(_tcb) mtx_unlock(&(_tcb)->tcb_send_mtx)
+
+#define SCTP_INP_INCR_REF(_inp) atomic_add_int(&((_inp)->refcount), 1)
+#define SCTP_INP_DECR_REF(_inp) atomic_add_int(&((_inp)->refcount), -1)
+
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+ do { \
+ if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_CREATE); \
+ mtx_lock(&(_inp)->inp_create_mtx); \
+ } while (0)
+#else
+
+#define SCTP_ASOC_CREATE_LOCK(_inp) \
+ do { \
+ mtx_lock(&(_inp)->inp_create_mtx); \
+ } while (0)
+#endif
+
+#define SCTP_INP_RUNLOCK(_inp) mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_INP_WUNLOCK(_inp) mtx_unlock(&(_inp)->inp_mtx)
+#define SCTP_ASOC_CREATE_UNLOCK(_inp) mtx_unlock(&(_inp)->inp_create_mtx)
+
+/*
+ * For the majority of things (once we have found the association) we will
+ * lock the actual association mutex. This will protect all the assoiciation
+ * level queues and streams and such. We will need to lock the socket layer
+ * when we stuff data up into the receiving sb_mb. I.e. we will need to do an
+ * extra SOCKBUF_LOCK(&so->so_rcv) even though the association is locked.
+ */
+
+#define SCTP_TCB_LOCK_INIT(_tcb) \
+ mtx_init(&(_tcb)->tcb_mtx, "sctp-tcb", "tcb", MTX_DEF | MTX_DUPOK)
+
+#define SCTP_TCB_LOCK_DESTROY(_tcb) mtx_destroy(&(_tcb)->tcb_mtx)
+
+#ifdef SCTP_LOCK_LOGGING
+#define SCTP_TCB_LOCK(_tcb) do { \
+ if(sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) sctp_log_lock(_tcb->sctp_ep, _tcb, SCTP_LOG_LOCK_TCB); \
+ mtx_lock(&(_tcb)->tcb_mtx); \
+} while (0)
+
+#else
+#define SCTP_TCB_LOCK(_tcb) do { \
+ mtx_lock(&(_tcb)->tcb_mtx); \
+} while (0)
+
+#endif
+
+
+#define SCTP_TCB_TRYLOCK(_tcb) mtx_trylock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK(_tcb) mtx_unlock(&(_tcb)->tcb_mtx)
+
+#define SCTP_TCB_UNLOCK_IFOWNED(_tcb) do { \
+ if (mtx_owned(&(_tcb)->tcb_mtx)) \
+ mtx_unlock(&(_tcb)->tcb_mtx); \
+ } while (0)
+
+
+
+#ifdef INVARIANTS
+#define SCTP_TCB_LOCK_ASSERT(_tcb) do { \
+ if (mtx_owned(&(_tcb)->tcb_mtx) == 0) \
+ panic("Don't own TCB lock"); \
+ } while (0)
+#else
+#define SCTP_TCB_LOCK_ASSERT(_tcb)
+#endif
+
+#define SCTP_ITERATOR_LOCK_INIT() \
+ mtx_init(&sctppcbinfo.it_mtx, "sctp-it", "iterator", MTX_DEF)
+
+#ifdef INVARIANTS
+#define SCTP_ITERATOR_LOCK() \
+ do { \
+ if (mtx_owned(&sctppcbinfo.it_mtx)) \
+ panic("Iterator Lock"); \
+ mtx_lock(&sctppcbinfo.it_mtx); \
+ } while (0)
+#else
+#define SCTP_ITERATOR_LOCK() \
+ do { \
+ mtx_lock(&sctppcbinfo.it_mtx); \
+ } while (0)
+
+#endif
+
+#define SCTP_ITERATOR_UNLOCK() mtx_unlock(&sctppcbinfo.it_mtx)
+#define SCTP_ITERATOR_LOCK_DESTROY() mtx_destroy(&sctppcbinfo.it_mtx)
+
+
+#define SCTP_INCR_EP_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_ep, 1); \
+ } while (0)
+
+#define SCTP_DECR_EP_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_ep, 1); \
+ } while (0)
+
+#define SCTP_INCR_ASOC_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_asoc, 1); \
+ } while (0)
+
+#define SCTP_DECR_ASOC_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_asoc, 1); \
+ } while (0)
+
+#define SCTP_INCR_LADDR_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_laddr, 1); \
+ } while (0)
+
+#define SCTP_DECR_LADDR_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_laddr, 1); \
+ } while (0)
+
+#define SCTP_INCR_RADDR_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_raddr, 1); \
+ } while (0)
+
+#define SCTP_DECR_RADDR_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_raddr,1); \
+ } while (0)
+
+#define SCTP_INCR_CHK_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_chunk, 1); \
+ } while (0)
+
+#define SCTP_DECR_CHK_COUNT() \
+ do { \
+ if(sctppcbinfo.ipi_count_chunk == 0) \
+ panic("chunk count to 0?"); \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_chunk, 1); \
+ } while (0)
+
+#define SCTP_INCR_READQ_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_readq,1); \
+ } while (0)
+
+#define SCTP_DECR_READQ_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_readq, 1); \
+ } while (0)
+
+#define SCTP_INCR_STRMOQ_COUNT() \
+ do { \
+ atomic_add_int(&sctppcbinfo.ipi_count_strmoq, 1); \
+ } while (0)
+
+#define SCTP_DECR_STRMOQ_COUNT() \
+ do { \
+ atomic_subtract_int(&sctppcbinfo.ipi_count_strmoq, 1); \
+ } while (0)
+
+
+#if defined(SCTP_SO_LOCK_TESTING)
+#define SCTP_INP_SO(sctpinp) (sctpinp)->ip_inp.inp.inp_socket
+#define SCTP_SOCKET_LOCK(so, refcnt)
+#define SCTP_SOCKET_UNLOCK(so, refcnt)
+#endif
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_output.c
@@ -0,0 +1,12458 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_output.c,v 1.46 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_output.c,v 1.58.2.3 2007/12/09 20:23:47 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_input.h>
+
+
+
+#define SCTP_MAX_GAPS_INARRAY 4
+struct sack_track {
+ uint8_t right_edge; /* mergable on the right edge */
+ uint8_t left_edge; /* mergable on the left edge */
+ uint8_t num_entries;
+ uint8_t spare;
+ struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY];
+};
+
+struct sack_track sack_array[256] = {
+ {0, 0, 0, 0, /* 0x00 */
+ {{0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x01 */
+ {{0, 0},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x02 */
+ {{1, 1},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x03 */
+ {{0, 1},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x04 */
+ {{2, 2},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x05 */
+ {{0, 0},
+ {2, 2},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x06 */
+ {{1, 2},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x07 */
+ {{0, 2},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x08 */
+ {{3, 3},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x09 */
+ {{0, 0},
+ {3, 3},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x0a */
+ {{1, 1},
+ {3, 3},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x0b */
+ {{0, 1},
+ {3, 3},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x0c */
+ {{2, 3},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x0d */
+ {{0, 0},
+ {2, 3},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x0e */
+ {{1, 3},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x0f */
+ {{0, 3},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x10 */
+ {{4, 4},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x11 */
+ {{0, 0},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x12 */
+ {{1, 1},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x13 */
+ {{0, 1},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x14 */
+ {{2, 2},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x15 */
+ {{0, 0},
+ {2, 2},
+ {4, 4},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x16 */
+ {{1, 2},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x17 */
+ {{0, 2},
+ {4, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x18 */
+ {{3, 4},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x19 */
+ {{0, 0},
+ {3, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x1a */
+ {{1, 1},
+ {3, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x1b */
+ {{0, 1},
+ {3, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x1c */
+ {{2, 4},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x1d */
+ {{0, 0},
+ {2, 4},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x1e */
+ {{1, 4},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x1f */
+ {{0, 4},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x20 */
+ {{5, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x21 */
+ {{0, 0},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x22 */
+ {{1, 1},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x23 */
+ {{0, 1},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x24 */
+ {{2, 2},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x25 */
+ {{0, 0},
+ {2, 2},
+ {5, 5},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x26 */
+ {{1, 2},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x27 */
+ {{0, 2},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x28 */
+ {{3, 3},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x29 */
+ {{0, 0},
+ {3, 3},
+ {5, 5},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x2a */
+ {{1, 1},
+ {3, 3},
+ {5, 5},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x2b */
+ {{0, 1},
+ {3, 3},
+ {5, 5},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x2c */
+ {{2, 3},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x2d */
+ {{0, 0},
+ {2, 3},
+ {5, 5},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x2e */
+ {{1, 3},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x2f */
+ {{0, 3},
+ {5, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x30 */
+ {{4, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x31 */
+ {{0, 0},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x32 */
+ {{1, 1},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x33 */
+ {{0, 1},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x34 */
+ {{2, 2},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x35 */
+ {{0, 0},
+ {2, 2},
+ {4, 5},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x36 */
+ {{1, 2},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x37 */
+ {{0, 2},
+ {4, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x38 */
+ {{3, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x39 */
+ {{0, 0},
+ {3, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x3a */
+ {{1, 1},
+ {3, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x3b */
+ {{0, 1},
+ {3, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x3c */
+ {{2, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x3d */
+ {{0, 0},
+ {2, 5},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x3e */
+ {{1, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x3f */
+ {{0, 5},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x40 */
+ {{6, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x41 */
+ {{0, 0},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x42 */
+ {{1, 1},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x43 */
+ {{0, 1},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x44 */
+ {{2, 2},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x45 */
+ {{0, 0},
+ {2, 2},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x46 */
+ {{1, 2},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x47 */
+ {{0, 2},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x48 */
+ {{3, 3},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x49 */
+ {{0, 0},
+ {3, 3},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x4a */
+ {{1, 1},
+ {3, 3},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x4b */
+ {{0, 1},
+ {3, 3},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x4c */
+ {{2, 3},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x4d */
+ {{0, 0},
+ {2, 3},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x4e */
+ {{1, 3},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x4f */
+ {{0, 3},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x50 */
+ {{4, 4},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x51 */
+ {{0, 0},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x52 */
+ {{1, 1},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x53 */
+ {{0, 1},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x54 */
+ {{2, 2},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 4, 0, /* 0x55 */
+ {{0, 0},
+ {2, 2},
+ {4, 4},
+ {6, 6}
+ }
+ },
+ {0, 0, 3, 0, /* 0x56 */
+ {{1, 2},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x57 */
+ {{0, 2},
+ {4, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x58 */
+ {{3, 4},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x59 */
+ {{0, 0},
+ {3, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x5a */
+ {{1, 1},
+ {3, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x5b */
+ {{0, 1},
+ {3, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x5c */
+ {{2, 4},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x5d */
+ {{0, 0},
+ {2, 4},
+ {6, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x5e */
+ {{1, 4},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x5f */
+ {{0, 4},
+ {6, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x60 */
+ {{5, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x61 */
+ {{0, 0},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x62 */
+ {{1, 1},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x63 */
+ {{0, 1},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x64 */
+ {{2, 2},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x65 */
+ {{0, 0},
+ {2, 2},
+ {5, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x66 */
+ {{1, 2},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x67 */
+ {{0, 2},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x68 */
+ {{3, 3},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x69 */
+ {{0, 0},
+ {3, 3},
+ {5, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 3, 0, /* 0x6a */
+ {{1, 1},
+ {3, 3},
+ {5, 6},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x6b */
+ {{0, 1},
+ {3, 3},
+ {5, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x6c */
+ {{2, 3},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x6d */
+ {{0, 0},
+ {2, 3},
+ {5, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x6e */
+ {{1, 3},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x6f */
+ {{0, 3},
+ {5, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x70 */
+ {{4, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x71 */
+ {{0, 0},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x72 */
+ {{1, 1},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x73 */
+ {{0, 1},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x74 */
+ {{2, 2},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 3, 0, /* 0x75 */
+ {{0, 0},
+ {2, 2},
+ {4, 6},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x76 */
+ {{1, 2},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x77 */
+ {{0, 2},
+ {4, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x78 */
+ {{3, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x79 */
+ {{0, 0},
+ {3, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 2, 0, /* 0x7a */
+ {{1, 1},
+ {3, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x7b */
+ {{0, 1},
+ {3, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x7c */
+ {{2, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 2, 0, /* 0x7d */
+ {{0, 0},
+ {2, 6},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 0, 1, 0, /* 0x7e */
+ {{1, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 0, 1, 0, /* 0x7f */
+ {{0, 6},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0x80 */
+ {{7, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0x81 */
+ {{0, 0},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x82 */
+ {{1, 1},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0x83 */
+ {{0, 1},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x84 */
+ {{2, 2},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x85 */
+ {{0, 0},
+ {2, 2},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x86 */
+ {{1, 2},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0x87 */
+ {{0, 2},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x88 */
+ {{3, 3},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x89 */
+ {{0, 0},
+ {3, 3},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0x8a */
+ {{1, 1},
+ {3, 3},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x8b */
+ {{0, 1},
+ {3, 3},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x8c */
+ {{2, 3},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x8d */
+ {{0, 0},
+ {2, 3},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x8e */
+ {{1, 3},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0x8f */
+ {{0, 3},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x90 */
+ {{4, 4},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x91 */
+ {{0, 0},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0x92 */
+ {{1, 1},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x93 */
+ {{0, 1},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0x94 */
+ {{2, 2},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0x95 */
+ {{0, 0},
+ {2, 2},
+ {4, 4},
+ {7, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0x96 */
+ {{1, 2},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x97 */
+ {{0, 2},
+ {4, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x98 */
+ {{3, 4},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x99 */
+ {{0, 0},
+ {3, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0x9a */
+ {{1, 1},
+ {3, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x9b */
+ {{0, 1},
+ {3, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x9c */
+ {{2, 4},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0x9d */
+ {{0, 0},
+ {2, 4},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0x9e */
+ {{1, 4},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0x9f */
+ {{0, 4},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xa0 */
+ {{5, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xa1 */
+ {{0, 0},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xa2 */
+ {{1, 1},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xa3 */
+ {{0, 1},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xa4 */
+ {{2, 2},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0xa5 */
+ {{0, 0},
+ {2, 2},
+ {5, 5},
+ {7, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0xa6 */
+ {{1, 2},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xa7 */
+ {{0, 2},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xa8 */
+ {{3, 3},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0xa9 */
+ {{0, 0},
+ {3, 3},
+ {5, 5},
+ {7, 7}
+ }
+ },
+ {0, 1, 4, 0, /* 0xaa */
+ {{1, 1},
+ {3, 3},
+ {5, 5},
+ {7, 7}
+ }
+ },
+ {1, 1, 4, 0, /* 0xab */
+ {{0, 1},
+ {3, 3},
+ {5, 5},
+ {7, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0xac */
+ {{2, 3},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0xad */
+ {{0, 0},
+ {2, 3},
+ {5, 5},
+ {7, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0xae */
+ {{1, 3},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xaf */
+ {{0, 3},
+ {5, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xb0 */
+ {{4, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xb1 */
+ {{0, 0},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xb2 */
+ {{1, 1},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xb3 */
+ {{0, 1},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xb4 */
+ {{2, 2},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0xb5 */
+ {{0, 0},
+ {2, 2},
+ {4, 5},
+ {7, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0xb6 */
+ {{1, 2},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xb7 */
+ {{0, 2},
+ {4, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xb8 */
+ {{3, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xb9 */
+ {{0, 0},
+ {3, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xba */
+ {{1, 1},
+ {3, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xbb */
+ {{0, 1},
+ {3, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xbc */
+ {{2, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xbd */
+ {{0, 0},
+ {2, 5},
+ {7, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xbe */
+ {{1, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xbf */
+ {{0, 5},
+ {7, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xc0 */
+ {{6, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xc1 */
+ {{0, 0},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xc2 */
+ {{1, 1},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xc3 */
+ {{0, 1},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xc4 */
+ {{2, 2},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xc5 */
+ {{0, 0},
+ {2, 2},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xc6 */
+ {{1, 2},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xc7 */
+ {{0, 2},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xc8 */
+ {{3, 3},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xc9 */
+ {{0, 0},
+ {3, 3},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xca */
+ {{1, 1},
+ {3, 3},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xcb */
+ {{0, 1},
+ {3, 3},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xcc */
+ {{2, 3},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xcd */
+ {{0, 0},
+ {2, 3},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xce */
+ {{1, 3},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xcf */
+ {{0, 3},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xd0 */
+ {{4, 4},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xd1 */
+ {{0, 0},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xd2 */
+ {{1, 1},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xd3 */
+ {{0, 1},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xd4 */
+ {{2, 2},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 4, 0, /* 0xd5 */
+ {{0, 0},
+ {2, 2},
+ {4, 4},
+ {6, 7}
+ }
+ },
+ {0, 1, 3, 0, /* 0xd6 */
+ {{1, 2},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xd7 */
+ {{0, 2},
+ {4, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xd8 */
+ {{3, 4},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xd9 */
+ {{0, 0},
+ {3, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xda */
+ {{1, 1},
+ {3, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xdb */
+ {{0, 1},
+ {3, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xdc */
+ {{2, 4},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xdd */
+ {{0, 0},
+ {2, 4},
+ {6, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xde */
+ {{1, 4},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xdf */
+ {{0, 4},
+ {6, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xe0 */
+ {{5, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xe1 */
+ {{0, 0},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xe2 */
+ {{1, 1},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xe3 */
+ {{0, 1},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xe4 */
+ {{2, 2},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xe5 */
+ {{0, 0},
+ {2, 2},
+ {5, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xe6 */
+ {{1, 2},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xe7 */
+ {{0, 2},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xe8 */
+ {{3, 3},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xe9 */
+ {{0, 0},
+ {3, 3},
+ {5, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 3, 0, /* 0xea */
+ {{1, 1},
+ {3, 3},
+ {5, 7},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xeb */
+ {{0, 1},
+ {3, 3},
+ {5, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xec */
+ {{2, 3},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xed */
+ {{0, 0},
+ {2, 3},
+ {5, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xee */
+ {{1, 3},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xef */
+ {{0, 3},
+ {5, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xf0 */
+ {{4, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xf1 */
+ {{0, 0},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xf2 */
+ {{1, 1},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xf3 */
+ {{0, 1},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xf4 */
+ {{2, 2},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 3, 0, /* 0xf5 */
+ {{0, 0},
+ {2, 2},
+ {4, 7},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xf6 */
+ {{1, 2},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xf7 */
+ {{0, 2},
+ {4, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xf8 */
+ {{3, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xf9 */
+ {{0, 0},
+ {3, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 2, 0, /* 0xfa */
+ {{1, 1},
+ {3, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xfb */
+ {{0, 1},
+ {3, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xfc */
+ {{2, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 2, 0, /* 0xfd */
+ {{0, 0},
+ {2, 7},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {0, 1, 1, 0, /* 0xfe */
+ {{1, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ },
+ {1, 1, 1, 0, /* 0xff */
+ {{0, 7},
+ {0, 0},
+ {0, 0},
+ {0, 0}
+ }
+ }
+};
+
+
+int
+sctp_is_address_in_scope(struct sctp_ifa *ifa,
+ int ipv4_addr_legal,
+ int ipv6_addr_legal,
+ int loopback_scope,
+ int ipv4_local_scope,
+ int local_scope,
+ int site_scope,
+ int do_update)
+{
+ if ((loopback_scope == 0) &&
+ (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) {
+ /*
+ * skip loopback if not in scope *
+ */
+ return (0);
+ }
+ if ((ifa->address.sa.sa_family == AF_INET) && ipv4_addr_legal) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ifa->address.sin;
+ if (sin->sin_addr.s_addr == 0) {
+ /* not in scope , unspecified */
+ return (0);
+ }
+ if ((ipv4_local_scope == 0) &&
+ (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+ /* private address not in scope */
+ return (0);
+ }
+ } else if ((ifa->address.sa.sa_family == AF_INET6) && ipv6_addr_legal) {
+ struct sockaddr_in6 *sin6;
+
+ /*
+ * Must update the flags, bummer, which means any IFA locks
+ * must now be applied HERE <->
+ */
+ if (do_update) {
+ sctp_gather_internal_ifa_flags(ifa);
+ }
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ return (0);
+ }
+ /* ok to use deprecated addresses? */
+ sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /* skip unspecifed addresses */
+ return (0);
+ }
+ if ( /* (local_scope == 0) && */
+ (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) {
+ return (0);
+ }
+ if ((site_scope == 0) &&
+ (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+ return (0);
+ }
+ } else {
+ return (0);
+ }
+ return (1);
+}
+
+static struct mbuf *
+sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa)
+{
+ struct sctp_paramhdr *parmh;
+ struct mbuf *mret;
+ int len;
+
+ if (ifa->address.sa.sa_family == AF_INET) {
+ len = sizeof(struct sctp_ipv4addr_param);
+ } else if (ifa->address.sa.sa_family == AF_INET6) {
+ len = sizeof(struct sctp_ipv6addr_param);
+ } else {
+ /* unknown type */
+ return (m);
+ }
+ if (M_TRAILINGSPACE(m) >= len) {
+ /* easy side we just drop it on the end */
+ parmh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m)));
+ mret = m;
+ } else {
+ /* Need more space */
+ mret = m;
+ while (SCTP_BUF_NEXT(mret) != NULL) {
+ mret = SCTP_BUF_NEXT(mret);
+ }
+ SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (SCTP_BUF_NEXT(mret) == NULL) {
+ /* We are hosed, can't add more addresses */
+ return (m);
+ }
+ mret = SCTP_BUF_NEXT(mret);
+ parmh = mtod(mret, struct sctp_paramhdr *);
+ }
+ /* now add the parameter */
+ if (ifa->address.sa.sa_family == AF_INET) {
+ struct sctp_ipv4addr_param *ipv4p;
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ifa->address.sin;
+ ipv4p = (struct sctp_ipv4addr_param *)parmh;
+ parmh->param_type = htons(SCTP_IPV4_ADDRESS);
+ parmh->param_length = htons(len);
+ ipv4p->addr = sin->sin_addr.s_addr;
+ SCTP_BUF_LEN(mret) += len;
+ } else if (ifa->address.sa.sa_family == AF_INET6) {
+ struct sctp_ipv6addr_param *ipv6p;
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ifa->address.sin6;
+ ipv6p = (struct sctp_ipv6addr_param *)parmh;
+ parmh->param_type = htons(SCTP_IPV6_ADDRESS);
+ parmh->param_length = htons(len);
+ memcpy(ipv6p->addr, &sin6->sin6_addr,
+ sizeof(ipv6p->addr));
+ /* clear embedded scope in the address */
+ in6_clearscope((struct in6_addr *)ipv6p->addr);
+ SCTP_BUF_LEN(mret) += len;
+ } else {
+ return (m);
+ }
+ return (mret);
+}
+
+
+struct mbuf *
+sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_scoping *scope,
+ struct mbuf *m_at, int cnt_inits_to)
+{
+ struct sctp_vrf *vrf = NULL;
+ int cnt, limit_out = 0, total_count;
+ uint32_t vrf_id;
+
+ vrf_id = inp->def_vrf_id;
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (m_at);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ struct sctp_ifa *sctp_ifap;
+ struct sctp_ifn *sctp_ifnp;
+
+ cnt = cnt_inits_to;
+ if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) {
+ limit_out = 1;
+ cnt = SCTP_ADDRESS_LIMIT;
+ goto skip_count;
+ }
+ LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+ if ((scope->loopback_scope == 0) &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+ /*
+ * Skip loopback devices if loopback_scope
+ * not set
+ */
+ continue;
+ }
+ LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+ if (sctp_is_address_in_scope(sctp_ifap,
+ scope->ipv4_addr_legal,
+ scope->ipv6_addr_legal,
+ scope->loopback_scope,
+ scope->ipv4_local_scope,
+ scope->local_scope,
+ scope->site_scope, 1) == 0) {
+ continue;
+ }
+ cnt++;
+ if (cnt > SCTP_ADDRESS_LIMIT) {
+ break;
+ }
+ }
+ if (cnt > SCTP_ADDRESS_LIMIT) {
+ break;
+ }
+ }
+skip_count:
+ if (cnt > 1) {
+ total_count = 0;
+ LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) {
+ cnt = 0;
+ if ((scope->loopback_scope == 0) &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) {
+ /*
+ * Skip loopback devices if
+ * loopback_scope not set
+ */
+ continue;
+ }
+ LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) {
+ if (sctp_is_address_in_scope(sctp_ifap,
+ scope->ipv4_addr_legal,
+ scope->ipv6_addr_legal,
+ scope->loopback_scope,
+ scope->ipv4_local_scope,
+ scope->local_scope,
+ scope->site_scope, 0) == 0) {
+ continue;
+ }
+ m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap);
+ if (limit_out) {
+ cnt++;
+ total_count++;
+ if (cnt >= 2) {
+ /*
+ * two from each
+ * address
+ */
+ break;
+ }
+ if (total_count > SCTP_ADDRESS_LIMIT) {
+ /* No more addresses */
+ break;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ struct sctp_laddr *laddr;
+
+ cnt = cnt_inits_to;
+ /* First, how many ? */
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ continue;
+ }
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+ /*
+ * Address being deleted by the system, dont
+ * list.
+ */
+ continue;
+ if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+ /*
+ * Address being deleted on this ep don't
+ * list.
+ */
+ continue;
+ }
+ if (sctp_is_address_in_scope(laddr->ifa,
+ scope->ipv4_addr_legal,
+ scope->ipv6_addr_legal,
+ scope->loopback_scope,
+ scope->ipv4_local_scope,
+ scope->local_scope,
+ scope->site_scope, 1) == 0) {
+ continue;
+ }
+ cnt++;
+ }
+ if (cnt > SCTP_ADDRESS_LIMIT) {
+ limit_out = 1;
+ }
+ /*
+ * To get through a NAT we only list addresses if we have
+ * more than one. That way if you just bind a single address
+ * we let the source of the init dictate our address.
+ */
+ if (cnt > 1) {
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ cnt = 0;
+ if (laddr->ifa == NULL) {
+ continue;
+ }
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED)
+ continue;
+
+ if (sctp_is_address_in_scope(laddr->ifa,
+ scope->ipv4_addr_legal,
+ scope->ipv6_addr_legal,
+ scope->loopback_scope,
+ scope->ipv4_local_scope,
+ scope->local_scope,
+ scope->site_scope, 0) == 0) {
+ continue;
+ }
+ m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa);
+ cnt++;
+ if (cnt >= SCTP_ADDRESS_LIMIT) {
+ break;
+ }
+ }
+ }
+ }
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (m_at);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa,
+ uint8_t dest_is_loop,
+ uint8_t dest_is_priv,
+ sa_family_t fam)
+{
+ uint8_t dest_is_global = 0;
+
+ /* dest_is_priv is true if destination is a private address */
+ /* dest_is_loop is true if destination is a loopback addresses */
+
+ /*
+ * Here we determine if its a preferred address. A preferred address
+ * means it is the same scope or higher scope then the destination.
+ * L = loopback, P = private, G = global
+ * ----------------------------------------- src | dest | result
+ * ---------------------------------------- L | L | yes
+ * ----------------------------------------- P | L |
+ * yes-v4 no-v6 ----------------------------------------- G |
+ * L | yes-v4 no-v6 ----------------------------------------- L
+ * | P | no ----------------------------------------- P |
+ * P | yes ----------------------------------------- G |
+ * P | no ----------------------------------------- L | G
+ * | no ----------------------------------------- P | G |
+ * no ----------------------------------------- G | G |
+ * yes -----------------------------------------
+ */
+
+ if (ifa->address.sa.sa_family != fam) {
+ /* forget mis-matched family */
+ return (NULL);
+ }
+ if ((dest_is_priv == 0) && (dest_is_loop == 0)) {
+ dest_is_global = 1;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa);
+ /* Ok the address may be ok */
+ if (fam == AF_INET6) {
+ /* ok to use deprecated addresses? */
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n");
+ return (NULL);
+ }
+ if (ifa->src_is_priv) {
+ if (dest_is_loop) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n");
+ return (NULL);
+ }
+ }
+ if (ifa->src_is_glob) {
+ if (dest_is_loop) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n");
+ return (NULL);
+ }
+ }
+ }
+ /*
+ * Now that we know what is what, implement or table this could in
+ * theory be done slicker (it used to be), but this is
+ * straightforward and easier to validate :-)
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n",
+ ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob);
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n",
+ dest_is_loop, dest_is_priv, dest_is_global);
+
+ if ((ifa->src_is_loop) && (dest_is_priv)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n");
+ return (NULL);
+ }
+ if ((ifa->src_is_glob) && (dest_is_priv)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n");
+ return (NULL);
+ }
+ if ((ifa->src_is_loop) && (dest_is_global)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n");
+ return (NULL);
+ }
+ if ((ifa->src_is_priv) && (dest_is_global)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n");
+ return (NULL);
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n");
+ /* its a preferred address */
+ return (ifa);
+}
+
+static struct sctp_ifa *
+sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa,
+ uint8_t dest_is_loop,
+ uint8_t dest_is_priv,
+ sa_family_t fam)
+{
+ uint8_t dest_is_global = 0;
+
+
+ /*
+ * Here we determine if its a acceptable address. A acceptable
+ * address means it is the same scope or higher scope but we can
+ * allow for NAT which means its ok to have a global dest and a
+ * private src.
+ *
+ * L = loopback, P = private, G = global
+ * ----------------------------------------- src | dest | result
+ * ----------------------------------------- L | L | yes
+ * ----------------------------------------- P | L |
+ * yes-v4 no-v6 ----------------------------------------- G |
+ * L | yes ----------------------------------------- L |
+ * P | no ----------------------------------------- P | P
+ * | yes ----------------------------------------- G | P
+ * | yes - May not work -----------------------------------------
+ * L | G | no ----------------------------------------- P
+ * | G | yes - May not work
+ * ----------------------------------------- G | G | yes
+ * -----------------------------------------
+ */
+
+ if (ifa->address.sa.sa_family != fam) {
+ /* forget non matching family */
+ return (NULL);
+ }
+ /* Ok the address may be ok */
+ if ((dest_is_loop == 0) && (dest_is_priv == 0)) {
+ dest_is_global = 1;
+ }
+ if (fam == AF_INET6) {
+ /* ok to use deprecated addresses? */
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ return (NULL);
+ }
+ if (ifa->src_is_priv) {
+ /* Special case, linklocal to loop */
+ if (dest_is_loop)
+ return (NULL);
+ }
+ }
+ /*
+ * Now that we know what is what, implement our table. This could in
+ * theory be done slicker (it used to be), but this is
+ * straightforward and easier to validate :-)
+ */
+ if ((ifa->src_is_loop == 1) && (dest_is_priv)) {
+ return (NULL);
+ }
+ if ((ifa->src_is_loop == 1) && (dest_is_global)) {
+ return (NULL);
+ }
+ /* its an acceptable address */
+ return (ifa);
+}
+
+int
+sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+ struct sctp_laddr *laddr;
+
+ if (stcb == NULL) {
+ /* There are no restrictions, no TCB :-) */
+ return (0);
+ }
+ LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+ __FUNCTION__);
+ continue;
+ }
+ if (laddr->ifa == ifa) {
+ /* Yes it is on the list */
+ return (1);
+ }
+ }
+ return (0);
+}
+
+
+int
+sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+ struct sctp_laddr *laddr;
+
+ if (ifa == NULL)
+ return (0);
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+ __FUNCTION__);
+ continue;
+ }
+ if ((laddr->ifa == ifa) && laddr->action == 0)
+ /* same pointer */
+ return (1);
+ }
+ return (0);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_inp(struct sctp_inpcb *inp,
+ sctp_route_t * ro,
+ uint32_t vrf_id,
+ int non_asoc_addr_ok,
+ uint8_t dest_is_priv,
+ uint8_t dest_is_loop,
+ sa_family_t fam)
+{
+ struct sctp_laddr *laddr, *starting_point;
+ void *ifn;
+ int resettotop = 0;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa, *sifa;
+ struct sctp_vrf *vrf;
+ uint32_t ifn_index;
+
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL)
+ return (NULL);
+
+ ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+ ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+ sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+ /*
+ * first question, is the ifn we will emit on in our list, if so, we
+ * want such an address. Note that we first looked for a preferred
+ * address.
+ */
+ if (sctp_ifn) {
+ /* is a preferred one on the interface we route out? */
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+ (non_asoc_addr_ok == 0))
+ continue;
+ sifa = sctp_is_ifa_addr_preferred(sctp_ifa,
+ dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if (sctp_is_addr_in_ep(inp, sifa)) {
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ }
+ }
+ /*
+ * ok, now we now need to find one on the list of the addresses. We
+ * can't get one on the emitting interface so let's find first a
+ * preferred one. If not that an acceptable one otherwise... we
+ * return NULL.
+ */
+ starting_point = inp->next_addr_touse;
+once_again:
+ if (inp->next_addr_touse == NULL) {
+ inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+ resettotop = 1;
+ }
+ for (laddr = inp->next_addr_touse; laddr;
+ laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+ if (laddr->ifa == NULL) {
+ /* address has been removed */
+ continue;
+ }
+ if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+ /* address is being deleted */
+ continue;
+ }
+ sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ if (resettotop == 0) {
+ inp->next_addr_touse = NULL;
+ goto once_again;
+ }
+ inp->next_addr_touse = starting_point;
+ resettotop = 0;
+once_again_too:
+ if (inp->next_addr_touse == NULL) {
+ inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list);
+ resettotop = 1;
+ }
+ /* ok, what about an acceptable address in the inp */
+ for (laddr = inp->next_addr_touse; laddr;
+ laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+ if (laddr->ifa == NULL) {
+ /* address has been removed */
+ continue;
+ }
+ if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+ /* address is being deleted */
+ continue;
+ }
+ sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ if (resettotop == 0) {
+ inp->next_addr_touse = NULL;
+ goto once_again_too;
+ }
+ /*
+ * no address bound can be a source for the destination we are in
+ * trouble
+ */
+ return (NULL);
+}
+
+
+
+static struct sctp_ifa *
+sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ sctp_route_t * ro,
+ uint32_t vrf_id,
+ uint8_t dest_is_priv,
+ uint8_t dest_is_loop,
+ int non_asoc_addr_ok,
+ sa_family_t fam)
+{
+ struct sctp_laddr *laddr, *starting_point;
+ void *ifn;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa, *sifa;
+ uint8_t start_at_beginning = 0;
+ struct sctp_vrf *vrf;
+ uint32_t ifn_index;
+
+ /*
+ * first question, is the ifn we will emit on in our list, if so, we
+ * want that one.
+ */
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL)
+ return (NULL);
+
+ ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+ ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+ sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+
+ /*
+ * first question, is the ifn we will emit on in our list? If so,
+ * we want that one. First we look for a preferred. Second, we go
+ * for an acceptable.
+ */
+ if (sctp_ifn) {
+ /* first try for a preferred address on the ep */
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+ continue;
+ if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+ sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if ((non_asoc_addr_ok == 0) &&
+ (sctp_is_addr_restricted(stcb, sifa))) {
+ /* on the no-no list */
+ continue;
+ }
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ }
+ /* next try for an acceptable address on the ep */
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0))
+ continue;
+ if (sctp_is_addr_in_ep(inp, sctp_ifa)) {
+ sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if ((non_asoc_addr_ok == 0) &&
+ (sctp_is_addr_restricted(stcb, sifa))) {
+ /* on the no-no list */
+ continue;
+ }
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ }
+
+ }
+ /*
+ * if we can't find one like that then we must look at all addresses
+ * bound to pick one at first preferable then secondly acceptable.
+ */
+ starting_point = stcb->asoc.last_used_address;
+sctp_from_the_top:
+ if (stcb->asoc.last_used_address == NULL) {
+ start_at_beginning = 1;
+ stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+ }
+ /* search beginning with the last used address */
+ for (laddr = stcb->asoc.last_used_address; laddr;
+ laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+ if (laddr->ifa == NULL) {
+ /* address has been removed */
+ continue;
+ }
+ if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+ /* address is being deleted */
+ continue;
+ }
+ sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if ((non_asoc_addr_ok == 0) &&
+ (sctp_is_addr_restricted(stcb, sifa))) {
+ /* on the no-no list */
+ continue;
+ }
+ stcb->asoc.last_used_address = laddr;
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ if (start_at_beginning == 0) {
+ stcb->asoc.last_used_address = NULL;
+ goto sctp_from_the_top;
+ }
+ /* now try for any higher scope than the destination */
+ stcb->asoc.last_used_address = starting_point;
+ start_at_beginning = 0;
+sctp_from_the_top2:
+ if (stcb->asoc.last_used_address == NULL) {
+ start_at_beginning = 1;
+ stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list);
+ }
+ /* search beginning with the last used address */
+ for (laddr = stcb->asoc.last_used_address; laddr;
+ laddr = LIST_NEXT(laddr, sctp_nxt_addr)) {
+ if (laddr->ifa == NULL) {
+ /* address has been removed */
+ continue;
+ }
+ if (laddr->action == SCTP_DEL_IP_ADDRESS) {
+ /* address is being deleted */
+ continue;
+ }
+ sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if ((non_asoc_addr_ok == 0) &&
+ (sctp_is_addr_restricted(stcb, sifa))) {
+ /* on the no-no list */
+ continue;
+ }
+ stcb->asoc.last_used_address = laddr;
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ if (start_at_beginning == 0) {
+ stcb->asoc.last_used_address = NULL;
+ goto sctp_from_the_top2;
+ }
+ return (NULL);
+}
+
+static struct sctp_ifa *
+sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn,
+ struct sctp_tcb *stcb,
+ int non_asoc_addr_ok,
+ uint8_t dest_is_loop,
+ uint8_t dest_is_priv,
+ int addr_wanted,
+ sa_family_t fam,
+ sctp_route_t * ro
+)
+{
+ struct sctp_ifa *ifa, *sifa;
+ int num_eligible_addr = 0;
+
+ LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+ if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+ (non_asoc_addr_ok == 0))
+ continue;
+ sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ /*
+ * Check if the IPv6 address matches to next-hop. In the
+ * mobile case, old IPv6 address may be not deleted from the
+ * interface. Then, the interface has previous and new
+ * addresses. We should use one corresponding to the
+ * next-hop. (by micchie)
+ */
+ if (stcb && fam == AF_INET6 &&
+ sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+ if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro)
+ == 0) {
+ continue;
+ }
+ }
+ /* Avoid topologically incorrect IPv4 address */
+ if (stcb && fam == AF_INET &&
+ sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) {
+ if (sctp_v4src_match_nexthop(sifa, ro) == 0) {
+ continue;
+ }
+ }
+ if (stcb) {
+ if ((non_asoc_addr_ok == 0) &&
+ sctp_is_addr_restricted(stcb, sifa)) {
+ /*
+ * It is restricted for some reason..
+ * probably not yet added.
+ */
+ continue;
+ }
+ }
+ if (num_eligible_addr >= addr_wanted) {
+ return (sifa);
+ }
+ num_eligible_addr++;
+ }
+ return (NULL);
+}
+
+
+static int
+sctp_count_num_preferred_boundall(struct sctp_ifn *ifn,
+ struct sctp_tcb *stcb,
+ int non_asoc_addr_ok,
+ uint8_t dest_is_loop,
+ uint8_t dest_is_priv,
+ sa_family_t fam)
+{
+ struct sctp_ifa *ifa, *sifa;
+ int num_eligible_addr = 0;
+
+ LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) {
+ if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+ (non_asoc_addr_ok == 0)) {
+ continue;
+ }
+ sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL) {
+ continue;
+ }
+ if (stcb) {
+ if ((non_asoc_addr_ok == 0) &&
+ sctp_is_addr_restricted(stcb, sifa)) {
+ /*
+ * It is restricted for some reason..
+ * probably not yet added.
+ */
+ continue;
+ }
+ }
+ num_eligible_addr++;
+ }
+ return (num_eligible_addr);
+}
+
+static struct sctp_ifa *
+sctp_choose_boundall(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ sctp_route_t * ro,
+ uint32_t vrf_id,
+ uint8_t dest_is_priv,
+ uint8_t dest_is_loop,
+ int non_asoc_addr_ok,
+ sa_family_t fam)
+{
+ int cur_addr_num = 0, num_preferred = 0;
+ void *ifn;
+ struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn;
+ struct sctp_ifa *sctp_ifa, *sifa;
+ uint32_t ifn_index;
+ struct sctp_vrf *vrf;
+
+ /*-
+ * For boundall we can use any address in the association.
+ * If non_asoc_addr_ok is set we can use any address (at least in
+ * theory). So we look for preferred addresses first. If we find one,
+ * we use it. Otherwise we next try to get an address on the
+ * interface, which we should be able to do (unless non_asoc_addr_ok
+ * is false and we are routed out that way). In these cases where we
+ * can't use the address of the interface we go through all the
+ * ifn's looking for an address we can use and fill that in. Punting
+ * means we send back address 0, which will probably cause problems
+ * actually since then IP will fill in the address of the route ifn,
+ * which means we probably already rejected it.. i.e. here comes an
+ * abort :-<.
+ */
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL)
+ return (NULL);
+
+ ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+ ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro);
+ emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index);
+ if (sctp_ifn == NULL) {
+ /* ?? We don't have this guy ?? */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n");
+ goto bound_all_plan_b;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n",
+ ifn_index, sctp_ifn->ifn_name);
+
+ if (net) {
+ cur_addr_num = net->indx_of_eligible_next_to_use;
+ }
+ num_preferred = sctp_count_num_preferred_boundall(sctp_ifn,
+ stcb,
+ non_asoc_addr_ok,
+ dest_is_loop,
+ dest_is_priv, fam);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n",
+ num_preferred, sctp_ifn->ifn_name);
+ if (num_preferred == 0) {
+ /*
+ * no eligible addresses, we must use some other interface
+ * address if we can find one.
+ */
+ goto bound_all_plan_b;
+ }
+ /*
+ * Ok we have num_eligible_addr set with how many we can use, this
+ * may vary from call to call due to addresses being deprecated
+ * etc..
+ */
+ if (cur_addr_num >= num_preferred) {
+ cur_addr_num = 0;
+ }
+ /*
+ * select the nth address from the list (where cur_addr_num is the
+ * nth) and 0 is the first one, 1 is the second one etc...
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num);
+
+ sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+ dest_is_priv, cur_addr_num, fam, ro);
+
+ /* if sctp_ifa is NULL something changed??, fall to plan b. */
+ if (sctp_ifa) {
+ atomic_add_int(&sctp_ifa->refcount, 1);
+ if (net) {
+ /* save off where the next one we will want */
+ net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+ }
+ return (sctp_ifa);
+ }
+ /*
+ * plan_b: Look at all interfaces and find a preferred address. If
+ * no preferred fall through to plan_c.
+ */
+bound_all_plan_b:
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n");
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n",
+ sctp_ifn->ifn_name);
+ if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ /* wrong base scope */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n");
+ continue;
+ }
+ if ((sctp_ifn == looked_at) && looked_at) {
+ /* already looked at this guy */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n");
+ continue;
+ }
+ num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, stcb, non_asoc_addr_ok,
+ dest_is_loop, dest_is_priv, fam);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2,
+ "Found ifn:%p %d preferred source addresses\n",
+ ifn, num_preferred);
+ if (num_preferred == 0) {
+ /* None on this interface. */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefered -- skipping to next\n");
+ continue;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2,
+ "num preferred:%d on interface:%p cur_addr_num:%d\n",
+ num_preferred, sctp_ifn, cur_addr_num);
+
+ /*
+ * Ok we have num_eligible_addr set with how many we can
+ * use, this may vary from call to call due to addresses
+ * being deprecated etc..
+ */
+ if (cur_addr_num >= num_preferred) {
+ cur_addr_num = 0;
+ }
+ sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, stcb, non_asoc_addr_ok, dest_is_loop,
+ dest_is_priv, cur_addr_num, fam, ro);
+ if (sifa == NULL)
+ continue;
+ if (net) {
+ net->indx_of_eligible_next_to_use = cur_addr_num + 1;
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n",
+ cur_addr_num);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa);
+ }
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+
+ }
+
+ /* plan_c: do we have an acceptable address on the emit interface */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n");
+ if (emit_ifn == NULL) {
+ goto plan_d;
+ }
+ LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+ (non_asoc_addr_ok == 0))
+ continue;
+ sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if (stcb) {
+ if ((non_asoc_addr_ok == 0) &&
+ sctp_is_addr_restricted(stcb, sifa)) {
+ /*
+ * It is restricted for some reason..
+ * probably not yet added.
+ */
+ continue;
+ }
+ }
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+plan_d:
+ /*
+ * plan_d: We are in trouble. No preferred address on the emit
+ * interface. And not even a preferred address on all interfaces. Go
+ * out and see if we can find an acceptable address somewhere
+ * amongst all interfaces.
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D\n");
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ /* wrong base scope */
+ continue;
+ }
+ if ((sctp_ifn == looked_at) && looked_at)
+ /* already looked at this guy */
+ continue;
+
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) &&
+ (non_asoc_addr_ok == 0))
+ continue;
+ sifa = sctp_is_ifa_addr_acceptable(sctp_ifa,
+ dest_is_loop,
+ dest_is_priv, fam);
+ if (sifa == NULL)
+ continue;
+ if (stcb) {
+ if ((non_asoc_addr_ok == 0) &&
+ sctp_is_addr_restricted(stcb, sifa)) {
+ /*
+ * It is restricted for some
+ * reason.. probably not yet added.
+ */
+ continue;
+ }
+ }
+ atomic_add_int(&sifa->refcount, 1);
+ return (sifa);
+ }
+ }
+ /*
+ * Ok we can find NO address to source from that is not on our
+ * restricted list and non_asoc_address is NOT ok, or it is on our
+ * restricted list. We can't source to it :-(
+ */
+ return (NULL);
+}
+
+
+
+/* tcb may be NULL */
+struct sctp_ifa *
+sctp_source_address_selection(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ sctp_route_t * ro,
+ struct sctp_nets *net,
+ int non_asoc_addr_ok, uint32_t vrf_id)
+{
+ struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst;
+ struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst;
+ struct sctp_ifa *answer;
+ uint8_t dest_is_priv, dest_is_loop;
+ sa_family_t fam;
+
+ /*-
+ * Rules: - Find the route if needed, cache if I can. - Look at
+ * interface address in route, Is it in the bound list. If so we
+ * have the best source. - If not we must rotate amongst the
+ * addresses.
+ *
+ * Cavets and issues
+ *
+ * Do we need to pay attention to scope. We can have a private address
+ * or a global address we are sourcing or sending to. So if we draw
+ * it out
+ * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+ * For V4
+ *------------------------------------------
+ * source * dest * result
+ * -----------------------------------------
+ * <a> Private * Global * NAT
+ * -----------------------------------------
+ * <b> Private * Private * No problem
+ * -----------------------------------------
+ * <c> Global * Private * Huh, How will this work?
+ * -----------------------------------------
+ * <d> Global * Global * No Problem
+ *------------------------------------------
+ * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+ * For V6
+ *------------------------------------------
+ * source * dest * result
+ * -----------------------------------------
+ * <a> Linklocal * Global *
+ * -----------------------------------------
+ * <b> Linklocal * Linklocal * No problem
+ * -----------------------------------------
+ * <c> Global * Linklocal * Huh, How will this work?
+ * -----------------------------------------
+ * <d> Global * Global * No Problem
+ *------------------------------------------
+ * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+ *
+ * And then we add to that what happens if there are multiple addresses
+ * assigned to an interface. Remember the ifa on a ifn is a linked
+ * list of addresses. So one interface can have more than one IP
+ * address. What happens if we have both a private and a global
+ * address? Do we then use context of destination to sort out which
+ * one is best? And what about NAT's sending P->G may get you a NAT
+ * translation, or should you select the G thats on the interface in
+ * preference.
+ *
+ * Decisions:
+ *
+ * - count the number of addresses on the interface.
+ * - if it is one, no problem except case <c>.
+ * For <a> we will assume a NAT out there.
+ * - if there are more than one, then we need to worry about scope P
+ * or G. We should prefer G -> G and P -> P if possible.
+ * Then as a secondary fall back to mixed types G->P being a last
+ * ditch one.
+ * - The above all works for bound all, but bound specific we need to
+ * use the same concept but instead only consider the bound
+ * addresses. If the bound set is NOT assigned to the interface then
+ * we must use rotation amongst the bound addresses..
+ */
+ if (ro->ro_rt == NULL) {
+ /*
+ * Need a route to cache.
+ */
+ SCTP_RTALLOC(ro, vrf_id);
+ }
+ if (ro->ro_rt == NULL) {
+ return (NULL);
+ }
+ fam = to->sin_family;
+ dest_is_priv = dest_is_loop = 0;
+ /* Setup our scopes for the destination */
+ if (fam == AF_INET) {
+ /* Scope based on outbound address */
+ if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) {
+ dest_is_priv = 1;
+ } else if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) {
+ dest_is_loop = 1;
+ if (net != NULL) {
+ /* mark it as local */
+ net->addr_is_local = 1;
+ }
+ }
+ } else if (fam == AF_INET6) {
+ /* Scope based on outbound address */
+ if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr)) {
+ /*
+ * If the route goes to the loopback address OR the
+ * address is a loopback address, we are loopback
+ * scope. But we don't use dest_is_priv (link local
+ * addresses).
+ */
+ dest_is_loop = 1;
+ if (net != NULL) {
+ /* mark it as local */
+ net->addr_is_local = 1;
+ }
+ } else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) {
+ dest_is_priv = 1;
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)to);
+ SCTP_IPI_ADDR_RLOCK();
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /*
+ * Bound all case
+ */
+ answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id,
+ dest_is_priv, dest_is_loop,
+ non_asoc_addr_ok, fam);
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (answer);
+ }
+ /*
+ * Subset bound case
+ */
+ if (stcb) {
+ answer = sctp_choose_boundspecific_stcb(inp, stcb, net, ro,
+ vrf_id, dest_is_priv,
+ dest_is_loop,
+ non_asoc_addr_ok, fam);
+ } else {
+ answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id,
+ non_asoc_addr_ok,
+ dest_is_priv,
+ dest_is_loop, fam);
+ }
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (answer);
+}
+
+static int
+sctp_find_cmsg(int c_type, void *data, struct mbuf *control, int cpsize)
+{
+ struct cmsghdr cmh;
+ int tlen, at;
+
+ tlen = SCTP_BUF_LEN(control);
+ at = 0;
+ /*
+ * Independent of how many mbufs, find the c_type inside the control
+ * structure and copy out the data.
+ */
+ while (at < tlen) {
+ if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) {
+ /* not enough room for one more we are done. */
+ return (0);
+ }
+ m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh);
+ if (((int)cmh.cmsg_len + at) > tlen) {
+ /*
+ * this is real messed up since there is not enough
+ * data here to cover the cmsg header. We are done.
+ */
+ return (0);
+ }
+ if ((cmh.cmsg_level == IPPROTO_SCTP) &&
+ (c_type == cmh.cmsg_type)) {
+ /* found the one we want, copy it out */
+ at += CMSG_ALIGN(sizeof(struct cmsghdr));
+ if ((int)(cmh.cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) < cpsize) {
+ /*
+ * space of cmsg_len after header not big
+ * enough
+ */
+ return (0);
+ }
+ m_copydata(control, at, cpsize, data);
+ return (1);
+ } else {
+ at += CMSG_ALIGN(cmh.cmsg_len);
+ if (cmh.cmsg_len == 0) {
+ break;
+ }
+ }
+ }
+ /* not found */
+ return (0);
+}
+
+static struct mbuf *
+sctp_add_cookie(struct sctp_inpcb *inp, struct mbuf *init, int init_offset,
+ struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t ** signature)
+{
+ struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret;
+ struct sctp_state_cookie *stc;
+ struct sctp_paramhdr *ph;
+ uint8_t *foo;
+ int sig_offset;
+ uint16_t cookie_sz;
+
+ mret = NULL;
+ mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) +
+ sizeof(struct sctp_paramhdr)), 0,
+ M_DONTWAIT, 1, MT_DATA);
+ if (mret == NULL) {
+ return (NULL);
+ }
+ copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_DONTWAIT);
+ if (copy_init == NULL) {
+ sctp_m_freem(mret);
+ return (NULL);
+ }
+ copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL,
+ M_DONTWAIT);
+ if (copy_initack == NULL) {
+ sctp_m_freem(mret);
+ sctp_m_freem(copy_init);
+ return (NULL);
+ }
+ /* easy side we just drop it on the end */
+ ph = mtod(mret, struct sctp_paramhdr *);
+ SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) +
+ sizeof(struct sctp_paramhdr);
+ stc = (struct sctp_state_cookie *)((caddr_t)ph +
+ sizeof(struct sctp_paramhdr));
+ ph->param_type = htons(SCTP_STATE_COOKIE);
+ ph->param_length = 0; /* fill in at the end */
+ /* Fill in the stc cookie data */
+ memcpy(stc, stc_in, sizeof(struct sctp_state_cookie));
+
+ /* tack the INIT and then the INIT-ACK onto the chain */
+ cookie_sz = 0;
+ m_at = mret;
+ for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+ cookie_sz += SCTP_BUF_LEN(m_at);
+ if (SCTP_BUF_NEXT(m_at) == NULL) {
+ SCTP_BUF_NEXT(m_at) = copy_init;
+ break;
+ }
+ }
+
+ for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+ cookie_sz += SCTP_BUF_LEN(m_at);
+ if (SCTP_BUF_NEXT(m_at) == NULL) {
+ SCTP_BUF_NEXT(m_at) = copy_initack;
+ break;
+ }
+ }
+
+ for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+ cookie_sz += SCTP_BUF_LEN(m_at);
+ if (SCTP_BUF_NEXT(m_at) == NULL) {
+ break;
+ }
+ }
+ sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_DONTWAIT, 1, MT_DATA);
+ if (sig == NULL) {
+ /* no space, so free the entire chain */
+ sctp_m_freem(mret);
+ return (NULL);
+ }
+ SCTP_BUF_LEN(sig) = 0;
+ SCTP_BUF_NEXT(m_at) = sig;
+ sig_offset = 0;
+ foo = (uint8_t *) (mtod(sig, caddr_t)+sig_offset);
+ memset(foo, 0, SCTP_SIGNATURE_SIZE);
+ *signature = foo;
+ SCTP_BUF_LEN(sig) += SCTP_SIGNATURE_SIZE;
+ cookie_sz += SCTP_SIGNATURE_SIZE;
+ ph->param_length = htons(cookie_sz);
+ return (mret);
+}
+
+
+static uint8_t
+sctp_get_ect(struct sctp_tcb *stcb,
+ struct sctp_tmit_chunk *chk)
+{
+ uint8_t this_random;
+
+ /* Huh? */
+ if (sctp_ecn_enable == 0)
+ return (0);
+
+ if (sctp_ecn_nonce == 0)
+ /* no nonce, always return ECT0 */
+ return (SCTP_ECT0_BIT);
+
+ if (stcb->asoc.peer_supports_ecn_nonce == 0) {
+ /* Peer does NOT support it, so we send a ECT0 only */
+ return (SCTP_ECT0_BIT);
+ }
+ if (chk == NULL)
+ return (SCTP_ECT0_BIT);
+
+ if ((stcb->asoc.hb_random_idx > 3) ||
+ ((stcb->asoc.hb_random_idx == 3) &&
+ (stcb->asoc.hb_ect_randombit > 7))) {
+ uint32_t rndval;
+
+warp_drive_sa:
+ rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+ memcpy(stcb->asoc.hb_random_values, &rndval,
+ sizeof(stcb->asoc.hb_random_values));
+ this_random = stcb->asoc.hb_random_values[0];
+ stcb->asoc.hb_random_idx = 0;
+ stcb->asoc.hb_ect_randombit = 0;
+ } else {
+ if (stcb->asoc.hb_ect_randombit > 7) {
+ stcb->asoc.hb_ect_randombit = 0;
+ stcb->asoc.hb_random_idx++;
+ if (stcb->asoc.hb_random_idx > 3) {
+ goto warp_drive_sa;
+ }
+ }
+ this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+ }
+ if ((this_random >> stcb->asoc.hb_ect_randombit) & 0x01) {
+ if (chk != NULL)
+ /* ECN Nonce stuff */
+ chk->rec.data.ect_nonce = SCTP_ECT1_BIT;
+ stcb->asoc.hb_ect_randombit++;
+ return (SCTP_ECT1_BIT);
+ } else {
+ stcb->asoc.hb_ect_randombit++;
+ return (SCTP_ECT0_BIT);
+ }
+}
+
+static int
+sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, /* may be NULL */
+ struct sctp_nets *net,
+ struct sockaddr *to,
+ struct mbuf *m,
+ uint32_t auth_offset,
+ struct sctp_auth_chunk *auth,
+ int nofragment_flag,
+ int ecn_ok,
+ struct sctp_tmit_chunk *chk,
+ int out_of_asoc_ok,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+/* nofragment_flag to tell if IP_DF should be set (IPv4 only) */
+{
+ /*
+ * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet
+ * header WITH an SCTPHDR but no IP header, endpoint inp and sa
+ * structure: - fill in the HMAC digest of any AUTH chunk in the
+ * packet. - calculate and fill in the SCTP checksum. - prepend an
+ * IP address header. - if boundall use INADDR_ANY. - if
+ * boundspecific do source address selection. - set fragmentation
+ * option for ipV4. - On return from IP output, check/adjust mtu
+ * size of output interface and smallest_mtu size as well.
+ */
+ /* Will need ifdefs around this */
+ struct mbuf *o_pak;
+ struct mbuf *newm;
+ struct sctphdr *sctphdr;
+ int packet_length;
+ uint32_t csum;
+ int ret;
+ uint32_t vrf_id;
+ sctp_route_t *ro = NULL;
+
+ if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ sctp_m_freem(m);
+ return (EFAULT);
+ }
+ if (stcb) {
+ vrf_id = stcb->asoc.vrf_id;
+ } else {
+ vrf_id = inp->def_vrf_id;
+ }
+
+ /* fill in the HMAC digest for any AUTH chunk in the packet */
+ if ((auth != NULL) && (stcb != NULL)) {
+ sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb);
+ }
+ /* Calculate the csum and fill in the length of the packet */
+ sctphdr = mtod(m, struct sctphdr *);
+ if (sctp_no_csum_on_loopback &&
+ (stcb) &&
+ (to->sa_family == AF_INET) &&
+ (stcb->asoc.loopback_scope)) {
+ sctphdr->checksum = 0;
+ /*
+ * This can probably now be taken out since my audit shows
+ * no more bad pktlen's coming in. But we will wait a while
+ * yet.
+ */
+ packet_length = sctp_calculate_len(m);
+ } else {
+ sctphdr->checksum = 0;
+ csum = sctp_calculate_sum(m, &packet_length, 0);
+ sctphdr->checksum = csum;
+ }
+
+ if (to->sa_family == AF_INET) {
+ struct ip *ip = NULL;
+ sctp_route_t iproute;
+ uint8_t tos_value;
+
+ newm = sctp_get_mbuf_for_msg(sizeof(struct ip), 1, M_DONTWAIT, 1, MT_DATA);
+ if (newm == NULL) {
+ sctp_m_freem(m);
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_ALIGN_TO_END(newm, sizeof(struct ip));
+ SCTP_BUF_LEN(newm) = sizeof(struct ip);
+ packet_length += sizeof(struct ip);
+ SCTP_BUF_NEXT(newm) = m;
+ m = newm;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = (sizeof(struct ip) >> 2);
+ if (net) {
+ tos_value = net->tos_flowlabel & 0x000000ff;
+ } else {
+ tos_value = inp->ip_inp.inp.inp_ip_tos;
+ }
+ if (nofragment_flag) {
+#if defined(WITH_CONVERT_IP_OFF) || defined(__FreeBSD__) || defined(__APPLE__)
+ ip->ip_off = IP_DF;
+#else
+ ip->ip_off = htons(IP_DF);
+#endif
+ } else
+ ip->ip_off = 0;
+
+ /* FreeBSD has a function for ip_id's */
+ ip->ip_id = ip_newid();
+
+ ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl;
+ ip->ip_len = packet_length;
+ if (stcb) {
+ if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+ /* Enable ECN */
+ ip->ip_tos = ((u_char)(tos_value & 0xfc) | sctp_get_ect(stcb, chk));
+ } else {
+ /* No ECN */
+ ip->ip_tos = (u_char)(tos_value & 0xfc);
+ }
+ } else {
+ /* no association at all */
+ ip->ip_tos = (tos_value & 0xfc);
+ }
+ ip->ip_p = IPPROTO_SCTP;
+ ip->ip_sum = 0;
+ if (net == NULL) {
+ ro = &iproute;
+ memset(&iproute, 0, sizeof(iproute));
+ memcpy(&ro->ro_dst, to, to->sa_len);
+ } else {
+ ro = (sctp_route_t *) & net->ro;
+ }
+ /* Now the address selection part */
+ ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr;
+
+ /* call the routine to select the src address */
+ if (net) {
+ if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ }
+ if (net->src_addr_selected == 0) {
+ if (out_of_asoc_ok) {
+ /* do not cache */
+ goto temp_v4_src;
+ }
+ /* Cache the source address */
+ net->ro._s_addr = sctp_source_address_selection(inp, stcb,
+ ro, net, out_of_asoc_ok,
+ vrf_id);
+ net->src_addr_selected = 1;
+ }
+ if (net->ro._s_addr == NULL) {
+ /* No route to host */
+ net->src_addr_selected = 0;
+ goto no_route;
+ }
+ ip->ip_src = net->ro._s_addr->address.sin.sin_addr;
+ } else {
+ struct sctp_ifa *_lsrc;
+
+ temp_v4_src:
+ _lsrc = sctp_source_address_selection(inp, stcb, ro,
+ net,
+ out_of_asoc_ok,
+ vrf_id);
+ if (_lsrc == NULL) {
+ goto no_route;
+ }
+ ip->ip_src = _lsrc->address.sin.sin_addr;
+ sctp_free_ifa(_lsrc);
+ }
+
+ /*
+ * If source address selection fails and we find no route
+ * then the ip_output should fail as well with a
+ * NO_ROUTE_TO_HOST type error. We probably should catch
+ * that somewhere and abort the association right away
+ * (assuming this is an INIT being sent).
+ */
+ if ((ro->ro_rt == NULL)) {
+ /*
+ * src addr selection failed to find a route (or
+ * valid source addr), so we can't get there from
+ * here (yet)!
+ */
+ no_route:
+ SCTPDBG(SCTP_DEBUG_OUTPUT1,
+ "%s: dropped packet - no valid source addr\n",
+ __FUNCTION__);
+ if (net) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1,
+ "Destination was ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1,
+ &net->ro._l_addr.sa);
+ if (net->dest_state & SCTP_ADDR_CONFIRMED) {
+ if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", net);
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+ stcb,
+ SCTP_FAILED_THRESHOLD,
+ (void *)net,
+ so_locked);
+ net->dest_state &= ~SCTP_ADDR_REACHABLE;
+ net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+ /*
+ * JRS 5/14/07 - If a
+ * destination is
+ * unreachable, the PF bit
+ * is turned off. This
+ * allows an unambiguous use
+ * of the PF bit for
+ * destinations that are
+ * reachable but potentially
+ * failed. If the
+ * destination is set to the
+ * unreachable state, also
+ * set the destination to
+ * the PF state.
+ */
+ /*
+ * Add debug message here if
+ * destination is not in PF
+ * state.
+ */
+ /*
+ * Stop any running T3
+ * timers here?
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination %p moved from PF to unreachable.\n",
+ net);
+ }
+ }
+ }
+ if (stcb) {
+ if (net == stcb->asoc.primary_destination) {
+ /* need a new primary */
+ struct sctp_nets *alt;
+
+ alt = sctp_find_alternate_net(stcb, net, 0);
+ if (alt != net) {
+ if (sctp_set_primary_addr(stcb,
+ (struct sockaddr *)NULL,
+ alt) == 0) {
+ net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+ if (net->ro._s_addr) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ }
+ net->src_addr_selected = 0;
+ }
+ }
+ }
+ }
+ }
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH);
+ sctp_m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ if (ro != &iproute) {
+ memcpy(&iproute, ro, sizeof(*ro));
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n",
+ (uint32_t) (ntohl(ip->ip_src.s_addr)));
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
+ (uint32_t) (ntohl(ip->ip_dst.s_addr)));
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
+ ro->ro_rt);
+
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ /* failed to prepend data, give up */
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ sctp_m_freem(m);
+ return (ENOMEM);
+ }
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(m, packet_length);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+
+ /* send it out. table id is taken from stcb */
+ SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id);
+
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+ if (ret)
+ SCTP_STAT_INCR(sctps_senderrors);
+
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
+ if (net == NULL) {
+ /* free tempy routes */
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ } else {
+ /* PMTU check versus smallest asoc MTU goes here */
+ if ((ro->ro_rt != NULL) &&
+ (net->ro._s_addr)) {
+ uint32_t mtu;
+
+ mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+ if (mtu &&
+ (stcb->asoc.smallest_mtu > mtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("sctp_mtu_size_reset called after ip_output mtu-change:%d\n",
+ mtu);
+#endif
+ sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ net->mtu = mtu;
+ }
+ } else if (ro->ro_rt == NULL) {
+ /* route was freed */
+ if (net->ro._s_addr &&
+ net->src_addr_selected) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ }
+ net->src_addr_selected = 0;
+ }
+ }
+ return (ret);
+ }
+#ifdef INET6
+ else if (to->sa_family == AF_INET6) {
+ uint32_t flowlabel;
+ struct ip6_hdr *ip6h;
+ struct route_in6 ip6route;
+ struct ifnet *ifp;
+ u_char flowTop;
+ uint16_t flowBottom;
+ u_char tosBottom, tosTop;
+ struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp;
+ int prev_scope = 0;
+ struct sockaddr_in6 lsa6_storage;
+ int error;
+ u_short prev_port = 0;
+
+ if (net != NULL) {
+ flowlabel = net->tos_flowlabel;
+ } else {
+ flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+ }
+
+ newm = sctp_get_mbuf_for_msg(sizeof(struct ip6_hdr), 1, M_DONTWAIT, 1, MT_DATA);
+ if (newm == NULL) {
+ sctp_m_freem(m);
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_ALIGN_TO_END(newm, sizeof(struct ip6_hdr));
+ SCTP_BUF_LEN(newm) = sizeof(struct ip6_hdr);
+ packet_length += sizeof(struct ip6_hdr);
+ SCTP_BUF_NEXT(newm) = m;
+ m = newm;
+
+ ip6h = mtod(m, struct ip6_hdr *);
+ /*
+ * We assume here that inp_flow is in host byte order within
+ * the TCB!
+ */
+ flowBottom = flowlabel & 0x0000ffff;
+ flowTop = ((flowlabel & 0x000f0000) >> 16);
+ tosTop = (((flowlabel & 0xf0) >> 4) | IPV6_VERSION);
+ /* protect *sin6 from overwrite */
+ sin6 = (struct sockaddr_in6 *)to;
+ tmp = *sin6;
+ sin6 = &tmp;
+
+ /* KAME hack: embed scopeid */
+ if (sa6_embedscope(sin6, ip6_use_defzone) != 0) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ if (net == NULL) {
+ memset(&ip6route, 0, sizeof(ip6route));
+ ro = (sctp_route_t *) & ip6route;
+ memcpy(&ro->ro_dst, sin6, sin6->sin6_len);
+ } else {
+ ro = (sctp_route_t *) & net->ro;
+ }
+ if (stcb != NULL) {
+ if ((stcb->asoc.ecn_allowed) && ecn_ok) {
+ /* Enable ECN */
+ tosBottom = (((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) | sctp_get_ect(stcb, chk)) << 4);
+ } else {
+ /* No ECN */
+ tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+ }
+ } else {
+ /* we could get no asoc if it is a O-O-T-B packet */
+ tosBottom = ((((struct in6pcb *)inp)->in6p_flowinfo & 0x0c) << 4);
+ }
+ ip6h->ip6_flow = htonl(((tosTop << 24) | ((tosBottom | flowTop) << 16) | flowBottom));
+ ip6h->ip6_nxt = IPPROTO_SCTP;
+ ip6h->ip6_plen = (packet_length - sizeof(struct ip6_hdr));
+ ip6h->ip6_dst = sin6->sin6_addr;
+
+ /*
+ * Add SRC address selection here: we can only reuse to a
+ * limited degree the kame src-addr-sel, since we can try
+ * their selection but it may not be bound.
+ */
+ bzero(&lsa6_tmp, sizeof(lsa6_tmp));
+ lsa6_tmp.sin6_family = AF_INET6;
+ lsa6_tmp.sin6_len = sizeof(lsa6_tmp);
+ lsa6 = &lsa6_tmp;
+ if (net) {
+ if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ }
+ if (net->src_addr_selected == 0) {
+ if (out_of_asoc_ok) {
+ /* do not cache */
+ goto temp_v6_src;
+ }
+ /* Cache the source address */
+ net->ro._s_addr = sctp_source_address_selection(inp,
+ stcb,
+ ro,
+ net,
+ out_of_asoc_ok,
+ vrf_id);
+ net->src_addr_selected = 1;
+ }
+ if (net->ro._s_addr == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n");
+ net->src_addr_selected = 0;
+ goto no_route;
+ }
+ lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr;
+ } else {
+ struct sctp_ifa *_lsrc;
+
+ temp_v6_src:
+ _lsrc = sctp_source_address_selection(inp, stcb, ro,
+ net,
+ out_of_asoc_ok,
+ vrf_id);
+ if (_lsrc == NULL) {
+ goto no_route;
+ }
+ lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr;
+ sctp_free_ifa(_lsrc);
+ }
+ lsa6->sin6_port = inp->sctp_lport;
+
+ if ((ro->ro_rt == NULL)) {
+ /*
+ * src addr selection failed to find a route (or
+ * valid source addr), so we can't get there from
+ * here!
+ */
+ goto no_route;
+ }
+ /*
+ * XXX: sa6 may not have a valid sin6_scope_id in the
+ * non-SCOPEDROUTING case.
+ */
+ bzero(&lsa6_storage, sizeof(lsa6_storage));
+ lsa6_storage.sin6_family = AF_INET6;
+ lsa6_storage.sin6_len = sizeof(lsa6_storage);
+ if ((error = sa6_recoverscope(&lsa6_storage)) != 0) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error);
+ sctp_m_freem(m);
+ return (error);
+ }
+ /* XXX */
+ lsa6_storage.sin6_addr = lsa6->sin6_addr;
+ lsa6_storage.sin6_port = inp->sctp_lport;
+ lsa6 = &lsa6_storage;
+ ip6h->ip6_src = lsa6->sin6_addr;
+
+ /*
+ * We set the hop limit now since there is a good chance
+ * that our ro pointer is now filled
+ */
+ ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro);
+ ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro);
+
+#ifdef SCTP_DEBUG
+ /* Copy to be sure something bad is not happening */
+ sin6->sin6_addr = ip6h->ip6_dst;
+ lsa6->sin6_addr = ip6h->ip6_src;
+#endif
+
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n");
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6);
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6);
+ if (net) {
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ /* preserve the port and scope for link local send */
+ prev_scope = sin6->sin6_scope_id;
+ prev_port = sin6->sin6_port;
+ }
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ /* failed to prepend data, give up */
+ sctp_m_freem(m);
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(m, packet_length);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, m, packet_length);
+
+ /* send it out. table id is taken from stcb */
+ SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp,
+ stcb, vrf_id);
+
+ if (net) {
+ /* for link local this must be done */
+ sin6->sin6_scope_id = prev_scope;
+ sin6->sin6_port = prev_port;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret);
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+ if (ret) {
+ SCTP_STAT_INCR(sctps_senderrors);
+ }
+ if (net == NULL) {
+ /* Now if we had a temp route free it */
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ }
+ } else {
+ /* PMTU check versus smallest asoc MTU goes here */
+ if (ro->ro_rt == NULL) {
+ /* Route was freed */
+ if (net->ro._s_addr &&
+ net->src_addr_selected) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ }
+ net->src_addr_selected = 0;
+ }
+ if ((ro->ro_rt != NULL) &&
+ (net->ro._s_addr)) {
+ uint32_t mtu;
+
+ mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
+ if (mtu &&
+ (stcb->asoc.smallest_mtu > mtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("sctp_mtu_size_reset called after ip6_output mtu-change:%d\n",
+ mtu);
+#endif
+ sctp_mtu_size_reset(inp, &stcb->asoc, mtu);
+ net->mtu = mtu;
+ }
+ } else if (ifp) {
+ if (ND_IFINFO(ifp)->linkmtu &&
+ (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("sctp_mtu_size_reset called via ifp ND_IFINFO() linkmtu:%d\n",
+ ND_IFINFO(ifp)->linkmtu);
+#endif
+ sctp_mtu_size_reset(inp,
+ &stcb->asoc,
+ ND_IFINFO(ifp)->linkmtu);
+ }
+ }
+ }
+ return (ret);
+ }
+#endif
+ else {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n",
+ ((struct sockaddr *)to)->sa_family);
+ sctp_m_freem(m);
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ return (EFAULT);
+ }
+}
+
+
+void
+sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct mbuf *m, *m_at, *mp_last;
+ struct sctp_nets *net;
+ struct sctp_init_msg *initm;
+ struct sctp_supported_addr_param *sup_addr;
+ struct sctp_ecn_supported_param *ecn;
+ struct sctp_prsctp_supported_param *prsctp;
+ struct sctp_ecn_nonce_supported_param *ecn_nonce;
+ struct sctp_supported_chunk_types_param *pr_supported;
+ int cnt_inits_to = 0;
+ int padval, ret;
+ int num_ext;
+ int p_len;
+
+ /* INIT's always go to the primary (and usually ONLY address) */
+ mp_last = NULL;
+ net = stcb->asoc.primary_destination;
+ if (net == NULL) {
+ net = TAILQ_FIRST(&stcb->asoc.nets);
+ if (net == NULL) {
+ /* TSNH */
+ return;
+ }
+ /* we confirm any address we send an INIT to */
+ net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+ (void)sctp_set_primary_addr(stcb, NULL, net);
+ } else {
+ /* we confirm any address we send an INIT to */
+ net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n");
+ if (((struct sockaddr *)&(net->ro._l_addr))->sa_family == AF_INET6) {
+ /*
+ * special hook, if we are sending to link local it will not
+ * show up in our private address count.
+ */
+ struct sockaddr_in6 *sin6l;
+
+ sin6l = &net->ro._l_addr.sin6;
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6l->sin6_addr))
+ cnt_inits_to = 1;
+ }
+ if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ /* This case should not happen */
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n");
+ return;
+ }
+ /* start the INIT timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
+
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_DONTWAIT, 1, MT_DATA);
+ if (m == NULL) {
+ /* No memory, INIT timer will re-attempt. */
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n");
+ return;
+ }
+ SCTP_BUF_LEN(m) = sizeof(struct sctp_init_msg);
+ /*
+ * assume peer supports asconf in order to be able to queue local
+ * address changes while an INIT is in flight and before the assoc
+ * is established.
+ */
+ stcb->asoc.peer_supports_asconf = 1;
+ /* Now lets put the SCTP header in place */
+ initm = mtod(m, struct sctp_init_msg *);
+ initm->sh.src_port = inp->sctp_lport;
+ initm->sh.dest_port = stcb->rport;
+ initm->sh.v_tag = 0;
+ initm->sh.checksum = 0; /* calculate later */
+ /* now the chunk header */
+ initm->msg.ch.chunk_type = SCTP_INITIATION;
+ initm->msg.ch.chunk_flags = 0;
+ /* fill in later from mbuf we build */
+ initm->msg.ch.chunk_length = 0;
+ /* place in my tag */
+ initm->msg.init.initiate_tag = htonl(stcb->asoc.my_vtag);
+ /* set up some of the credits. */
+ initm->msg.init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(inp->sctp_socket),
+ SCTP_MINIMAL_RWND));
+
+ initm->msg.init.num_outbound_streams = htons(stcb->asoc.pre_open_streams);
+ initm->msg.init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams);
+ initm->msg.init.initial_tsn = htonl(stcb->asoc.init_seq_number);
+ /* now the address restriction */
+ sup_addr = (struct sctp_supported_addr_param *)((caddr_t)initm +
+ sizeof(*initm));
+ sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE);
+ /* we support 2 types IPv6/IPv4 */
+ sup_addr->ph.param_length = htons(sizeof(*sup_addr) +
+ sizeof(uint16_t));
+ sup_addr->addr_type[0] = htons(SCTP_IPV4_ADDRESS);
+ sup_addr->addr_type[1] = htons(SCTP_IPV6_ADDRESS);
+ SCTP_BUF_LEN(m) += sizeof(*sup_addr) + sizeof(uint16_t);
+
+ if (inp->sctp_ep.adaptation_layer_indicator) {
+ struct sctp_adaptation_layer_indication *ali;
+
+ ali = (struct sctp_adaptation_layer_indication *)(
+ (caddr_t)sup_addr + sizeof(*sup_addr) + sizeof(uint16_t));
+ ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+ ali->ph.param_length = htons(sizeof(*ali));
+ ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+ SCTP_BUF_LEN(m) += sizeof(*ali);
+ ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali +
+ sizeof(*ali));
+ } else {
+ ecn = (struct sctp_ecn_supported_param *)((caddr_t)sup_addr +
+ sizeof(*sup_addr) + sizeof(uint16_t));
+ }
+
+ /* now any cookie time extensions */
+ if (stcb->asoc.cookie_preserve_req) {
+ struct sctp_cookie_perserve_param *cookie_preserve;
+
+ cookie_preserve = (struct sctp_cookie_perserve_param *)(ecn);
+ cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE);
+ cookie_preserve->ph.param_length = htons(
+ sizeof(*cookie_preserve));
+ cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req);
+ SCTP_BUF_LEN(m) += sizeof(*cookie_preserve);
+ ecn = (struct sctp_ecn_supported_param *)(
+ (caddr_t)cookie_preserve + sizeof(*cookie_preserve));
+ stcb->asoc.cookie_preserve_req = 0;
+ }
+ /* ECN parameter */
+ if (sctp_ecn_enable == 1) {
+ ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+ ecn->ph.param_length = htons(sizeof(*ecn));
+ SCTP_BUF_LEN(m) += sizeof(*ecn);
+ prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+ sizeof(*ecn));
+ } else {
+ prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+ }
+ /* And now tell the peer we do pr-sctp */
+ prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ prsctp->ph.param_length = htons(sizeof(*prsctp));
+ SCTP_BUF_LEN(m) += sizeof(*prsctp);
+
+ /* And now tell the peer we do all the extensions */
+ pr_supported = (struct sctp_supported_chunk_types_param *)
+ ((caddr_t)prsctp + sizeof(*prsctp));
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ num_ext = 0;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ if (!sctp_auth_disable)
+ pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+ p_len = sizeof(*pr_supported) + num_ext;
+ pr_supported->ph.param_length = htons(p_len);
+ bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+ /* ECN nonce: And now tell the peer we support ECN nonce */
+ if (sctp_ecn_nonce) {
+ ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+ ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+ ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+ ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+ SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+ }
+ /* add authentication parameters */
+ if (!sctp_auth_disable) {
+ struct sctp_auth_random *randp;
+ struct sctp_auth_hmac_algo *hmacs;
+ struct sctp_auth_chunk_list *chunks;
+
+ /* attach RANDOM parameter, if available */
+ if (stcb->asoc.authinfo.random != NULL) {
+ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ p_len = sizeof(*randp) + stcb->asoc.authinfo.random_len;
+#ifdef SCTP_AUTH_DRAFT_04
+ randp->ph.param_type = htons(SCTP_RANDOM);
+ randp->ph.param_length = htons(p_len);
+ bcopy(stcb->asoc.authinfo.random->key,
+ randp->random_data,
+ stcb->asoc.authinfo.random_len);
+#else
+ /* random key already contains the header */
+ bcopy(stcb->asoc.authinfo.random->key, randp, p_len);
+#endif
+ /* zero out any padding required */
+ bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ }
+ /* add HMAC_ALGO parameter */
+ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ p_len = sctp_serialize_hmaclist(stcb->asoc.local_hmacs,
+ (uint8_t *) hmacs->hmac_ids);
+ if (p_len > 0) {
+ p_len += sizeof(*hmacs);
+ hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+ hmacs->ph.param_length = htons(p_len);
+ /* zero out any padding required */
+ bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ }
+ /* add CHUNKS parameter */
+ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ p_len = sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks,
+ chunks->chunk_types);
+ if (p_len > 0) {
+ p_len += sizeof(*chunks);
+ chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+ chunks->ph.param_length = htons(p_len);
+ /* zero out any padding required */
+ bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ }
+ }
+ m_at = m;
+ /* now the addresses */
+ {
+ struct sctp_scoping scp;
+
+ /*
+ * To optimize this we could put the scoping stuff into a
+ * structure and remove the individual uint8's from the
+ * assoc structure. Then we could just sifa in the address
+ * within the stcb.. but for now this is a quick hack to get
+ * the address stuff teased apart.
+ */
+ scp.ipv4_addr_legal = stcb->asoc.ipv4_addr_legal;
+ scp.ipv6_addr_legal = stcb->asoc.ipv6_addr_legal;
+ scp.loopback_scope = stcb->asoc.loopback_scope;
+ scp.ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+ scp.local_scope = stcb->asoc.local_scope;
+ scp.site_scope = stcb->asoc.site_scope;
+
+ m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+ }
+
+ /* calulate the size and update pkt header and chunk header */
+ p_len = 0;
+ for (m_at = m; m_at; m_at = SCTP_BUF_NEXT(m_at)) {
+ if (SCTP_BUF_NEXT(m_at) == NULL)
+ mp_last = m_at;
+ p_len += SCTP_BUF_LEN(m_at);
+ }
+ initm->msg.ch.chunk_length = htons((p_len - sizeof(struct sctphdr)));
+ /*
+ * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+ * here since the timer will drive a retranmission.
+ */
+
+ /* I don't expect this to execute but we will be safe here */
+ padval = p_len % 4;
+ if ((padval) && (mp_last)) {
+ /*
+ * The compiler worries that mp_last may not be set even
+ * though I think it is impossible :-> however we add
+ * mp_last here just in case.
+ */
+ ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+ if (ret) {
+ /* Houston we have a problem, no space */
+ sctp_m_freem(m);
+ return;
+ }
+ p_len += padval;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n");
+ ret = sctp_lowlevel_chunk_output(inp, stcb, net,
+ (struct sockaddr *)&net->ro._l_addr,
+ m, 0, NULL, 0, 0, NULL, 0, so_locked);
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "lowlevel_output - %d\n", ret);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net);
+ (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+}
+
+struct mbuf *
+sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt,
+ int param_offset, int *abort_processing, struct sctp_chunkhdr *cp)
+{
+ /*
+ * Given a mbuf containing an INIT or INIT-ACK with the param_offset
+ * being equal to the beginning of the params i.e. (iphlen +
+ * sizeof(struct sctp_init_msg) parse through the parameters to the
+ * end of the mbuf verifying that all parameters are known.
+ *
+ * For unknown parameters build and return a mbuf with
+ * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop
+ * processing this chunk stop, and set *abort_processing to 1.
+ *
+ * By having param_offset be pre-set to where parameters begin it is
+ * hoped that this routine may be reused in the future by new
+ * features.
+ */
+ struct sctp_paramhdr *phdr, params;
+
+ struct mbuf *mat, *op_err;
+ char tempbuf[SCTP_PARAM_BUFFER_SIZE];
+ int at, limit, pad_needed;
+ uint16_t ptype, plen, padded_size;
+ int err_at;
+
+ *abort_processing = 0;
+ mat = in_initpkt;
+ err_at = 0;
+ limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk);
+ at = param_offset;
+ op_err = NULL;
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n");
+ phdr = sctp_get_next_param(mat, at, ¶ms, sizeof(params));
+ while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) {
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+ if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) {
+ /* wacked parameter */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen);
+ goto invalid_size;
+ }
+ limit -= SCTP_SIZE32(plen);
+ /*-
+ * All parameters for all chunks that we know/understand are
+ * listed here. We process them other places and make
+ * appropriate stop actions per the upper bits. However this
+ * is the generic routine processor's can call to get back
+ * an operr.. to either incorporate (init-ack) or send.
+ */
+ padded_size = SCTP_SIZE32(plen);
+ switch (ptype) {
+ /* Param's with variable size */
+ case SCTP_HEARTBEAT_INFO:
+ case SCTP_STATE_COOKIE:
+ case SCTP_UNRECOG_PARAM:
+ case SCTP_ERROR_CAUSE_IND:
+ /* ok skip fwd */
+ at += padded_size;
+ break;
+ /* Param's with variable size within a range */
+ case SCTP_CHUNK_LIST:
+ case SCTP_SUPPORTED_CHUNK_EXT:
+ if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_SUPPORTED_ADDRTYPE:
+ if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_RANDOM:
+ if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_SET_PRIM_ADDR:
+ case SCTP_DEL_IP_ADDRESS:
+ case SCTP_ADD_IP_ADDRESS:
+ if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) &&
+ (padded_size != sizeof(struct sctp_asconf_addr_param))) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ /* Param's with a fixed size */
+ case SCTP_IPV4_ADDRESS:
+ if (padded_size != sizeof(struct sctp_ipv4addr_param)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_IPV6_ADDRESS:
+ if (padded_size != sizeof(struct sctp_ipv6addr_param)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_COOKIE_PRESERVE:
+ if (padded_size != sizeof(struct sctp_cookie_perserve_param)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_ECN_NONCE_SUPPORTED:
+ case SCTP_PRSCTP_SUPPORTED:
+ if (padded_size != sizeof(struct sctp_paramhdr)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecnnonce/prsctp %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_ECN_CAPABLE:
+ if (padded_size != sizeof(struct sctp_ecn_supported_param)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_ULP_ADAPTATION:
+ if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_SUCCESS_REPORT:
+ if (padded_size != sizeof(struct sctp_asconf_paramhdr)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen);
+ goto invalid_size;
+ }
+ at += padded_size;
+ break;
+ case SCTP_HOSTNAME_ADDRESS:
+ {
+ /* We can NOT handle HOST NAME addresses!! */
+ int l_len;
+
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n");
+ *abort_processing = 1;
+ if (op_err == NULL) {
+ /* Ok need to try to get a mbuf */
+ l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len += plen;
+ l_len += sizeof(struct sctp_paramhdr);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ SCTP_BUF_LEN(op_err) = 0;
+ /*
+ * pre-reserve space for ip
+ * and sctp header and
+ * chunk hdr
+ */
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ }
+ }
+ if (op_err) {
+ /* If we have space */
+ struct sctp_paramhdr s;
+
+ if (err_at % 4) {
+ uint32_t cpthis = 0;
+
+ pad_needed = 4 - (err_at % 4);
+ m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+ err_at += pad_needed;
+ }
+ s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR);
+ s.param_length = htons(sizeof(s) + plen);
+ m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+ err_at += sizeof(s);
+ phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+ if (phdr == NULL) {
+ sctp_m_freem(op_err);
+ /*
+ * we are out of memory but
+ * we still need to have a
+ * look at what to do (the
+ * system is in trouble
+ * though).
+ */
+ return (NULL);
+ }
+ m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+ err_at += plen;
+ }
+ return (op_err);
+ break;
+ }
+ default:
+ /*
+ * we do not recognize the parameter figure out what
+ * we do.
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype);
+ if ((ptype & 0x4000) == 0x4000) {
+ /* Report bit is set?? */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n");
+ if (op_err == NULL) {
+ int l_len;
+
+ /* Ok need to try to get an mbuf */
+ l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len += plen;
+ l_len += sizeof(struct sctp_paramhdr);
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ SCTP_BUF_LEN(op_err) = 0;
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ }
+ }
+ if (op_err) {
+ /* If we have space */
+ struct sctp_paramhdr s;
+
+ if (err_at % 4) {
+ uint32_t cpthis = 0;
+
+ pad_needed = 4 - (err_at % 4);
+ m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+ err_at += pad_needed;
+ }
+ s.param_type = htons(SCTP_UNRECOG_PARAM);
+ s.param_length = htons(sizeof(s) + plen);
+ m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+ err_at += sizeof(s);
+ if (plen > sizeof(tempbuf)) {
+ plen = sizeof(tempbuf);
+ }
+ phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, min(sizeof(tempbuf), plen));
+ if (phdr == NULL) {
+ sctp_m_freem(op_err);
+ /*
+ * we are out of memory but
+ * we still need to have a
+ * look at what to do (the
+ * system is in trouble
+ * though).
+ */
+ op_err = NULL;
+ goto more_processing;
+ }
+ m_copyback(op_err, err_at, plen, (caddr_t)phdr);
+ err_at += plen;
+ }
+ }
+ more_processing:
+ if ((ptype & 0x8000) == 0x0000) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n");
+ return (op_err);
+ } else {
+ /* skip this chunk and continue processing */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n");
+ at += SCTP_SIZE32(plen);
+ }
+ break;
+
+ }
+ phdr = sctp_get_next_param(mat, at, ¶ms, sizeof(params));
+ }
+ return (op_err);
+invalid_size:
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n");
+ *abort_processing = 1;
+ if ((op_err == NULL) && phdr) {
+ int l_len;
+
+ l_len = sizeof(struct ip6_hdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ l_len += (2 * sizeof(struct sctp_paramhdr));
+ op_err = sctp_get_mbuf_for_msg(l_len, 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ SCTP_BUF_LEN(op_err) = 0;
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr));
+ SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr));
+ }
+ }
+ if ((op_err) && phdr) {
+ struct sctp_paramhdr s;
+
+ if (err_at % 4) {
+ uint32_t cpthis = 0;
+
+ pad_needed = 4 - (err_at % 4);
+ m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis);
+ err_at += pad_needed;
+ }
+ s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr));
+ m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s);
+ err_at += sizeof(s);
+ /* Only copy back the p-hdr that caused the issue */
+ m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr);
+ }
+ return (op_err);
+}
+
+static int
+sctp_are_there_new_addresses(struct sctp_association *asoc,
+ struct mbuf *in_initpkt, int iphlen, int offset)
+{
+ /*
+ * Given a INIT packet, look through the packet to verify that there
+ * are NO new addresses. As we go through the parameters add reports
+ * of any un-understood parameters that require an error. Also we
+ * must return (1) to drop the packet if we see a un-understood
+ * parameter that tells us to drop the chunk.
+ */
+ struct sockaddr_in sin4, *sa4;
+ struct sockaddr_in6 sin6, *sa6;
+ struct sockaddr *sa_touse;
+ struct sockaddr *sa;
+ struct sctp_paramhdr *phdr, params;
+ struct ip *iph;
+ struct mbuf *mat;
+ uint16_t ptype, plen;
+ int err_at;
+ uint8_t fnd;
+ struct sctp_nets *net;
+
+ memset(&sin4, 0, sizeof(sin4));
+ memset(&sin6, 0, sizeof(sin6));
+ sin4.sin_family = AF_INET;
+ sin4.sin_len = sizeof(sin4);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(sin6);
+
+ sa_touse = NULL;
+ /* First what about the src address of the pkt ? */
+ iph = mtod(in_initpkt, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* source addr is IPv4 */
+ sin4.sin_addr = iph->ip_src;
+ sa_touse = (struct sockaddr *)&sin4;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* source addr is IPv6 */
+ struct ip6_hdr *ip6h;
+
+ ip6h = mtod(in_initpkt, struct ip6_hdr *);
+ sin6.sin6_addr = ip6h->ip6_src;
+ sa_touse = (struct sockaddr *)&sin6;
+ } else {
+ return (1);
+ }
+
+ fnd = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sa = (struct sockaddr *)&net->ro._l_addr;
+ if (sa->sa_family == sa_touse->sa_family) {
+ if (sa->sa_family == AF_INET) {
+ sa4 = (struct sockaddr_in *)sa;
+ if (sa4->sin_addr.s_addr ==
+ sin4.sin_addr.s_addr) {
+ fnd = 1;
+ break;
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ sa6 = (struct sockaddr_in6 *)sa;
+ if (SCTP6_ARE_ADDR_EQUAL(&sa6->sin6_addr,
+ &sin6.sin6_addr)) {
+ fnd = 1;
+ break;
+ }
+ }
+ }
+ }
+ if (fnd == 0) {
+ /* New address added! no need to look futher. */
+ return (1);
+ }
+ /* Ok so far lets munge through the rest of the packet */
+ mat = in_initpkt;
+ err_at = 0;
+ sa_touse = NULL;
+ offset += sizeof(struct sctp_init_chunk);
+ phdr = sctp_get_next_param(mat, offset, ¶ms, sizeof(params));
+ while (phdr) {
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+ if (ptype == SCTP_IPV4_ADDRESS) {
+ struct sctp_ipv4addr_param *p4, p4_buf;
+
+ phdr = sctp_get_next_param(mat, offset,
+ (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
+ if (plen != sizeof(struct sctp_ipv4addr_param) ||
+ phdr == NULL) {
+ return (1);
+ }
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ sin4.sin_addr.s_addr = p4->addr;
+ sa_touse = (struct sockaddr *)&sin4;
+ } else if (ptype == SCTP_IPV6_ADDRESS) {
+ struct sctp_ipv6addr_param *p6, p6_buf;
+
+ phdr = sctp_get_next_param(mat, offset,
+ (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
+ if (plen != sizeof(struct sctp_ipv6addr_param) ||
+ phdr == NULL) {
+ return (1);
+ }
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+ sizeof(p6->addr));
+ sa_touse = (struct sockaddr *)&sin4;
+ }
+ if (sa_touse) {
+ /* ok, sa_touse points to one to check */
+ fnd = 0;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ sa = (struct sockaddr *)&net->ro._l_addr;
+ if (sa->sa_family != sa_touse->sa_family) {
+ continue;
+ }
+ if (sa->sa_family == AF_INET) {
+ sa4 = (struct sockaddr_in *)sa;
+ if (sa4->sin_addr.s_addr ==
+ sin4.sin_addr.s_addr) {
+ fnd = 1;
+ break;
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ sa6 = (struct sockaddr_in6 *)sa;
+ if (SCTP6_ARE_ADDR_EQUAL(
+ &sa6->sin6_addr, &sin6.sin6_addr)) {
+ fnd = 1;
+ break;
+ }
+ }
+ }
+ if (!fnd) {
+ /* New addr added! no need to look further */
+ return (1);
+ }
+ }
+ offset += SCTP_SIZE32(plen);
+ phdr = sctp_get_next_param(mat, offset, ¶ms, sizeof(params));
+ }
+ return (0);
+}
+
+/*
+ * Given a MBUF chain that was sent into us containing an INIT. Build a
+ * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done
+ * a pullup to include IPv6/4header, SCTP header and initial part of INIT
+ * message (i.e. the struct sctp_init_msg).
+ */
+void
+sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct mbuf *init_pkt, int iphlen, int offset, struct sctphdr *sh,
+ struct sctp_init_chunk *init_chk, uint32_t vrf_id, int hold_inp_lock)
+{
+ struct sctp_association *asoc;
+ struct mbuf *m, *m_at, *m_tmp, *m_cookie, *op_err, *mp_last;
+ struct sctp_init_msg *initackm_out;
+ struct sctp_ecn_supported_param *ecn;
+ struct sctp_prsctp_supported_param *prsctp;
+ struct sctp_ecn_nonce_supported_param *ecn_nonce;
+ struct sctp_supported_chunk_types_param *pr_supported;
+ struct sockaddr_storage store;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ sctp_route_t *ro;
+ struct ip *iph;
+ struct ip6_hdr *ip6;
+ struct sockaddr *to;
+ struct sctp_state_cookie stc;
+ struct sctp_nets *net = NULL;
+ uint8_t *signature = NULL;
+ int cnt_inits_to = 0;
+ uint16_t his_limit, i_want;
+ int abort_flag, padval;
+ int num_ext;
+ int p_len;
+ struct socket *so;
+
+ if (stcb)
+ asoc = &stcb->asoc;
+ else
+ asoc = NULL;
+ mp_last = NULL;
+ if ((asoc != NULL) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ (sctp_are_there_new_addresses(asoc, init_pkt, iphlen, offset))) {
+ /* new addresses, out of here in non-cookie-wait states */
+ /*
+ * Send a ABORT, we don't add the new address error clause
+ * though we even set the T bit and copy in the 0 tag.. this
+ * looks no different than if no listener was present.
+ */
+ sctp_send_abort(init_pkt, iphlen, sh, 0, NULL, vrf_id);
+ return;
+ }
+ abort_flag = 0;
+ op_err = sctp_arethere_unrecognized_parameters(init_pkt,
+ (offset + sizeof(struct sctp_init_chunk)),
+ &abort_flag, (struct sctp_chunkhdr *)init_chk);
+ if (abort_flag) {
+ sctp_send_abort(init_pkt, iphlen, sh,
+ init_chk->init.initiate_tag, op_err, vrf_id);
+ return;
+ }
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (m == NULL) {
+ /* No memory, INIT timer will re-attempt. */
+ if (op_err)
+ sctp_m_freem(op_err);
+ return;
+ }
+ SCTP_BUF_LEN(m) = sizeof(struct sctp_init_msg);
+
+ /* the time I built cookie */
+ (void)SCTP_GETTIME_TIMEVAL(&stc.time_entered);
+
+ /* populate any tie tags */
+ if (asoc != NULL) {
+ /* unlock before tag selections */
+ stc.tie_tag_my_vtag = asoc->my_vtag_nonce;
+ stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce;
+ stc.cookie_life = asoc->cookie_life;
+ net = asoc->primary_destination;
+ } else {
+ stc.tie_tag_my_vtag = 0;
+ stc.tie_tag_peer_vtag = 0;
+ /* life I will award this cookie */
+ stc.cookie_life = inp->sctp_ep.def_cookie_life;
+ }
+
+ /* copy in the ports for later check */
+ stc.myport = sh->dest_port;
+ stc.peerport = sh->src_port;
+
+ /*
+ * If we wanted to honor cookie life extentions, we would add to
+ * stc.cookie_life. For now we should NOT honor any extension
+ */
+ stc.site_scope = stc.local_scope = stc.loopback_scope = 0;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ struct inpcb *in_inp;
+
+ /* Its a V6 socket */
+ in_inp = (struct inpcb *)inp;
+ stc.ipv6_addr_legal = 1;
+ /* Now look at the binding flag to see if V4 will be legal */
+ if (SCTP_IPV6_V6ONLY(in_inp) == 0) {
+ stc.ipv4_addr_legal = 1;
+ } else {
+ /* V4 addresses are NOT legal on the association */
+ stc.ipv4_addr_legal = 0;
+ }
+ } else {
+ /* Its a V4 socket, no - V6 */
+ stc.ipv4_addr_legal = 1;
+ stc.ipv6_addr_legal = 0;
+ }
+
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+ stc.ipv4_scope = 1;
+#else
+ stc.ipv4_scope = 0;
+#endif
+ /* now for scope setup */
+ memset((caddr_t)&store, 0, sizeof(store));
+ sin = (struct sockaddr_in *)&store;
+ sin6 = (struct sockaddr_in6 *)&store;
+ if (net == NULL) {
+ to = (struct sockaddr *)&store;
+ iph = mtod(init_pkt, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ struct sctp_ifa *addr;
+ sctp_route_t iproute;
+
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(struct sockaddr_in);
+ sin->sin_port = sh->src_port;
+ sin->sin_addr = iph->ip_src;
+ /* lookup address */
+ stc.address[0] = sin->sin_addr.s_addr;
+ stc.address[1] = 0;
+ stc.address[2] = 0;
+ stc.address[3] = 0;
+ stc.addr_type = SCTP_IPV4_ADDRESS;
+ /* local from address */
+ memset(&iproute, 0, sizeof(iproute));
+ ro = &iproute;
+ memcpy(&ro->ro_dst, sin, sizeof(*sin));
+ addr = sctp_source_address_selection(inp, NULL,
+ ro, NULL, 0,
+ vrf_id);
+ if (addr == NULL)
+ return;
+
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ stc.laddress[0] = addr->address.sin.sin_addr.s_addr;
+ stc.laddress[1] = 0;
+ stc.laddress[2] = 0;
+ stc.laddress[3] = 0;
+ stc.laddr_type = SCTP_IPV4_ADDRESS;
+ /* scope_id is only for v6 */
+ stc.scope_id = 0;
+#ifndef SCTP_DONT_DO_PRIVADDR_SCOPE
+ if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ stc.ipv4_scope = 1;
+ }
+#else
+ stc.ipv4_scope = 1;
+#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
+ /* Must use the address in this case */
+ if (sctp_is_address_on_local_host((struct sockaddr *)sin, vrf_id)) {
+ stc.loopback_scope = 1;
+ stc.ipv4_scope = 1;
+ stc.site_scope = 1;
+ stc.local_scope = 0;
+ }
+ sctp_free_ifa(addr);
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ struct sctp_ifa *addr;
+ struct route_in6 iproute6;
+
+ ip6 = mtod(init_pkt, struct ip6_hdr *);
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ sin6->sin6_port = sh->src_port;
+ sin6->sin6_addr = ip6->ip6_src;
+ /* lookup address */
+ memcpy(&stc.address, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ sin6->sin6_scope_id = 0;
+ stc.addr_type = SCTP_IPV6_ADDRESS;
+ stc.scope_id = 0;
+ if (sctp_is_address_on_local_host((struct sockaddr *)sin6, vrf_id)) {
+ stc.loopback_scope = 1;
+ stc.local_scope = 0;
+ stc.site_scope = 1;
+ stc.ipv4_scope = 1;
+ } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ /*
+ * If the new destination is a LINK_LOCAL we
+ * must have common both site and local
+ * scope. Don't set local scope though since
+ * we must depend on the source to be added
+ * implicitly. We cannot assure just because
+ * we share one link that all links are
+ * common.
+ */
+ stc.local_scope = 0;
+ stc.site_scope = 1;
+ stc.ipv4_scope = 1;
+ /*
+ * we start counting for the private address
+ * stuff at 1. since the link local we
+ * source from won't show up in our scoped
+ * count.
+ */
+ cnt_inits_to = 1;
+ /* pull out the scope_id from incoming pkt */
+ /* FIX ME: does this have scope from rcvif? */
+ (void)sa6_recoverscope(sin6);
+
+ sa6_embedscope(sin6, ip6_use_defzone);
+ stc.scope_id = sin6->sin6_scope_id;
+ } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+ /*
+ * If the new destination is SITE_LOCAL then
+ * we must have site scope in common.
+ */
+ stc.site_scope = 1;
+ }
+ /* local from address */
+ memset(&iproute6, 0, sizeof(iproute6));
+ ro = (sctp_route_t *) & iproute6;
+ memcpy(&ro->ro_dst, sin6, sizeof(*sin6));
+ addr = sctp_source_address_selection(inp, NULL,
+ ro, NULL, 0, vrf_id);
+ if (addr == NULL)
+ return;
+
+ if (ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ memcpy(&stc.laddress, &addr->address.sin6.sin6_addr, sizeof(struct in6_addr));
+ stc.laddr_type = SCTP_IPV6_ADDRESS;
+ sctp_free_ifa(addr);
+ }
+ } else {
+ /* set the scope per the existing tcb */
+ struct sctp_nets *lnet;
+
+ stc.loopback_scope = asoc->loopback_scope;
+ stc.ipv4_scope = asoc->ipv4_local_scope;
+ stc.site_scope = asoc->site_scope;
+ stc.local_scope = asoc->local_scope;
+ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
+ if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) {
+ if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) {
+ /*
+ * if we have a LL address, start
+ * counting at 1.
+ */
+ cnt_inits_to = 1;
+ }
+ }
+ }
+
+ /* use the net pointer */
+ to = (struct sockaddr *)&net->ro._l_addr;
+ if (to->sa_family == AF_INET) {
+ sin = (struct sockaddr_in *)to;
+ stc.address[0] = sin->sin_addr.s_addr;
+ stc.address[1] = 0;
+ stc.address[2] = 0;
+ stc.address[3] = 0;
+ stc.addr_type = SCTP_IPV4_ADDRESS;
+ if (net->src_addr_selected == 0) {
+ /*
+ * strange case here, the INIT should have
+ * did the selection.
+ */
+ net->ro._s_addr = sctp_source_address_selection(inp,
+ stcb, (sctp_route_t *) & net->ro,
+ net, 0, vrf_id);
+ if (net->ro._s_addr == NULL)
+ return;
+
+ net->src_addr_selected = 1;
+
+ }
+ stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr;
+ stc.laddress[1] = 0;
+ stc.laddress[2] = 0;
+ stc.laddress[3] = 0;
+ stc.laddr_type = SCTP_IPV4_ADDRESS;
+ } else if (to->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)to;
+ memcpy(&stc.address, &sin6->sin6_addr,
+ sizeof(struct in6_addr));
+ stc.addr_type = SCTP_IPV6_ADDRESS;
+ if (net->src_addr_selected == 0) {
+ /*
+ * strange case here, the INIT should have
+ * did the selection.
+ */
+ net->ro._s_addr = sctp_source_address_selection(inp,
+ stcb, (sctp_route_t *) & net->ro,
+ net, 0, vrf_id);
+ if (net->ro._s_addr == NULL)
+ return;
+
+ net->src_addr_selected = 1;
+ }
+ memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+ stc.laddr_type = SCTP_IPV6_ADDRESS;
+ }
+ }
+ /* Now lets put the SCTP header in place */
+ initackm_out = mtod(m, struct sctp_init_msg *);
+ initackm_out->sh.src_port = inp->sctp_lport;
+ initackm_out->sh.dest_port = sh->src_port;
+ initackm_out->sh.v_tag = init_chk->init.initiate_tag;
+ /* Save it off for quick ref */
+ stc.peers_vtag = init_chk->init.initiate_tag;
+ initackm_out->sh.checksum = 0; /* calculate later */
+ /* who are we */
+ memcpy(stc.identification, SCTP_VERSION_STRING,
+ min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification)));
+ /* now the chunk header */
+ initackm_out->msg.ch.chunk_type = SCTP_INITIATION_ACK;
+ initackm_out->msg.ch.chunk_flags = 0;
+ /* fill in later from mbuf we build */
+ initackm_out->msg.ch.chunk_length = 0;
+ /* place in my tag */
+ if ((asoc != NULL) &&
+ ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_INUSE) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED))) {
+ /* re-use the v-tags and init-seq here */
+ initackm_out->msg.init.initiate_tag = htonl(asoc->my_vtag);
+ initackm_out->msg.init.initial_tsn = htonl(asoc->init_seq_number);
+ } else {
+ uint32_t vtag, itsn;
+
+ if (hold_inp_lock) {
+ SCTP_INP_INCR_REF(inp);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ if (asoc) {
+ atomic_add_int(&asoc->refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ vtag = sctp_select_a_tag(inp, 1);
+ initackm_out->msg.init.initiate_tag = htonl(vtag);
+ /* get a TSN to use too */
+ itsn = sctp_select_initial_TSN(&inp->sctp_ep);
+ initackm_out->msg.init.initial_tsn = htonl(itsn);
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&asoc->refcnt, -1);
+ } else {
+ vtag = sctp_select_a_tag(inp, 1);
+ initackm_out->msg.init.initiate_tag = htonl(vtag);
+ /* get a TSN to use too */
+ initackm_out->msg.init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep));
+ }
+ if (hold_inp_lock) {
+ SCTP_INP_RLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+ /* save away my tag to */
+ stc.my_vtag = initackm_out->msg.init.initiate_tag;
+
+ /* set up some of the credits. */
+ so = inp->sctp_socket;
+ if (so == NULL) {
+ /* memory problem */
+ sctp_m_freem(m);
+ return;
+ } else {
+ initackm_out->msg.init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND));
+ }
+ /* set what I want */
+ his_limit = ntohs(init_chk->init.num_inbound_streams);
+ /* choose what I want */
+ if (asoc != NULL) {
+ if (asoc->streamoutcnt > inp->sctp_ep.pre_open_stream_count) {
+ i_want = asoc->streamoutcnt;
+ } else {
+ i_want = inp->sctp_ep.pre_open_stream_count;
+ }
+ } else {
+ i_want = inp->sctp_ep.pre_open_stream_count;
+ }
+ if (his_limit < i_want) {
+ /* I Want more :< */
+ initackm_out->msg.init.num_outbound_streams = init_chk->init.num_inbound_streams;
+ } else {
+ /* I can have what I want :> */
+ initackm_out->msg.init.num_outbound_streams = htons(i_want);
+ }
+ /* tell him his limt. */
+ initackm_out->msg.init.num_inbound_streams =
+ htons(inp->sctp_ep.max_open_streams_intome);
+ /* setup the ECN pointer */
+
+ if (inp->sctp_ep.adaptation_layer_indicator) {
+ struct sctp_adaptation_layer_indication *ali;
+
+ ali = (struct sctp_adaptation_layer_indication *)(
+ (caddr_t)initackm_out + sizeof(*initackm_out));
+ ali->ph.param_type = htons(SCTP_ULP_ADAPTATION);
+ ali->ph.param_length = htons(sizeof(*ali));
+ ali->indication = ntohl(inp->sctp_ep.adaptation_layer_indicator);
+ SCTP_BUF_LEN(m) += sizeof(*ali);
+ ecn = (struct sctp_ecn_supported_param *)((caddr_t)ali +
+ sizeof(*ali));
+ } else {
+ ecn = (struct sctp_ecn_supported_param *)(
+ (caddr_t)initackm_out + sizeof(*initackm_out));
+ }
+
+ /* ECN parameter */
+ if (sctp_ecn_enable == 1) {
+ ecn->ph.param_type = htons(SCTP_ECN_CAPABLE);
+ ecn->ph.param_length = htons(sizeof(*ecn));
+ SCTP_BUF_LEN(m) += sizeof(*ecn);
+
+ prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn +
+ sizeof(*ecn));
+ } else {
+ prsctp = (struct sctp_prsctp_supported_param *)((caddr_t)ecn);
+ }
+ /* And now tell the peer we do pr-sctp */
+ prsctp->ph.param_type = htons(SCTP_PRSCTP_SUPPORTED);
+ prsctp->ph.param_length = htons(sizeof(*prsctp));
+ SCTP_BUF_LEN(m) += sizeof(*prsctp);
+
+ /* And now tell the peer we do all the extensions */
+ pr_supported = (struct sctp_supported_chunk_types_param *)
+ ((caddr_t)prsctp + sizeof(*prsctp));
+
+ pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT);
+ num_ext = 0;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF;
+ pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK;
+ pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN;
+ pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED;
+ pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET;
+ if (!sctp_auth_disable)
+ pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION;
+ p_len = sizeof(*pr_supported) + num_ext;
+ pr_supported->ph.param_length = htons(p_len);
+ bzero((caddr_t)pr_supported + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+ /* ECN nonce: And now tell the peer we support ECN nonce */
+ if (sctp_ecn_nonce) {
+ ecn_nonce = (struct sctp_ecn_nonce_supported_param *)
+ ((caddr_t)pr_supported + SCTP_SIZE32(p_len));
+ ecn_nonce->ph.param_type = htons(SCTP_ECN_NONCE_SUPPORTED);
+ ecn_nonce->ph.param_length = htons(sizeof(*ecn_nonce));
+ SCTP_BUF_LEN(m) += sizeof(*ecn_nonce);
+ }
+ /* add authentication parameters */
+ if (!sctp_auth_disable) {
+ struct sctp_auth_random *randp;
+ struct sctp_auth_hmac_algo *hmacs;
+ struct sctp_auth_chunk_list *chunks;
+ uint16_t random_len;
+
+ /* generate and add RANDOM parameter */
+ random_len = SCTP_AUTH_RANDOM_SIZE_DEFAULT;
+ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ randp->ph.param_type = htons(SCTP_RANDOM);
+ p_len = sizeof(*randp) + random_len;
+ randp->ph.param_length = htons(p_len);
+ SCTP_READ_RANDOM(randp->random_data, random_len);
+ /* zero out any padding required */
+ bzero((caddr_t)randp + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+
+ /* add HMAC_ALGO parameter */
+ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ p_len = sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs,
+ (uint8_t *) hmacs->hmac_ids);
+ if (p_len > 0) {
+ p_len += sizeof(*hmacs);
+ hmacs->ph.param_type = htons(SCTP_HMAC_LIST);
+ hmacs->ph.param_length = htons(p_len);
+ /* zero out any padding required */
+ bzero((caddr_t)hmacs + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ }
+ /* add CHUNKS parameter */
+ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+SCTP_BUF_LEN(m));
+ p_len = sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks,
+ chunks->chunk_types);
+ if (p_len > 0) {
+ p_len += sizeof(*chunks);
+ chunks->ph.param_type = htons(SCTP_CHUNK_LIST);
+ chunks->ph.param_length = htons(p_len);
+ /* zero out any padding required */
+ bzero((caddr_t)chunks + p_len, SCTP_SIZE32(p_len) - p_len);
+ SCTP_BUF_LEN(m) += SCTP_SIZE32(p_len);
+ }
+ }
+ m_at = m;
+ /* now the addresses */
+ {
+ struct sctp_scoping scp;
+
+ /*
+ * To optimize this we could put the scoping stuff into a
+ * structure and remove the individual uint8's from the stc
+ * structure. Then we could just sifa in the address within
+ * the stc.. but for now this is a quick hack to get the
+ * address stuff teased apart.
+ */
+ scp.ipv4_addr_legal = stc.ipv4_addr_legal;
+ scp.ipv6_addr_legal = stc.ipv6_addr_legal;
+ scp.loopback_scope = stc.loopback_scope;
+ scp.ipv4_local_scope = stc.ipv4_scope;
+ scp.local_scope = stc.local_scope;
+ scp.site_scope = stc.site_scope;
+ m_at = sctp_add_addresses_to_i_ia(inp, &scp, m_at, cnt_inits_to);
+ }
+
+ /* tack on the operational error if present */
+ if (op_err) {
+ struct mbuf *ol;
+ int llen;
+
+ llen = 0;
+ ol = op_err;
+ while (ol) {
+ llen += SCTP_BUF_LEN(ol);
+ ol = SCTP_BUF_NEXT(ol);
+ }
+ if (llen % 4) {
+ /* must add a pad to the param */
+ uint32_t cpthis = 0;
+ int padlen;
+
+ padlen = 4 - (llen % 4);
+ m_copyback(op_err, llen, padlen, (caddr_t)&cpthis);
+ }
+ while (SCTP_BUF_NEXT(m_at) != NULL) {
+ m_at = SCTP_BUF_NEXT(m_at);
+ }
+ SCTP_BUF_NEXT(m_at) = op_err;
+ while (SCTP_BUF_NEXT(m_at) != NULL) {
+ m_at = SCTP_BUF_NEXT(m_at);
+ }
+ }
+ /* pre-calulate the size and update pkt header and chunk header */
+ p_len = 0;
+ for (m_tmp = m; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ p_len += SCTP_BUF_LEN(m_tmp);
+ if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+ /* m_tmp should now point to last one */
+ break;
+ }
+ }
+
+ /* Now we must build a cookie */
+ m_cookie = sctp_add_cookie(inp, init_pkt, offset, m,
+ sizeof(struct sctphdr), &stc, &signature);
+ if (m_cookie == NULL) {
+ /* memory problem */
+ sctp_m_freem(m);
+ return;
+ }
+ /* Now append the cookie to the end and update the space/size */
+ SCTP_BUF_NEXT(m_tmp) = m_cookie;
+
+ for (m_tmp = m_cookie; m_tmp; m_tmp = SCTP_BUF_NEXT(m_tmp)) {
+ p_len += SCTP_BUF_LEN(m_tmp);
+ if (SCTP_BUF_NEXT(m_tmp) == NULL) {
+ /* m_tmp should now point to last one */
+ mp_last = m_tmp;
+ break;
+ }
+ }
+ /*
+ * Place in the size, but we don't include the last pad (if any) in
+ * the INIT-ACK.
+ */
+ initackm_out->msg.ch.chunk_length = htons((p_len - sizeof(struct sctphdr)));
+
+ /*
+ * Time to sign the cookie, we don't sign over the cookie signature
+ * though thus we set trailer.
+ */
+ (void)sctp_hmac_m(SCTP_HMAC,
+ (uint8_t *) inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)],
+ SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr),
+ (uint8_t *) signature, SCTP_SIGNATURE_SIZE);
+ /*
+ * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return
+ * here since the timer will drive a retranmission.
+ */
+ padval = p_len % 4;
+ if ((padval) && (mp_last)) {
+ /* see my previous comments on mp_last */
+ int ret;
+
+ ret = sctp_add_pad_tombuf(mp_last, (4 - padval));
+ if (ret) {
+ /* Houston we have a problem, no space */
+ sctp_m_freem(m);
+ return;
+ }
+ p_len += padval;
+ }
+ (void)sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0,
+ NULL, 0, SCTP_SO_NOT_LOCKED);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+
+void
+sctp_insert_on_wheel(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_stream_out *strq, int holds_lock)
+{
+ struct sctp_stream_out *stre, *strn;
+
+ if (holds_lock == 0) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ }
+ if ((strq->next_spoke.tqe_next) ||
+ (strq->next_spoke.tqe_prev)) {
+ /* already on wheel */
+ goto outof_here;
+ }
+ stre = TAILQ_FIRST(&asoc->out_wheel);
+ if (stre == NULL) {
+ /* only one on wheel */
+ TAILQ_INSERT_HEAD(&asoc->out_wheel, strq, next_spoke);
+ goto outof_here;
+ }
+ for (; stre; stre = strn) {
+ strn = TAILQ_NEXT(stre, next_spoke);
+ if (stre->stream_no > strq->stream_no) {
+ TAILQ_INSERT_BEFORE(stre, strq, next_spoke);
+ goto outof_here;
+ } else if (stre->stream_no == strq->stream_no) {
+ /* huh, should not happen */
+ goto outof_here;
+ } else if (strn == NULL) {
+ /* next one is null */
+ TAILQ_INSERT_AFTER(&asoc->out_wheel, stre, strq,
+ next_spoke);
+ }
+ }
+outof_here:
+ if (holds_lock == 0) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ }
+}
+
+static void
+sctp_remove_from_wheel(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_stream_out *strq)
+{
+ /* take off and then setup so we know it is not on the wheel */
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (TAILQ_FIRST(&strq->outqueue)) {
+ /* more was added */
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ return;
+ }
+ TAILQ_REMOVE(&asoc->out_wheel, strq, next_spoke);
+ strq->next_spoke.tqe_next = NULL;
+ strq->next_spoke.tqe_prev = NULL;
+ SCTP_TCB_SEND_UNLOCK(stcb);
+}
+
+static void
+sctp_prune_prsctp(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *srcv,
+ int dataout)
+{
+ int freed_spc = 0;
+ struct sctp_tmit_chunk *chk, *nchk;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if ((asoc->peer_supports_prsctp) &&
+ (asoc->sent_queue_cnt_removeable > 0)) {
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ /*
+ * Look for chunks marked with the PR_SCTP flag AND
+ * the buffer space flag. If the one being sent is
+ * equal or greater priority then purge the old one
+ * and free some space.
+ */
+ if (PR_SCTP_BUF_ENABLED(chk->flags)) {
+ /*
+ * This one is PR-SCTP AND buffer space
+ * limited type
+ */
+ if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+ /*
+ * Lower numbers equates to higher
+ * priority so if the one we are
+ * looking at has a larger or equal
+ * priority we want to drop the data
+ * and NOT retransmit it.
+ */
+ if (chk->data) {
+ /*
+ * We release the book_size
+ * if the mbuf is here
+ */
+ int ret_spc;
+ int cause;
+
+ if (chk->sent > SCTP_DATAGRAM_UNSENT)
+ cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT;
+ else
+ cause = SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT;
+ ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+ cause,
+ &asoc->sent_queue, SCTP_SO_LOCKED);
+ freed_spc += ret_spc;
+ if (freed_spc >= dataout) {
+ return;
+ }
+ } /* if chunk was present */
+ } /* if of sufficent priority */
+ } /* if chunk has enabled */
+ } /* tailqforeach */
+
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ while (chk) {
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ /* Here we must move to the sent queue and mark */
+ if (PR_SCTP_TTL_ENABLED(chk->flags)) {
+ if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) {
+ if (chk->data) {
+ /*
+ * We release the book_size
+ * if the mbuf is here
+ */
+ int ret_spc;
+
+ ret_spc = sctp_release_pr_sctp_chunk(stcb, chk,
+ SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_UNSENT,
+ &asoc->send_queue, SCTP_SO_LOCKED);
+
+ freed_spc += ret_spc;
+ if (freed_spc >= dataout) {
+ return;
+ }
+ } /* end if chk->data */
+ } /* end if right class */
+ } /* end if chk pr-sctp */
+ chk = nchk;
+ } /* end while (chk) */
+ } /* if enabled in asoc */
+}
+
+int
+sctp_get_frag_point(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ int siz, ovh;
+
+ /*
+ * For endpoints that have both v6 and v4 addresses we must reserve
+ * room for the ipv6 header, for those that are only dealing with V4
+ * we use a larger frag point.
+ */
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+
+ if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu)
+ siz = asoc->smallest_mtu - ovh;
+ else
+ siz = (stcb->asoc.sctp_frag_point - ovh);
+ /*
+ * if (siz > (MCLBYTES-sizeof(struct sctp_data_chunk))) {
+ */
+ /* A data chunk MUST fit in a cluster */
+ /* siz = (MCLBYTES - sizeof(struct sctp_data_chunk)); */
+ /* } */
+
+ /* adjust for an AUTH chunk if DATA requires auth */
+ if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks))
+ siz -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+
+ if (siz % 4) {
+ /* make it an even word boundary please */
+ siz -= (siz % 4);
+ }
+ return (siz);
+}
+
+static void
+sctp_set_prsctp_policy(struct sctp_tcb *stcb,
+ struct sctp_stream_queue_pending *sp)
+{
+ sp->pr_sctp_on = 0;
+ if (stcb->asoc.peer_supports_prsctp) {
+ /*
+ * We assume that the user wants PR_SCTP_TTL if the user
+ * provides a positive lifetime but does not specify any
+ * PR_SCTP policy. This is a BAD assumption and causes
+ * problems at least with the U-Vancovers MPI folks. I will
+ * change this to be no policy means NO PR-SCTP.
+ */
+ if (PR_SCTP_ENABLED(sp->sinfo_flags)) {
+ sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags);
+ sp->pr_sctp_on = 1;
+ } else {
+ return;
+ }
+ switch (PR_SCTP_POLICY(sp->sinfo_flags)) {
+ case CHUNK_FLAGS_PR_SCTP_BUF:
+ /*
+ * Time to live is a priority stored in tv_sec when
+ * doing the buffer drop thing.
+ */
+ sp->ts.tv_sec = sp->timetolive;
+ sp->ts.tv_usec = 0;
+ break;
+ case CHUNK_FLAGS_PR_SCTP_TTL:
+ {
+ struct timeval tv;
+
+ (void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+ tv.tv_sec = sp->timetolive / 1000;
+ tv.tv_usec = (sp->timetolive * 1000) % 1000000;
+ timevaladd(&sp->ts, &tv);
+ }
+ break;
+ case CHUNK_FLAGS_PR_SCTP_RTX:
+ /*
+ * Time to live is a the number or retransmissions
+ * stored in tv_sec.
+ */
+ sp->ts.tv_sec = sp->timetolive;
+ sp->ts.tv_usec = 0;
+ break;
+ default:
+ SCTPDBG(SCTP_DEBUG_USRREQ1,
+ "Unknown PR_SCTP policy %u.\n",
+ PR_SCTP_POLICY(sp->sinfo_flags));
+ break;
+ }
+ }
+}
+
+static int
+sctp_msg_append(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ struct mbuf *m,
+ struct sctp_sndrcvinfo *srcv, int hold_stcb_lock)
+{
+ int error = 0, holds_lock;
+ struct mbuf *at;
+ struct sctp_stream_queue_pending *sp = NULL;
+ struct sctp_stream_out *strm;
+
+ /*
+ * Given an mbuf chain, put it into the association send queue and
+ * place it on the wheel
+ */
+ holds_lock = hold_stcb_lock;
+ if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) {
+ /* Invalid stream number */
+ SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_now;
+ }
+ if ((stcb->asoc.stream_locked) &&
+ (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) {
+ SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_now;
+ }
+ strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+ /* Now can we send this? */
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) {
+ /* got data while shutting down */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+ error = ECONNRESET;
+ goto out_now;
+ }
+ sctp_alloc_a_strmoq(stcb, sp);
+ if (sp == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ error = ENOMEM;
+ goto out_now;
+ }
+ sp->sinfo_flags = srcv->sinfo_flags;
+ sp->timetolive = srcv->sinfo_timetolive;
+ sp->ppid = srcv->sinfo_ppid;
+ sp->context = srcv->sinfo_context;
+ sp->strseq = 0;
+ if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+ sp->net = net;
+ sp->addr_over = 1;
+ } else {
+ sp->net = stcb->asoc.primary_destination;
+ sp->addr_over = 0;
+ }
+ atomic_add_int(&sp->net->ref_count, 1);
+ (void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+ sp->stream = srcv->sinfo_stream;
+ sp->msg_is_complete = 1;
+ sp->sender_all_done = 1;
+ sp->some_taken = 0;
+ sp->data = m;
+ sp->tail_mbuf = NULL;
+ sp->length = 0;
+ at = m;
+ sctp_set_prsctp_policy(stcb, sp);
+ /*
+ * We could in theory (for sendall) sifa the length in, but we would
+ * still have to hunt through the chain since we need to setup the
+ * tail_mbuf
+ */
+ while (at) {
+ if (SCTP_BUF_NEXT(at) == NULL)
+ sp->tail_mbuf = at;
+ sp->length += SCTP_BUF_LEN(at);
+ at = SCTP_BUF_NEXT(at);
+ }
+ SCTP_TCB_SEND_LOCK(stcb);
+ sctp_snd_sb_alloc(stcb, sp->length);
+ atomic_add_int(&stcb->asoc.stream_queue_cnt, 1);
+ TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+ if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+ sp->strseq = strm->next_sequence_sent;
+ strm->next_sequence_sent++;
+ }
+ if ((strm->next_spoke.tqe_next == NULL) &&
+ (strm->next_spoke.tqe_prev == NULL)) {
+ /* Not on wheel, insert */
+ sctp_insert_on_wheel(stcb, &stcb->asoc, strm, 1);
+ }
+ m = NULL;
+ SCTP_TCB_SEND_UNLOCK(stcb);
+out_now:
+ if (m) {
+ sctp_m_freem(m);
+ }
+ return (error);
+}
+
+
+static struct mbuf *
+sctp_copy_mbufchain(struct mbuf *clonechain,
+ struct mbuf *outchain,
+ struct mbuf **endofchain,
+ int can_take_mbuf,
+ int sizeofcpy,
+ uint8_t copy_by_ref)
+{
+ struct mbuf *m;
+ struct mbuf *appendchain;
+ caddr_t cp;
+ int len;
+
+ if (endofchain == NULL) {
+ /* error */
+error_out:
+ if (outchain)
+ sctp_m_freem(outchain);
+ return (NULL);
+ }
+ if (can_take_mbuf) {
+ appendchain = clonechain;
+ } else {
+ if (!copy_by_ref &&
+ (sizeofcpy <= (int)((((sctp_mbuf_threshold_count - 1) * MLEN) + MHLEN)))
+ ) {
+ /* Its not in a cluster */
+ if (*endofchain == NULL) {
+ /* lets get a mbuf cluster */
+ if (outchain == NULL) {
+ /* This is the general case */
+ new_mbuf:
+ outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ if (outchain == NULL) {
+ goto error_out;
+ }
+ SCTP_BUF_LEN(outchain) = 0;
+ *endofchain = outchain;
+ /* get the prepend space */
+ SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4));
+ } else {
+ /*
+ * We really should not get a NULL
+ * in endofchain
+ */
+ /* find end */
+ m = outchain;
+ while (m) {
+ if (SCTP_BUF_NEXT(m) == NULL) {
+ *endofchain = m;
+ break;
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ /* sanity */
+ if (*endofchain == NULL) {
+ /*
+ * huh, TSNH XXX maybe we
+ * should panic
+ */
+ sctp_m_freem(outchain);
+ goto new_mbuf;
+ }
+ }
+ /* get the new end of length */
+ len = M_TRAILINGSPACE(*endofchain);
+ } else {
+ /* how much is left at the end? */
+ len = M_TRAILINGSPACE(*endofchain);
+ }
+ /* Find the end of the data, for appending */
+ cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain)));
+
+ /* Now lets copy it out */
+ if (len >= sizeofcpy) {
+ /* It all fits, copy it in */
+ m_copydata(clonechain, 0, sizeofcpy, cp);
+ SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+ } else {
+ /* fill up the end of the chain */
+ if (len > 0) {
+ m_copydata(clonechain, 0, len, cp);
+ SCTP_BUF_LEN((*endofchain)) += len;
+ /* now we need another one */
+ sizeofcpy -= len;
+ }
+ m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m == NULL) {
+ /* We failed */
+ goto error_out;
+ }
+ SCTP_BUF_NEXT((*endofchain)) = m;
+ *endofchain = m;
+ cp = mtod((*endofchain), caddr_t);
+ m_copydata(clonechain, len, sizeofcpy, cp);
+ SCTP_BUF_LEN((*endofchain)) += sizeofcpy;
+ }
+ return (outchain);
+ } else {
+ /* copy the old fashion way */
+ appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_DONTWAIT);
+ }
+ }
+ if (appendchain == NULL) {
+ /* error */
+ if (outchain)
+ sctp_m_freem(outchain);
+ return (NULL);
+ }
+ if (outchain) {
+ /* tack on to the end */
+ if (*endofchain != NULL) {
+ SCTP_BUF_NEXT(((*endofchain))) = appendchain;
+ } else {
+ m = outchain;
+ while (m) {
+ if (SCTP_BUF_NEXT(m) == NULL) {
+ SCTP_BUF_NEXT(m) = appendchain;
+ break;
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ }
+ /*
+ * save off the end and update the end-chain postion
+ */
+ m = appendchain;
+ while (m) {
+ if (SCTP_BUF_NEXT(m) == NULL) {
+ *endofchain = m;
+ break;
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ return (outchain);
+ } else {
+ /* save off the end and update the end-chain postion */
+ m = appendchain;
+ while (m) {
+ if (SCTP_BUF_NEXT(m) == NULL) {
+ *endofchain = m;
+ break;
+ }
+ m = SCTP_BUF_NEXT(m);
+ }
+ return (appendchain);
+ }
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int *num_out,
+ int *reason_code,
+ int control_only, int *cwnd_full, int from_where,
+ struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+);
+
+static void
+sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr,
+ uint32_t val)
+{
+ struct sctp_copy_all *ca;
+ struct mbuf *m;
+ int ret = 0;
+ int added_control = 0;
+ int un_sent, do_chunk_output = 1;
+ struct sctp_association *asoc;
+
+ ca = (struct sctp_copy_all *)ptr;
+ if (ca->m == NULL) {
+ return;
+ }
+ if (ca->inp != inp) {
+ /* TSNH */
+ return;
+ }
+ if ((ca->m) && ca->sndlen) {
+ m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_DONTWAIT);
+ if (m == NULL) {
+ /* can't copy so we are done */
+ ca->cnt_failed++;
+ return;
+ }
+ } else {
+ m = NULL;
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (ca->sndrcv.sinfo_flags & SCTP_ABORT) {
+ /* Abort this assoc with m as the user defined reason */
+ if (m) {
+ struct sctp_paramhdr *ph;
+
+ SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_DONTWAIT);
+ if (m) {
+ ph = mtod(m, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(ca->sndlen);
+ }
+ /*
+ * We add one here to keep the assoc from
+ * dis-appearing on us.
+ */
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ sctp_abort_an_association(inp, stcb,
+ SCTP_RESPONSE_TO_USER_REQ,
+ m, SCTP_SO_NOT_LOCKED);
+ /*
+ * sctp_abort_an_association calls sctp_free_asoc()
+ * free association will NOT free it since we
+ * incremented the refcnt .. we do this to prevent
+ * it being freed and things getting tricky since we
+ * could end up (from free_asoc) calling inpcb_free
+ * which would get a recursive lock call to the
+ * iterator lock.. But as a consequence of that the
+ * stcb will return to us un-locked.. since
+ * free_asoc returns with either no TCB or the TCB
+ * unlocked, we must relock.. to unlock in the
+ * iterator timer :-0
+ */
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ goto no_chunk_output;
+ }
+ } else {
+ if (m) {
+ ret = sctp_msg_append(stcb, stcb->asoc.primary_destination, m,
+ &ca->sndrcv, 1);
+ }
+ asoc = &stcb->asoc;
+ if (ca->sndrcv.sinfo_flags & SCTP_EOF) {
+ /* shutdown this assoc */
+ int cnt;
+
+ cnt = sctp_is_there_unsent_data(stcb);
+
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (cnt == 0)) {
+ if (asoc->locked_on_sending) {
+ goto abort_anyway;
+ }
+ /*
+ * there is nothing queued to send, so I'm
+ * done...
+ */
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /*
+ * only send SHUTDOWN the first time
+ * through
+ */
+ sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ added_control = 1;
+ do_chunk_output = 0;
+ }
+ } else {
+ /*
+ * we still got (or just got) data to send,
+ * so set SHUTDOWN_PENDING
+ */
+ /*
+ * XXX sockets draft says that SCTP_EOF
+ * should be sent with no data. currently,
+ * we will allow user data to be sent first
+ * and move to SHUTDOWN-PENDING
+ */
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if (asoc->locked_on_sending) {
+ /*
+ * Locked to send out the
+ * data
+ */
+ struct sctp_stream_queue_pending *sp;
+
+ sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+ if (sp) {
+ if ((sp->length == 0) && (sp->msg_is_complete == 0))
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ }
+ }
+ asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ abort_anyway:
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_RESPONSE_TO_USER_REQ,
+ NULL, SCTP_SO_NOT_LOCKED);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ goto no_chunk_output;
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ }
+ }
+
+ }
+ }
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) * sizeof(struct sctp_data_chunk)));
+
+ if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+ (stcb->asoc.total_flight > 0) &&
+ (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+ ) {
+ do_chunk_output = 0;
+ }
+ if (do_chunk_output)
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED);
+ else if (added_control) {
+ int num_out = 0, reason = 0, cwnd_full = 0, now_filled = 0;
+ struct timeval now;
+ int frag_point;
+
+ frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+ (void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+ &reason, 1, &cwnd_full, 1, &now, &now_filled, frag_point, SCTP_SO_NOT_LOCKED);
+ }
+no_chunk_output:
+ if (ret) {
+ ca->cnt_failed++;
+ } else {
+ ca->cnt_sent++;
+ }
+}
+
+static void
+sctp_sendall_completes(void *ptr, uint32_t val)
+{
+ struct sctp_copy_all *ca;
+
+ ca = (struct sctp_copy_all *)ptr;
+ /*
+ * Do a notify here? Kacheong suggests that the notify be done at
+ * the send time.. so you would push up a notification if any send
+ * failed. Don't know if this is feasable since the only failures we
+ * have is "memory" related and if you cannot get an mbuf to send
+ * the data you surely can't get an mbuf to send up to notify the
+ * user you can't send the data :->
+ */
+
+ /* now free everything */
+ sctp_m_freem(ca->m);
+ SCTP_FREE(ca, SCTP_M_COPYAL);
+}
+
+
+#define MC_ALIGN(m, len) do { \
+ SCTP_BUF_RESV_UF(m, ((MCLBYTES - (len)) & ~(sizeof(long) - 1)); \
+} while (0)
+
+
+
+static struct mbuf *
+sctp_copy_out_all(struct uio *uio, int len)
+{
+ struct mbuf *ret, *at;
+ int left, willcpy, cancpy, error;
+
+ ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAIT, 1, MT_DATA);
+ if (ret == NULL) {
+ /* TSNH */
+ return (NULL);
+ }
+ left = len;
+ SCTP_BUF_LEN(ret) = 0;
+ /* save space for the data chunk header */
+ cancpy = M_TRAILINGSPACE(ret);
+ willcpy = min(cancpy, left);
+ at = ret;
+ while (left > 0) {
+ /* Align data to the end */
+ error = uiomove(mtod(at, caddr_t), willcpy, uio);
+ if (error) {
+ err_out_now:
+ sctp_m_freem(at);
+ return (NULL);
+ }
+ SCTP_BUF_LEN(at) = willcpy;
+ SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0;
+ left -= willcpy;
+ if (left > 0) {
+ SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAIT, 1, MT_DATA);
+ if (SCTP_BUF_NEXT(at) == NULL) {
+ goto err_out_now;
+ }
+ at = SCTP_BUF_NEXT(at);
+ SCTP_BUF_LEN(at) = 0;
+ cancpy = M_TRAILINGSPACE(at);
+ willcpy = min(cancpy, left);
+ }
+ }
+ return (ret);
+}
+
+static int
+sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m,
+ struct sctp_sndrcvinfo *srcv)
+{
+ int ret;
+ struct sctp_copy_all *ca;
+
+ SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all),
+ SCTP_M_COPYAL);
+ if (ca == NULL) {
+ sctp_m_freem(m);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ memset(ca, 0, sizeof(struct sctp_copy_all));
+
+ ca->inp = inp;
+ memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo));
+ /*
+ * take off the sendall flag, it would be bad if we failed to do
+ * this :-0
+ */
+ ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL;
+ /* get length and mbuf chain */
+ if (uio) {
+ ca->sndlen = uio->uio_resid;
+ ca->m = sctp_copy_out_all(uio, ca->sndlen);
+ if (ca->m == NULL) {
+ SCTP_FREE(ca, SCTP_M_COPYAL);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ } else {
+ /* Gather the length of the send */
+ struct mbuf *mat;
+
+ mat = m;
+ ca->sndlen = 0;
+ while (m) {
+ ca->sndlen += SCTP_BUF_LEN(m);
+ m = SCTP_BUF_NEXT(m);
+ }
+ ca->m = mat;
+ }
+ ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL,
+ SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES,
+ SCTP_ASOC_ANY_STATE,
+ (void *)ca, 0,
+ sctp_sendall_completes, inp, 1);
+ if (ret) {
+ SCTP_PRINTF("Failed to initiate iterator for sendall\n");
+ SCTP_FREE(ca, SCTP_M_COPYAL);
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ return (EFAULT);
+ }
+ return (0);
+}
+
+
+void
+sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk, *nchk;
+
+ chk = TAILQ_FIRST(&asoc->control_send_queue);
+ while (chk) {
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ asoc->ctrl_queue_cnt--;
+ sctp_free_a_chunk(stcb, chk);
+ }
+ chk = nchk;
+ }
+}
+
+void
+sctp_toss_old_asconf(struct sctp_tcb *stcb)
+{
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk, *chk_tmp;
+
+ asoc = &stcb->asoc;
+ for (chk = TAILQ_FIRST(&asoc->control_send_queue); chk != NULL;
+ chk = chk_tmp) {
+ /* get next chk */
+ chk_tmp = TAILQ_NEXT(chk, sctp_next);
+ /* find SCTP_ASCONF chunk in queue (only one ever in queue) */
+ if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ asoc->ctrl_queue_cnt--;
+ sctp_free_a_chunk(stcb, chk);
+ }
+ }
+}
+
+
+static void
+sctp_clean_up_datalist(struct sctp_tcb *stcb,
+
+ struct sctp_association *asoc,
+ struct sctp_tmit_chunk **data_list,
+ int bundle_at,
+ struct sctp_nets *net)
+{
+ int i;
+ struct sctp_tmit_chunk *tp1;
+
+ for (i = 0; i < bundle_at; i++) {
+ /* off of the send queue */
+ if (i) {
+ /*
+ * Any chunk NOT 0 you zap the time chunk 0 gets
+ * zapped or set based on if a RTO measurment is
+ * needed.
+ */
+ data_list[i]->do_rtt = 0;
+ }
+ /* record time */
+ data_list[i]->sent_rcv_time = net->last_sent_time;
+ data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.TSN_seq;
+ TAILQ_REMOVE(&asoc->send_queue,
+ data_list[i],
+ sctp_next);
+ /* on to the sent queue */
+ tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead);
+ if ((tp1) && (compare_with_wrap(tp1->rec.data.TSN_seq,
+ data_list[i]->rec.data.TSN_seq, MAX_TSN))) {
+ struct sctp_tmit_chunk *tpp;
+
+ /* need to move back */
+ back_up_more:
+ tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next);
+ if (tpp == NULL) {
+ TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next);
+ goto all_done;
+ }
+ tp1 = tpp;
+ if (compare_with_wrap(tp1->rec.data.TSN_seq,
+ data_list[i]->rec.data.TSN_seq, MAX_TSN)) {
+ goto back_up_more;
+ }
+ TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next);
+ } else {
+ TAILQ_INSERT_TAIL(&asoc->sent_queue,
+ data_list[i],
+ sctp_next);
+ }
+all_done:
+ /* This does not lower until the cum-ack passes it */
+ asoc->sent_queue_cnt++;
+ asoc->send_queue_cnt--;
+ if ((asoc->peers_rwnd <= 0) &&
+ (asoc->total_flight == 0) &&
+ (bundle_at == 1)) {
+ /* Mark the chunk as being a window probe */
+ SCTP_STAT_INCR(sctps_windowprobed);
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xC2, 3);
+#endif
+ data_list[i]->sent = SCTP_DATAGRAM_SENT;
+ data_list[i]->snd_count = 1;
+ data_list[i]->rec.data.chunk_was_revoked = 0;
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+ data_list[i]->whoTo->flight_size,
+ data_list[i]->book_size,
+ (uintptr_t) data_list[i]->whoTo,
+ data_list[i]->rec.data.TSN_seq);
+ }
+ sctp_flight_size_increase(data_list[i]);
+ sctp_total_flight_increase(stcb, data_list[i]);
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+ asoc->peers_rwnd, data_list[i]->send_size, sctp_peer_chunk_oh);
+ }
+ asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+ (uint32_t) (data_list[i]->send_size + sctp_peer_chunk_oh));
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ }
+}
+
+static void
+sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk, *nchk;
+
+ for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+ chk; chk = nchk) {
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+ (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+ (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+ (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+ (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+ /* Stray chunks must be cleaned up */
+ clean_up_anyway:
+ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ asoc->ctrl_queue_cnt--;
+ sctp_free_a_chunk(stcb, chk);
+ } else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+ /* special handling, we must look into the param */
+ if (chk != asoc->str_reset) {
+ goto clean_up_anyway;
+ }
+ }
+ }
+}
+
+
+static int
+sctp_can_we_split_this(struct sctp_tcb *stcb,
+ uint32_t length,
+ uint32_t goal_mtu, uint32_t frag_point, int eeor_on)
+{
+ /*
+ * Make a decision on if I should split a msg into multiple parts.
+ * This is only asked of incomplete messages.
+ */
+ if (eeor_on) {
+ /*
+ * If we are doing EEOR we need to always send it if its the
+ * entire thing, since it might be all the guy is putting in
+ * the hopper.
+ */
+ if (goal_mtu >= length) {
+ /*-
+ * If we have data outstanding,
+ * we get another chance when the sack
+ * arrives to transmit - wait for more data
+ */
+ if (stcb->asoc.total_flight == 0) {
+ /*
+ * If nothing is in flight, we zero the
+ * packet counter.
+ */
+ return (length);
+ }
+ return (0);
+
+ } else {
+ /* You can fill the rest */
+ return (goal_mtu);
+ }
+ }
+ /*-
+ * For those strange folk that make the send buffer
+ * smaller than our fragmentation point, we can't
+ * get a full msg in so we have to allow splitting.
+ */
+ if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) {
+ return (length);
+ }
+ if ((length <= goal_mtu) ||
+ ((length - goal_mtu) < sctp_min_residual)) {
+ /* Sub-optimial residual don't split in non-eeor mode. */
+ return (0);
+ }
+ /*
+ * If we reach here length is larger than the goal_mtu. Do we wish
+ * to split it for the sake of packet putting together?
+ */
+ if (goal_mtu >= min(sctp_min_split_point, frag_point)) {
+ /* Its ok to split it */
+ return (min(goal_mtu, frag_point));
+ }
+ /* Nope, can't split */
+ return (0);
+
+}
+
+static uint32_t
+sctp_move_to_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net,
+ struct sctp_stream_out *strq,
+ uint32_t goal_mtu,
+ uint32_t frag_point,
+ int *locked,
+ int *giveup,
+ int eeor_mode,
+ int *bail)
+{
+ /* Move from the stream to the send_queue keeping track of the total */
+ struct sctp_association *asoc;
+ struct sctp_stream_queue_pending *sp;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_data_chunk *dchkh;
+ uint32_t to_move, length;
+ uint8_t rcv_flags = 0;
+ uint8_t some_taken;
+ uint8_t send_lock_up = 0;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ asoc = &stcb->asoc;
+one_more_time:
+ /* sa_ignore FREED_MEMORY */
+ sp = TAILQ_FIRST(&strq->outqueue);
+ if (sp == NULL) {
+ *locked = 0;
+ if (send_lock_up == 0) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ }
+ sp = TAILQ_FIRST(&strq->outqueue);
+ if (sp) {
+ goto one_more_time;
+ }
+ if (strq->last_msg_incomplete) {
+ SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n",
+ strq->stream_no,
+ strq->last_msg_incomplete);
+ strq->last_msg_incomplete = 0;
+ }
+ to_move = 0;
+ if (send_lock_up) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ send_lock_up = 0;
+ }
+ goto out_of;
+ }
+ if ((sp->msg_is_complete) && (sp->length == 0)) {
+ if (sp->sender_all_done) {
+ /*
+ * We are doing differed cleanup. Last time through
+ * when we took all the data the sender_all_done was
+ * not set.
+ */
+ if (sp->put_last_out == 0) {
+ SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
+ SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+ sp->sender_all_done,
+ sp->length,
+ sp->msg_is_complete,
+ sp->put_last_out,
+ send_lock_up);
+ }
+ if ((TAILQ_NEXT(sp, next) == NULL) && (send_lock_up == 0)) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ }
+ atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+ TAILQ_REMOVE(&strq->outqueue, sp, next);
+ sctp_free_remote_addr(sp->net);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ }
+ sctp_free_a_strmoq(stcb, sp);
+
+ /* we can't be locked to it */
+ *locked = 0;
+ stcb->asoc.locked_on_sending = NULL;
+ if (send_lock_up) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ send_lock_up = 0;
+ }
+ /* back to get the next msg */
+ goto one_more_time;
+ } else {
+ /*
+ * sender just finished this but still holds a
+ * reference
+ */
+ *locked = 1;
+ *giveup = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ } else {
+ /* is there some to get */
+ if (sp->length == 0) {
+ /* no */
+ *locked = 1;
+ *giveup = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ }
+ some_taken = sp->some_taken;
+ if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
+ sp->msg_is_complete = 1;
+ }
+re_look:
+ length = sp->length;
+ if (sp->msg_is_complete) {
+ /* The message is complete */
+ to_move = min(length, frag_point);
+ if (to_move == length) {
+ /* All of it fits in the MTU */
+ if (sp->some_taken) {
+ rcv_flags |= SCTP_DATA_LAST_FRAG;
+ sp->put_last_out = 1;
+ } else {
+ rcv_flags |= SCTP_DATA_NOT_FRAG;
+ sp->put_last_out = 1;
+ }
+ } else {
+ /* Not all of it fits, we fragment */
+ if (sp->some_taken == 0) {
+ rcv_flags |= SCTP_DATA_FIRST_FRAG;
+ }
+ sp->some_taken = 1;
+ }
+ } else {
+ to_move = sctp_can_we_split_this(stcb, length, goal_mtu,
+ frag_point, eeor_mode);
+ if (to_move) {
+ /*-
+ * We use a snapshot of length in case it
+ * is expanding during the compare.
+ */
+ uint32_t llen;
+
+ llen = length;
+ if (to_move >= llen) {
+ to_move = llen;
+ if (send_lock_up == 0) {
+ /*-
+ * We are taking all of an incomplete msg
+ * thus we need a send lock.
+ */
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ if (sp->msg_is_complete) {
+ /*
+ * the sender finished the
+ * msg
+ */
+ goto re_look;
+ }
+ }
+ }
+ if (sp->some_taken == 0) {
+ rcv_flags |= SCTP_DATA_FIRST_FRAG;
+ sp->some_taken = 1;
+ }
+ } else {
+ /* Nothing to take. */
+ if (sp->some_taken) {
+ *locked = 1;
+ }
+ *giveup = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ }
+
+ /* If we reach here, we can copy out a chunk */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* No chunk memory */
+ *giveup = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ /*
+ * Setup for unordered if needed by looking at the user sent info
+ * flags.
+ */
+ if (sp->sinfo_flags & SCTP_UNORDERED) {
+ rcv_flags |= SCTP_DATA_UNORDERED;
+ }
+ /* clear out the chunk before setting up */
+ memset(chk, 0, sizeof(*chk));
+ chk->rec.data.rcv_flags = rcv_flags;
+
+ if (to_move >= length) {
+ /* we think we can steal the whole thing */
+ if ((sp->sender_all_done == 0) && (send_lock_up == 0)) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ }
+ if (to_move < sp->length) {
+ /* bail, it changed */
+ goto dont_do_it;
+ }
+ chk->data = sp->data;
+ chk->last_mbuf = sp->tail_mbuf;
+ /* register the stealing */
+ sp->data = sp->tail_mbuf = NULL;
+ } else {
+ struct mbuf *m;
+
+dont_do_it:
+ chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_DONTWAIT);
+ chk->last_mbuf = NULL;
+ if (chk->data == NULL) {
+ sp->some_taken = some_taken;
+ sctp_free_a_chunk(stcb, chk);
+ *bail = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ /* Pull off the data */
+ m_adj(sp->data, to_move);
+ /* Now lets work our way down and compact it */
+ m = sp->data;
+ while (m && (SCTP_BUF_LEN(m) == 0)) {
+ sp->data = SCTP_BUF_NEXT(m);
+ SCTP_BUF_NEXT(m) = NULL;
+ if (sp->tail_mbuf == m) {
+ /*-
+ * Freeing tail? TSNH since
+ * we supposedly were taking less
+ * than the sp->length.
+ */
+#ifdef INVARIANTS
+ panic("Huh, freing tail? - TSNH");
+#else
+ SCTP_PRINTF("Huh, freeing tail? - TSNH\n");
+ sp->tail_mbuf = sp->data = NULL;
+ sp->length = 0;
+#endif
+
+ }
+ sctp_m_free(m);
+ m = sp->data;
+ }
+ }
+ if (SCTP_BUF_IS_EXTENDED(chk->data)) {
+ chk->copy_by_ref = 1;
+ } else {
+ chk->copy_by_ref = 0;
+ }
+ /*
+ * get last_mbuf and counts of mb useage This is ugly but hopefully
+ * its only one mbuf.
+ */
+ if (chk->last_mbuf == NULL) {
+ chk->last_mbuf = chk->data;
+ while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) {
+ chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf);
+ }
+ }
+ if (to_move > length) {
+ /*- This should not happen either
+ * since we always lower to_move to the size
+ * of sp->length if its larger.
+ */
+#ifdef INVARIANTS
+ panic("Huh, how can to_move be larger?");
+#else
+ SCTP_PRINTF("Huh, how can to_move be larger?\n");
+ sp->length = 0;
+#endif
+ } else {
+ atomic_subtract_int(&sp->length, to_move);
+ }
+ if (M_LEADINGSPACE(chk->data) < (int)sizeof(struct sctp_data_chunk)) {
+ /* Not enough room for a chunk header, get some */
+ struct mbuf *m;
+
+ m = sctp_get_mbuf_for_msg(1, 0, M_DONTWAIT, 0, MT_DATA);
+ if (m == NULL) {
+ /*
+ * we're in trouble here. _PREPEND below will free
+ * all the data if there is no leading space, so we
+ * must put the data back and restore.
+ */
+ if (send_lock_up == 0) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ }
+ if (chk->data == NULL) {
+ /* unsteal the data */
+ sp->data = chk->data;
+ sp->tail_mbuf = chk->last_mbuf;
+ } else {
+ struct mbuf *m_tmp;
+
+ /* reassemble the data */
+ m_tmp = sp->data;
+ sp->data = chk->data;
+ SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp;
+ }
+ sp->some_taken = some_taken;
+ atomic_add_int(&sp->length, to_move);
+ chk->data = NULL;
+ *bail = 1;
+ sctp_free_a_chunk(stcb, chk);
+ to_move = 0;
+ goto out_of;
+ } else {
+ SCTP_BUF_LEN(m) = 0;
+ SCTP_BUF_NEXT(m) = chk->data;
+ chk->data = m;
+ M_ALIGN(chk->data, 4);
+ }
+ }
+ SCTP_BUF_PREPEND(chk->data, sizeof(struct sctp_data_chunk), M_DONTWAIT);
+ if (chk->data == NULL) {
+ /* HELP, TSNH since we assured it would not above? */
+#ifdef INVARIANTS
+ panic("prepend failes HELP?");
+#else
+ SCTP_PRINTF("prepend fails HELP?\n");
+ sctp_free_a_chunk(stcb, chk);
+#endif
+ *bail = 1;
+ to_move = 0;
+ goto out_of;
+ }
+ sctp_snd_sb_alloc(stcb, sizeof(struct sctp_data_chunk));
+ chk->book_size = chk->send_size = (to_move +
+ sizeof(struct sctp_data_chunk));
+ chk->book_size_scale = 0;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->pad_inplace = 0;
+ chk->no_fr_allowed = 0;
+ chk->rec.data.stream_seq = sp->strseq;
+ chk->rec.data.stream_number = sp->stream;
+ chk->rec.data.payloadtype = sp->ppid;
+ chk->rec.data.context = sp->context;
+ chk->rec.data.doing_fast_retransmit = 0;
+ chk->rec.data.ect_nonce = 0; /* ECN Nonce */
+
+ chk->rec.data.timetodrop = sp->ts;
+ chk->flags = sp->act_flags;
+ chk->addr_over = sp->addr_over;
+
+ chk->whoTo = net;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+
+ chk->rec.data.TSN_seq = atomic_fetchadd_int(&asoc->sending_seq, 1);
+ if (sctp_logging_level & SCTP_LOG_AT_SEND_2_OUTQ) {
+ sctp_misc_ints(SCTP_STRMOUT_LOG_SEND,
+ (uintptr_t) stcb, sp->length,
+ (uint32_t) ((chk->rec.data.stream_number << 16) | chk->rec.data.stream_seq),
+ chk->rec.data.TSN_seq);
+ }
+ dchkh = mtod(chk->data, struct sctp_data_chunk *);
+ /*
+ * Put the rest of the things in place now. Size was done earlier in
+ * previous loop prior to padding.
+ */
+
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) {
+ asoc->tsn_out_at = 0;
+ asoc->tsn_out_wrapped = 1;
+ }
+ asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.TSN_seq;
+ asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.stream_number;
+ asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.stream_seq;
+ asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size;
+ asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags;
+ asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb;
+ asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at;
+ asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2;
+ asoc->tsn_out_at++;
+#endif
+
+ dchkh->ch.chunk_type = SCTP_DATA;
+ dchkh->ch.chunk_flags = chk->rec.data.rcv_flags;
+ dchkh->dp.tsn = htonl(chk->rec.data.TSN_seq);
+ dchkh->dp.stream_id = htons(strq->stream_no);
+ dchkh->dp.stream_sequence = htons(chk->rec.data.stream_seq);
+ dchkh->dp.protocol_id = chk->rec.data.payloadtype;
+ dchkh->ch.chunk_length = htons(chk->send_size);
+ /* Now advance the chk->send_size by the actual pad needed. */
+ if (chk->send_size < SCTP_SIZE32(chk->book_size)) {
+ /* need a pad */
+ struct mbuf *lm;
+ int pads;
+
+ pads = SCTP_SIZE32(chk->book_size) - chk->send_size;
+ if (sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf) == 0) {
+ chk->pad_inplace = 1;
+ }
+ if ((lm = SCTP_BUF_NEXT(chk->last_mbuf)) != NULL) {
+ /* pad added an mbuf */
+ chk->last_mbuf = lm;
+ }
+ chk->send_size += pads;
+ }
+ /* We only re-set the policy if it is on */
+ if (sp->pr_sctp_on) {
+ sctp_set_prsctp_policy(stcb, sp);
+ asoc->pr_sctp_cnt++;
+ chk->pr_sctp_on = 1;
+ } else {
+ chk->pr_sctp_on = 0;
+ }
+ if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) {
+ /* All done pull and kill the message */
+ atomic_subtract_int(&asoc->stream_queue_cnt, 1);
+ if (sp->put_last_out == 0) {
+ SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n");
+ SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n",
+ sp->sender_all_done,
+ sp->length,
+ sp->msg_is_complete,
+ sp->put_last_out,
+ send_lock_up);
+ }
+ if ((send_lock_up == 0) && (TAILQ_NEXT(sp, next) == NULL)) {
+ SCTP_TCB_SEND_LOCK(stcb);
+ send_lock_up = 1;
+ }
+ TAILQ_REMOVE(&strq->outqueue, sp, next);
+ sctp_free_remote_addr(sp->net);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ }
+ sctp_free_a_strmoq(stcb, sp);
+
+ /* we can't be locked to it */
+ *locked = 0;
+ stcb->asoc.locked_on_sending = NULL;
+ } else {
+ /* more to go, we are locked */
+ *locked = 1;
+ }
+ asoc->chunks_on_out_queue++;
+ TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next);
+ asoc->send_queue_cnt++;
+out_of:
+ if (send_lock_up) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ send_lock_up = 0;
+ }
+ return (to_move);
+}
+
+
+static struct sctp_stream_out *
+sctp_select_a_stream(struct sctp_tcb *stcb, struct sctp_association *asoc)
+{
+ struct sctp_stream_out *strq;
+
+ /* Find the next stream to use */
+ if (asoc->last_out_stream == NULL) {
+ strq = asoc->last_out_stream = TAILQ_FIRST(&asoc->out_wheel);
+ if (asoc->last_out_stream == NULL) {
+ /* huh nothing on the wheel, TSNH */
+ return (NULL);
+ }
+ goto done_it;
+ }
+ strq = TAILQ_NEXT(asoc->last_out_stream, next_spoke);
+done_it:
+ if (strq == NULL) {
+ strq = asoc->last_out_stream = TAILQ_FIRST(&asoc->out_wheel);
+ }
+ /* Save off the last stream */
+ asoc->last_out_stream = strq;
+ return (strq);
+
+}
+
+
+static void
+sctp_fill_outqueue(struct sctp_tcb *stcb,
+ struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *strq, *strqn, *strqt;
+ int goal_mtu, moved_how_much, total_moved = 0, bail = 0;
+ int locked, giveup;
+ struct sctp_stream_queue_pending *sp;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ asoc = &stcb->asoc;
+#ifdef INET6
+ if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+ goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+ } else {
+ /* ?? not sure what else to do */
+ goal_mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+ }
+#else
+ goal_mtu = net->mtu - SCTP_MIN_OVERHEAD;
+ mtu_fromwheel = 0;
+#endif
+ /* Need an allowance for the data chunk header too */
+ goal_mtu -= sizeof(struct sctp_data_chunk);
+
+ /* must make even word boundary */
+ goal_mtu &= 0xfffffffc;
+ if (asoc->locked_on_sending) {
+ /* We are stuck on one stream until the message completes. */
+ strqn = strq = asoc->locked_on_sending;
+ locked = 1;
+ } else {
+ strqn = strq = sctp_select_a_stream(stcb, asoc);
+ locked = 0;
+ }
+
+ while ((goal_mtu > 0) && strq) {
+ sp = TAILQ_FIRST(&strq->outqueue);
+ /*
+ * If CMT is off, we must validate that the stream in
+ * question has the first item pointed towards are network
+ * destionation requested by the caller. Note that if we
+ * turn out to be locked to a stream (assigning TSN's then
+ * we must stop, since we cannot look for another stream
+ * with data to send to that destination). In CMT's case, by
+ * skipping this check, we will send one data packet towards
+ * the requested net.
+ */
+ if (sp == NULL) {
+ break;
+ }
+ if ((sp->net != net) && (sctp_cmt_on_off == 0)) {
+ /* none for this network */
+ if (locked) {
+ break;
+ } else {
+ strq = sctp_select_a_stream(stcb, asoc);
+ if (strq == NULL)
+ /* none left */
+ break;
+ if (strqn == strq) {
+ /* I have circled */
+ break;
+ }
+ continue;
+ }
+ }
+ giveup = 0;
+ bail = 0;
+ moved_how_much = sctp_move_to_outqueue(stcb, net, strq, goal_mtu, frag_point, &locked,
+ &giveup, eeor_mode, &bail);
+ asoc->last_out_stream = strq;
+ if (locked) {
+ asoc->locked_on_sending = strq;
+ if ((moved_how_much == 0) || (giveup) || bail)
+ /* no more to move for now */
+ break;
+ } else {
+ asoc->locked_on_sending = NULL;
+ strqt = sctp_select_a_stream(stcb, asoc);
+ if (TAILQ_FIRST(&strq->outqueue) == NULL) {
+ if (strq == strqn) {
+ /* Must move start to next one */
+ strqn = TAILQ_NEXT(asoc->last_out_stream, next_spoke);
+ if (strqn == NULL) {
+ strqn = TAILQ_FIRST(&asoc->out_wheel);
+ if (strqn == NULL) {
+ break;
+ }
+ }
+ }
+ sctp_remove_from_wheel(stcb, asoc, strq);
+ }
+ if ((giveup) || bail) {
+ break;
+ }
+ strq = strqt;
+ if (strq == NULL) {
+ break;
+ }
+ }
+ total_moved += moved_how_much;
+ goal_mtu -= (moved_how_much + sizeof(struct sctp_data_chunk));
+ goal_mtu &= 0xfffffffc;
+ }
+ if (bail)
+ *quit_now = 1;
+
+ if (total_moved == 0) {
+ if ((sctp_cmt_on_off == 0) &&
+ (net == stcb->asoc.primary_destination)) {
+ /* ran dry for primary network net */
+ SCTP_STAT_INCR(sctps_primary_randry);
+ } else if (sctp_cmt_on_off) {
+ /* ran dry with CMT on */
+ SCTP_STAT_INCR(sctps_cmt_randry);
+ }
+ }
+}
+
+void
+sctp_fix_ecn_echo(struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ }
+ }
+}
+
+static void
+sctp_move_to_an_alt(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_nets *net)
+{
+ struct sctp_tmit_chunk *chk;
+ struct sctp_nets *a_net;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ /*
+ * JRS 5/14/07 - If CMT PF is turned on, find an alternate
+ * destination using the PF algorithm for finding alternate
+ * destinations.
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ a_net = sctp_find_alternate_net(stcb, net, 2);
+ } else {
+ a_net = sctp_find_alternate_net(stcb, net, 0);
+ }
+ if ((a_net != net) &&
+ ((a_net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE)) {
+ /*
+ * We only proceed if a valid alternate is found that is not
+ * this one and is reachable. Here we must move all chunks
+ * queued in the send queue off of the destination address
+ * to our alternate.
+ */
+ TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+ if (chk->whoTo == net) {
+ /* Move the chunk to our alternate */
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = a_net;
+ atomic_add_int(&a_net->ref_count, 1);
+ }
+ }
+ }
+}
+
+int
+sctp_med_chunk_output(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int *num_out,
+ int *reason_code,
+ int control_only, int *cwnd_full, int from_where,
+ struct timeval *now, int *now_filled, int frag_point, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ /*
+ * Ok this is the generic chunk service queue. we must do the
+ * following: - Service the stream queue that is next, moving any
+ * message (note I must get a complete message i.e. FIRST/MIDDLE and
+ * LAST to the out queue in one pass) and assigning TSN's - Check to
+ * see if the cwnd/rwnd allows any output, if so we go ahead and
+ * fomulate and send the low level chunks. Making sure to combine
+ * any control in the control chunk queue also.
+ */
+ struct sctp_nets *net;
+ struct mbuf *outchain, *endoutchain;
+ struct sctp_tmit_chunk *chk, *nchk;
+ struct sctphdr *shdr;
+
+ /* temp arrays for unlinking */
+ struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+ int no_fragmentflg, error;
+ int one_chunk, hbflag, skip_data_for_this_net;
+ int asconf, cookie, no_out_cnt;
+ int bundle_at, ctl_cnt, no_data_chunks, cwnd_full_ind, eeor_mode;
+ unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
+ struct sctp_nets *start_at, *old_startat = NULL, *send_start_at;
+ int tsns_sent = 0;
+ uint32_t auth_offset = 0;
+ struct sctp_auth_chunk *auth = NULL;
+
+ /*
+ * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the
+ * destination.
+ */
+ int pf_hbflag = 0;
+ int quit_now = 0;
+
+ *num_out = 0;
+ cwnd_full_ind = 0;
+
+ if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) ||
+ (asoc->state & SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {
+ eeor_mode = 1;
+ } else {
+ eeor_mode = 0;
+ }
+ ctl_cnt = no_out_cnt = asconf = cookie = 0;
+ /*
+ * First lets prime the pump. For each destination, if there is room
+ * in the flight size, attempt to pull an MTU's worth out of the
+ * stream queues into the general send_queue
+ */
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xC2, 2);
+#endif
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ hbflag = 0;
+ if ((control_only) || (asoc->stream_reset_outstanding))
+ no_data_chunks = 1;
+ else
+ no_data_chunks = 0;
+
+ /* Nothing to possible to send? */
+ if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+ TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->out_wheel)) {
+ *reason_code = 9;
+ return (0);
+ }
+ if (asoc->peers_rwnd == 0) {
+ /* No room in peers rwnd */
+ *cwnd_full = 1;
+ *reason_code = 1;
+ if (asoc->total_flight > 0) {
+ /* we are allowed one chunk in flight */
+ no_data_chunks = 1;
+ }
+ }
+ if ((no_data_chunks == 0) && (!TAILQ_EMPTY(&asoc->out_wheel))) {
+ if (sctp_cmt_on_off) {
+ /*
+ * for CMT we start at the next one past the one we
+ * last added data to.
+ */
+ if (TAILQ_FIRST(&asoc->send_queue) != NULL) {
+ goto skip_the_fill_from_streams;
+ }
+ if (asoc->last_net_data_came_from) {
+ net = TAILQ_NEXT(asoc->last_net_data_came_from, sctp_next);
+ if (net == NULL) {
+ net = TAILQ_FIRST(&asoc->nets);
+ }
+ } else {
+ /* back to start */
+ net = TAILQ_FIRST(&asoc->nets);
+ }
+
+ /*
+ * JRI-TODO: CMT-MPI. Simply set the first
+ * destination (net) to be optimized for the next
+ * message to be pulled out of the outwheel. 1. peek
+ * at outwheel 2. If large message, set net =
+ * highest_cwnd 3. If small message, set net =
+ * lowest rtt
+ */
+ } else {
+ net = asoc->primary_destination;
+ if (net == NULL) {
+ /* TSNH */
+ net = TAILQ_FIRST(&asoc->nets);
+ }
+ }
+ start_at = net;
+
+one_more_time:
+ for (; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
+ net->window_probe = 0;
+ if (old_startat && (old_startat == net)) {
+ break;
+ }
+ /*
+ * JRI: if dest is unreachable or unconfirmed, do
+ * not send data to it
+ */
+ if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) || (net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+ continue;
+ }
+ /*
+ * JRI: if dest is in PF state, do not send data to
+ * it
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF)) {
+ continue;
+ }
+ if ((sctp_cmt_on_off == 0) && (net->ref_count < 2)) {
+ /* nothing can be in queue for this guy */
+ continue;
+ }
+ if (net->flight_size >= net->cwnd) {
+ /* skip this network, no room */
+ cwnd_full_ind++;
+ continue;
+ }
+ /*
+ * JRI : this for loop we are in takes in each net,
+ * if its's got space in cwnd and has data sent to
+ * it (when CMT is off) then it calls
+ * sctp_fill_outqueue for the net. This gets data on
+ * the send queue for that network.
+ *
+ * In sctp_fill_outqueue TSN's are assigned and data is
+ * copied out of the stream buffers. Note mostly
+ * copy by reference (we hope).
+ */
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FILL_OUTQ_CALLED);
+ }
+ sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now);
+ if (quit_now) {
+ /* memory alloc failure */
+ no_data_chunks = 1;
+ goto skip_the_fill_from_streams;
+ }
+ }
+ if (start_at != TAILQ_FIRST(&asoc->nets)) {
+ /* got to pick up the beginning stuff. */
+ old_startat = start_at;
+ start_at = net = TAILQ_FIRST(&asoc->nets);
+ if (old_startat)
+ goto one_more_time;
+ }
+ }
+skip_the_fill_from_streams:
+ *cwnd_full = cwnd_full_ind;
+
+ /* now service each destination and send out what we can for it */
+ /* Nothing to send? */
+ if ((TAILQ_FIRST(&asoc->control_send_queue) == NULL) &&
+ (TAILQ_FIRST(&asoc->send_queue) == NULL)) {
+ *reason_code = 8;
+ return (0);
+ }
+ if (no_data_chunks) {
+ chk = TAILQ_FIRST(&asoc->control_send_queue);
+ } else {
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ }
+ if (chk) {
+ send_start_at = chk->whoTo;
+ } else {
+ send_start_at = TAILQ_FIRST(&asoc->nets);
+ }
+ old_startat = NULL;
+again_one_more_time:
+ for (net = send_start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) {
+ /* how much can we send? */
+ /* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */
+ if (old_startat && (old_startat == net)) {
+ /* through list ocmpletely. */
+ break;
+ }
+ tsns_sent = 0;
+ if (net->ref_count < 2) {
+ /*
+ * Ref-count of 1 so we cannot have data or control
+ * queued to this address. Skip it.
+ */
+ continue;
+ }
+ ctl_cnt = bundle_at = 0;
+ endoutchain = outchain = NULL;
+ no_fragmentflg = 1;
+ one_chunk = 0;
+ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ skip_data_for_this_net = 1;
+ } else {
+ skip_data_for_this_net = 0;
+ }
+ if ((net->ro.ro_rt) && (net->ro.ro_rt->rt_ifp)) {
+ /*
+ * if we have a route and an ifp check to see if we
+ * have room to send to this guy
+ */
+ struct ifnet *ifp;
+
+ ifp = net->ro.ro_rt->rt_ifp;
+ if ((ifp->if_snd.ifq_len + 2) >= ifp->if_snd.ifq_maxlen) {
+ SCTP_STAT_INCR(sctps_ifnomemqueued);
+ if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+ sctp_log_maxburst(stcb, net, ifp->if_snd.ifq_len, ifp->if_snd.ifq_maxlen, SCTP_MAX_IFP_APPLIED);
+ }
+ continue;
+ }
+ }
+ if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+ mtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ } else {
+ mtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ }
+ mx_mtu = mtu;
+ to_out = 0;
+ if (mtu > asoc->peers_rwnd) {
+ if (asoc->total_flight > 0) {
+ /* We have a packet in flight somewhere */
+ r_mtu = asoc->peers_rwnd;
+ } else {
+ /* We are always allowed to send one MTU out */
+ one_chunk = 1;
+ r_mtu = mtu;
+ }
+ } else {
+ r_mtu = mtu;
+ }
+ /************************/
+ /* Control transmission */
+ /************************/
+ /* Now first lets go through the control queue */
+ for (chk = TAILQ_FIRST(&asoc->control_send_queue);
+ chk; chk = nchk) {
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ if (chk->whoTo != net) {
+ /*
+ * No, not sent to the network we are
+ * looking at
+ */
+ continue;
+ }
+ if (chk->data == NULL) {
+ continue;
+ }
+ if (chk->sent != SCTP_DATAGRAM_UNSENT) {
+ /*
+ * It must be unsent. Cookies and ASCONF's
+ * hang around but there timers will force
+ * when marked for resend.
+ */
+ continue;
+ }
+ /*
+ * if no AUTH is yet included and this chunk
+ * requires it, make sure to account for it. We
+ * don't apply the size until the AUTH chunk is
+ * actually added below in case there is no room for
+ * this chunk. NOTE: we overload the use of "omtu"
+ * here
+ */
+ if ((auth == NULL) &&
+ sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+ stcb->asoc.peer_auth_chunks)) {
+ omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+ } else
+ omtu = 0;
+ /* Here we do NOT factor the r_mtu */
+ if ((chk->send_size < (int)(mtu - omtu)) ||
+ (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+ /*
+ * We probably should glom the mbuf chain
+ * from the chk->data for control but the
+ * problem is it becomes yet one more level
+ * of tracking to do if for some reason
+ * output fails. Then I have got to
+ * reconstruct the merged control chain.. el
+ * yucko.. for now we take the easy way and
+ * do the copy
+ */
+ /*
+ * Add an AUTH chunk, if chunk requires it
+ * save the offset into the chain for AUTH
+ */
+ if ((auth == NULL) &&
+ (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+ stcb->asoc.peer_auth_chunks))) {
+ outchain = sctp_add_auth_chunk(outchain,
+ &endoutchain,
+ &auth,
+ &auth_offset,
+ stcb,
+ chk->rec.chunk_id.id);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ }
+ outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain,
+ (int)chk->rec.chunk_id.can_take_data,
+ chk->send_size, chk->copy_by_ref);
+ if (outchain == NULL) {
+ *reason_code = 8;
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ /* update our MTU size */
+ if (mtu > (chk->send_size + omtu))
+ mtu -= (chk->send_size + omtu);
+ else
+ mtu = 0;
+ to_out += (chk->send_size + omtu);
+ /* Do clear IP_DF ? */
+ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+ no_fragmentflg = 0;
+ }
+ if (chk->rec.chunk_id.can_take_data)
+ chk->data = NULL;
+ /* Mark things to be removed, if needed */
+ if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) ||
+ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_SHUTDOWN) ||
+ (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) ||
+ (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) ||
+ (chk->rec.chunk_id.id == SCTP_ECN_CWR) ||
+ (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) ||
+ (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) {
+
+ if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) {
+ hbflag = 1;
+ /*
+ * JRS 5/14/07 - Set the
+ * flag to say a heartbeat
+ * is being sent.
+ */
+ pf_hbflag = 1;
+ }
+ /* remove these chunks at the end */
+ if (chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) {
+ /* turn off the timer */
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+ inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1);
+ }
+ }
+ ctl_cnt++;
+ } else {
+ /*
+ * Other chunks, since they have
+ * timers running (i.e. COOKIE or
+ * ASCONF) we just "trust" that it
+ * gets sent or retransmitted.
+ */
+ ctl_cnt++;
+ if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+ cookie = 1;
+ no_out_cnt = 1;
+ } else if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+ /*
+ * set hb flag since we can
+ * use these for RTO
+ */
+ hbflag = 1;
+ asconf = 1;
+ /*
+ * should sysctl this: don't
+ * bundle data with ASCONF
+ * since it requires AUTH
+ */
+ no_data_chunks = 1;
+ }
+ chk->sent = SCTP_DATAGRAM_SENT;
+ chk->snd_count++;
+ }
+ if (mtu == 0) {
+ /*
+ * Ok we are out of room but we can
+ * output without effecting the
+ * flight size since this little guy
+ * is a control only packet.
+ */
+ if (asconf) {
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net);
+ /*
+ * do NOT clear the asconf
+ * flag as it is used to do
+ * appropriate source
+ * address selection.
+ */
+ }
+ if (cookie) {
+ sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+ cookie = 0;
+ }
+ SCTP_BUF_PREPEND(outchain, sizeof(struct sctphdr), M_DONTWAIT);
+ if (outchain == NULL) {
+ /* no memory */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ error = ENOBUFS;
+ goto error_out_again;
+ }
+ shdr = mtod(outchain, struct sctphdr *);
+ shdr->src_port = inp->sctp_lport;
+ shdr->dest_port = stcb->rport;
+ shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+ shdr->checksum = 0;
+ auth_offset += sizeof(struct sctphdr);
+ if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+ (struct sockaddr *)&net->ro._l_addr,
+ outchain, auth_offset, auth,
+ no_fragmentflg, 0, NULL, asconf, so_locked))) {
+ if (error == ENOBUFS) {
+ asoc->ifp_had_enobuf = 1;
+ SCTP_STAT_INCR(sctps_lowlevelerr);
+ }
+ if (from_where == 0) {
+ SCTP_STAT_INCR(sctps_lowlevelerrusr);
+ }
+ error_out_again:
+ /* error, could not output */
+ if (hbflag) {
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+ *now_filled = 1;
+ *now = net->last_sent_time;
+ } else {
+ net->last_sent_time = *now;
+ }
+ hbflag = 0;
+ }
+ if (error == EHOSTUNREACH) {
+ /*
+ * Destination went
+ * unreachable
+ * during this send
+ */
+ sctp_move_to_an_alt(stcb, asoc, net);
+ }
+ *reason_code = 7;
+ continue;
+ } else
+ asoc->ifp_had_enobuf = 0;
+ /* Only HB or ASCONF advances time */
+ if (hbflag) {
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+ *now_filled = 1;
+ *now = net->last_sent_time;
+ } else {
+ net->last_sent_time = *now;
+ }
+ hbflag = 0;
+ }
+ /*
+ * increase the number we sent, if a
+ * cookie is sent we don't tell them
+ * any was sent out.
+ */
+ outchain = endoutchain = NULL;
+ auth = NULL;
+ auth_offset = 0;
+ if (!no_out_cnt)
+ *num_out += ctl_cnt;
+ /* recalc a clean slate and setup */
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+ } else {
+ mtu = (net->mtu - SCTP_MIN_V4_OVERHEAD);
+ }
+ to_out = 0;
+ no_fragmentflg = 1;
+ }
+ }
+ }
+ /*********************/
+ /* Data transmission */
+ /*********************/
+ /*
+ * if AUTH for DATA is required and no AUTH has been added
+ * yet, account for this in the mtu now... if no data can be
+ * bundled, this adjustment won't matter anyways since the
+ * packet will be going out...
+ */
+ if ((auth == NULL) &&
+ sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks)) {
+ mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+ }
+ /* now lets add any data within the MTU constraints */
+ if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+ if (net->mtu > (sizeof(struct ip) + sizeof(struct sctphdr)))
+ omtu = net->mtu - (sizeof(struct ip) + sizeof(struct sctphdr));
+ else
+ omtu = 0;
+ } else {
+ if (net->mtu > (sizeof(struct ip6_hdr) + sizeof(struct sctphdr)))
+ omtu = net->mtu - (sizeof(struct ip6_hdr) + sizeof(struct sctphdr));
+ else
+ omtu = 0;
+ }
+ if ((((asoc->state & SCTP_STATE_OPEN) == SCTP_STATE_OPEN) && (skip_data_for_this_net == 0)) ||
+ (cookie)) {
+ for (chk = TAILQ_FIRST(&asoc->send_queue); chk; chk = nchk) {
+ if (no_data_chunks) {
+ /* let only control go out */
+ *reason_code = 1;
+ break;
+ }
+ if (net->flight_size >= net->cwnd) {
+ /* skip this net, no room for data */
+ *reason_code = 2;
+ break;
+ }
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ if (chk->whoTo != net) {
+ /* No, not sent to this net */
+ continue;
+ }
+ if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) {
+ /*-
+ * strange, we have a chunk that is
+ * to big for its destination and
+ * yet no fragment ok flag.
+ * Something went wrong when the
+ * PMTU changed...we did not mark
+ * this chunk for some reason?? I
+ * will fix it here by letting IP
+ * fragment it for now and printing
+ * a warning. This really should not
+ * happen ...
+ */
+ SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n",
+ chk->send_size, mtu);
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+ }
+ if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) ||
+ ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) {
+ /* ok we will add this one */
+
+ /*
+ * Add an AUTH chunk, if chunk
+ * requires it, save the offset into
+ * the chain for AUTH
+ */
+ if ((auth == NULL) &&
+ (sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks))) {
+
+ outchain = sctp_add_auth_chunk(outchain,
+ &endoutchain,
+ &auth,
+ &auth_offset,
+ stcb,
+ SCTP_DATA);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ }
+ outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0,
+ chk->send_size, chk->copy_by_ref);
+ if (outchain == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n");
+ if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ }
+ *reason_code = 3;
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ /* upate our MTU size */
+ /* Do clear IP_DF ? */
+ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+ no_fragmentflg = 0;
+ }
+ /* unsigned subtraction of mtu */
+ if (mtu > chk->send_size)
+ mtu -= chk->send_size;
+ else
+ mtu = 0;
+ /* unsigned subtraction of r_mtu */
+ if (r_mtu > chk->send_size)
+ r_mtu -= chk->send_size;
+ else
+ r_mtu = 0;
+
+ to_out += chk->send_size;
+ if ((to_out > mx_mtu) && no_fragmentflg) {
+#ifdef INVARIANTS
+ panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out);
+#else
+ SCTP_PRINTF("Exceeding mtu of %d out size is %d\n",
+ mx_mtu, to_out);
+#endif
+ }
+ chk->window_probe = 0;
+ data_list[bundle_at++] = chk;
+ if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+ mtu = 0;
+ break;
+ }
+ if (chk->sent == SCTP_DATAGRAM_UNSENT) {
+ if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) {
+ SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks);
+ } else {
+ SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks);
+ }
+ if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) &&
+ ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0))
+ /*
+ * Count number of
+ * user msg's that
+ * were fragmented
+ * we do this by
+ * counting when we
+ * see a LAST
+ * fragment only.
+ */
+ SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs);
+ }
+ if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) {
+ if (one_chunk) {
+ data_list[0]->window_probe = 1;
+ net->window_probe = 1;
+ }
+ break;
+ }
+ } else {
+ /*
+ * Must be sent in order of the
+ * TSN's (on a network)
+ */
+ break;
+ }
+ } /* for (chunk gather loop for this net) */
+ } /* if asoc.state OPEN */
+ /* Is there something to send for this destination? */
+ if (outchain) {
+ /* We may need to start a control timer or two */
+ if (asconf) {
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp,
+ stcb, net);
+ /*
+ * do NOT clear the asconf flag as it is
+ * used to do appropriate source address
+ * selection.
+ */
+ }
+ if (cookie) {
+ sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net);
+ cookie = 0;
+ }
+ /* must start a send timer if data is being sent */
+ if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+ /*
+ * no timer running on this destination
+ * restart it.
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ } else if (sctp_cmt_on_off && sctp_cmt_pf && pf_hbflag && ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)
+ && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) {
+ /*
+ * JRS 5/14/07 - If a HB has been sent to a
+ * PF destination and no T3 timer is
+ * currently running, start the T3 timer to
+ * track the HBs that were sent.
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ }
+ /* Now send it, if there is anything to send :> */
+ SCTP_BUF_PREPEND(outchain, sizeof(struct sctphdr), M_DONTWAIT);
+ if (outchain == NULL) {
+ /* out of mbufs */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ error = ENOBUFS;
+ goto errored_send;
+ }
+ shdr = mtod(outchain, struct sctphdr *);
+ shdr->src_port = inp->sctp_lport;
+ shdr->dest_port = stcb->rport;
+ shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+ shdr->checksum = 0;
+ auth_offset += sizeof(struct sctphdr);
+ if ((error = sctp_lowlevel_chunk_output(inp,
+ stcb,
+ net,
+ (struct sockaddr *)&net->ro._l_addr,
+ outchain,
+ auth_offset,
+ auth,
+ no_fragmentflg,
+ bundle_at,
+ data_list[0],
+ asconf, so_locked))) {
+ /* error, we could not output */
+ if (error == ENOBUFS) {
+ SCTP_STAT_INCR(sctps_lowlevelerr);
+ asoc->ifp_had_enobuf = 1;
+ }
+ if (from_where == 0) {
+ SCTP_STAT_INCR(sctps_lowlevelerrusr);
+ }
+ errored_send:
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error);
+ if (hbflag) {
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+ *now_filled = 1;
+ *now = net->last_sent_time;
+ } else {
+ net->last_sent_time = *now;
+ }
+ hbflag = 0;
+ }
+ if (error == EHOSTUNREACH) {
+ /*
+ * Destination went unreachable
+ * during this send
+ */
+ sctp_move_to_an_alt(stcb, asoc, net);
+ }
+ *reason_code = 6;
+ /*-
+ * I add this line to be paranoid. As far as
+ * I can tell the continue, takes us back to
+ * the top of the for, but just to make sure
+ * I will reset these again here.
+ */
+ ctl_cnt = bundle_at = 0;
+ continue; /* This takes us back to the
+ * for() for the nets. */
+ } else {
+ asoc->ifp_had_enobuf = 0;
+ }
+ outchain = endoutchain = NULL;
+ auth = NULL;
+ auth_offset = 0;
+ if (bundle_at || hbflag) {
+ /* For data/asconf and hb set time */
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time);
+ *now_filled = 1;
+ *now = net->last_sent_time;
+ } else {
+ net->last_sent_time = *now;
+ }
+ }
+ if (!no_out_cnt) {
+ *num_out += (ctl_cnt + bundle_at);
+ }
+ if (bundle_at) {
+ /* setup for a RTO measurement */
+ tsns_sent = data_list[0]->rec.data.TSN_seq;
+ /* fill time if not already filled */
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+ *now_filled = 1;
+ *now = asoc->time_last_sent;
+ } else {
+ asoc->time_last_sent = *now;
+ }
+ data_list[0]->do_rtt = 1;
+ SCTP_STAT_INCR_BY(sctps_senddata, bundle_at);
+ sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net);
+ if (sctp_early_fr) {
+ if (net->flight_size < net->cwnd) {
+ /* start or restart it */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2);
+ }
+ SCTP_STAT_INCR(sctps_earlyfrstrout);
+ sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net);
+ } else {
+ /* stop it if its running */
+ if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
+ SCTP_STAT_INCR(sctps_earlyfrstpout);
+ sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, inp, stcb, net,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3);
+ }
+ }
+ }
+ }
+ if (one_chunk) {
+ break;
+ }
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND);
+ }
+ }
+ if (old_startat == NULL) {
+ old_startat = send_start_at;
+ send_start_at = TAILQ_FIRST(&asoc->nets);
+ if (old_startat)
+ goto again_one_more_time;
+ }
+ /*
+ * At the end there should be no NON timed chunks hanging on this
+ * queue.
+ */
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND);
+ }
+ if ((*num_out == 0) && (*reason_code == 0)) {
+ *reason_code = 4;
+ } else {
+ *reason_code = 5;
+ }
+ sctp_clean_up_ctl(stcb, asoc);
+ return (0);
+}
+
+void
+sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err)
+{
+ /*-
+ * Prepend a OPERATIONAL_ERROR chunk header and put on the end of
+ * the control chunk queue.
+ */
+ struct sctp_chunkhdr *hdr;
+ struct sctp_tmit_chunk *chk;
+ struct mbuf *mat;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(op_err);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_DONTWAIT);
+ if (op_err == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ chk->send_size = 0;
+ mat = op_err;
+ while (mat != NULL) {
+ chk->send_size += SCTP_BUF_LEN(mat);
+ mat = SCTP_BUF_NEXT(mat);
+ }
+ chk->rec.chunk_id.id = SCTP_OPERATION_ERROR;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->data = op_err;
+ chk->whoTo = chk->asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ hdr = mtod(op_err, struct sctp_chunkhdr *);
+ hdr->chunk_type = SCTP_OPERATION_ERROR;
+ hdr->chunk_flags = 0;
+ hdr->chunk_length = htons(chk->send_size);
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue,
+ chk,
+ sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+}
+
+int
+sctp_send_cookie_echo(struct mbuf *m,
+ int offset,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ /*-
+ * pull out the cookie and put it at the front of the control chunk
+ * queue.
+ */
+ int at;
+ struct mbuf *cookie;
+ struct sctp_paramhdr parm, *phdr;
+ struct sctp_chunkhdr *hdr;
+ struct sctp_tmit_chunk *chk;
+ uint16_t ptype, plen;
+
+ /* First find the cookie in the param area */
+ cookie = NULL;
+ at = offset + sizeof(struct sctp_init_chunk);
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ do {
+ phdr = sctp_get_next_param(m, at, &parm, sizeof(parm));
+ if (phdr == NULL) {
+ return (-3);
+ }
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+ if (ptype == SCTP_STATE_COOKIE) {
+ int pad;
+
+ /* found the cookie */
+ if ((pad = (plen % 4))) {
+ plen += 4 - pad;
+ }
+ cookie = SCTP_M_COPYM(m, at, plen, M_DONTWAIT);
+ if (cookie == NULL) {
+ /* No memory */
+ return (-2);
+ }
+ break;
+ }
+ at += SCTP_SIZE32(plen);
+ } while (phdr);
+ if (cookie == NULL) {
+ /* Did not find the cookie */
+ return (-3);
+ }
+ /* ok, we got the cookie lets change it into a cookie echo chunk */
+
+ /* first the change from param to cookie */
+ hdr = mtod(cookie, struct sctp_chunkhdr *);
+ hdr->chunk_type = SCTP_COOKIE_ECHO;
+ hdr->chunk_flags = 0;
+ /* get the chunk stuff now and place it in the FRONT of the queue */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(cookie);
+ return (-5);
+ }
+ chk->copy_by_ref = 0;
+ chk->send_size = plen;
+ chk->rec.chunk_id.id = SCTP_COOKIE_ECHO;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = CHUNK_FLAGS_FRAGMENT_OK;
+ chk->asoc = &stcb->asoc;
+ chk->data = cookie;
+ chk->whoTo = chk->asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ return (0);
+}
+
+void
+sctp_send_heartbeat_ack(struct sctp_tcb *stcb,
+ struct mbuf *m,
+ int offset,
+ int chk_length,
+ struct sctp_nets *net)
+{
+ /*
+ * take a HB request and make it into a HB ack and send it.
+ */
+ struct mbuf *outchain;
+ struct sctp_chunkhdr *chdr;
+ struct sctp_tmit_chunk *chk;
+
+
+ if (net == NULL)
+ /* must have a net pointer */
+ return;
+
+ outchain = SCTP_M_COPYM(m, offset, chk_length, M_DONTWAIT);
+ if (outchain == NULL) {
+ /* gak out of memory */
+ return;
+ }
+ chdr = mtod(outchain, struct sctp_chunkhdr *);
+ chdr->chunk_type = SCTP_HEARTBEAT_ACK;
+ chdr->chunk_flags = 0;
+ if (chk_length % 4) {
+ /* need pad */
+ uint32_t cpthis = 0;
+ int padlen;
+
+ padlen = 4 - (chk_length % 4);
+ m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis);
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(outchain);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->send_size = chk_length;
+ chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->data = outchain;
+ chk->whoTo = net;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cookie_ack(struct sctp_tcb *stcb)
+{
+ /* formulate and queue a cookie-ack back to sender */
+ struct mbuf *cookie_ack;
+ struct sctp_chunkhdr *hdr;
+ struct sctp_tmit_chunk *chk;
+
+ cookie_ack = NULL;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+
+ cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (cookie_ack == NULL) {
+ /* no mbuf's */
+ return;
+ }
+ SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD);
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(cookie_ack);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
+ chk->rec.chunk_id.id = SCTP_COOKIE_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->data = cookie_ack;
+ if (chk->asoc->last_control_chunk_from != NULL) {
+ chk->whoTo = chk->asoc->last_control_chunk_from;
+ } else {
+ chk->whoTo = chk->asoc->primary_destination;
+ }
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ hdr = mtod(cookie_ack, struct sctp_chunkhdr *);
+ hdr->chunk_type = SCTP_COOKIE_ACK;
+ hdr->chunk_flags = 0;
+ hdr->chunk_length = htons(chk->send_size);
+ SCTP_BUF_LEN(cookie_ack) = chk->send_size;
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ return;
+}
+
+
+void
+sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /* formulate and queue a SHUTDOWN-ACK back to the sender */
+ struct mbuf *m_shutdown_ack;
+ struct sctp_shutdown_ack_chunk *ack_cp;
+ struct sctp_tmit_chunk *chk;
+
+ m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_shutdown_ack == NULL) {
+ /* no mbuf's */
+ return;
+ }
+ SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD);
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(m_shutdown_ack);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->send_size = sizeof(struct sctp_chunkhdr);
+ chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->data = m_shutdown_ack;
+ chk->whoTo = net;
+ atomic_add_int(&net->ref_count, 1);
+
+ ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *);
+ ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK;
+ ack_cp->ch.chunk_flags = 0;
+ ack_cp->ch.chunk_length = htons(chk->send_size);
+ SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size;
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ return;
+}
+
+void
+sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ /* formulate and queue a SHUTDOWN to the sender */
+ struct mbuf *m_shutdown;
+ struct sctp_shutdown_chunk *shutdown_cp;
+ struct sctp_tmit_chunk *chk;
+
+ m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_shutdown == NULL) {
+ /* no mbuf's */
+ return;
+ }
+ SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD);
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(m_shutdown);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->send_size = sizeof(struct sctp_shutdown_chunk);
+ chk->rec.chunk_id.id = SCTP_SHUTDOWN;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->data = m_shutdown;
+ chk->whoTo = net;
+ atomic_add_int(&net->ref_count, 1);
+
+ shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *);
+ shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN;
+ shutdown_cp->ch.chunk_flags = 0;
+ shutdown_cp->ch.chunk_length = htons(chk->send_size);
+ shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn);
+ SCTP_BUF_LEN(m_shutdown) = chk->send_size;
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ return;
+}
+
+void
+sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked)
+{
+ /*
+ * formulate and queue an ASCONF to the peer. ASCONF parameters
+ * should be queued on the assoc queue.
+ */
+ struct sctp_tmit_chunk *chk;
+ struct mbuf *m_asconf;
+ struct sctp_asconf_chunk *acp;
+ int len;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ /* compose an ASCONF chunk, maximum length is PMTU */
+ m_asconf = sctp_compose_asconf(stcb, &len, addr_locked);
+ if (m_asconf == NULL) {
+ return;
+ }
+ acp = mtod(m_asconf, struct sctp_asconf_chunk *);
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ sctp_m_freem(m_asconf);
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->data = m_asconf;
+ chk->send_size = len;
+ chk->rec.chunk_id.id = SCTP_ASCONF;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags = 0;
+ chk->asoc = &stcb->asoc;
+ chk->whoTo = chk->asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ return;
+}
+
+void
+sctp_send_asconf_ack(struct sctp_tcb *stcb)
+{
+ /*
+ * formulate and queue a asconf-ack back to sender. the asconf-ack
+ * must be stored in the tcb.
+ */
+ struct sctp_tmit_chunk *chk;
+ struct sctp_asconf_ack *ack, *latest_ack;
+ struct mbuf *m_ack, *m;
+ struct sctp_nets *net = NULL;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ /* Get the latest ASCONF-ACK */
+ latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead);
+ if (latest_ack == NULL) {
+ return;
+ }
+ if (latest_ack->last_sent_to != NULL &&
+ latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) {
+ /* we're doing a retransmission */
+ net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0);
+ if (net == NULL) {
+ /* no alternate */
+ if (stcb->asoc.last_control_chunk_from == NULL)
+ net = stcb->asoc.primary_destination;
+ else
+ net = stcb->asoc.last_control_chunk_from;
+ }
+ } else {
+ /* normal case */
+ if (stcb->asoc.last_control_chunk_from == NULL)
+ net = stcb->asoc.primary_destination;
+ else
+ net = stcb->asoc.last_control_chunk_from;
+ }
+ latest_ack->last_sent_to = net;
+
+ TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) {
+ if (ack->data == NULL) {
+ continue;
+ }
+ /* copy the asconf_ack */
+ m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_DONTWAIT);
+ if (m_ack == NULL) {
+ /* couldn't copy it */
+ return;
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ /* no memory */
+ if (m_ack)
+ sctp_m_freem(m_ack);
+ return;
+ }
+ chk->copy_by_ref = 0;
+
+ chk->whoTo = net;
+ chk->data = m_ack;
+ chk->send_size = 0;
+ /* Get size */
+ m = m_ack;
+ chk->send_size = ack->len;
+ chk->rec.chunk_id.id = SCTP_ASCONF_ACK;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; /* XXX */
+ chk->asoc = &stcb->asoc;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+
+ TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next);
+ chk->asoc->ctrl_queue_cnt++;
+ }
+ return;
+}
+
+
+static int
+sctp_chunk_retransmission(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ /*-
+ * send out one MTU of retransmission. If fast_retransmit is
+ * happening we ignore the cwnd. Otherwise we obey the cwnd and
+ * rwnd. For a Cookie or Asconf in the control chunk queue we
+ * retransmit them by themselves.
+ *
+ * For data chunks we will pick out the lowest TSN's in the sent_queue
+ * marked for resend and bundle them all together (up to a MTU of
+ * destination). The address to send to should have been
+ * selected/changed where the retransmission was marked (i.e. in FR
+ * or t3-timeout routines).
+ */
+ struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING];
+ struct sctp_tmit_chunk *chk, *fwd;
+ struct mbuf *m, *endofchain;
+ struct sctphdr *shdr;
+ int asconf;
+ struct sctp_nets *net = NULL;
+ uint32_t tsns_sent = 0;
+ int no_fragmentflg, bundle_at, cnt_thru;
+ unsigned int mtu;
+ int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started;
+ struct sctp_auth_chunk *auth = NULL;
+ uint32_t auth_offset = 0;
+ uint32_t dmtu = 0;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ tmr_started = ctl_cnt = bundle_at = error = 0;
+ no_fragmentflg = 1;
+ asconf = 0;
+ fwd_tsn = 0;
+ *cnt_out = 0;
+ fwd = NULL;
+ endofchain = m = NULL;
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xC3, 1);
+#endif
+ if ((TAILQ_EMPTY(&asoc->sent_queue)) &&
+ (TAILQ_EMPTY(&asoc->control_send_queue))) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n",
+ asoc->sent_queue_retran_cnt);
+ asoc->sent_queue_cnt = 0;
+ asoc->sent_queue_cnt_removeable = 0;
+ /* send back 0/0 so we enter normal transmission */
+ *cnt_out = 0;
+ return (0);
+ }
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) ||
+ (chk->rec.chunk_id.id == SCTP_ASCONF) ||
+ (chk->rec.chunk_id.id == SCTP_STREAM_RESET) ||
+ (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) {
+ if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) {
+ if (chk != asoc->str_reset) {
+ /*
+ * not eligible for retran if its
+ * not ours
+ */
+ continue;
+ }
+ }
+ ctl_cnt++;
+ if (chk->rec.chunk_id.id == SCTP_ASCONF) {
+ no_fragmentflg = 1;
+ asconf = 1;
+ }
+ if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+ fwd_tsn = 1;
+ fwd = chk;
+ }
+ /*
+ * Add an AUTH chunk, if chunk requires it save the
+ * offset into the chain for AUTH
+ */
+ if ((auth == NULL) &&
+ (sctp_auth_is_required_chunk(chk->rec.chunk_id.id,
+ stcb->asoc.peer_auth_chunks))) {
+ m = sctp_add_auth_chunk(m, &endofchain,
+ &auth, &auth_offset,
+ stcb,
+ chk->rec.chunk_id.id);
+ }
+ m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+ break;
+ }
+ }
+ one_chunk = 0;
+ cnt_thru = 0;
+ /* do we have control chunks to retransmit? */
+ if (m != NULL) {
+ /* Start a timer no matter if we suceed or fail */
+ if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+ sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo);
+ } else if (chk->rec.chunk_id.id == SCTP_ASCONF)
+ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo);
+
+ SCTP_BUF_PREPEND(m, sizeof(struct sctphdr), M_DONTWAIT);
+ if (m == NULL) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ return (ENOBUFS);
+ }
+ shdr = mtod(m, struct sctphdr *);
+ shdr->src_port = inp->sctp_lport;
+ shdr->dest_port = stcb->rport;
+ shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+ shdr->checksum = 0;
+ auth_offset += sizeof(struct sctphdr);
+ chk->snd_count++; /* update our count */
+
+ if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo,
+ (struct sockaddr *)&chk->whoTo->ro._l_addr, m, auth_offset,
+ auth, no_fragmentflg, 0, NULL, asconf, so_locked))) {
+ SCTP_STAT_INCR(sctps_lowlevelerr);
+ return (error);
+ }
+ m = endofchain = NULL;
+ auth = NULL;
+ auth_offset = 0;
+ /*
+ * We don't want to mark the net->sent time here since this
+ * we use this for HB and retrans cannot measure RTT
+ */
+ /* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */
+ *cnt_out += 1;
+ chk->sent = SCTP_DATAGRAM_SENT;
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+ if (fwd_tsn == 0) {
+ return (0);
+ } else {
+ /* Clean up the fwd-tsn list */
+ sctp_clean_up_ctl(stcb, asoc);
+ return (0);
+ }
+ }
+ /*
+ * Ok, it is just data retransmission we need to do or that and a
+ * fwd-tsn with it all.
+ */
+ if (TAILQ_EMPTY(&asoc->sent_queue)) {
+ return (SCTP_RETRAN_DONE);
+ }
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT)) {
+ /* not yet open, resend the cookie and that is it */
+ return (1);
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(20, inp, stcb, NULL);
+#endif
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ /* No, not sent to this net or not ready for rtx */
+ continue;
+ }
+ if ((sctp_max_retran_chunk) && (chk->snd_count >= sctp_max_retran_chunk)) {
+ /* Gak, we have exceeded max unlucky retran, abort! */
+ SCTP_PRINTF("Gak, chk->snd_count:%d >= max:%d - send abort\n",
+ chk->snd_count,
+ sctp_max_retran_chunk);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ sctp_abort_an_association(stcb->sctp_ep, stcb, 0, NULL, so_locked);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return (SCTP_RETRAN_EXIT);
+ }
+ /* pick up the net */
+ net = chk->whoTo;
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ mtu = (net->mtu - SCTP_MIN_OVERHEAD);
+ } else {
+ mtu = net->mtu - SCTP_MIN_V4_OVERHEAD;
+ }
+
+ if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) {
+ /* No room in peers rwnd */
+ uint32_t tsn;
+
+ tsn = asoc->last_acked_seq + 1;
+ if (tsn == chk->rec.data.TSN_seq) {
+ /*
+ * we make a special exception for this
+ * case. The peer has no rwnd but is missing
+ * the lowest chunk.. which is probably what
+ * is holding up the rwnd.
+ */
+ goto one_chunk_around;
+ }
+ return (1);
+ }
+one_chunk_around:
+ if (asoc->peers_rwnd < mtu) {
+ one_chunk = 1;
+ if ((asoc->peers_rwnd == 0) &&
+ (asoc->total_flight == 0)) {
+ chk->window_probe = 1;
+ chk->whoTo->window_probe = 1;
+ }
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xC3, 2);
+#endif
+ bundle_at = 0;
+ m = NULL;
+ net->fast_retran_ip = 0;
+ if (chk->rec.data.doing_fast_retransmit == 0) {
+ /*
+ * if no FR in progress skip destination that have
+ * flight_size > cwnd.
+ */
+ if (net->flight_size >= net->cwnd) {
+ continue;
+ }
+ } else {
+ /*
+ * Mark the destination net to have FR recovery
+ * limits put on it.
+ */
+ *fr_done = 1;
+ net->fast_retran_ip = 1;
+ }
+
+ /*
+ * if no AUTH is yet included and this chunk requires it,
+ * make sure to account for it. We don't apply the size
+ * until the AUTH chunk is actually added below in case
+ * there is no room for this chunk.
+ */
+ if ((auth == NULL) &&
+ sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks)) {
+ dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+ } else
+ dmtu = 0;
+
+ if ((chk->send_size <= (mtu - dmtu)) ||
+ (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) {
+ /* ok we will add this one */
+ if ((auth == NULL) &&
+ (sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks))) {
+ m = sctp_add_auth_chunk(m, &endofchain,
+ &auth, &auth_offset,
+ stcb, SCTP_DATA);
+ }
+ m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref);
+ if (m == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ /* Do clear IP_DF ? */
+ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+ no_fragmentflg = 0;
+ }
+ /* upate our MTU size */
+ if (mtu > (chk->send_size + dmtu))
+ mtu -= (chk->send_size + dmtu);
+ else
+ mtu = 0;
+ data_list[bundle_at++] = chk;
+ if (one_chunk && (asoc->total_flight <= 0)) {
+ SCTP_STAT_INCR(sctps_windowprobed);
+ }
+ }
+ if (one_chunk == 0) {
+ /*
+ * now are there anymore forward from chk to pick
+ * up?
+ */
+ fwd = TAILQ_NEXT(chk, sctp_next);
+ while (fwd) {
+ if (fwd->sent != SCTP_DATAGRAM_RESEND) {
+ /* Nope, not for retran */
+ fwd = TAILQ_NEXT(fwd, sctp_next);
+ continue;
+ }
+ if (fwd->whoTo != net) {
+ /* Nope, not the net in question */
+ fwd = TAILQ_NEXT(fwd, sctp_next);
+ continue;
+ }
+ if ((auth == NULL) &&
+ sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks)) {
+ dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
+ } else
+ dmtu = 0;
+ if (fwd->send_size <= (mtu - dmtu)) {
+ if ((auth == NULL) &&
+ (sctp_auth_is_required_chunk(SCTP_DATA,
+ stcb->asoc.peer_auth_chunks))) {
+ m = sctp_add_auth_chunk(m,
+ &endofchain,
+ &auth, &auth_offset,
+ stcb,
+ SCTP_DATA);
+ }
+ m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref);
+ if (m == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ /* Do clear IP_DF ? */
+ if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) {
+ no_fragmentflg = 0;
+ }
+ /* upate our MTU size */
+ if (mtu > (fwd->send_size + dmtu))
+ mtu -= (fwd->send_size + dmtu);
+ else
+ mtu = 0;
+ data_list[bundle_at++] = fwd;
+ if (bundle_at >= SCTP_MAX_DATA_BUNDLING) {
+ break;
+ }
+ fwd = TAILQ_NEXT(fwd, sctp_next);
+ } else {
+ /* can't fit so we are done */
+ break;
+ }
+ }
+ }
+ /* Is there something to send for this destination? */
+ if (m) {
+ /*
+ * No matter if we fail/or suceed we should start a
+ * timer. A failure is like a lost IP packet :-)
+ */
+ if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ /*
+ * no timer running on this destination
+ * restart it.
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ tmr_started = 1;
+ }
+ SCTP_BUF_PREPEND(m, sizeof(struct sctphdr), M_DONTWAIT);
+ if (m == NULL) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOBUFS);
+ return (ENOBUFS);
+ }
+ shdr = mtod(m, struct sctphdr *);
+ shdr->src_port = inp->sctp_lport;
+ shdr->dest_port = stcb->rport;
+ shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+ shdr->checksum = 0;
+ auth_offset += sizeof(struct sctphdr);
+ /* Now lets send it, if there is anything to send :> */
+ if ((error = sctp_lowlevel_chunk_output(inp, stcb, net,
+ (struct sockaddr *)&net->ro._l_addr, m, auth_offset,
+ auth, no_fragmentflg, 0, NULL, asconf, so_locked))) {
+ /* error, we could not output */
+ SCTP_STAT_INCR(sctps_lowlevelerr);
+ return (error);
+ }
+ m = endofchain = NULL;
+ auth = NULL;
+ auth_offset = 0;
+ /* For HB's */
+ /*
+ * We don't want to mark the net->sent time here
+ * since this we use this for HB and retrans cannot
+ * measure RTT
+ */
+ /* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */
+
+ /* For auto-close */
+ cnt_thru++;
+ if (*now_filled == 0) {
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent);
+ *now = asoc->time_last_sent;
+ *now_filled = 1;
+ } else {
+ asoc->time_last_sent = *now;
+ }
+ *cnt_out += bundle_at;
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_audit_log(0xC4, bundle_at);
+#endif
+ if (bundle_at) {
+ tsns_sent = data_list[0]->rec.data.TSN_seq;
+ }
+ for (i = 0; i < bundle_at; i++) {
+ SCTP_STAT_INCR(sctps_sendretransdata);
+ data_list[i]->sent = SCTP_DATAGRAM_SENT;
+ /*
+ * When we have a revoked data, and we
+ * retransmit it, then we clear the revoked
+ * flag since this flag dictates if we
+ * subtracted from the fs
+ */
+ if (data_list[i]->rec.data.chunk_was_revoked) {
+ /* Deflate the cwnd */
+ data_list[i]->whoTo->cwnd -= data_list[i]->book_size;
+ data_list[i]->rec.data.chunk_was_revoked = 0;
+ }
+ data_list[i]->snd_count++;
+ sctp_ucount_decr(asoc->sent_queue_retran_cnt);
+ /* record the time */
+ data_list[i]->sent_rcv_time = asoc->time_last_sent;
+ if (data_list[i]->book_size_scale) {
+ /*
+ * need to double the book size on
+ * this one
+ */
+ data_list[i]->book_size_scale = 0;
+ /*
+ * Since we double the booksize, we
+ * must also double the output queue
+ * size, since this get shrunk when
+ * we free by this amount.
+ */
+ atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size);
+ data_list[i]->book_size *= 2;
+
+
+ } else {
+ if (sctp_logging_level & SCTP_LOG_RWND_ENABLE) {
+ sctp_log_rwnd(SCTP_DECREASE_PEER_RWND,
+ asoc->peers_rwnd, data_list[i]->send_size, sctp_peer_chunk_oh);
+ }
+ asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd,
+ (uint32_t) (data_list[i]->send_size +
+ sctp_peer_chunk_oh));
+ }
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND,
+ data_list[i]->whoTo->flight_size,
+ data_list[i]->book_size,
+ (uintptr_t) data_list[i]->whoTo,
+ data_list[i]->rec.data.TSN_seq);
+ }
+ sctp_flight_size_increase(data_list[i]);
+ sctp_total_flight_increase(stcb, data_list[i]);
+ if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
+ /* SWS sender side engages */
+ asoc->peers_rwnd = 0;
+ }
+ if ((i == 0) &&
+ (data_list[i]->rec.data.doing_fast_retransmit)) {
+ SCTP_STAT_INCR(sctps_sendfastretrans);
+ if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) &&
+ (tmr_started == 0)) {
+ /*-
+ * ok we just fast-retrans'd
+ * the lowest TSN, i.e the
+ * first on the list. In
+ * this case we want to give
+ * some more time to get a
+ * SACK back without a
+ * t3-expiring.
+ */
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net,
+ SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4);
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ }
+ }
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND);
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(21, inp, stcb, NULL);
+#endif
+ } else {
+ /* None will fit */
+ return (1);
+ }
+ if (asoc->sent_queue_retran_cnt <= 0) {
+ /* all done we have no more to retran */
+ asoc->sent_queue_retran_cnt = 0;
+ break;
+ }
+ if (one_chunk) {
+ /* No more room in rwnd */
+ return (1);
+ }
+ /* stop the for loop here. we sent out a packet */
+ break;
+ }
+ return (0);
+}
+
+
+static int
+sctp_timer_validation(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int ret)
+{
+ struct sctp_nets *net;
+
+ /* Validate that a timer is running somewhere */
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) {
+ /* Here is a timer */
+ return (ret);
+ }
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ /* Gak, we did not have a timer somewhere */
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n");
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination);
+ return (ret);
+}
+
+void
+sctp_chunk_output(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ int from_where,
+ int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ /*-
+ * Ok this is the generic chunk service queue. we must do the
+ * following:
+ * - See if there are retransmits pending, if so we must
+ * do these first.
+ * - Service the stream queue that is next, moving any
+ * message (note I must get a complete message i.e.
+ * FIRST/MIDDLE and LAST to the out queue in one pass) and assigning
+ * TSN's
+ * - Check to see if the cwnd/rwnd allows any output, if so we
+ * go ahead and fomulate and send the low level chunks. Making sure
+ * to combine any control in the control chunk queue also.
+ */
+ struct sctp_association *asoc;
+ struct sctp_nets *net;
+ int error = 0, num_out = 0, tot_out = 0, ret = 0, reason_code = 0,
+ burst_cnt = 0, burst_limit = 0;
+ struct timeval now;
+ int now_filled = 0;
+ int cwnd_full = 0;
+ int nagle_on = 0;
+ int frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+ int un_sent = 0;
+ int fr_done, tot_frs = 0;
+
+ asoc = &stcb->asoc;
+ if (from_where == SCTP_OUTPUT_FROM_USR_SEND) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) {
+ nagle_on = 0;
+ } else {
+ nagle_on = 1;
+ }
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+
+ un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
+
+ if ((un_sent <= 0) &&
+ (TAILQ_EMPTY(&asoc->control_send_queue)) &&
+ (asoc->sent_queue_retran_cnt == 0)) {
+ /* Nothing to do unless there is something to be sent left */
+ return;
+ }
+ /*
+ * Do we have something to send, data or control AND a sack timer
+ * running, if so piggy-back the sack.
+ */
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ sctp_send_sack(stcb);
+ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+ }
+ while (asoc->sent_queue_retran_cnt) {
+ /*-
+ * Ok, it is retransmission time only, we send out only ONE
+ * packet with a single call off to the retran code.
+ */
+ if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) {
+ /*-
+ * Special hook for handling cookiess discarded
+ * by peer that carried data. Send cookie-ack only
+ * and then the next call with get the retran's.
+ */
+ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+ &cwnd_full, from_where,
+ &now, &now_filled, frag_point, so_locked);
+ return;
+ } else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) {
+ /* if its not from a HB then do it */
+ fr_done = 0;
+ ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked);
+ if (fr_done) {
+ tot_frs++;
+ }
+ } else {
+ /*
+ * its from any other place, we don't allow retran
+ * output (only control)
+ */
+ ret = 1;
+ }
+ if (ret > 0) {
+ /* Can't send anymore */
+ /*-
+ * now lets push out control by calling med-level
+ * output once. this assures that we WILL send HB's
+ * if queued too.
+ */
+ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1,
+ &cwnd_full, from_where,
+ &now, &now_filled, frag_point, so_locked);
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(8, inp, stcb, NULL);
+#endif
+ (void)sctp_timer_validation(inp, stcb, asoc, ret);
+ return;
+ }
+ if (ret < 0) {
+ /*-
+ * The count was off.. retran is not happening so do
+ * the normal retransmission.
+ */
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(9, inp, stcb, NULL);
+#endif
+ if (ret == SCTP_RETRAN_EXIT) {
+ return;
+ }
+ break;
+ }
+ if (from_where == SCTP_OUTPUT_FROM_T3) {
+ /* Only one transmission allowed out of a timeout */
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(10, inp, stcb, NULL);
+#endif
+ /* Push out any control */
+ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, &cwnd_full, from_where,
+ &now, &now_filled, frag_point, so_locked);
+ return;
+ }
+ if (tot_frs > asoc->max_burst) {
+ /* Hit FR burst limit */
+ return;
+ }
+ if ((num_out == 0) && (ret == 0)) {
+
+ /* No more retrans to send */
+ break;
+ }
+ }
+#ifdef SCTP_AUDITING_ENABLED
+ sctp_auditing(12, inp, stcb, NULL);
+#endif
+ /* Check for bad destinations, if they exist move chunks around. */
+ burst_limit = asoc->max_burst;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
+ SCTP_ADDR_NOT_REACHABLE) {
+ /*-
+ * if possible move things off of this address we
+ * still may send below due to the dormant state but
+ * we try to find an alternate address to send to
+ * and if we have one we move all queued data on the
+ * out wheel to this alternate address.
+ */
+ if (net->ref_count > 1)
+ sctp_move_to_an_alt(stcb, asoc, net);
+ } else if (sctp_cmt_on_off && sctp_cmt_pf && ((net->dest_state & SCTP_ADDR_PF) ==
+ SCTP_ADDR_PF)) {
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the current
+ * destination is in PF state, move all queued data
+ * to an alternate desination.
+ */
+ if (net->ref_count > 1)
+ sctp_move_to_an_alt(stcb, asoc, net);
+ } else {
+ /*-
+ * if ((asoc->sat_network) || (net->addr_is_local))
+ * { burst_limit = asoc->max_burst *
+ * SCTP_SAT_NETWORK_BURST_INCR; }
+ */
+ if (sctp_use_cwnd_based_maxburst) {
+ if ((net->flight_size + (burst_limit * net->mtu)) < net->cwnd) {
+ /*
+ * JRS - Use the congestion control
+ * given in the congestion control
+ * module
+ */
+ asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, burst_limit);
+ if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+ sctp_log_maxburst(stcb, net, 0, burst_limit, SCTP_MAX_BURST_APPLIED);
+ }
+ SCTP_STAT_INCR(sctps_maxburstqueued);
+ }
+ net->fast_retran_ip = 0;
+ } else {
+ if (net->flight_size == 0) {
+ /* Should be decaying the cwnd here */
+ ;
+ }
+ }
+ }
+
+ }
+ burst_cnt = 0;
+ cwnd_full = 0;
+ do {
+ error = sctp_med_chunk_output(inp, stcb, asoc, &num_out,
+ &reason_code, 0, &cwnd_full, from_where,
+ &now, &now_filled, frag_point, so_locked);
+ if (error) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error);
+ if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+ sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP);
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES);
+ sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES);
+ }
+ break;
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out);
+
+ tot_out += num_out;
+ burst_cnt++;
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES);
+ if (num_out == 0) {
+ sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES);
+ }
+ }
+ if (nagle_on) {
+ /*-
+ * When nagle is on, we look at how much is un_sent, then
+ * if its smaller than an MTU and we have data in
+ * flight we stop.
+ */
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count)
+ * sizeof(struct sctp_data_chunk)));
+ if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) &&
+ (stcb->asoc.total_flight > 0)) {
+ break;
+ }
+ }
+ if (TAILQ_EMPTY(&asoc->control_send_queue) &&
+ TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->out_wheel)) {
+ /* Nothing left to send */
+ break;
+ }
+ if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) {
+ /* Nothing left to send */
+ break;
+ }
+ } while (num_out && (sctp_use_cwnd_based_maxburst ||
+ (burst_cnt < burst_limit)));
+
+ if (sctp_use_cwnd_based_maxburst == 0) {
+ if (burst_cnt >= burst_limit) {
+ SCTP_STAT_INCR(sctps_maxburstqueued);
+ asoc->burst_limit_applied = 1;
+ if (sctp_logging_level & SCTP_LOG_MAXBURST_ENABLE) {
+ sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED);
+ }
+ } else {
+ asoc->burst_limit_applied = 0;
+ }
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES);
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n",
+ tot_out);
+
+ /*-
+ * Now we need to clean up the control chunk chain if a ECNE is on
+ * it. It must be marked as UNSENT again so next call will continue
+ * to send it until such time that we get a CWR, to remove it.
+ */
+ if (stcb->asoc.ecn_echo_cnt_onq)
+ sctp_fix_ecn_echo(asoc);
+ return;
+}
+
+
+int
+sctp_output(inp, m, addr, control, p, flags)
+ struct sctp_inpcb *inp;
+ struct mbuf *m;
+ struct sockaddr *addr;
+ struct mbuf *control;
+ struct thread *p;
+ int flags;
+{
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ if (inp->sctp_socket == NULL) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ return (sctp_sosend(inp->sctp_socket,
+ addr,
+ (struct uio *)NULL,
+ m,
+ control,
+ flags, p
+ ));
+}
+
+void
+send_forward_tsn(struct sctp_tcb *stcb,
+ struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+ struct sctp_forward_tsn_chunk *fwdtsn;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) {
+ /* mark it to unsent */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ /* Do we correct its output location? */
+ if (chk->whoTo != asoc->primary_destination) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ }
+ goto sctp_fill_in_rest;
+ }
+ }
+ /* Ok if we reach here we must build one */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->asoc = asoc;
+ chk->whoTo = NULL;
+
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+sctp_fill_in_rest:
+ /*-
+ * Here we go through and fill out the part that deals with
+ * stream/seq of the ones we skip.
+ */
+ SCTP_BUF_LEN(chk->data) = 0;
+ {
+ struct sctp_tmit_chunk *at, *tp1, *last;
+ struct sctp_strseq *strseq;
+ unsigned int cnt_of_space, i, ovh;
+ unsigned int space_needed;
+ unsigned int cnt_of_skipped = 0;
+
+ TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) {
+ if (at->sent != SCTP_FORWARD_TSN_SKIP) {
+ /* no more to look at */
+ break;
+ }
+ if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+ /* We don't report these */
+ continue;
+ }
+ cnt_of_skipped++;
+ }
+ space_needed = (sizeof(struct sctp_forward_tsn_chunk) +
+ (cnt_of_skipped * sizeof(struct sctp_strseq)));
+
+ cnt_of_space = M_TRAILINGSPACE(chk->data);
+
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MIN_OVERHEAD;
+ } else {
+ ovh = SCTP_MIN_V4_OVERHEAD;
+ }
+ if (cnt_of_space > (asoc->smallest_mtu - ovh)) {
+ /* trim to a mtu size */
+ cnt_of_space = asoc->smallest_mtu - ovh;
+ }
+ if (cnt_of_space < space_needed) {
+ /*-
+ * ok we must trim down the chunk by lowering the
+ * advance peer ack point.
+ */
+ cnt_of_skipped = (cnt_of_space -
+ ((sizeof(struct sctp_forward_tsn_chunk)) /
+ sizeof(struct sctp_strseq)));
+ /*-
+ * Go through and find the TSN that will be the one
+ * we report.
+ */
+ at = TAILQ_FIRST(&asoc->sent_queue);
+ for (i = 0; i < cnt_of_skipped; i++) {
+ tp1 = TAILQ_NEXT(at, sctp_next);
+ at = tp1;
+ }
+ last = at;
+ /*-
+ * last now points to last one I can report, update
+ * peer ack point
+ */
+ asoc->advanced_peer_ack_point = last->rec.data.TSN_seq;
+ space_needed -= (cnt_of_skipped * sizeof(struct sctp_strseq));
+ }
+ chk->send_size = space_needed;
+ /* Setup the chunk */
+ fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *);
+ fwdtsn->ch.chunk_length = htons(chk->send_size);
+ fwdtsn->ch.chunk_flags = 0;
+ fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN;
+ fwdtsn->new_cumulative_tsn = htonl(asoc->advanced_peer_ack_point);
+ chk->send_size = (sizeof(struct sctp_forward_tsn_chunk) +
+ (cnt_of_skipped * sizeof(struct sctp_strseq)));
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ fwdtsn++;
+ /*-
+ * Move pointer to after the fwdtsn and transfer to the
+ * strseq pointer.
+ */
+ strseq = (struct sctp_strseq *)fwdtsn;
+ /*-
+ * Now populate the strseq list. This is done blindly
+ * without pulling out duplicate stream info. This is
+ * inefficent but won't harm the process since the peer will
+ * look at these in sequence and will thus release anything.
+ * It could mean we exceed the PMTU and chop off some that
+ * we could have included.. but this is unlikely (aka 1432/4
+ * would mean 300+ stream seq's would have to be reported in
+ * one FWD-TSN. With a bit of work we can later FIX this to
+ * optimize and pull out duplcates.. but it does add more
+ * overhead. So for now... not!
+ */
+ at = TAILQ_FIRST(&asoc->sent_queue);
+ for (i = 0; i < cnt_of_skipped; i++) {
+ tp1 = TAILQ_NEXT(at, sctp_next);
+ if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) {
+ /* We don't report these */
+ i--;
+ at = tp1;
+ continue;
+ }
+ strseq->stream = ntohs(at->rec.data.stream_number);
+ strseq->sequence = ntohs(at->rec.data.stream_seq);
+ strseq++;
+ at = tp1;
+ }
+ }
+ return;
+
+}
+
+void
+sctp_send_sack(struct sctp_tcb *stcb)
+{
+ /*-
+ * Queue up a SACK in the control queue. We must first check to see
+ * if a SACK is somehow on the control queue. If so, we will take
+ * and and remove the old one.
+ */
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk, *a_chk;
+ struct sctp_sack_chunk *sack;
+ struct sctp_gap_ack_block *gap_descriptor;
+ struct sack_track *selector;
+ int mergeable = 0;
+ int offset;
+ caddr_t limit;
+ uint32_t *dup;
+ int limit_reached = 0;
+ unsigned int i, jstart, siz, j;
+ unsigned int num_gap_blocks = 0, space;
+ int num_dups = 0;
+ int space_req;
+
+ a_chk = NULL;
+ asoc = &stcb->asoc;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (asoc->last_data_chunk_from == NULL) {
+ /* Hmm we never received anything */
+ return;
+ }
+ sctp_set_rwnd(stcb, asoc);
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) {
+ /* Hmm, found a sack already on queue, remove it */
+ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+ a_chk = chk;
+ if (a_chk->data) {
+ sctp_m_freem(a_chk->data);
+ a_chk->data = NULL;
+ }
+ sctp_free_remote_addr(a_chk->whoTo);
+ a_chk->whoTo = NULL;
+ break;
+ }
+ }
+ if (a_chk == NULL) {
+ sctp_alloc_a_chunk(stcb, a_chk);
+ if (a_chk == NULL) {
+ /* No memory so we drop the idea, and set a timer */
+ if (stcb->asoc.delayed_ack) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5);
+ sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL);
+ } else {
+ stcb->asoc.send_sack = 1;
+ }
+ return;
+ }
+ a_chk->copy_by_ref = 0;
+ /* a_chk->rec.chunk_id.id = SCTP_SELECTIVE_ACK; */
+ a_chk->rec.chunk_id.id = SCTP_SELECTIVE_ACK;
+ a_chk->rec.chunk_id.can_take_data = 1;
+ }
+ /* Clear our pkt counts */
+ asoc->data_pkts_seen = 0;
+
+ a_chk->asoc = asoc;
+ a_chk->snd_count = 0;
+ a_chk->send_size = 0; /* fill in later */
+ a_chk->sent = SCTP_DATAGRAM_UNSENT;
+ a_chk->whoTo = NULL;
+
+ if ((asoc->numduptsns) ||
+ (asoc->last_data_chunk_from->dest_state & SCTP_ADDR_NOT_REACHABLE)
+ ) {
+ /*-
+ * Ok, we have some duplicates or the destination for the
+ * sack is unreachable, lets see if we can select an
+ * alternate than asoc->last_data_chunk_from
+ */
+ if ((!(asoc->last_data_chunk_from->dest_state &
+ SCTP_ADDR_NOT_REACHABLE)) &&
+ (asoc->used_alt_onsack > asoc->numnets)) {
+ /* We used an alt last time, don't this time */
+ a_chk->whoTo = NULL;
+ } else {
+ asoc->used_alt_onsack++;
+ a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0);
+ }
+ if (a_chk->whoTo == NULL) {
+ /* Nope, no alternate */
+ a_chk->whoTo = asoc->last_data_chunk_from;
+ asoc->used_alt_onsack = 0;
+ }
+ } else {
+ /*
+ * No duplicates so we use the last place we received data
+ * from.
+ */
+ asoc->used_alt_onsack = 0;
+ a_chk->whoTo = asoc->last_data_chunk_from;
+ }
+ if (a_chk->whoTo) {
+ atomic_add_int(&a_chk->whoTo->ref_count, 1);
+ }
+ if (asoc->highest_tsn_inside_map == asoc->cumulative_tsn) {
+ /* no gaps */
+ space_req = sizeof(struct sctp_sack_chunk);
+ } else {
+ /* gaps get a cluster */
+ space_req = MCLBYTES;
+ }
+ /* Ok now lets formulate a MBUF with our sack */
+ a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_DONTWAIT, 1, MT_DATA);
+ if ((a_chk->data == NULL) ||
+ (a_chk->whoTo == NULL)) {
+ /* rats, no mbuf memory */
+ if (a_chk->data) {
+ /* was a problem with the destination */
+ sctp_m_freem(a_chk->data);
+ a_chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, a_chk);
+ /* sa_ignore NO_NULL_CHK */
+ if (stcb->asoc.delayed_ack) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_6);
+ sctp_timer_start(SCTP_TIMER_TYPE_RECV,
+ stcb->sctp_ep, stcb, NULL);
+ } else {
+ stcb->asoc.send_sack = 1;
+ }
+ return;
+ }
+ /* ok, lets go through and fill it in */
+ SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD);
+ space = M_TRAILINGSPACE(a_chk->data);
+ if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) {
+ space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD);
+ }
+ limit = mtod(a_chk->data, caddr_t);
+ limit += space;
+
+ sack = mtod(a_chk->data, struct sctp_sack_chunk *);
+ sack->ch.chunk_type = SCTP_SELECTIVE_ACK;
+ /* 0x01 is used by nonce for ecn */
+ if ((sctp_ecn_enable) &&
+ (sctp_ecn_nonce) &&
+ (asoc->peer_supports_ecn_nonce))
+ sack->ch.chunk_flags = (asoc->receiver_nonce_sum & SCTP_SACK_NONCE_SUM);
+ else
+ sack->ch.chunk_flags = 0;
+
+ if (sctp_cmt_on_off && sctp_cmt_use_dac) {
+ /*-
+ * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been
+ * received, then set high bit to 1, else 0. Reset
+ * pkts_rcvd.
+ */
+ sack->ch.chunk_flags |= (asoc->cmt_dac_pkts_rcvd << 6);
+ asoc->cmt_dac_pkts_rcvd = 0;
+ }
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn;
+ stcb->asoc.cumack_log_atsnt++;
+ if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) {
+ stcb->asoc.cumack_log_atsnt = 0;
+ }
+#endif
+ sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+ sack->sack.a_rwnd = htonl(asoc->my_rwnd);
+ asoc->my_last_reported_rwnd = asoc->my_rwnd;
+
+ /* reset the readers interpretation */
+ stcb->freed_by_sorcv_sincelast = 0;
+
+ gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk));
+
+ siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8;
+ if (compare_with_wrap(asoc->mapping_array_base_tsn, asoc->cumulative_tsn, MAX_TSN)) {
+ offset = 1;
+ /*-
+ * cum-ack behind the mapping array, so we start and use all
+ * entries.
+ */
+ jstart = 0;
+ } else {
+ offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn;
+ /*-
+ * we skip the first one when the cum-ack is at or above the
+ * mapping array base. Note this only works if
+ */
+ jstart = 1;
+ }
+ if (compare_with_wrap(asoc->highest_tsn_inside_map, asoc->cumulative_tsn, MAX_TSN)) {
+ /* we have a gap .. maybe */
+ for (i = 0; i < siz; i++) {
+ selector = &sack_array[asoc->mapping_array[i]];
+ if (mergeable && selector->right_edge) {
+ /*
+ * Backup, left and right edges were ok to
+ * merge.
+ */
+ num_gap_blocks--;
+ gap_descriptor--;
+ }
+ if (selector->num_entries == 0)
+ mergeable = 0;
+ else {
+ for (j = jstart; j < selector->num_entries; j++) {
+ if (mergeable && selector->right_edge) {
+ /*
+ * do a merge by NOT setting
+ * the left side
+ */
+ mergeable = 0;
+ } else {
+ /*
+ * no merge, set the left
+ * side
+ */
+ mergeable = 0;
+ gap_descriptor->start = htons((selector->gaps[j].start + offset));
+ }
+ gap_descriptor->end = htons((selector->gaps[j].end + offset));
+ num_gap_blocks++;
+ gap_descriptor++;
+ if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) {
+ /* no more room */
+ limit_reached = 1;
+ break;
+ }
+ }
+ if (selector->left_edge) {
+ mergeable = 1;
+ }
+ }
+ if (limit_reached) {
+ /* Reached the limit stop */
+ break;
+ }
+ jstart = 0;
+ offset += 8;
+ }
+ if (num_gap_blocks == 0) {
+ /*
+ * slide not yet happened, and somehow we got called
+ * to send a sack. Cumack needs to move up.
+ */
+ int abort_flag = 0;
+
+ asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
+ sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn);
+ sctp_sack_check(stcb, 0, 0, &abort_flag);
+ }
+ }
+ /* now we must add any dups we are going to report. */
+ if ((limit_reached == 0) && (asoc->numduptsns)) {
+ dup = (uint32_t *) gap_descriptor;
+ for (i = 0; i < asoc->numduptsns; i++) {
+ *dup = htonl(asoc->dup_tsns[i]);
+ dup++;
+ num_dups++;
+ if (((caddr_t)dup + sizeof(uint32_t)) > limit) {
+ /* no more room */
+ break;
+ }
+ }
+ asoc->numduptsns = 0;
+ }
+ /*
+ * now that the chunk is prepared queue it to the control chunk
+ * queue.
+ */
+ a_chk->send_size = (sizeof(struct sctp_sack_chunk) +
+ (num_gap_blocks * sizeof(struct sctp_gap_ack_block)) +
+ (num_dups * sizeof(int32_t)));
+ SCTP_BUF_LEN(a_chk->data) = a_chk->send_size;
+ sack->sack.num_gap_ack_blks = htons(num_gap_blocks);
+ sack->sack.num_dup_tsns = htons(num_dups);
+ sack->ch.chunk_length = htons(a_chk->send_size);
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+ asoc->send_sack = 0;
+ SCTP_STAT_INCR(sctps_sendsacks);
+ return;
+}
+
+
+void
+sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked
+#if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
+ SCTP_UNUSED
+#endif
+)
+{
+ struct mbuf *m_abort;
+ struct mbuf *m_out = NULL, *m_end = NULL;
+ struct sctp_abort_chunk *abort = NULL;
+ int sz;
+ uint32_t auth_offset = 0;
+ struct sctp_auth_chunk *auth = NULL;
+ struct sctphdr *shdr;
+
+ /*-
+ * Add an AUTH chunk, if chunk requires it and save the offset into
+ * the chain for AUTH
+ */
+ if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION,
+ stcb->asoc.peer_auth_chunks)) {
+ m_out = sctp_add_auth_chunk(m_out, &m_end, &auth, &auth_offset,
+ stcb, SCTP_ABORT_ASSOCIATION);
+ }
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_abort == NULL) {
+ /* no mbuf's */
+ if (m_out)
+ sctp_m_freem(m_out);
+ return;
+ }
+ /* link in any error */
+ SCTP_BUF_NEXT(m_abort) = operr;
+ sz = 0;
+ if (operr) {
+ struct mbuf *n;
+
+ n = operr;
+ while (n) {
+ sz += SCTP_BUF_LEN(n);
+ n = SCTP_BUF_NEXT(n);
+ }
+ }
+ SCTP_BUF_LEN(m_abort) = sizeof(*abort);
+ if (m_out == NULL) {
+ /* NO Auth chunk prepended, so reserve space in front */
+ SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD);
+ m_out = m_abort;
+ } else {
+ /* Put AUTH chunk at the front of the chain */
+ SCTP_BUF_NEXT(m_end) = m_abort;
+ }
+
+ /* fill in the ABORT chunk */
+ abort = mtod(m_abort, struct sctp_abort_chunk *);
+ abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+ abort->ch.chunk_flags = 0;
+ abort->ch.chunk_length = htons(sizeof(*abort) + sz);
+
+ /* prepend and fill in the SCTP header */
+ SCTP_BUF_PREPEND(m_out, sizeof(struct sctphdr), M_DONTWAIT);
+ if (m_out == NULL) {
+ /* TSNH: no memory */
+ return;
+ }
+ shdr = mtod(m_out, struct sctphdr *);
+ shdr->src_port = stcb->sctp_ep->sctp_lport;
+ shdr->dest_port = stcb->rport;
+ shdr->v_tag = htonl(stcb->asoc.peer_vtag);
+ shdr->checksum = 0;
+ auth_offset += sizeof(struct sctphdr);
+
+ (void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb,
+ stcb->asoc.primary_destination,
+ (struct sockaddr *)&stcb->asoc.primary_destination->ro._l_addr,
+ m_out, auth_offset, auth, 1, 0, NULL, 0, so_locked);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+}
+
+void
+sctp_send_shutdown_complete(struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ /* formulate and SEND a SHUTDOWN-COMPLETE */
+ struct mbuf *m_shutdown_comp;
+ struct sctp_shutdown_complete_msg *comp_cp;
+
+ m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_complete_msg), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_shutdown_comp == NULL) {
+ /* no mbuf's */
+ return;
+ }
+ comp_cp = mtod(m_shutdown_comp, struct sctp_shutdown_complete_msg *);
+ comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+ comp_cp->shut_cmp.ch.chunk_flags = 0;
+ comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+ comp_cp->sh.src_port = stcb->sctp_ep->sctp_lport;
+ comp_cp->sh.dest_port = stcb->rport;
+ comp_cp->sh.v_tag = htonl(stcb->asoc.peer_vtag);
+ comp_cp->sh.checksum = 0;
+
+ SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_msg);
+ (void)sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net,
+ (struct sockaddr *)&net->ro._l_addr,
+ m_shutdown_comp, 0, NULL, 1, 0, NULL, 0, SCTP_SO_NOT_LOCKED);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ return;
+}
+
+void
+sctp_send_shutdown_complete2(struct mbuf *m, int iphlen, struct sctphdr *sh,
+ uint32_t vrf_id)
+{
+ /* formulate and SEND a SHUTDOWN-COMPLETE */
+ struct mbuf *o_pak;
+ struct mbuf *mout;
+ struct ip *iph, *iph_out;
+ struct ip6_hdr *ip6, *ip6_out;
+ int offset_out, len, mlen;
+ struct sctp_shutdown_complete_msg *comp_cp;
+
+ /* Get room for the largest message */
+ len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_shutdown_complete_msg));
+ mout = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ if (mout == NULL) {
+ return;
+ }
+ SCTP_BUF_LEN(mout) = len;
+ iph = mtod(m, struct ip *);
+ iph_out = NULL;
+ ip6_out = NULL;
+ offset_out = 0;
+ if (iph->ip_v == IPVERSION) {
+ SCTP_BUF_LEN(mout) = sizeof(struct ip) +
+ sizeof(struct sctp_shutdown_complete_msg);
+ SCTP_BUF_NEXT(mout) = NULL;
+ iph_out = mtod(mout, struct ip *);
+
+ /* Fill in the IP header for the ABORT */
+ iph_out->ip_v = IPVERSION;
+ iph_out->ip_hl = (sizeof(struct ip) / 4);
+ iph_out->ip_tos = (u_char)0;
+ iph_out->ip_id = 0;
+ iph_out->ip_off = 0;
+ iph_out->ip_ttl = MAXTTL;
+ iph_out->ip_p = IPPROTO_SCTP;
+ iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+ iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+
+ /* let IP layer calculate this */
+ iph_out->ip_sum = 0;
+ offset_out += sizeof(*iph_out);
+ comp_cp = (struct sctp_shutdown_complete_msg *)(
+ (caddr_t)iph_out + offset_out);
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ ip6 = (struct ip6_hdr *)iph;
+ SCTP_BUF_LEN(mout) = sizeof(struct ip6_hdr) +
+ sizeof(struct sctp_shutdown_complete_msg);
+ SCTP_BUF_NEXT(mout) = NULL;
+ ip6_out = mtod(mout, struct ip6_hdr *);
+
+ /* Fill in the IPv6 header for the ABORT */
+ ip6_out->ip6_flow = ip6->ip6_flow;
+ ip6_out->ip6_hlim = ip6_defhlim;
+ ip6_out->ip6_nxt = IPPROTO_SCTP;
+ ip6_out->ip6_src = ip6->ip6_dst;
+ ip6_out->ip6_dst = ip6->ip6_src;
+ /*
+ * ?? The old code had both the iph len + payload, I think
+ * this is wrong and would never have worked
+ */
+ ip6_out->ip6_plen = sizeof(struct sctp_shutdown_complete_msg);
+ offset_out += sizeof(*ip6_out);
+ comp_cp = (struct sctp_shutdown_complete_msg *)(
+ (caddr_t)ip6_out + offset_out);
+ } else {
+ /* Currently not supported. */
+ return;
+ }
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ /* no mbuf's */
+ sctp_m_freem(mout);
+ return;
+ }
+ /* Now copy in and fill in the ABORT tags etc. */
+ comp_cp->sh.src_port = sh->dest_port;
+ comp_cp->sh.dest_port = sh->src_port;
+ comp_cp->sh.checksum = 0;
+ comp_cp->sh.v_tag = sh->v_tag;
+ comp_cp->shut_cmp.ch.chunk_flags = SCTP_HAD_NO_TCB;
+ comp_cp->shut_cmp.ch.chunk_type = SCTP_SHUTDOWN_COMPLETE;
+ comp_cp->shut_cmp.ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk));
+
+ /* add checksum */
+ comp_cp->sh.checksum = sctp_calculate_sum(mout, NULL, offset_out);
+ if (iph_out != NULL) {
+ sctp_route_t ro;
+ int ret;
+ struct sctp_tcb *stcb = NULL;
+
+ mlen = SCTP_BUF_LEN(mout);
+ bzero(&ro, sizeof ro);
+ /* set IPv4 length */
+ iph_out->ip_len = mlen;
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, mlen);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+
+ /* out it goes */
+ SCTP_IP_OUTPUT(ret, o_pak, &ro, stcb, vrf_id);
+
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ } else if (ip6_out != NULL) {
+ struct route_in6 ro;
+ int ret;
+ struct sctp_tcb *stcb = NULL;
+ struct ifnet *ifp = NULL;
+
+ bzero(&ro, sizeof(ro));
+ mlen = SCTP_BUF_LEN(mout);
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, mlen);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, mlen);
+ SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ }
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks);
+ return;
+
+}
+
+static struct sctp_nets *
+sctp_select_hb_destination(struct sctp_tcb *stcb, struct timeval *now)
+{
+ struct sctp_nets *net, *hnet;
+ int ms_goneby, highest_ms, state_overide = 0;
+
+ (void)SCTP_GETTIME_TIMEVAL(now);
+ highest_ms = 0;
+ hnet = NULL;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (
+ ((net->dest_state & SCTP_ADDR_NOHB) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) ||
+ (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)
+ ) {
+ /*
+ * Skip this guy from consideration if HB is off AND
+ * its confirmed
+ */
+ continue;
+ }
+ if (sctp_destination_is_reachable(stcb, (struct sockaddr *)&net->ro._l_addr) == 0) {
+ /* skip this dest net from consideration */
+ continue;
+ }
+ if (net->last_sent_time.tv_sec) {
+ /* Sent to so we subtract */
+ ms_goneby = (now->tv_sec - net->last_sent_time.tv_sec) * 1000;
+ } else
+ /* Never been sent to */
+ ms_goneby = 0x7fffffff;
+ /*-
+ * When the address state is unconfirmed but still
+ * considered reachable, we HB at a higher rate. Once it
+ * goes confirmed OR reaches the "unreachable" state, thenw
+ * we cut it back to HB at a more normal pace.
+ */
+ if ((net->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED) {
+ state_overide = 1;
+ } else {
+ state_overide = 0;
+ }
+
+ if ((((unsigned int)ms_goneby >= net->RTO) || (state_overide)) &&
+ (ms_goneby > highest_ms)) {
+ highest_ms = ms_goneby;
+ hnet = net;
+ }
+ }
+ if (hnet &&
+ ((hnet->dest_state & (SCTP_ADDR_UNCONFIRMED | SCTP_ADDR_NOT_REACHABLE)) == SCTP_ADDR_UNCONFIRMED)) {
+ state_overide = 1;
+ } else {
+ state_overide = 0;
+ }
+
+ if (hnet && highest_ms && (((unsigned int)highest_ms >= hnet->RTO) || state_overide)) {
+ /*-
+ * Found the one with longest delay bounds OR it is
+ * unconfirmed and still not marked unreachable.
+ */
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "net:%p is the hb winner -", hnet);
+#ifdef SCTP_DEBUG
+ if (hnet) {
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT4,
+ (struct sockaddr *)&hnet->ro._l_addr);
+ } else {
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, " none\n");
+ }
+#endif
+ /* update the timer now */
+ hnet->last_sent_time = *now;
+ return (hnet);
+ }
+ /* Nothing to HB */
+ return (NULL);
+}
+
+int
+sctp_send_hb(struct sctp_tcb *stcb, int user_req, struct sctp_nets *u_net)
+{
+ struct sctp_tmit_chunk *chk;
+ struct sctp_nets *net;
+ struct sctp_heartbeat_chunk *hb;
+ struct timeval now;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (user_req == 0) {
+ net = sctp_select_hb_destination(stcb, &now);
+ if (net == NULL) {
+ /*-
+ * All our busy none to send to, just start the
+ * timer again.
+ */
+ if (stcb->asoc.state == 0) {
+ return (0);
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT,
+ stcb->sctp_ep,
+ stcb,
+ net);
+ return (0);
+ }
+ } else {
+ net = u_net;
+ if (net == NULL) {
+ return (0);
+ }
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ }
+ sin = (struct sockaddr_in *)&net->ro._l_addr;
+ if (sin->sin_family != AF_INET) {
+ if (sin->sin_family != AF_INET6) {
+ /* huh */
+ return (0);
+ }
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n");
+ return (0);
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->asoc = &stcb->asoc;
+ chk->send_size = sizeof(struct sctp_heartbeat_chunk);
+
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return (0);
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = net;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ /* Now we have a mbuf that we can fill in with the details */
+ hb = mtod(chk->data, struct sctp_heartbeat_chunk *);
+ memset(hb, 0, sizeof(struct sctp_heartbeat_chunk));
+ /* fill out chunk header */
+ hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST;
+ hb->ch.chunk_flags = 0;
+ hb->ch.chunk_length = htons(chk->send_size);
+ /* Fill out hb parameter */
+ hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO);
+ hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param));
+ hb->heartbeat.hb_info.time_value_1 = now.tv_sec;
+ hb->heartbeat.hb_info.time_value_2 = now.tv_usec;
+ /* Did our user request this one, put it in */
+ hb->heartbeat.hb_info.user_req = user_req;
+ hb->heartbeat.hb_info.addr_family = sin->sin_family;
+ hb->heartbeat.hb_info.addr_len = sin->sin_len;
+ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ /*
+ * we only take from the entropy pool if the address is not
+ * confirmed.
+ */
+ net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+ net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+ } else {
+ net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0;
+ net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0;
+ }
+ if (sin->sin_family == AF_INET) {
+ memcpy(hb->heartbeat.hb_info.address, &sin->sin_addr, sizeof(sin->sin_addr));
+ } else if (sin->sin_family == AF_INET6) {
+ /* We leave the scope the way it is in our lookup table. */
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ memcpy(hb->heartbeat.hb_info.address, &sin6->sin6_addr, sizeof(sin6->sin6_addr));
+ } else {
+ /* huh compiler bug */
+ return (0);
+ }
+
+ /*
+ * JRS 5/14/07 - In CMT PF, the T3 timer is used to track
+ * PF-heartbeats. Because of this, threshold management is done by
+ * the t3 timer handler, and does not need to be done upon the send
+ * of a PF-heartbeat. If CMT PF is on and the destination to which a
+ * heartbeat is being sent is in PF state, do NOT do threshold
+ * management.
+ */
+ if ((sctp_cmt_pf == 0) || ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF)) {
+ /* ok we have a destination that needs a beat */
+ /* lets do the theshold management Qiaobing style */
+ if (sctp_threshold_management(stcb->sctp_ep, stcb, net,
+ stcb->asoc.max_send_times)) {
+ /*-
+ * we have lost the association, in a way this is
+ * quite bad since we really are one less time since
+ * we really did not send yet. This is the down side
+ * to the Q's style as defined in the RFC and not my
+ * alternate style defined in the RFC.
+ */
+ if (chk->data != NULL) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ /*
+ * Here we do NOT use the macro since the
+ * association is now gone.
+ */
+ if (chk->whoTo) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = NULL;
+ }
+ sctp_free_a_chunk((struct sctp_tcb *)NULL, chk);
+ return (-1);
+ }
+ }
+ net->hb_responded = 0;
+ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+ stcb->asoc.ctrl_queue_cnt++;
+ SCTP_STAT_INCR(sctps_sendheartbeat);
+ /*-
+ * Call directly med level routine to put out the chunk. It will
+ * always tumble out control chunks aka HB but it may even tumble
+ * out data too.
+ */
+ return (1);
+}
+
+void
+sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
+ uint32_t high_tsn)
+{
+ struct sctp_association *asoc;
+ struct sctp_ecne_chunk *ecne;
+ struct sctp_tmit_chunk *chk;
+
+ asoc = &stcb->asoc;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) {
+ /* found a previous ECN_ECHO update it if needed */
+ ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+ ecne->tsn = htonl(high_tsn);
+ return;
+ }
+ }
+ /* nope could not find one to update so we must build one */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ return;
+ }
+ chk->copy_by_ref = 0;
+ SCTP_STAT_INCR(sctps_sendecne);
+ chk->rec.chunk_id.id = SCTP_ECN_ECHO;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->asoc = &stcb->asoc;
+ chk->send_size = sizeof(struct sctp_ecne_chunk);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = net;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ stcb->asoc.ecn_echo_cnt_onq++;
+ ecne = mtod(chk->data, struct sctp_ecne_chunk *);
+ ecne->ch.chunk_type = SCTP_ECN_ECHO;
+ ecne->ch.chunk_flags = 0;
+ ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk));
+ ecne->tsn = htonl(high_tsn);
+ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net,
+ struct mbuf *m, int iphlen, int bad_crc)
+{
+ struct sctp_association *asoc;
+ struct sctp_pktdrop_chunk *drp;
+ struct sctp_tmit_chunk *chk;
+ uint8_t *datap;
+ int len;
+ int was_trunc = 0;
+ struct ip *iph;
+ int fullsz = 0, extra = 0;
+ long spc;
+ int offset;
+ struct sctp_chunkhdr *ch, chunk_buf;
+ unsigned int chk_length;
+
+ if (!stcb) {
+ return;
+ }
+ asoc = &stcb->asoc;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ if (asoc->peer_supports_pktdrop == 0) {
+ /*-
+ * peer must declare support before I send one.
+ */
+ return;
+ }
+ if (stcb->sctp_socket == NULL) {
+ return;
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ return;
+ }
+ chk->copy_by_ref = 0;
+ iph = mtod(m, struct ip *);
+ if (iph == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ if (iph->ip_v == IPVERSION) {
+ /* IPv4 */
+ len = chk->send_size = iph->ip_len;
+ } else {
+ struct ip6_hdr *ip6h;
+
+ /* IPv6 */
+ ip6h = mtod(m, struct ip6_hdr *);
+ len = chk->send_size = htons(ip6h->ip6_plen);
+ }
+ /* Validate that we do not have an ABORT in here. */
+ offset = iphlen + sizeof(struct sctphdr);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+ sizeof(*ch), (uint8_t *) & chunk_buf);
+ while (ch != NULL) {
+ chk_length = ntohs(ch->chunk_length);
+ if (chk_length < sizeof(*ch)) {
+ /* break to abort land */
+ break;
+ }
+ switch (ch->chunk_type) {
+ case SCTP_PACKET_DROPPED:
+ case SCTP_ABORT_ASSOCIATION:
+ /*-
+ * we don't respond with an PKT-DROP to an ABORT
+ * or PKT-DROP
+ */
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ default:
+ break;
+ }
+ offset += SCTP_SIZE32(chk_length);
+ ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset,
+ sizeof(*ch), (uint8_t *) & chunk_buf);
+ }
+
+ if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) >
+ min(stcb->asoc.smallest_mtu, MCLBYTES)) {
+ /*
+ * only send 1 mtu worth, trim off the excess on the end.
+ */
+ fullsz = len - extra;
+ len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD;
+ was_trunc = 1;
+ }
+ chk->asoc = &stcb->asoc;
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+jump_out:
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ drp = mtod(chk->data, struct sctp_pktdrop_chunk *);
+ if (drp == NULL) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ goto jump_out;
+ }
+ chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) +
+ sizeof(struct sctphdr) + SCTP_MED_OVERHEAD));
+ chk->book_size_scale = 0;
+ if (was_trunc) {
+ drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED;
+ drp->trunc_len = htons(fullsz);
+ /*
+ * Len is already adjusted to size minus overhead above take
+ * out the pkt_drop chunk itself from it.
+ */
+ chk->send_size = len - sizeof(struct sctp_pktdrop_chunk);
+ len = chk->send_size;
+ } else {
+ /* no truncation needed */
+ drp->ch.chunk_flags = 0;
+ drp->trunc_len = htons(0);
+ }
+ if (bad_crc) {
+ drp->ch.chunk_flags |= SCTP_BADCRC;
+ }
+ chk->send_size += sizeof(struct sctp_pktdrop_chunk);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ if (net) {
+ /* we should hit here */
+ chk->whoTo = net;
+ } else {
+ chk->whoTo = asoc->primary_destination;
+ }
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ chk->rec.chunk_id.id = SCTP_PACKET_DROPPED;
+ chk->rec.chunk_id.can_take_data = 1;
+ drp->ch.chunk_type = SCTP_PACKET_DROPPED;
+ drp->ch.chunk_length = htons(chk->send_size);
+ spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket);
+ if (spc < 0) {
+ spc = 0;
+ }
+ drp->bottle_bw = htonl(spc);
+ if (asoc->my_rwnd) {
+ drp->current_onq = htonl(asoc->size_on_reasm_queue +
+ asoc->size_on_all_streams +
+ asoc->my_rwnd_control_len +
+ stcb->sctp_socket->so_rcv.sb_cc);
+ } else {
+ /*-
+ * If my rwnd is 0, possibly from mbuf depletion as well as
+ * space used, tell the peer there is NO space aka onq == bw
+ */
+ drp->current_onq = htonl(spc);
+ }
+ drp->reserved = 0;
+ datap = drp->data;
+ m_copydata(m, iphlen, len, (caddr_t)datap);
+ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn)
+{
+ struct sctp_association *asoc;
+ struct sctp_cwr_chunk *cwr;
+ struct sctp_tmit_chunk *chk;
+
+ asoc = &stcb->asoc;
+ SCTP_TCB_LOCK_ASSERT(stcb);
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->rec.chunk_id.id == SCTP_ECN_CWR) {
+ /* found a previous ECN_CWR update it if needed */
+ cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+ if (compare_with_wrap(high_tsn, ntohl(cwr->tsn),
+ MAX_TSN)) {
+ cwr->tsn = htonl(high_tsn);
+ }
+ return;
+ }
+ }
+ /* nope could not find one to update so we must build one */
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ return;
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_ECN_CWR;
+ chk->rec.chunk_id.can_take_data = 1;
+ chk->asoc = &stcb->asoc;
+ chk->send_size = sizeof(struct sctp_cwr_chunk);
+ chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_DONTWAIT, 1, MT_HEADER);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ return;
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = net;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+ cwr = mtod(chk->data, struct sctp_cwr_chunk *);
+ cwr->ch.chunk_type = SCTP_ECN_CWR;
+ cwr->ch.chunk_flags = 0;
+ cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk));
+ cwr->tsn = htonl(high_tsn);
+ TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next);
+ asoc->ctrl_queue_cnt++;
+}
+
+void
+sctp_add_stream_reset_out(struct sctp_tmit_chunk *chk,
+ int number_entries, uint16_t * list,
+ uint32_t seq, uint32_t resp_seq, uint32_t last_sent)
+{
+ int len, old_len, i;
+ struct sctp_stream_reset_out_request *req_out;
+ struct sctp_chunkhdr *ch;
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+ old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+ /* get to new offset for the param. */
+ req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len);
+ /* now how long will this param be? */
+ len = (sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries));
+ req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST);
+ req_out->ph.param_length = htons(len);
+ req_out->request_seq = htonl(seq);
+ req_out->response_seq = htonl(resp_seq);
+ req_out->send_reset_at_tsn = htonl(last_sent);
+ if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ req_out->list_of_streams[i] = htons(list[i]);
+ }
+ }
+ if (SCTP_SIZE32(len) > len) {
+ /*-
+ * Need to worry about the pad we may end up adding to the
+ * end. This is easy since the struct is either aligned to 4
+ * bytes or 2 bytes off.
+ */
+ req_out->list_of_streams[number_entries] = 0;
+ }
+ /* now fix the chunk length */
+ ch->chunk_length = htons(len + old_len);
+ chk->book_size = len + old_len;
+ chk->book_size_scale = 0;
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ return;
+}
+
+
+void
+sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk,
+ int number_entries, uint16_t * list,
+ uint32_t seq)
+{
+ int len, old_len, i;
+ struct sctp_stream_reset_in_request *req_in;
+ struct sctp_chunkhdr *ch;
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+ old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+ /* get to new offset for the param. */
+ req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len);
+ /* now how long will this param be? */
+ len = (sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries));
+ req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST);
+ req_in->ph.param_length = htons(len);
+ req_in->request_seq = htonl(seq);
+ if (number_entries) {
+ for (i = 0; i < number_entries; i++) {
+ req_in->list_of_streams[i] = htons(list[i]);
+ }
+ }
+ if (SCTP_SIZE32(len) > len) {
+ /*-
+ * Need to worry about the pad we may end up adding to the
+ * end. This is easy since the struct is either aligned to 4
+ * bytes or 2 bytes off.
+ */
+ req_in->list_of_streams[number_entries] = 0;
+ }
+ /* now fix the chunk length */
+ ch->chunk_length = htons(len + old_len);
+ chk->book_size = len + old_len;
+ chk->book_size_scale = 0;
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ return;
+}
+
+
+void
+sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk,
+ uint32_t seq)
+{
+ int len, old_len;
+ struct sctp_stream_reset_tsn_request *req_tsn;
+ struct sctp_chunkhdr *ch;
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+ old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+ /* get to new offset for the param. */
+ req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len);
+ /* now how long will this param be? */
+ len = sizeof(struct sctp_stream_reset_tsn_request);
+ req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST);
+ req_tsn->ph.param_length = htons(len);
+ req_tsn->request_seq = htonl(seq);
+
+ /* now fix the chunk length */
+ ch->chunk_length = htons(len + old_len);
+ chk->send_size = len + old_len;
+ chk->book_size = SCTP_SIZE32(chk->send_size);
+ chk->book_size_scale = 0;
+ SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
+ return;
+}
+
+void
+sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk,
+ uint32_t resp_seq, uint32_t result)
+{
+ int len, old_len;
+ struct sctp_stream_reset_response *resp;
+ struct sctp_chunkhdr *ch;
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+ old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+ /* get to new offset for the param. */
+ resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len);
+ /* now how long will this param be? */
+ len = sizeof(struct sctp_stream_reset_response);
+ resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+ resp->ph.param_length = htons(len);
+ resp->response_seq = htonl(resp_seq);
+ resp->result = ntohl(result);
+
+ /* now fix the chunk length */
+ ch->chunk_length = htons(len + old_len);
+ chk->book_size = len + old_len;
+ chk->book_size_scale = 0;
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ return;
+
+}
+
+
+void
+sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk,
+ uint32_t resp_seq, uint32_t result,
+ uint32_t send_una, uint32_t recv_next)
+{
+ int len, old_len;
+ struct sctp_stream_reset_response_tsn *resp;
+ struct sctp_chunkhdr *ch;
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+
+
+ old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length));
+
+ /* get to new offset for the param. */
+ resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len);
+ /* now how long will this param be? */
+ len = sizeof(struct sctp_stream_reset_response_tsn);
+ resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE);
+ resp->ph.param_length = htons(len);
+ resp->response_seq = htonl(resp_seq);
+ resp->result = htonl(result);
+ resp->senders_next_tsn = htonl(send_una);
+ resp->receivers_next_tsn = htonl(recv_next);
+
+ /* now fix the chunk length */
+ ch->chunk_length = htons(len + old_len);
+ chk->book_size = len + old_len;
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+ return;
+}
+
+
+int
+sctp_send_str_reset_req(struct sctp_tcb *stcb,
+ int number_entries, uint16_t * list,
+ uint8_t send_out_req, uint32_t resp_seq,
+ uint8_t send_in_req,
+ uint8_t send_tsn_req)
+{
+
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_chunkhdr *ch;
+ uint32_t seq;
+
+ asoc = &stcb->asoc;
+ if (asoc->stream_reset_outstanding) {
+ /*-
+ * Already one pending, must get ACK back to clear the flag.
+ */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY);
+ return (EBUSY);
+ }
+ if ((send_out_req == 0) && (send_in_req == 0) && (send_tsn_req == 0)) {
+ /* nothing to do */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ if (send_tsn_req && (send_out_req || send_in_req)) {
+ /* error, can't do that */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ sctp_alloc_a_chunk(stcb, chk);
+ if (chk == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ chk->copy_by_ref = 0;
+ chk->rec.chunk_id.id = SCTP_STREAM_RESET;
+ chk->rec.chunk_id.can_take_data = 0;
+ chk->asoc = &stcb->asoc;
+ chk->book_size = sizeof(struct sctp_chunkhdr);
+ chk->send_size = SCTP_SIZE32(chk->book_size);
+ chk->book_size_scale = 0;
+
+ chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_DONTWAIT, 1, MT_DATA);
+ if (chk->data == NULL) {
+ sctp_free_a_chunk(stcb, chk);
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
+
+ /* setup chunk parameters */
+ chk->sent = SCTP_DATAGRAM_UNSENT;
+ chk->snd_count = 0;
+ chk->whoTo = asoc->primary_destination;
+ atomic_add_int(&chk->whoTo->ref_count, 1);
+
+ ch = mtod(chk->data, struct sctp_chunkhdr *);
+ ch->chunk_type = SCTP_STREAM_RESET;
+ ch->chunk_flags = 0;
+ ch->chunk_length = htons(chk->book_size);
+ SCTP_BUF_LEN(chk->data) = chk->send_size;
+
+ seq = stcb->asoc.str_reset_seq_out;
+ if (send_out_req) {
+ sctp_add_stream_reset_out(chk, number_entries, list,
+ seq, resp_seq, (stcb->asoc.sending_seq - 1));
+ asoc->stream_reset_out_is_outstanding = 1;
+ seq++;
+ asoc->stream_reset_outstanding++;
+ }
+ if (send_in_req) {
+ sctp_add_stream_reset_in(chk, number_entries, list, seq);
+ asoc->stream_reset_outstanding++;
+ }
+ if (send_tsn_req) {
+ sctp_add_stream_reset_tsn(chk, seq);
+ asoc->stream_reset_outstanding++;
+ }
+ asoc->str_reset = chk;
+
+ /* insert the chunk for sending */
+ TAILQ_INSERT_TAIL(&asoc->control_send_queue,
+ chk,
+ sctp_next);
+ asoc->ctrl_queue_cnt++;
+ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo);
+ return (0);
+}
+
+void
+sctp_send_abort(struct mbuf *m, int iphlen, struct sctphdr *sh, uint32_t vtag,
+ struct mbuf *err_cause, uint32_t vrf_id)
+{
+ /*-
+ * Formulate the abort message, and send it back down.
+ */
+ struct mbuf *o_pak;
+ struct mbuf *mout;
+ struct sctp_abort_msg *abm;
+ struct ip *iph, *iph_out;
+ struct ip6_hdr *ip6, *ip6_out;
+ int iphlen_out, len;
+
+ /* don't respond to ABORT with ABORT */
+ if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) {
+ if (err_cause)
+ sctp_m_freem(err_cause);
+ return;
+ }
+ len = (sizeof(struct ip6_hdr) + sizeof(struct sctp_abort_msg));
+
+ mout = sctp_get_mbuf_for_msg(len, 1, M_DONTWAIT, 1, MT_DATA);
+ if (mout == NULL) {
+ if (err_cause)
+ sctp_m_freem(err_cause);
+ return;
+ }
+ iph = mtod(m, struct ip *);
+ iph_out = NULL;
+ ip6_out = NULL;
+ if (iph->ip_v == IPVERSION) {
+ iph_out = mtod(mout, struct ip *);
+ SCTP_BUF_LEN(mout) = sizeof(*iph_out) + sizeof(*abm);
+ SCTP_BUF_NEXT(mout) = err_cause;
+
+ /* Fill in the IP header for the ABORT */
+ iph_out->ip_v = IPVERSION;
+ iph_out->ip_hl = (sizeof(struct ip) / 4);
+ iph_out->ip_tos = (u_char)0;
+ iph_out->ip_id = 0;
+ iph_out->ip_off = 0;
+ iph_out->ip_ttl = MAXTTL;
+ iph_out->ip_p = IPPROTO_SCTP;
+ iph_out->ip_src.s_addr = iph->ip_dst.s_addr;
+ iph_out->ip_dst.s_addr = iph->ip_src.s_addr;
+ /* let IP layer calculate this */
+ iph_out->ip_sum = 0;
+
+ iphlen_out = sizeof(*iph_out);
+ abm = (struct sctp_abort_msg *)((caddr_t)iph_out + iphlen_out);
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ ip6 = (struct ip6_hdr *)iph;
+ ip6_out = mtod(mout, struct ip6_hdr *);
+ SCTP_BUF_LEN(mout) = sizeof(*ip6_out) + sizeof(*abm);
+ SCTP_BUF_NEXT(mout) = err_cause;
+
+ /* Fill in the IP6 header for the ABORT */
+ ip6_out->ip6_flow = ip6->ip6_flow;
+ ip6_out->ip6_hlim = ip6_defhlim;
+ ip6_out->ip6_nxt = IPPROTO_SCTP;
+ ip6_out->ip6_src = ip6->ip6_dst;
+ ip6_out->ip6_dst = ip6->ip6_src;
+
+ iphlen_out = sizeof(*ip6_out);
+ abm = (struct sctp_abort_msg *)((caddr_t)ip6_out + iphlen_out);
+ } else {
+ /* Currently not supported */
+ if (err_cause)
+ sctp_m_freem(err_cause);
+ sctp_m_freem(mout);
+ return;
+ }
+
+ abm->sh.src_port = sh->dest_port;
+ abm->sh.dest_port = sh->src_port;
+ abm->sh.checksum = 0;
+ if (vtag == 0) {
+ abm->sh.v_tag = sh->v_tag;
+ abm->msg.ch.chunk_flags = SCTP_HAD_NO_TCB;
+ } else {
+ abm->sh.v_tag = htonl(vtag);
+ abm->msg.ch.chunk_flags = 0;
+ }
+ abm->msg.ch.chunk_type = SCTP_ABORT_ASSOCIATION;
+
+ if (err_cause) {
+ struct mbuf *m_tmp = err_cause;
+ int err_len = 0;
+
+ /* get length of the err_cause chain */
+ while (m_tmp != NULL) {
+ err_len += SCTP_BUF_LEN(m_tmp);
+ m_tmp = SCTP_BUF_NEXT(m_tmp);
+ }
+ len = SCTP_BUF_LEN(mout) + err_len;
+ if (err_len % 4) {
+ /* need pad at end of chunk */
+ uint32_t cpthis = 0;
+ int padlen;
+
+ padlen = 4 - (len % 4);
+ m_copyback(mout, len, padlen, (caddr_t)&cpthis);
+ len += padlen;
+ }
+ abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch) + err_len);
+ } else {
+ len = SCTP_BUF_LEN(mout);
+ abm->msg.ch.chunk_length = htons(sizeof(abm->msg.ch));
+ }
+
+ /* add checksum */
+ abm->sh.checksum = sctp_calculate_sum(mout, NULL, iphlen_out);
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ /* no mbuf's */
+ sctp_m_freem(mout);
+ return;
+ }
+ if (iph_out != NULL) {
+ sctp_route_t ro;
+ struct sctp_tcb *stcb = NULL;
+ int ret;
+
+ /* zap the stack pointer to the route */
+ bzero(&ro, sizeof ro);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip_output:\n");
+ SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, iph_out, &abm->sh);
+ /* set IPv4 length */
+ iph_out->ip_len = len;
+ /* out it goes */
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, len);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, len);
+ SCTP_IP_OUTPUT(ret, o_pak, &ro, stcb, vrf_id);
+
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ } else if (ip6_out != NULL) {
+ struct route_in6 ro;
+ int ret;
+ struct sctp_tcb *stcb = NULL;
+ struct ifnet *ifp = NULL;
+
+ /* zap the stack pointer to the route */
+ bzero(&ro, sizeof(ro));
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_send_abort calling ip6_output:\n");
+ SCTPDBG_PKT(SCTP_DEBUG_OUTPUT2, (struct ip *)ip6_out, &abm->sh);
+ ip6_out->ip6_plen = len - sizeof(*ip6_out);
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, len);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, len);
+ SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ }
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+}
+
+void
+sctp_send_operr_to(struct mbuf *m, int iphlen, struct mbuf *scm, uint32_t vtag,
+ uint32_t vrf_id)
+{
+ struct mbuf *o_pak;
+ struct sctphdr *ihdr;
+ int retcode;
+ struct sctphdr *ohdr;
+ struct sctp_chunkhdr *ophdr;
+ struct ip *iph;
+ struct mbuf *mout;
+
+#ifdef SCTP_DEBUG
+ struct sockaddr_in6 lsa6, fsa6;
+
+#endif
+ uint32_t val;
+ struct mbuf *at;
+ int len;
+
+ iph = mtod(m, struct ip *);
+ ihdr = (struct sctphdr *)((caddr_t)iph + iphlen);
+
+ SCTP_BUF_PREPEND(scm, (sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)), M_DONTWAIT);
+ if (scm == NULL) {
+ /* can't send because we can't add a mbuf */
+ return;
+ }
+ ohdr = mtod(scm, struct sctphdr *);
+ ohdr->src_port = ihdr->dest_port;
+ ohdr->dest_port = ihdr->src_port;
+ ohdr->v_tag = vtag;
+ ohdr->checksum = 0;
+ ophdr = (struct sctp_chunkhdr *)(ohdr + 1);
+ ophdr->chunk_type = SCTP_OPERATION_ERROR;
+ ophdr->chunk_flags = 0;
+ len = 0;
+ at = scm;
+ while (at) {
+ len += SCTP_BUF_LEN(at);
+ at = SCTP_BUF_NEXT(at);
+ }
+ ophdr->chunk_length = htons(len - sizeof(struct sctphdr));
+ if (len % 4) {
+ /* need padding */
+ uint32_t cpthis = 0;
+ int padlen;
+
+ padlen = 4 - (len % 4);
+ m_copyback(scm, len, padlen, (caddr_t)&cpthis);
+ len += padlen;
+ }
+ val = sctp_calculate_sum(scm, NULL, 0);
+ mout = sctp_get_mbuf_for_msg(sizeof(struct ip6_hdr), 1, M_DONTWAIT, 1, MT_DATA);
+ if (mout == NULL) {
+ sctp_m_freem(scm);
+ return;
+ }
+ SCTP_BUF_NEXT(mout) = scm;
+ if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
+ sctp_m_freem(mout);
+ return;
+ }
+ ohdr->checksum = val;
+ if (iph->ip_v == IPVERSION) {
+ /* V4 */
+ struct ip *out;
+ sctp_route_t ro;
+ struct sctp_tcb *stcb = NULL;
+
+ SCTP_BUF_LEN(mout) = sizeof(struct ip);
+ len += sizeof(struct ip);
+
+ bzero(&ro, sizeof ro);
+ out = mtod(mout, struct ip *);
+ out->ip_v = iph->ip_v;
+ out->ip_hl = (sizeof(struct ip) / 4);
+ out->ip_tos = iph->ip_tos;
+ out->ip_id = iph->ip_id;
+ out->ip_off = 0;
+ out->ip_ttl = MAXTTL;
+ out->ip_p = IPPROTO_SCTP;
+ out->ip_sum = 0;
+ out->ip_src = iph->ip_dst;
+ out->ip_dst = iph->ip_src;
+ out->ip_len = len;
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, len);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, len);
+
+ SCTP_IP_OUTPUT(retcode, o_pak, &ro, stcb, vrf_id);
+
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ } else {
+ /* V6 */
+ struct route_in6 ro;
+ int ret;
+ struct sctp_tcb *stcb = NULL;
+ struct ifnet *ifp = NULL;
+ struct ip6_hdr *out6, *in6;
+
+ SCTP_BUF_LEN(mout) = sizeof(struct ip6_hdr);
+ len += sizeof(struct ip6_hdr);
+ bzero(&ro, sizeof ro);
+ in6 = mtod(m, struct ip6_hdr *);
+ out6 = mtod(mout, struct ip6_hdr *);
+ out6->ip6_flow = in6->ip6_flow;
+ out6->ip6_hlim = ip6_defhlim;
+ out6->ip6_nxt = IPPROTO_SCTP;
+ out6->ip6_src = in6->ip6_dst;
+ out6->ip6_dst = in6->ip6_src;
+ out6->ip6_plen = len - sizeof(struct ip6_hdr);
+
+#ifdef SCTP_DEBUG
+ bzero(&lsa6, sizeof(lsa6));
+ lsa6.sin6_len = sizeof(lsa6);
+ lsa6.sin6_family = AF_INET6;
+ lsa6.sin6_addr = out6->ip6_src;
+ bzero(&fsa6, sizeof(fsa6));
+ fsa6.sin6_len = sizeof(fsa6);
+ fsa6.sin6_family = AF_INET6;
+ fsa6.sin6_addr = out6->ip6_dst;
+#endif
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "sctp_operr_to calling ipv6 output:\n");
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "src: ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&lsa6);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "dst ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&fsa6);
+
+#ifdef SCTP_PACKET_LOGGING
+ if (sctp_logging_level & SCTP_LAST_PACKET_TRACING)
+ sctp_packet_log(mout, len);
+#endif
+ SCTP_ATTACH_CHAIN(o_pak, mout, len);
+ SCTP_IP6_OUTPUT(ret, o_pak, &ro, &ifp, stcb, vrf_id);
+
+ SCTP_STAT_INCR(sctps_sendpackets);
+ SCTP_STAT_INCR_COUNTER64(sctps_outpackets);
+ /* Free the route if we got one back */
+ if (ro.ro_rt)
+ RTFREE(ro.ro_rt);
+ }
+}
+
+static struct mbuf *
+sctp_copy_resume(struct sctp_stream_queue_pending *sp,
+ struct uio *uio,
+ struct sctp_sndrcvinfo *srcv,
+ int max_send_len,
+ int user_marks_eor,
+ int *error,
+ uint32_t * sndout,
+ struct mbuf **new_tail)
+{
+ struct mbuf *m;
+
+ m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0,
+ (M_PKTHDR | (user_marks_eor ? M_EOR : 0)));
+ if (m == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ *error = ENOMEM;
+ } else {
+ *sndout = m_length(m, NULL);
+ *new_tail = m_last(m);
+ }
+ return (m);
+}
+
+static int
+sctp_copy_one(struct sctp_stream_queue_pending *sp,
+ struct uio *uio,
+ int resv_upfront)
+{
+ int left;
+
+ left = sp->length;
+ sp->data = m_uiotombuf(uio, M_WAITOK, sp->length,
+ resv_upfront, 0);
+ if (sp->data == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ return (ENOMEM);
+ }
+ sp->tail_mbuf = m_last(sp->data);
+ return (0);
+}
+
+
+
+static struct sctp_stream_queue_pending *
+sctp_copy_it_in(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ struct sctp_sndrcvinfo *srcv,
+ struct uio *uio,
+ struct sctp_nets *net,
+ int max_send_len,
+ int user_marks_eor,
+ int *error,
+ int non_blocking)
+{
+ /*-
+ * This routine must be very careful in its work. Protocol
+ * processing is up and running so care must be taken to spl...()
+ * when you need to do something that may effect the stcb/asoc. The
+ * sb is locked however. When data is copied the protocol processing
+ * should be enabled since this is a slower operation...
+ */
+ struct sctp_stream_queue_pending *sp = NULL;
+ int resv_in_first;
+
+ *error = 0;
+ /* Now can we send this? */
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+ /* got data while shutting down */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+ *error = ECONNRESET;
+ goto out_now;
+ }
+ sctp_alloc_a_strmoq(stcb, sp);
+ if (sp == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ *error = ENOMEM;
+ goto out_now;
+ }
+ sp->act_flags = 0;
+ sp->sender_all_done = 0;
+ sp->sinfo_flags = srcv->sinfo_flags;
+ sp->timetolive = srcv->sinfo_timetolive;
+ sp->ppid = srcv->sinfo_ppid;
+ sp->context = srcv->sinfo_context;
+ sp->strseq = 0;
+ (void)SCTP_GETTIME_TIMEVAL(&sp->ts);
+
+ sp->stream = srcv->sinfo_stream;
+ sp->length = min(uio->uio_resid, max_send_len);
+ if ((sp->length == (uint32_t) uio->uio_resid) &&
+ ((user_marks_eor == 0) ||
+ (srcv->sinfo_flags & SCTP_EOF) ||
+ (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) {
+ sp->msg_is_complete = 1;
+ } else {
+ sp->msg_is_complete = 0;
+ }
+ sp->sender_all_done = 0;
+ sp->some_taken = 0;
+ sp->put_last_out = 0;
+ resv_in_first = sizeof(struct sctp_data_chunk);
+ sp->data = sp->tail_mbuf = NULL;
+ *error = sctp_copy_one(sp, uio, resv_in_first);
+ if (*error) {
+ sctp_free_a_strmoq(stcb, sp);
+ sp = NULL;
+ } else {
+ if (sp->sinfo_flags & SCTP_ADDR_OVER) {
+ sp->net = net;
+ sp->addr_over = 1;
+ } else {
+ sp->net = asoc->primary_destination;
+ sp->addr_over = 0;
+ }
+ atomic_add_int(&sp->net->ref_count, 1);
+ sctp_set_prsctp_policy(stcb, sp);
+ }
+out_now:
+ return (sp);
+}
+
+
+int
+sctp_sosend(struct socket *so,
+ struct sockaddr *addr,
+ struct uio *uio,
+ struct mbuf *top,
+ struct mbuf *control,
+ int flags,
+ struct thread *p
+)
+{
+ struct sctp_inpcb *inp;
+ int error, use_rcvinfo = 0;
+ struct sctp_sndrcvinfo srcv;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (control) {
+ /* process cmsg snd/rcv info (maybe a assoc-id) */
+ if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&srcv, control,
+ sizeof(srcv))) {
+ /* got one */
+ use_rcvinfo = 1;
+ }
+ }
+ error = sctp_lower_sosend(so, addr, uio, top,
+ control,
+ flags,
+ use_rcvinfo, &srcv
+ ,p
+ );
+ return (error);
+}
+
+
+int
+sctp_lower_sosend(struct socket *so,
+ struct sockaddr *addr,
+ struct uio *uio,
+ struct mbuf *i_pak,
+ struct mbuf *control,
+ int flags,
+ int use_rcvinfo,
+ struct sctp_sndrcvinfo *srcv
+ ,
+ struct thread *p
+)
+{
+ unsigned int sndlen = 0, max_len;
+ int error, len;
+ struct mbuf *top = NULL;
+
+#if defined(__NetBSD__) || defined(__OpenBSD_)
+ int s;
+
+#endif
+ int queue_only = 0, queue_only_for_init = 0;
+ int free_cnt_applied = 0;
+ int un_sent = 0;
+ int now_filled = 0;
+ unsigned int inqueue_bytes = 0;
+ struct sctp_block_entry be;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb = NULL;
+ struct timeval now;
+ struct sctp_nets *net;
+ struct sctp_association *asoc;
+ struct sctp_inpcb *t_inp;
+ int user_marks_eor;
+ int create_lock_applied = 0;
+ int nagle_applies = 0;
+ int some_on_control = 0;
+ int got_all_of_the_send = 0;
+ int hold_tcblock = 0;
+ int non_blocking = 0;
+ int temp_flags = 0;
+ uint32_t local_add_more, local_soresv = 0;
+
+ error = 0;
+ net = NULL;
+ stcb = NULL;
+ asoc = NULL;
+
+ t_inp = inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ error = EFAULT;
+ if (i_pak) {
+ SCTP_RELEASE_PKT(i_pak);
+ }
+ return (error);
+ }
+ if ((uio == NULL) && (i_pak == NULL)) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+ atomic_add_int(&inp->total_sends, 1);
+ if (uio) {
+ if (uio->uio_resid < 0) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ return (EINVAL);
+ }
+ sndlen = uio->uio_resid;
+ } else {
+ top = SCTP_HEADER_TO_CHAIN(i_pak);
+ sndlen = SCTP_HEADER_LEN(i_pak);
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n",
+ addr,
+ sndlen);
+ /*-
+ * Pre-screen address, if one is given the sin-len
+ * must be set correctly!
+ */
+ if (addr) {
+ if ((addr->sa_family == AF_INET) &&
+ (addr->sa_len != sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ } else if ((addr->sa_family == AF_INET6) &&
+ (addr->sa_len != sizeof(struct sockaddr_in6))) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ }
+ hold_tcblock = 0;
+
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (inp->sctp_socket->so_qlimit)) {
+ /* The listener can NOT send */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ error = EFAULT;
+ goto out_unlocked;
+ }
+ if ((use_rcvinfo) && srcv) {
+ if (INVALID_SINFO_FLAG(srcv->sinfo_flags) ||
+ PR_SCTP_INVALID_POLICY(srcv->sinfo_flags)) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if (srcv->sinfo_flags)
+ SCTP_STAT_INCR(sctps_sends_with_flags);
+
+ if (srcv->sinfo_flags & SCTP_SENDALL) {
+ /* its a sendall */
+ error = sctp_sendall(inp, uio, top, srcv);
+ top = NULL;
+ goto out_unlocked;
+ }
+ }
+ /* now we must find the assoc */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ SCTP_INP_RLOCK(inp);
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+ error = ENOTCONN;
+ goto out_unlocked;
+ }
+ hold_tcblock = 0;
+ SCTP_INP_RUNLOCK(inp);
+ if (addr) {
+ /* Must locate the net structure if addr given */
+ net = sctp_findnet(stcb, addr);
+ if (net) {
+ /* validate port was 0 or correct */
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)addr;
+ if ((sin->sin_port != 0) &&
+ (sin->sin_port != stcb->rport)) {
+ net = NULL;
+ }
+ }
+ temp_flags |= SCTP_ADDR_OVER;
+ } else
+ net = stcb->asoc.primary_destination;
+ if (addr && (net == NULL)) {
+ /* Could not find address, was it legal */
+ if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)addr;
+ if (sin->sin_addr.s_addr == 0) {
+ if ((sin->sin_port == 0) ||
+ (sin->sin_port == stcb->rport)) {
+ net = stcb->asoc.primary_destination;
+ }
+ }
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)addr;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ if ((sin6->sin6_port == 0) ||
+ (sin6->sin6_port == stcb->rport)) {
+ net = stcb->asoc.primary_destination;
+ }
+ }
+ }
+ }
+ if (net == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ } else if (use_rcvinfo && srcv && srcv->sinfo_assoc_id) {
+ stcb = sctp_findassociation_ep_asocid(inp, srcv->sinfo_assoc_id, 0);
+ if (stcb) {
+ if (addr)
+ /*
+ * Must locate the net structure if addr
+ * given
+ */
+ net = sctp_findnet(stcb, addr);
+ else
+ net = stcb->asoc.primary_destination;
+ if ((srcv->sinfo_flags & SCTP_ADDR_OVER) &&
+ ((net == NULL) || (addr == NULL))) {
+ struct sockaddr_in *sin;
+
+ if (addr == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ sin = (struct sockaddr_in *)addr;
+ /* Validate port is 0 or correct */
+ if ((sin->sin_port != 0) &&
+ (sin->sin_port != stcb->rport)) {
+ net = NULL;
+ }
+ }
+ }
+ hold_tcblock = 0;
+ } else if (addr) {
+ /*-
+ * Since we did not use findep we must
+ * increment it, and if we don't find a tcb
+ * decrement it.
+ */
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_INCR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ hold_tcblock = 1;
+ }
+ }
+ if ((stcb == NULL) && (addr)) {
+ /* Possible implicit send? */
+ SCTP_ASOC_CREATE_LOCK(inp);
+ create_lock_applied = 1;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+ /* Should I really unlock ? */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ error = EFAULT;
+ goto out_unlocked;
+
+ }
+ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+ (addr->sa_family == AF_INET6)) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_INCR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ /* With the lock applied look again */
+ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ } else {
+ hold_tcblock = 1;
+ }
+ if (t_inp != inp) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
+ error = ENOTCONN;
+ goto out_unlocked;
+ }
+ }
+ if (stcb == NULL) {
+ if (addr == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+ error = ENOENT;
+ goto out_unlocked;
+ } else {
+ /*
+ * UDP style, we must go ahead and start the INIT
+ * process
+ */
+ uint32_t vrf_id;
+
+ if ((use_rcvinfo) && (srcv) &&
+ ((srcv->sinfo_flags & SCTP_ABORT) ||
+ ((srcv->sinfo_flags & SCTP_EOF) &&
+ (sndlen == 0)))) {
+ /*-
+ * User asks to abort a non-existant assoc,
+ * or EOF a non-existant assoc with no data
+ */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT);
+ error = ENOENT;
+ goto out_unlocked;
+ }
+ /* get an asoc/stcb struct */
+ vrf_id = inp->def_vrf_id;
+#ifdef INVARIANTS
+ if (create_lock_applied == 0) {
+ panic("Error, should hold create lock and I don't?");
+ }
+#endif
+ stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id,
+ p
+ );
+ if (stcb == NULL) {
+ /* Error is setup for us in the call */
+ goto out_unlocked;
+ }
+ if (create_lock_applied) {
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ create_lock_applied = 0;
+ } else {
+ SCTP_PRINTF("Huh-3? create lock should have been on??\n");
+ }
+ /*
+ * Turn on queue only flag to prevent data from
+ * being sent
+ */
+ queue_only = 1;
+ asoc = &stcb->asoc;
+ SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
+
+ /* initialize authentication params for the assoc */
+ sctp_initialize_auth_params(inp, stcb);
+
+ if (control) {
+ /*
+ * see if a init structure exists in cmsg
+ * headers
+ */
+ struct sctp_initmsg initm;
+ int i;
+
+ if (sctp_find_cmsg(SCTP_INIT, (void *)&initm, control,
+ sizeof(initm))) {
+ /*
+ * we have an INIT override of the
+ * default
+ */
+ if (initm.sinit_max_attempts)
+ asoc->max_init_times = initm.sinit_max_attempts;
+ if (initm.sinit_num_ostreams)
+ asoc->pre_open_streams = initm.sinit_num_ostreams;
+ if (initm.sinit_max_instreams)
+ asoc->max_inbound_streams = initm.sinit_max_instreams;
+ if (initm.sinit_max_init_timeo)
+ asoc->initial_init_rto_max = initm.sinit_max_init_timeo;
+ if (asoc->streamoutcnt < asoc->pre_open_streams) {
+ /* Default is NOT correct */
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, defout:%d pre_open:%d\n",
+ asoc->streamoutcnt, asoc->pre_open_streams);
+ /*
+ * What happens if this
+ * fails? we panic ...
+ */
+ {
+ struct sctp_stream_out *tmp_str;
+ int had_lock = 0;
+
+ if (hold_tcblock) {
+ had_lock = 1;
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_MALLOC(tmp_str,
+ struct sctp_stream_out *,
+ (asoc->pre_open_streams *
+ sizeof(struct sctp_stream_out)),
+ SCTP_M_STRMO);
+ if (had_lock) {
+ SCTP_TCB_LOCK(stcb);
+ }
+ if (tmp_str != NULL) {
+ SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+ asoc->strmout = tmp_str;
+ asoc->streamoutcnt = asoc->pre_open_streams;
+ } else {
+ asoc->pre_open_streams = asoc->streamoutcnt;
+ }
+ }
+ for (i = 0; i < asoc->streamoutcnt; i++) {
+ /*-
+ * inbound side must be set
+ * to 0xffff, also NOTE when
+ * we get the INIT-ACK back
+ * (for INIT sender) we MUST
+ * reduce the count
+ * (streamoutcnt) but first
+ * check if we sent to any
+ * of the upper streams that
+ * were dropped (if some
+ * were). Those that were
+ * dropped must be notified
+ * to the upper layer as
+ * failed to send.
+ */
+ asoc->strmout[i].next_sequence_sent = 0x0;
+ TAILQ_INIT(&asoc->strmout[i].outqueue);
+ asoc->strmout[i].stream_no = i;
+ asoc->strmout[i].last_msg_incomplete = 0;
+ asoc->strmout[i].next_spoke.tqe_next = 0;
+ asoc->strmout[i].next_spoke.tqe_prev = 0;
+ }
+ }
+ }
+ }
+ hold_tcblock = 1;
+ /* out with the INIT */
+ queue_only_for_init = 1;
+ /*-
+ * we may want to dig in after this call and adjust the MTU
+ * value. It defaulted to 1500 (constant) but the ro
+ * structure may now have an update and thus we may need to
+ * change it BEFORE we append the message.
+ */
+ net = stcb->asoc.primary_destination;
+ asoc = &stcb->asoc;
+ }
+ }
+ if ((SCTP_SO_IS_NBIO(so)
+ || (flags & MSG_NBIO)
+ )) {
+ non_blocking = 1;
+ }
+ asoc = &stcb->asoc;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) {
+ if (sndlen > asoc->smallest_mtu) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+ error = EMSGSIZE;
+ goto out_unlocked;
+ }
+ }
+ /* would we block? */
+ if (non_blocking) {
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ if ((SCTP_SB_LIMIT_SND(so) <
+ (sndlen + inqueue_bytes + stcb->asoc.sb_send_resv)) ||
+ (stcb->asoc.chunks_on_out_queue >
+ sctp_max_chunks_on_queue)) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK);
+ if (sndlen > SCTP_SB_LIMIT_SND(so))
+ error = EMSGSIZE;
+ else
+ error = EWOULDBLOCK;
+ goto out_unlocked;
+ }
+ stcb->asoc.sb_send_resv += sndlen;
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ } else {
+ atomic_add_int(&stcb->asoc.sb_send_resv, sndlen);
+ }
+ local_soresv = sndlen;
+ /* Keep the stcb from being freed under our feet */
+ if (free_cnt_applied) {
+#ifdef INVARIANTS
+ panic("refcnt already incremented");
+#else
+ printf("refcnt:1 already incremented?\n");
+#endif
+ } else {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ free_cnt_applied = 1;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+ error = ECONNRESET;
+ goto out_unlocked;
+ }
+ if (create_lock_applied) {
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ create_lock_applied = 0;
+ }
+ if (asoc->stream_reset_outstanding) {
+ /*
+ * Can't queue any data while stream reset is underway.
+ */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAGAIN);
+ error = EAGAIN;
+ goto out_unlocked;
+ }
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ queue_only = 1;
+ }
+ if ((use_rcvinfo == 0) || (srcv == NULL)) {
+ /* Grab the default stuff from the asoc */
+ srcv = (struct sctp_sndrcvinfo *)&stcb->asoc.def_send;
+ }
+ /* we are now done with all control */
+ if (control) {
+ sctp_m_freem(control);
+ control = NULL;
+ }
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) ||
+ (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) {
+ if ((use_rcvinfo) &&
+ (srcv->sinfo_flags & SCTP_ABORT)) {
+ ;
+ } else {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+ error = ECONNRESET;
+ goto out_unlocked;
+ }
+ }
+ /* Ok, we will attempt a msgsnd :> */
+ if (p) {
+ p->td_ru.ru_msgsnd++;
+ }
+ if (stcb) {
+ if (((srcv->sinfo_flags | temp_flags) & SCTP_ADDR_OVER) == 0) {
+ net = stcb->asoc.primary_destination;
+ }
+ }
+ if (net == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if ((net->flight_size > net->cwnd) && (sctp_cmt_on_off == 0)) {
+ /*-
+ * CMT: Added check for CMT above. net above is the primary
+ * dest. If CMT is ON, sender should always attempt to send
+ * with the output routine sctp_fill_outqueue() that loops
+ * through all destination addresses. Therefore, if CMT is
+ * ON, queue_only is NOT set to 1 here, so that
+ * sctp_chunk_output() can be called below.
+ */
+ queue_only = 1;
+
+ } else if (asoc->ifp_had_enobuf) {
+ SCTP_STAT_INCR(sctps_ifnomemqueued);
+ if (net->flight_size > (net->mtu * 2))
+ queue_only = 1;
+ asoc->ifp_had_enobuf = 0;
+ } else {
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) * sizeof(struct sctp_data_chunk)));
+ }
+ /* Are we aborting? */
+ if (srcv->sinfo_flags & SCTP_ABORT) {
+ struct mbuf *mm;
+ int tot_demand, tot_out = 0, max_out;
+
+ SCTP_STAT_INCR(sctps_sends_with_abort);
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ /* It has to be up before we abort */
+ /* how big is the user initiated abort? */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out;
+ }
+ if (hold_tcblock) {
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ }
+ if (top) {
+ struct mbuf *cntm = NULL;
+
+ mm = sctp_get_mbuf_for_msg(1, 0, M_WAIT, 1, MT_DATA);
+ if (sndlen != 0) {
+ cntm = top;
+ while (cntm) {
+ tot_out += SCTP_BUF_LEN(cntm);
+ cntm = SCTP_BUF_NEXT(cntm);
+ }
+ }
+ tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+ } else {
+ /* Must fit in a MTU */
+ tot_out = sndlen;
+ tot_demand = (tot_out + sizeof(struct sctp_paramhdr));
+ if (tot_demand > SCTP_DEFAULT_ADD_MORE) {
+ /* To big */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+ error = EMSGSIZE;
+ goto out;
+ }
+ mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAIT, 1, MT_DATA);
+ }
+ if (mm == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM);
+ error = ENOMEM;
+ goto out;
+ }
+ max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr);
+ max_out -= sizeof(struct sctp_abort_msg);
+ if (tot_out > max_out) {
+ tot_out = max_out;
+ }
+ if (mm) {
+ struct sctp_paramhdr *ph;
+
+ /* now move forward the data pointer */
+ ph = mtod(mm, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons((sizeof(struct sctp_paramhdr) + tot_out));
+ ph++;
+ SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr);
+ if (top == NULL) {
+ error = uiomove((caddr_t)ph, (int)tot_out, uio);
+ if (error) {
+ /*-
+ * Here if we can't get his data we
+ * still abort we just don't get to
+ * send the users note :-0
+ */
+ sctp_m_freem(mm);
+ mm = NULL;
+ }
+ } else {
+ if (sndlen != 0) {
+ SCTP_BUF_NEXT(mm) = top;
+ }
+ }
+ }
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ free_cnt_applied = 0;
+ /* release this lock, otherwise we hang on ourselves */
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_RESPONSE_TO_USER_REQ,
+ mm, SCTP_SO_LOCKED);
+ /* now relock the stcb so everything is sane */
+ hold_tcblock = 0;
+ stcb = NULL;
+ /*
+ * In this case top is already chained to mm avoid double
+ * free, since we free it below if top != NULL and driver
+ * would free it after sending the packet out
+ */
+ if (sndlen != 0) {
+ top = NULL;
+ }
+ goto out_unlocked;
+ }
+ /* Calculate the maximum we can send */
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+ if (non_blocking) {
+ /* we already checked for non-blocking above. */
+ max_len = sndlen;
+ } else {
+ max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
+ }
+ } else {
+ max_len = 0;
+ }
+ if (hold_tcblock) {
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ }
+ /* Is the stream no. valid? */
+ if (srcv->sinfo_stream >= asoc->streamoutcnt) {
+ /* Invalid stream number */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if (asoc->strmout == NULL) {
+ /* huh? software error */
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT);
+ error = EFAULT;
+ goto out_unlocked;
+ }
+ /* Unless E_EOR mode is on, we must make a send FIT in one call. */
+ if ((user_marks_eor == 0) &&
+ (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) {
+ /* It will NEVER fit */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE);
+ error = EMSGSIZE;
+ goto out_unlocked;
+ }
+ if ((uio == NULL) && user_marks_eor) {
+ /*-
+ * We do not support eeor mode for
+ * sending with mbuf chains (like sendfile).
+ */
+ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out_unlocked;
+ }
+ if (user_marks_eor) {
+ local_add_more = sctp_add_more_threshold;
+ } else {
+ /*-
+ * For non-eeor the whole message must fit in
+ * the socket send buffer.
+ */
+ local_add_more = sndlen;
+ }
+ len = 0;
+ if (non_blocking) {
+ goto skip_preblock;
+ }
+ if (((max_len <= local_add_more) &&
+ (SCTP_SB_LIMIT_SND(so) > local_add_more)) ||
+ ((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) > sctp_max_chunks_on_queue)) { /* if */
+ /* No room right no ! */
+ SOCKBUF_LOCK(&so->so_snd);
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + sctp_add_more_threshold)) ||
+ ((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) > sctp_max_chunks_on_queue /* while */ )) {
+
+ if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+ sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA,
+ so, asoc, sndlen);
+ }
+ be.error = 0;
+ stcb->block_entry = &be;
+ error = sbwait(&so->so_snd);
+ stcb->block_entry = NULL;
+ if (error || so->so_error || be.error) {
+ if (error == 0) {
+ if (so->so_error)
+ error = so->so_error;
+ if (be.error) {
+ error = be.error;
+ }
+ }
+ SOCKBUF_UNLOCK(&so->so_snd);
+ goto out_unlocked;
+ }
+ if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+ sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+ so, asoc, stcb->asoc.total_output_queue_size);
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ goto out_unlocked;
+ }
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ }
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) {
+ max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+ } else {
+ max_len = 0;
+ }
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
+skip_preblock:
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ goto out_unlocked;
+ }
+ atomic_add_int(&stcb->total_sends, 1);
+ /*
+ * sndlen covers for mbuf case uio_resid covers for the non-mbuf
+ * case NOTE: uio will be null when top/mbuf is passed
+ */
+ if (sndlen == 0) {
+ if (srcv->sinfo_flags & SCTP_EOF) {
+ got_all_of_the_send = 1;
+ goto dataless_eof;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out;
+ }
+ }
+ if (top == NULL) {
+ struct sctp_stream_queue_pending *sp;
+ struct sctp_stream_out *strm;
+ uint32_t sndout, initial_out;
+
+ initial_out = uio->uio_resid;
+
+ SCTP_TCB_SEND_LOCK(stcb);
+ if ((asoc->stream_locked) &&
+ (asoc->stream_locked_on != srcv->sinfo_stream)) {
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL);
+ error = EINVAL;
+ goto out;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+
+ strm = &stcb->asoc.strmout[srcv->sinfo_stream];
+ if (strm->last_msg_incomplete == 0) {
+ do_a_copy_in:
+ sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error, non_blocking);
+ if ((sp == NULL) || (error)) {
+ goto out;
+ }
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (sp->msg_is_complete) {
+ strm->last_msg_incomplete = 0;
+ asoc->stream_locked = 0;
+ } else {
+ /*
+ * Just got locked to this guy in case of an
+ * interrupt.
+ */
+ strm->last_msg_incomplete = 1;
+ asoc->stream_locked = 1;
+ asoc->stream_locked_on = srcv->sinfo_stream;
+ sp->sender_all_done = 0;
+ }
+ sctp_snd_sb_alloc(stcb, sp->length);
+ atomic_add_int(&asoc->stream_queue_cnt, 1);
+ if ((srcv->sinfo_flags & SCTP_UNORDERED) == 0) {
+ sp->strseq = strm->next_sequence_sent;
+ if (sctp_logging_level & SCTP_LOG_AT_SEND_2_SCTP) {
+ sctp_misc_ints(SCTP_STRMOUT_LOG_ASSIGN,
+ (uintptr_t) stcb, sp->length,
+ (uint32_t) ((srcv->sinfo_stream << 16) | sp->strseq), 0);
+ }
+ strm->next_sequence_sent++;
+ } else {
+ SCTP_STAT_INCR(sctps_sends_with_unord);
+ }
+ TAILQ_INSERT_TAIL(&strm->outqueue, sp, next);
+ if ((strm->next_spoke.tqe_next == NULL) &&
+ (strm->next_spoke.tqe_prev == NULL)) {
+ /* Not on wheel, insert */
+ sctp_insert_on_wheel(stcb, asoc, strm, 1);
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ } else {
+ SCTP_TCB_SEND_LOCK(stcb);
+ sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead);
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ if (sp == NULL) {
+ /* ???? Huh ??? last msg is gone */
+#ifdef INVARIANTS
+ panic("Warning: Last msg marked incomplete, yet nothing left?");
+#else
+ SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n");
+ strm->last_msg_incomplete = 0;
+#endif
+ goto do_a_copy_in;
+
+ }
+ }
+ while (uio->uio_resid > 0) {
+ /* How much room do we have? */
+ struct mbuf *new_tail, *mm;
+
+ if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+ max_len = SCTP_SB_LIMIT_SND(so) - stcb->asoc.total_output_queue_size;
+ else
+ max_len = 0;
+
+ if ((max_len > sctp_add_more_threshold) ||
+ (max_len && (SCTP_SB_LIMIT_SND(so) < sctp_add_more_threshold)) ||
+ (uio->uio_resid &&
+ (uio->uio_resid <= (int)max_len))) {
+ sndout = 0;
+ new_tail = NULL;
+ if (hold_tcblock) {
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ }
+ mm = sctp_copy_resume(sp, uio, srcv, max_len, user_marks_eor, &error, &sndout, &new_tail);
+ if ((mm == NULL) || error) {
+ if (mm) {
+ sctp_m_freem(mm);
+ }
+ goto out;
+ }
+ /* Update the mbuf and count */
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ /*
+ * we need to get out. Peer probably
+ * aborted.
+ */
+ sctp_m_freem(mm);
+ if (stcb->asoc.state & SCTP_PCB_FLAGS_WAS_ABORTED) {
+ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET);
+ error = ECONNRESET;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ goto out;
+ }
+ if (sp->tail_mbuf) {
+ /* tack it to the end */
+ SCTP_BUF_NEXT(sp->tail_mbuf) = mm;
+ sp->tail_mbuf = new_tail;
+ } else {
+ /* A stolen mbuf */
+ sp->data = mm;
+ sp->tail_mbuf = new_tail;
+ }
+ sctp_snd_sb_alloc(stcb, sndout);
+ atomic_add_int(&sp->length, sndout);
+ len += sndout;
+
+ /* Did we reach EOR? */
+ if ((uio->uio_resid == 0) &&
+ ((user_marks_eor == 0) ||
+ (srcv->sinfo_flags & SCTP_EOF) ||
+ (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))
+ ) {
+ sp->msg_is_complete = 1;
+ } else {
+ sp->msg_is_complete = 0;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ }
+ if (uio->uio_resid == 0) {
+ /* got it all? */
+ continue;
+ }
+ /* PR-SCTP? */
+ if ((asoc->peer_supports_prsctp) && (asoc->sent_queue_cnt_removeable > 0)) {
+ /*
+ * This is ugly but we must assure locking
+ * order
+ */
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ sctp_prune_prsctp(stcb, asoc, srcv, sndlen);
+ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * sizeof(struct sctp_data_chunk));
+ if (SCTP_SB_LIMIT_SND(so) > stcb->asoc.total_output_queue_size)
+ max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes;
+ else
+ max_len = 0;
+ if (max_len > 0) {
+ continue;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ }
+ /* wait for space now */
+ if (non_blocking) {
+ /* Non-blocking io in place out */
+ goto skip_out_eof;
+ }
+ if ((net->flight_size > net->cwnd) &&
+ (sctp_cmt_on_off == 0)) {
+ queue_only = 1;
+ } else if (asoc->ifp_had_enobuf) {
+ SCTP_STAT_INCR(sctps_ifnomemqueued);
+ if (net->flight_size > (net->mtu * 2)) {
+ queue_only = 1;
+ } else {
+ queue_only = 0;
+ }
+ asoc->ifp_had_enobuf = 0;
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+ sizeof(struct sctp_data_chunk)));
+ } else {
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+ sizeof(struct sctp_data_chunk)));
+ if (net->flight_size > (net->mtu * stcb->asoc.max_burst)) {
+ queue_only = 1;
+ SCTP_STAT_INCR(sctps_send_burst_avoid);
+ } else if (net->flight_size > net->cwnd) {
+ queue_only = 1;
+ SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+ } else {
+ queue_only = 0;
+ }
+ }
+ if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+ (stcb->asoc.total_flight > 0) &&
+ (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+ (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+ ) {
+
+ /*-
+ * Ok, Nagle is set on and we have data outstanding.
+ * Don't send anything and let SACKs drive out the
+ * data unless wen have a "full" segment to send.
+ */
+ if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+ sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+ }
+ SCTP_STAT_INCR(sctps_naglequeued);
+ nagle_applies = 1;
+ } else {
+ if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+ sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+ }
+ SCTP_STAT_INCR(sctps_naglesent);
+ nagle_applies = 0;
+ }
+ /* What about the INIT, send it maybe */
+ if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+
+ sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only,
+ nagle_applies, un_sent);
+ sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size,
+ stcb->asoc.total_flight,
+ stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count);
+ }
+ if (queue_only_for_init) {
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ /* a collision took us forward? */
+ queue_only_for_init = 0;
+ queue_only = 0;
+ } else {
+ sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+ SCTP_SET_STATE(asoc, SCTP_STATE_COOKIE_WAIT);
+ queue_only_for_init = 0;
+ queue_only = 1;
+ }
+ }
+ if ((queue_only == 0) && (nagle_applies == 0)
+ ) {
+ /*-
+ * need to start chunk output
+ * before blocking.. note that if
+ * a lock is already applied, then
+ * the input via the net is happening
+ * and I don't need to start output :-D
+ */
+ if (hold_tcblock == 0) {
+ if (SCTP_TCB_TRYLOCK(stcb)) {
+ hold_tcblock = 1;
+ sctp_chunk_output(inp,
+ stcb,
+ SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+ }
+ } else {
+ sctp_chunk_output(inp,
+ stcb,
+ SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+ }
+ if (hold_tcblock == 1) {
+ SCTP_TCB_UNLOCK(stcb);
+ hold_tcblock = 0;
+ }
+ }
+ SOCKBUF_LOCK(&so->so_snd);
+ /*-
+ * This is a bit strange, but I think it will
+ * work. The total_output_queue_size is locked and
+ * protected by the TCB_LOCK, which we just released.
+ * There is a race that can occur between releasing it
+ * above, and me getting the socket lock, where sacks
+ * come in but we have not put the SB_WAIT on the
+ * so_snd buffer to get the wakeup. After the LOCK
+ * is applied the sack_processing will also need to
+ * LOCK the so->so_snd to do the actual sowwakeup(). So
+ * once we have the socket buffer lock if we recheck the
+ * size we KNOW we will get to sleep safely with the
+ * wakeup flag in place.
+ */
+ if (SCTP_SB_LIMIT_SND(so) <= (stcb->asoc.total_output_queue_size +
+ min(sctp_add_more_threshold, SCTP_SB_LIMIT_SND(so)))
+ ) {
+ if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+ sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK,
+ so, asoc, uio->uio_resid);
+ }
+ be.error = 0;
+ stcb->block_entry = &be;
+ error = sbwait(&so->so_snd);
+ stcb->block_entry = NULL;
+
+ if (error || so->so_error || be.error) {
+ if (error == 0) {
+ if (so->so_error)
+ error = so->so_error;
+ if (be.error) {
+ error = be.error;
+ }
+ }
+ SOCKBUF_UNLOCK(&so->so_snd);
+ goto out_unlocked;
+ }
+ if (sctp_logging_level & SCTP_BLK_LOGGING_ENABLE) {
+ sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK,
+ so, asoc, stcb->asoc.total_output_queue_size);
+ }
+ }
+ SOCKBUF_UNLOCK(&so->so_snd);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ goto out_unlocked;
+ }
+ }
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (sp) {
+ if (sp->msg_is_complete == 0) {
+ strm->last_msg_incomplete = 1;
+ asoc->stream_locked = 1;
+ asoc->stream_locked_on = srcv->sinfo_stream;
+ } else {
+ sp->sender_all_done = 1;
+ strm->last_msg_incomplete = 0;
+ asoc->stream_locked = 0;
+ }
+ } else {
+ SCTP_PRINTF("Huh no sp TSNH?\n");
+ strm->last_msg_incomplete = 0;
+ asoc->stream_locked = 0;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ if (uio->uio_resid == 0) {
+ got_all_of_the_send = 1;
+ }
+ } else if (top) {
+ /* We send in a 0, since we do NOT have any locks */
+ error = sctp_msg_append(stcb, net, top, srcv, 0);
+ top = NULL;
+ if (srcv->sinfo_flags & SCTP_EOF) {
+ /*
+ * This should only happen for Panda for the mbuf
+ * send case, which does NOT yet support EEOR mode.
+ * Thus, we can just set this flag to do the proper
+ * EOF handling.
+ */
+ got_all_of_the_send = 1;
+ }
+ }
+ if (error) {
+ goto out;
+ }
+dataless_eof:
+ /* EOF thing ? */
+ if ((srcv->sinfo_flags & SCTP_EOF) &&
+ (got_all_of_the_send == 1) &&
+ (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)
+ ) {
+ int cnt;
+
+ SCTP_STAT_INCR(sctps_sends_with_eof);
+ error = 0;
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ cnt = sctp_is_there_unsent_data(stcb);
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (cnt == 0)) {
+ if (asoc->locked_on_sending) {
+ goto abort_anyway;
+ }
+ /* there is nothing queued to send, so I'm done... */
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /* only send SHUTDOWN the first time through */
+ sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+ if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ }
+ } else {
+ /*-
+ * we still got (or just got) data to send, so set
+ * SHUTDOWN_PENDING
+ */
+ /*-
+ * XXX sockets draft says that SCTP_EOF should be
+ * sent with no data. currently, we will allow user
+ * data to be sent first and move to
+ * SHUTDOWN-PENDING
+ */
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ if (asoc->locked_on_sending) {
+ /* Locked to send out the data */
+ struct sctp_stream_queue_pending *sp;
+
+ sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+ if (sp) {
+ if ((sp->length == 0) && (sp->msg_is_complete == 0))
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ }
+ }
+ asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ abort_anyway:
+ if (free_cnt_applied) {
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ free_cnt_applied = 0;
+ }
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_RESPONSE_TO_USER_REQ,
+ NULL, SCTP_SO_LOCKED);
+ /*
+ * now relock the stcb so everything
+ * is sane
+ */
+ hold_tcblock = 0;
+ stcb = NULL;
+ goto out;
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY);
+ }
+ }
+ }
+skip_out_eof:
+ if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
+ some_on_control = 1;
+ }
+ if ((net->flight_size > net->cwnd) &&
+ (sctp_cmt_on_off == 0)) {
+ queue_only = 1;
+ } else if (asoc->ifp_had_enobuf) {
+ SCTP_STAT_INCR(sctps_ifnomemqueued);
+ if (net->flight_size > (net->mtu * 2)) {
+ queue_only = 1;
+ } else {
+ queue_only = 0;
+ }
+ asoc->ifp_had_enobuf = 0;
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+ sizeof(struct sctp_data_chunk)));
+ } else {
+ un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) +
+ ((stcb->asoc.chunks_on_out_queue - stcb->asoc.total_flight_count) *
+ sizeof(struct sctp_data_chunk)));
+ if (net->flight_size > (net->mtu * stcb->asoc.max_burst)) {
+ queue_only = 1;
+ SCTP_STAT_INCR(sctps_send_burst_avoid);
+ } else if (net->flight_size > net->cwnd) {
+ queue_only = 1;
+ SCTP_STAT_INCR(sctps_send_cwnd_avoid);
+ } else {
+ queue_only = 0;
+ }
+ }
+ if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) &&
+ (stcb->asoc.total_flight > 0) &&
+ (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) &&
+ (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))
+ ) {
+ /*-
+ * Ok, Nagle is set on and we have data outstanding.
+ * Don't send anything and let SACKs drive out the
+ * data unless wen have a "full" segment to send.
+ */
+ if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+ sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED);
+ }
+ SCTP_STAT_INCR(sctps_naglequeued);
+ nagle_applies = 1;
+ } else {
+ if (sctp_logging_level & SCTP_NAGLE_LOGGING_ENABLE) {
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY))
+ sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED);
+ }
+ SCTP_STAT_INCR(sctps_naglesent);
+ nagle_applies = 0;
+ }
+ if (queue_only_for_init) {
+ if (hold_tcblock == 0) {
+ SCTP_TCB_LOCK(stcb);
+ hold_tcblock = 1;
+ }
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) {
+ /* a collision took us forward? */
+ queue_only_for_init = 0;
+ queue_only = 0;
+ } else {
+ sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ queue_only_for_init = 0;
+ queue_only = 1;
+ }
+ }
+ if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) {
+ /* we can attempt to send too. */
+ if (hold_tcblock == 0) {
+ /*
+ * If there is activity recv'ing sacks no need to
+ * send
+ */
+ if (SCTP_TCB_TRYLOCK(stcb)) {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+ hold_tcblock = 1;
+ }
+ } else {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+ }
+ } else if ((queue_only == 0) &&
+ (stcb->asoc.peers_rwnd == 0) &&
+ (stcb->asoc.total_flight == 0)) {
+ /* We get to have a probe outstanding */
+ if (hold_tcblock == 0) {
+ hold_tcblock = 1;
+ SCTP_TCB_LOCK(stcb);
+ }
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED);
+ } else if (some_on_control) {
+ int num_out, reason, cwnd_full, frag_point;
+
+ /* Here we do control only */
+ if (hold_tcblock == 0) {
+ hold_tcblock = 1;
+ SCTP_TCB_LOCK(stcb);
+ }
+ frag_point = sctp_get_frag_point(stcb, &stcb->asoc);
+ (void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out,
+ &reason, 1, &cwnd_full, 1, &now, &now_filled, frag_point, SCTP_SO_LOCKED);
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d",
+ queue_only, stcb->asoc.peers_rwnd, un_sent,
+ stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue,
+ stcb->asoc.total_output_queue_size, error);
+
+out:
+out_unlocked:
+
+ if (local_soresv && stcb) {
+ atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen);
+ local_soresv = 0;
+ }
+ if (create_lock_applied) {
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ create_lock_applied = 0;
+ }
+ if ((stcb) && hold_tcblock) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ if (stcb && free_cnt_applied) {
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ }
+#ifdef INVARIANTS
+ if (stcb) {
+ if (mtx_owned(&stcb->tcb_mtx)) {
+ panic("Leaving with tcb mtx owned?");
+ }
+ if (mtx_owned(&stcb->tcb_send_mtx)) {
+ panic("Leaving with tcb send mtx owned?");
+ }
+ }
+#endif
+ if (top) {
+ sctp_m_freem(top);
+ }
+ if (control) {
+ sctp_m_freem(control);
+ }
+ return (error);
+}
+
+
+/*
+ * generate an AUTHentication chunk, if required
+ */
+struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+ struct sctp_auth_chunk **auth_ret, uint32_t * offset,
+ struct sctp_tcb *stcb, uint8_t chunk)
+{
+ struct mbuf *m_auth;
+ struct sctp_auth_chunk *auth;
+ int chunk_len;
+
+ if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) ||
+ (stcb == NULL))
+ return (m);
+
+ /* sysctl disabled auth? */
+ if (sctp_auth_disable)
+ return (m);
+
+ /* peer doesn't do auth... */
+ if (!stcb->asoc.peer_supports_auth) {
+ return (m);
+ }
+ /* does the requested chunk require auth? */
+ if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) {
+ return (m);
+ }
+ m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_DONTWAIT, 1, MT_HEADER);
+ if (m_auth == NULL) {
+ /* no mbuf's */
+ return (m);
+ }
+ /* reserve some space if this will be the first mbuf */
+ if (m == NULL)
+ SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD);
+ /* fill in the AUTH chunk details */
+ auth = mtod(m_auth, struct sctp_auth_chunk *);
+ bzero(auth, sizeof(*auth));
+ auth->ch.chunk_type = SCTP_AUTHENTICATION;
+ auth->ch.chunk_flags = 0;
+ chunk_len = sizeof(*auth) +
+ sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id);
+ auth->ch.chunk_length = htons(chunk_len);
+ auth->hmac_id = htons(stcb->asoc.peer_hmac_id);
+ /* key id and hmac digest will be computed and filled in upon send */
+
+ /* save the offset where the auth was inserted into the chain */
+ if (m != NULL) {
+ struct mbuf *cn;
+
+ *offset = 0;
+ cn = m;
+ while (cn) {
+ *offset += SCTP_BUF_LEN(cn);
+ cn = SCTP_BUF_NEXT(cn);
+ }
+ } else
+ *offset = 0;
+
+ /* update length and return pointer to the auth chunk */
+ SCTP_BUF_LEN(m_auth) = chunk_len;
+ m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0);
+ if (auth_ret != NULL)
+ *auth_ret = auth;
+
+ return (m);
+}
+
+int
+sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t * ro)
+{
+ struct nd_prefix *pfx = NULL;
+ struct nd_pfxrouter *pfxrtr = NULL;
+ struct sockaddr_in6 gw6;
+
+ if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6)
+ return (0);
+
+ /* get prefix entry of address */
+ LIST_FOREACH(pfx, &nd_prefix, ndpr_entry) {
+ if (pfx->ndpr_stateflags & NDPRF_DETACHED)
+ continue;
+ if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr,
+ &src6->sin6_addr, &pfx->ndpr_mask))
+ break;
+ }
+ /* no prefix entry in the prefix list */
+ if (pfx == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+ return (0);
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6);
+
+ /* search installed gateway from prefix entry */
+ for (pfxrtr = pfx->ndpr_advrtrs.lh_first; pfxrtr; pfxrtr =
+ pfxrtr->pfr_next) {
+ memset(&gw6, 0, sizeof(struct sockaddr_in6));
+ gw6.sin6_family = AF_INET6;
+ gw6.sin6_len = sizeof(struct sockaddr_in6);
+ memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr,
+ sizeof(struct in6_addr));
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+ if (sctp_cmpaddr((struct sockaddr *)&gw6,
+ ro->ro_rt->rt_gateway)) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
+ return (1);
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n");
+ return (0);
+}
+int
+sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t * ro)
+{
+ struct sockaddr_in *sin, *mask;
+ struct ifaddr *ifa;
+ struct in_addr srcnetaddr, gwnetaddr;
+
+ if (ro == NULL || ro->ro_rt == NULL ||
+ sifa->address.sa.sa_family != AF_INET) {
+ return (0);
+ }
+ ifa = (struct ifaddr *)sifa->ifa;
+ mask = (struct sockaddr_in *)(ifa->ifa_netmask);
+ sin = (struct sockaddr_in *)&sifa->address.sin;
+ srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
+
+ sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
+ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
+ if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
+ return (1);
+ }
+ return (0);
+}
Index: ip_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -L sys/netinet/ip_output.c -L sys/netinet/ip_output.c -u -r1.6 -r1.7
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -27,10 +27,11 @@
* SUCH DAMAGE.
*
* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/netinet/ip_output.c,v 1.242.2.8 2006/01/31 16:06:05 andre Exp $
- * $MidnightBSD$
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_output.c,v 1.276 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_ipfw.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
@@ -39,9 +40,9 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -58,26 +59,16 @@
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
-
-#include <machine/in_cksum.h>
-
-static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
+#include <netinet/ip_options.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#ifdef IPSEC_DEBUG
-#include <netkey/key_debug.h>
-#else
-#define KEYDEBUG(lev,arg)
-#endif
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
+#include <netinet/ip_ipsec.h>
#include <netipsec/ipsec.h>
-#include <netipsec/xform.h>
-#include <netipsec/key.h>
-#endif /*FAST_IPSEC*/
+#endif /* IPSEC*/
+
+#include <machine/in_cksum.h>
+
+#include <security/mac/mac_framework.h>
#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
x, (ntohl(a.s_addr)>>24)&0xFF,\
@@ -93,16 +84,8 @@
&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
#endif
-static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
-static struct ifnet *ip_multicast_if(struct in_addr *, int *);
static void ip_mloopback
(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
-static int ip_getmoptions(struct inpcb *, struct sockopt *);
-static int ip_pcbopts(struct inpcb *, int, struct mbuf *);
-static int ip_setmoptions(struct inpcb *, struct sockopt *);
-static struct ip_moptions *ip_findmoptions(struct inpcb *inp);
-
-int ip_optcopy(struct ip *, struct ip *);
extern struct protosw inetsw[];
@@ -116,13 +99,14 @@
* inserted, so must have a NULL opt pointer.
*/
int
-ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
- int flags, struct ip_moptions *imo, struct inpcb *inp)
+ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
+ struct ip_moptions *imo, struct inpcb *inp)
{
struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m0;
int hlen = sizeof (struct ip);
+ int mtu;
int len, error = 0;
struct sockaddr_in *dst = NULL; /* keep compiler happy */
struct in_ifaddr *ia = NULL;
@@ -132,18 +116,8 @@
#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag = NULL;
#endif
-#ifdef IPSEC
- struct secpolicy *sp = NULL;
-#endif
-#ifdef FAST_IPSEC
- struct secpolicy *sp = NULL;
- struct tdb_ident *tdbi;
- struct m_tag *mtag;
- int s;
-#endif /* FAST_IPSEC */
-
M_ASSERTPKTHDR(m);
-
+
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
@@ -193,7 +167,7 @@
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
RTFREE(ro->ro_rt);
- ro->ro_rt = (struct rtentry *)0;
+ ro->ro_rt = (struct rtentry *)NULL;
}
#ifdef IPFIREWALL_FORWARD
if (ro->ro_rt == NULL && fwd_tag == NULL) {
@@ -206,10 +180,24 @@
dst->sin_addr = ip->ip_dst;
}
/*
- * If routing to interface only,
- * short circuit routing lookup.
+ * If routing to interface only, short circuit routing lookup.
+ * The use of an all-ones broadcast address implies this; an
+ * interface is specified by the broadcast address of an interface,
+ * or the destination address of a ptp interface.
*/
- if (flags & IP_ROUTETOIF) {
+ if (flags & IP_SENDONES) {
+ if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
+ (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
+ ipstat.ips_noroute++;
+ error = ENETUNREACH;
+ goto bad;
+ }
+ ip->ip_dst.s_addr = INADDR_BROADCAST;
+ dst->sin_addr = ip->ip_dst;
+ ifp = ia->ia_ifp;
+ ip->ip_ttl = 1;
+ isbroadcast = 1;
+ } else if (flags & IP_ROUTETOIF) {
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
(ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
ipstat.ips_noroute++;
@@ -251,6 +239,24 @@
else
isbroadcast = in_broadcast(dst->sin_addr, ifp);
}
+ /*
+ * Calculate MTU. If we have a route that is up, use that,
+ * otherwise use the interface's MTU.
+ */
+ if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
+ /*
+ * This case can happen if the user changed the MTU
+ * of an interface after enabling IP on it. Because
+ * most netifs don't keep track of routes pointing to
+ * them, there is no way for one to update all its
+ * routes when the MTU is changed.
+ */
+ if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
+ ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
+ mtu = ro->ro_rt->rt_rmx.rmx_mtu;
+ } else {
+ mtu = ifp->if_mtu;
+ }
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
struct in_multi *inm;
@@ -351,7 +357,7 @@
goto sendit;
}
-#ifndef notdef
+
/*
* If the source address is not specified yet, use the address
* of the outoing interface.
@@ -362,7 +368,7 @@
ip->ip_src = IA_SIN(ia)->sin_addr;
}
}
-#endif /* notdef */
+
/*
* Verify that we have any chance at all of being able to queue the
* packet or packet fragments, unless ALTQ is enabled on the given
@@ -370,10 +376,10 @@
*/
#ifdef ALTQ
if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
- ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+ ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
ifp->if_snd.ifq_maxlen))
#else
- if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
+ if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
ifp->if_snd.ifq_maxlen)
#endif /* ALTQ */
{
@@ -398,12 +404,10 @@
goto bad;
}
/* don't allow broadcast messages to be fragmented */
- if (ip->ip_len > ifp->if_mtu) {
+ if (ip->ip_len > mtu) {
error = EMSGSIZE;
goto bad;
}
- if (flags & IP_SENDONES)
- ip->ip_dst.s_addr = INADDR_BROADCAST;
m->m_flags |= M_BCAST;
} else {
m->m_flags &= ~M_BCAST;
@@ -411,256 +415,22 @@
sendit:
#ifdef IPSEC
- /* get SP for this packet */
- if (inp == NULL)
- sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
- flags, &error);
- else
- sp = ipsec4_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
- if (sp == NULL) {
- ipsecstat.out_inval++;
- goto bad;
- }
-
- error = 0;
-
- /* check policy */
- switch (sp->policy) {
- case IPSEC_POLICY_DISCARD:
- /*
- * This packet is just discarded.
- */
- ipsecstat.out_polvio++;
+ switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
+ case 1:
goto bad;
-
- case IPSEC_POLICY_BYPASS:
- case IPSEC_POLICY_NONE:
- case IPSEC_POLICY_TCP:
- /* no need to do IPsec. */
- goto skip_ipsec;
-
- case IPSEC_POLICY_IPSEC:
- if (sp->req == NULL) {
- /* acquire a policy */
- error = key_spdacquire(sp);
- goto bad;
- }
- break;
-
- case IPSEC_POLICY_ENTRUST:
+ case -1:
+ goto done;
+ case 0:
default:
- printf("ip_output: Invalid policy found. %d\n", sp->policy);
- }
- {
- struct ipsec_output_state state;
- bzero(&state, sizeof(state));
- state.m = m;
- if (flags & IP_ROUTETOIF) {
- state.ro = &iproute;
- bzero(&iproute, sizeof(iproute));
- } else
- state.ro = ro;
- state.dst = (struct sockaddr *)dst;
-
- ip->ip_sum = 0;
-
- /*
- * XXX
- * delayed checksums are not currently compatible with IPsec
- */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- in_delayed_cksum(m);
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- }
-
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
-
- error = ipsec4_output(&state, sp, flags);
-
- m = state.m;
- if (flags & IP_ROUTETOIF) {
- /*
- * if we have tunnel mode SA, we may need to ignore
- * IP_ROUTETOIF.
- */
- if (state.ro != &iproute || state.ro->ro_rt != NULL) {
- flags &= ~IP_ROUTETOIF;
- ro = state.ro;
- }
- } else
- ro = state.ro;
- dst = (struct sockaddr_in *)state.dst;
- if (error) {
- /* mbuf is already reclaimed in ipsec4_output. */
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("ip4_output (ipsec): error code %d\n", error);
- /*fall through*/
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
+ break; /* Continue with packet processing. */
}
-
- /* be sure to update variables that are affected by ipsec4_output() */
+ /* Update variables that are affected by ipsec4_output(). */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
- if (ro->ro_rt == NULL) {
- if ((flags & IP_ROUTETOIF) == 0) {
- printf("ip_output: "
- "can't update route after IPsec processing\n");
- error = EHOSTUNREACH; /*XXX*/
- goto bad;
- }
- } else {
- if (state.encap) {
- ia = ifatoia(ro->ro_rt->rt_ifa);
- ifp = ro->ro_rt->rt_ifp;
- }
- }
- }
-
- /* make it flipped, again. */
- ip->ip_len = ntohs(ip->ip_len);
- ip->ip_off = ntohs(ip->ip_off);
-skip_ipsec:
-#endif /*IPSEC*/
-#ifdef FAST_IPSEC
- /*
- * Check the security policy (SP) for the packet and, if
- * required, do IPsec-related processing. There are two
- * cases here; the first time a packet is sent through
- * it will be untagged and handled by ipsec4_checkpolicy.
- * If the packet is resubmitted to ip_output (e.g. after
- * AH, ESP, etc. processing), there will be a tag to bypass
- * the lookup and related policy checking.
- */
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
- s = splnet();
- if (mtag != NULL) {
- tdbi = (struct tdb_ident *)(mtag + 1);
- sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
- if (sp == NULL)
- error = -EINVAL; /* force silent drop */
- m_tag_delete(m, mtag);
- } else {
- sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags,
- &error, inp);
- }
- /*
- * There are four return cases:
- * sp != NULL apply IPsec policy
- * sp == NULL, error == 0 no IPsec handling needed
- * sp == NULL, error == -EINVAL discard packet w/o error
- * sp == NULL, error != 0 discard packet, report error
- */
- if (sp != NULL) {
- /* Loop detection, check if ipsec processing already done */
- KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
- for (mtag = m_tag_first(m); mtag != NULL;
- mtag = m_tag_next(m, mtag)) {
- if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
- continue;
- if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
- mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
- continue;
- /*
- * Check if policy has an SA associated with it.
- * This can happen when an SP has yet to acquire
- * an SA; e.g. on first reference. If it occurs,
- * then we let ipsec4_process_packet do its thing.
- */
- if (sp->req->sav == NULL)
- break;
- tdbi = (struct tdb_ident *)(mtag + 1);
- if (tdbi->spi == sp->req->sav->spi &&
- tdbi->proto == sp->req->sav->sah->saidx.proto &&
- bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
- sizeof (union sockaddr_union)) == 0) {
- /*
- * No IPsec processing is needed, free
- * reference to SP.
- *
- * NB: null pointer to avoid free at
- * done: below.
- */
- KEY_FREESP(&sp), sp = NULL;
- splx(s);
- goto spd_done;
- }
- }
-
- /*
- * Do delayed checksums now because we send before
- * this is done in the normal processing path.
- */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- in_delayed_cksum(m);
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- }
-
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
-
- /* NB: callee frees mbuf */
- error = ipsec4_process_packet(m, sp->req, flags, 0);
- /*
- * Preserve KAME behaviour: ENOENT can be returned
- * when an SA acquire is in progress. Don't propagate
- * this to user-level; it confuses applications.
- *
- * XXX this will go away when the SADB is redone.
- */
- if (error == ENOENT)
- error = 0;
- splx(s);
- goto done;
- } else {
- splx(s);
-
- if (error != 0) {
- /*
- * Hack: -EINVAL is used to signal that a packet
- * should be silently discarded. This is typically
- * because we asked key management for an SA and
- * it was delayed (e.g. kicked up to IKE).
- */
- if (error == -EINVAL)
- error = 0;
- goto bad;
- } else {
- /* No IPsec processing for this packet. */
- }
-#ifdef notyet
- /*
- * If deferred crypto processing is needed, check that
- * the interface supports it.
- */
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
- if (mtag != NULL && (ifp->if_capenable & IFCAP_IPSEC) == 0) {
- /* notify IPsec to do its own crypto */
- ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
- error = EHOSTUNREACH;
- goto bad;
- }
-#endif
- }
-spd_done:
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
/* Jump over all PFIL processing if hooks are not active. */
- if (inet_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet_pfil_hook))
goto passout;
/* Run through list of hooks for output packets. */
@@ -712,20 +482,11 @@
/* Or forward to some other address? */
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag) {
-#ifndef IPFIREWALL_FORWARD_EXTENDED
- if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst)) {
-#endif
- dst = (struct sockaddr_in *)&ro->ro_dst;
- bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
- m->m_flags |= M_SKIP_FIREWALL;
- m_tag_delete(m, fwd_tag);
- goto again;
-#ifndef IPFIREWALL_FORWARD_EXTENDED
- } else {
- m_tag_delete(m, fwd_tag);
- /* Continue. */
- }
-#endif
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
+ m->m_flags |= M_SKIP_FIREWALL;
+ m_tag_delete(m, fwd_tag);
+ goto again;
}
#endif /* IPFIREWALL_FORWARD */
@@ -750,50 +511,49 @@
/*
* If small enough for interface, or the interface will take
- * care of the fragmentation for us, can just send directly.
+ * care of the fragmentation for us, we can just send directly.
*/
- if (ip->ip_len <= ifp->if_mtu || (ifp->if_hwassist & CSUM_FRAGMENT &&
- ((ip->ip_off & IP_DF) == 0))) {
+ if (ip->ip_len <= mtu ||
+ (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
+ ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
- /* Record statistics for this interface address. */
+ /*
+ * Record statistics for this interface address.
+ * With CSUM_TSO the byte/packet count will be slightly
+ * incorrect because we count the IP+TCP headers only
+ * once instead of for every generated packet.
+ */
if (!(flags & IP_FORWARDING) && ia) {
- ia->ia_ifa.if_opackets++;
+ if (m->m_pkthdr.csum_flags & CSUM_TSO)
+ ia->ia_ifa.if_opackets +=
+ m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
+ else
+ ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
-
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
-
#ifdef MBUF_STRESS_TEST
if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
#endif
+ /*
+ * Reset layer specific mbuf flags
+ * to avoid confusing lower layers.
+ */
+ m->m_flags &= ~(M_PROTOFLAGS);
+
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, ro->ro_rt);
goto done;
}
- if (ip->ip_off & IP_DF) {
+ /* Balk when DF bit is set or the interface didn't support TSO. */
+ if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
- /*
- * This case can happen if the user changed the MTU
- * of an interface after enabling IP on it. Because
- * most netifs don't keep track of routes pointing to
- * them, there is no way for one to update all its
- * routes when the MTU is changed.
- */
- if (ro != NULL &&
- (ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
- (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
- ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
- }
ipstat.ips_cantfrag++;
goto bad;
}
@@ -802,23 +562,24 @@
* Too large for interface; fragment if possible. If successful,
* on return, m will point to a list of packets to be sent.
*/
- error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
+ error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
if (error)
goto bad;
for (; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
if (error == 0) {
/* Record statistics for this interface address. */
if (ia != NULL) {
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
-
+ /*
+ * Reset layer specific mbuf flags
+ * to avoid confusing upper layers.
+ */
+ m->m_flags &= ~(M_PROTOFLAGS);
+
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, ro->ro_rt);
} else
@@ -832,17 +593,6 @@
if (ro == &iproute && ro->ro_rt) {
RTFREE(ro->ro_rt);
}
-#ifdef IPSEC
- if (sp != NULL) {
- KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
- printf("DP ip_output call free SP:%p\n", sp));
- key_freesp(sp);
- }
-#endif
-#ifdef FAST_IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif
return (error);
bad:
m_freem(m);
@@ -860,7 +610,7 @@
*/
int
ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
- u_long if_hwassist_flags, int sw_csum)
+ u_long if_hwassist_flags, int sw_csum)
{
int error = 0;
int hlen = ip->ip_hl << 2;
@@ -944,7 +694,7 @@
struct mbuf *m;
int mhlen = sizeof (struct ip);
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
ipstat.ips_odropped++;
@@ -1045,141 +795,19 @@
}
/*
- * Insert IP options into preformed packet.
- * Adjust IP destination as required for IP source routing,
- * as indicated by a non-zero in_addr at the start of the options.
- *
- * XXX This routine assumes that the packet has no options in place.
- */
-static struct mbuf *
-ip_insertoptions(m, opt, phlen)
- register struct mbuf *m;
- struct mbuf *opt;
- int *phlen;
-{
- register struct ipoption *p = mtod(opt, struct ipoption *);
- struct mbuf *n;
- register struct ip *ip = mtod(m, struct ip *);
- unsigned optlen;
-
- optlen = opt->m_len - sizeof(p->ipopt_dst);
- if (optlen + ip->ip_len > IP_MAXPACKET) {
- *phlen = 0;
- return (m); /* XXX should fail */
- }
- if (p->ipopt_dst.s_addr)
- ip->ip_dst = p->ipopt_dst;
- if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
- MGETHDR(n, M_DONTWAIT, MT_HEADER);
- if (n == NULL) {
- *phlen = 0;
- return (m);
- }
- M_MOVE_PKTHDR(n, m);
- n->m_pkthdr.rcvif = NULL;
-#ifdef MAC
- mac_copy_mbuf(m, n);
-#endif
- n->m_pkthdr.len += optlen;
- m->m_len -= sizeof(struct ip);
- m->m_data += sizeof(struct ip);
- n->m_next = m;
- m = n;
- m->m_len = optlen + sizeof(struct ip);
- m->m_data += max_linkhdr;
- bcopy(ip, mtod(m, void *), sizeof(struct ip));
- } else {
- m->m_data -= optlen;
- m->m_len += optlen;
- m->m_pkthdr.len += optlen;
- bcopy(ip, mtod(m, void *), sizeof(struct ip));
- }
- ip = mtod(m, struct ip *);
- bcopy(p->ipopt_list, ip + 1, optlen);
- *phlen = sizeof(struct ip) + optlen;
- ip->ip_v = IPVERSION;
- ip->ip_hl = *phlen >> 2;
- ip->ip_len += optlen;
- return (m);
-}
-
-/*
- * Copy options from ip to jp,
- * omitting those not copied during fragmentation.
- */
-int
-ip_optcopy(ip, jp)
- struct ip *ip, *jp;
-{
- register u_char *cp, *dp;
- int opt, optlen, cnt;
-
- cp = (u_char *)(ip + 1);
- dp = (u_char *)(jp + 1);
- cnt = (ip->ip_hl << 2) - sizeof (struct ip);
- for (; cnt > 0; cnt -= optlen, cp += optlen) {
- opt = cp[0];
- if (opt == IPOPT_EOL)
- break;
- if (opt == IPOPT_NOP) {
- /* Preserve for IP mcast tunnel's LSRR alignment. */
- *dp++ = IPOPT_NOP;
- optlen = 1;
- continue;
- }
-
- KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
- ("ip_optcopy: malformed ipv4 option"));
- optlen = cp[IPOPT_OLEN];
- KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
- ("ip_optcopy: malformed ipv4 option"));
-
- /* bogus lengths should have been caught by ip_dooptions */
- if (optlen > cnt)
- optlen = cnt;
- if (IPOPT_COPIED(opt)) {
- bcopy(cp, dp, optlen);
- dp += optlen;
- }
- }
- for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
- *dp++ = IPOPT_EOL;
- return (optlen);
-}
-
-/*
* IP socket option processing.
- *
- * There are two versions of this call in order to work around a race
- * condition in TCP in FreeBSD 6.x. In the TCP implementation, so->so_pcb
- * can become NULL if the pcb or pcbinfo lock isn't held. However, when
- * entering ip_ctloutput(), neither lock is held, and finding the pointer to
- * either lock requires follow so->so_pcb, which may be NULL.
- * ip_ctloutput_pcbinfo() accepts the pcbinfo pointer so that the lock can be
- * safely acquired. This is not required in FreeBSD 7.x because the
- * invariants on so->so_pcb are much stronger, so it cannot become NULL
- * while the socket is in use.
*/
int
-ip_ctloutput_pcbinfo(so, sopt, pcbinfo)
- struct socket *so;
- struct sockopt *sopt;
- struct inpcbinfo *pcbinfo;
+ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
- if (pcbinfo == NULL)
- pcbinfo = inp->inp_pcbinfo;
-
error = optval = 0;
if (sopt->sopt_level != IPPROTO_IP) {
return (EINVAL);
}
- if (inp == NULL)
- return (EINVAL);
-
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
@@ -1205,15 +833,7 @@
m_free(m);
break;
}
- INP_INFO_WLOCK(pcbinfo);
- if (so->so_pcb == NULL) {
- INP_INFO_WUNLOCK(pcbinfo);
- m_free(m);
- error = EINVAL;
- break;
- }
INP_LOCK(inp);
- INP_INFO_WUNLOCK(pcbinfo);
error = ip_pcbopts(inp, sopt->sopt_name, m);
INP_UNLOCK(inp);
return (error);
@@ -1234,14 +854,7 @@
sizeof optval);
if (error)
break;
- INP_INFO_WLOCK(pcbinfo);
- if (so->so_pcb == NULL) {
- INP_INFO_WUNLOCK(pcbinfo);
- error = EINVAL;
- break;
- }
- INP_LOCK(inp);
- INP_INFO_WUNLOCK(pcbinfo);
+
switch (sopt->sopt_name) {
case IP_TOS:
inp->inp_ip_tos = optval;
@@ -1298,17 +911,31 @@
OPTSET(INP_DONTFRAG);
break;
}
- INP_UNLOCK(inp);
break;
#undef OPTSET
+ /*
+ * Multicast socket options are processed by the in_mcast
+ * module.
+ */
case IP_MULTICAST_IF:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_ADD_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
- error = ip_setmoptions(inp, sopt);
+ case IP_ADD_SOURCE_MEMBERSHIP:
+ case IP_DROP_SOURCE_MEMBERSHIP:
+ case IP_BLOCK_SOURCE:
+ case IP_UNBLOCK_SOURCE:
+ case IP_MSFILTER:
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ error = inp_setmoptions(inp, sopt);
break;
case IP_PORTRANGE:
@@ -1317,14 +944,7 @@
if (error)
break;
- INP_INFO_WLOCK(pcbinfo);
- if (so->so_pcb == NULL) {
- INP_INFO_WUNLOCK(pcbinfo);
- error = EINVAL;
- break;
- }
INP_LOCK(inp);
- INP_INFO_WUNLOCK(pcbinfo);
switch (optval) {
case IP_PORTRANGE_DEFAULT:
inp->inp_flags &= ~(INP_LOWPORT);
@@ -1348,7 +968,7 @@
INP_UNLOCK(inp);
break;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
case IP_IPSEC_POLICY:
{
caddr_t req;
@@ -1361,21 +981,28 @@
break;
if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
break;
- priv = (sopt->sopt_td != NULL &&
- suser(sopt->sopt_td) != 0) ? 0 : 1;
+ if (sopt->sopt_td != NULL) {
+ /*
+ * XXXRW: Would be more desirable to do this
+ * one layer down so that we only exercise
+ * privilege if it is needed.
+ */
+ error = priv_check(sopt->sopt_td,
+ PRIV_NETINET_IPSEC);
+ if (error)
+ priv = 0;
+ else
+ priv = 1;
+ } else
+ priv = 1;
req = mtod(m, caddr_t);
len = m->m_len;
optname = sopt->sopt_name;
- if (so->so_pcb == NULL) {
- m_free(m);
- error = EINVAL;
- break;
- }
error = ipsec4_set_policy(inp, optname, req, len, priv);
m_freem(m);
break;
}
-#endif /*IPSEC*/
+#endif /* IPSEC */
default:
error = ENOPROTOOPT;
@@ -1467,16 +1094,19 @@
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
+ /*
+ * Multicast socket options are processed by the in_mcast
+ * module.
+ */
case IP_MULTICAST_IF:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
- case IP_ADD_MEMBERSHIP:
- case IP_DROP_MEMBERSHIP:
- error = ip_getmoptions(inp, sopt);
+ case IP_MSFILTER:
+ error = inp_getmoptions(inp, sopt);
break;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
case IP_IPSEC_POLICY:
{
struct mbuf *m = NULL;
@@ -1494,7 +1124,7 @@
m_freem(m);
break;
}
-#endif /*IPSEC*/
+#endif /* IPSEC */
default:
error = ENOPROTOOPT;
@@ -1505,562 +1135,6 @@
return (error);
}
-int
-ip_ctloutput(struct socket *so, struct sockopt *sopt)
-{
- return (ip_ctloutput_pcbinfo(so, sopt, NULL));
-}
-
-/*
- * Set up IP options in pcb for insertion in output packets.
- * Store in mbuf with pointer in pcbopt, adding pseudo-option
- * with destination address if source routed.
- */
-static int
-ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
-{
- register int cnt, optlen;
- register u_char *cp;
- struct mbuf **pcbopt;
- u_char opt;
-
- INP_LOCK_ASSERT(inp);
-
- pcbopt = &inp->inp_options;
-
- /* turn off any old options */
- if (*pcbopt)
- (void)m_free(*pcbopt);
- *pcbopt = 0;
- if (m == NULL || m->m_len == 0) {
- /*
- * Only turning off any previous options.
- */
- if (m != NULL)
- (void)m_free(m);
- return (0);
- }
-
- if (m->m_len % sizeof(int32_t))
- goto bad;
- /*
- * IP first-hop destination address will be stored before
- * actual options; move other options back
- * and clear it when none present.
- */
- if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
- goto bad;
- cnt = m->m_len;
- m->m_len += sizeof(struct in_addr);
- cp = mtod(m, u_char *) + sizeof(struct in_addr);
- bcopy(mtod(m, void *), cp, (unsigned)cnt);
- bzero(mtod(m, void *), sizeof(struct in_addr));
-
- for (; cnt > 0; cnt -= optlen, cp += optlen) {
- opt = cp[IPOPT_OPTVAL];
- if (opt == IPOPT_EOL)
- break;
- if (opt == IPOPT_NOP)
- optlen = 1;
- else {
- if (cnt < IPOPT_OLEN + sizeof(*cp))
- goto bad;
- optlen = cp[IPOPT_OLEN];
- if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
- goto bad;
- }
- switch (opt) {
-
- default:
- break;
-
- case IPOPT_LSRR:
- case IPOPT_SSRR:
- /*
- * user process specifies route as:
- * ->A->B->C->D
- * D must be our final destination (but we can't
- * check that since we may not have connected yet).
- * A is first hop destination, which doesn't appear in
- * actual IP option, but is stored before the options.
- */
- if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
- goto bad;
- m->m_len -= sizeof(struct in_addr);
- cnt -= sizeof(struct in_addr);
- optlen -= sizeof(struct in_addr);
- cp[IPOPT_OLEN] = optlen;
- /*
- * Move first hop before start of options.
- */
- bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
- sizeof(struct in_addr));
- /*
- * Then copy rest of options back
- * to close up the deleted entry.
- */
- bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
- &cp[IPOPT_OFFSET+1],
- (unsigned)cnt - (IPOPT_MINOFF - 1));
- break;
- }
- }
- if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
- goto bad;
- *pcbopt = m;
- return (0);
-
-bad:
- (void)m_free(m);
- return (EINVAL);
-}
-
-/*
- * XXX
- * The whole multicast option thing needs to be re-thought.
- * Several of these options are equally applicable to non-multicast
- * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
- * standard option (IP_TTL).
- */
-
-/*
- * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
- */
-static struct ifnet *
-ip_multicast_if(a, ifindexp)
- struct in_addr *a;
- int *ifindexp;
-{
- int ifindex;
- struct ifnet *ifp;
-
- if (ifindexp)
- *ifindexp = 0;
- if (ntohl(a->s_addr) >> 24 == 0) {
- ifindex = ntohl(a->s_addr) & 0xffffff;
- if (ifindex < 0 || if_index < ifindex)
- return NULL;
- ifp = ifnet_byindex(ifindex);
- if (ifindexp)
- *ifindexp = ifindex;
- } else {
- INADDR_TO_IFP(*a, ifp);
- }
- return ifp;
-}
-
-/*
- * Given an inpcb, return its multicast options structure pointer. Accepts
- * an unlocked inpcb pointer, but will return it locked. May sleep.
- */
-static struct ip_moptions *
-ip_findmoptions(struct inpcb *inp)
-{
- struct ip_moptions *imo;
-
- INP_LOCK(inp);
- if (inp->inp_moptions != NULL)
- return (inp->inp_moptions);
-
- INP_UNLOCK(inp);
-
- imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
-
- imo->imo_multicast_ifp = NULL;
- imo->imo_multicast_addr.s_addr = INADDR_ANY;
- imo->imo_multicast_vif = -1;
- imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
- imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
- imo->imo_num_memberships = 0;
-
- INP_LOCK(inp);
- if (inp->inp_moptions != NULL) {
- free(imo, M_IPMOPTS);
- return (inp->inp_moptions);
- }
- inp->inp_moptions = imo;
- return (imo);
-}
-
-/*
- * Set the IP multicast options in response to user setsockopt().
- */
-static int
-ip_setmoptions(struct inpcb *inp, struct sockopt *sopt)
-{
- int error = 0;
- int i;
- struct in_addr addr;
- struct ip_mreq mreq;
- struct ifnet *ifp;
- struct ip_moptions *imo;
- struct route ro;
- struct sockaddr_in *dst;
- int ifindex;
- int s;
-
- switch (sopt->sopt_name) {
- /* store an index number for the vif you wanna use in the send */
- case IP_MULTICAST_VIF:
- if (legal_vif_num == 0) {
- error = EOPNOTSUPP;
- break;
- }
- error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
- if (error)
- break;
- if (!legal_vif_num(i) && (i != -1)) {
- error = EINVAL;
- break;
- }
- imo = ip_findmoptions(inp);
- imo->imo_multicast_vif = i;
- INP_UNLOCK(inp);
- break;
-
- case IP_MULTICAST_IF:
- /*
- * Select the interface for outgoing multicast packets.
- */
- error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
- if (error)
- break;
- /*
- * INADDR_ANY is used to remove a previous selection.
- * When no interface is selected, a default one is
- * chosen every time a multicast packet is sent.
- */
- imo = ip_findmoptions(inp);
- if (addr.s_addr == INADDR_ANY) {
- imo->imo_multicast_ifp = NULL;
- INP_UNLOCK(inp);
- break;
- }
- /*
- * The selected interface is identified by its local
- * IP address. Find the interface and confirm that
- * it supports multicasting.
- */
- s = splimp();
- ifp = ip_multicast_if(&addr, &ifindex);
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
- INP_UNLOCK(inp);
- splx(s);
- error = EADDRNOTAVAIL;
- break;
- }
- imo->imo_multicast_ifp = ifp;
- if (ifindex)
- imo->imo_multicast_addr = addr;
- else
- imo->imo_multicast_addr.s_addr = INADDR_ANY;
- INP_UNLOCK(inp);
- splx(s);
- break;
-
- case IP_MULTICAST_TTL:
- /*
- * Set the IP time-to-live for outgoing multicast packets.
- * The original multicast API required a char argument,
- * which is inconsistent with the rest of the socket API.
- * We allow either a char or an int.
- */
- if (sopt->sopt_valsize == 1) {
- u_char ttl;
- error = sooptcopyin(sopt, &ttl, 1, 1);
- if (error)
- break;
- imo = ip_findmoptions(inp);
- imo->imo_multicast_ttl = ttl;
- INP_UNLOCK(inp);
- } else {
- u_int ttl;
- error = sooptcopyin(sopt, &ttl, sizeof ttl,
- sizeof ttl);
- if (error)
- break;
- if (ttl > 255)
- error = EINVAL;
- else {
- imo = ip_findmoptions(inp);
- imo->imo_multicast_ttl = ttl;
- INP_UNLOCK(inp);
- }
- }
- break;
-
- case IP_MULTICAST_LOOP:
- /*
- * Set the loopback flag for outgoing multicast packets.
- * Must be zero or one. The original multicast API required a
- * char argument, which is inconsistent with the rest
- * of the socket API. We allow either a char or an int.
- */
- if (sopt->sopt_valsize == 1) {
- u_char loop;
- error = sooptcopyin(sopt, &loop, 1, 1);
- if (error)
- break;
- imo = ip_findmoptions(inp);
- imo->imo_multicast_loop = !!loop;
- INP_UNLOCK(inp);
- } else {
- u_int loop;
- error = sooptcopyin(sopt, &loop, sizeof loop,
- sizeof loop);
- if (error)
- break;
- imo = ip_findmoptions(inp);
- imo->imo_multicast_loop = !!loop;
- INP_UNLOCK(inp);
- }
- break;
-
- case IP_ADD_MEMBERSHIP:
- /*
- * Add a multicast group membership.
- * Group must be a valid IP multicast address.
- */
- error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
- if (error)
- break;
-
- if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
- error = EINVAL;
- break;
- }
- s = splimp();
- /*
- * If no interface address was provided, use the interface of
- * the route to the given multicast address.
- */
- if (mreq.imr_interface.s_addr == INADDR_ANY) {
- bzero((caddr_t)&ro, sizeof(ro));
- dst = (struct sockaddr_in *)&ro.ro_dst;
- dst->sin_len = sizeof(*dst);
- dst->sin_family = AF_INET;
- dst->sin_addr = mreq.imr_multiaddr;
- rtalloc_ign(&ro, RTF_CLONING);
- if (ro.ro_rt == NULL) {
- error = EADDRNOTAVAIL;
- splx(s);
- break;
- }
- ifp = ro.ro_rt->rt_ifp;
- RTFREE(ro.ro_rt);
- }
- else {
- ifp = ip_multicast_if(&mreq.imr_interface, NULL);
- }
-
- /*
- * See if we found an interface, and confirm that it
- * supports multicast.
- */
- if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
- error = EADDRNOTAVAIL;
- splx(s);
- break;
- }
- /*
- * See if the membership already exists or if all the
- * membership slots are full.
- */
- imo = ip_findmoptions(inp);
- for (i = 0; i < imo->imo_num_memberships; ++i) {
- if (imo->imo_membership[i]->inm_ifp == ifp &&
- imo->imo_membership[i]->inm_addr.s_addr
- == mreq.imr_multiaddr.s_addr)
- break;
- }
- if (i < imo->imo_num_memberships) {
- INP_UNLOCK(inp);
- error = EADDRINUSE;
- splx(s);
- break;
- }
- if (i == IP_MAX_MEMBERSHIPS) {
- INP_UNLOCK(inp);
- error = ETOOMANYREFS;
- splx(s);
- break;
- }
- /*
- * Everything looks good; add a new record to the multicast
- * address list for the given interface.
- */
- if ((imo->imo_membership[i] =
- in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
- INP_UNLOCK(inp);
- error = ENOBUFS;
- splx(s);
- break;
- }
- ++imo->imo_num_memberships;
- INP_UNLOCK(inp);
- splx(s);
- break;
-
- case IP_DROP_MEMBERSHIP:
- /*
- * Drop a multicast group membership.
- * Group must be a valid IP multicast address.
- */
- error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
- if (error)
- break;
-
- if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
- error = EINVAL;
- break;
- }
-
- s = splimp();
- /*
- * If an interface address was specified, get a pointer
- * to its ifnet structure.
- */
- if (mreq.imr_interface.s_addr == INADDR_ANY)
- ifp = NULL;
- else {
- ifp = ip_multicast_if(&mreq.imr_interface, NULL);
- if (ifp == NULL) {
- error = EADDRNOTAVAIL;
- splx(s);
- break;
- }
- }
- /*
- * Find the membership in the membership array.
- */
- imo = ip_findmoptions(inp);
- for (i = 0; i < imo->imo_num_memberships; ++i) {
- if ((ifp == NULL ||
- imo->imo_membership[i]->inm_ifp == ifp) &&
- imo->imo_membership[i]->inm_addr.s_addr ==
- mreq.imr_multiaddr.s_addr)
- break;
- }
- if (i == imo->imo_num_memberships) {
- INP_UNLOCK(inp);
- error = EADDRNOTAVAIL;
- splx(s);
- break;
- }
- /*
- * Give up the multicast address record to which the
- * membership points.
- */
- in_delmulti(imo->imo_membership[i]);
- /*
- * Remove the gap in the membership array.
- */
- for (++i; i < imo->imo_num_memberships; ++i)
- imo->imo_membership[i-1] = imo->imo_membership[i];
- --imo->imo_num_memberships;
- INP_UNLOCK(inp);
- splx(s);
- break;
-
- default:
- error = EOPNOTSUPP;
- break;
- }
-
- return (error);
-}
-
-/*
- * Return the IP multicast options in response to user getsockopt().
- */
-static int
-ip_getmoptions(struct inpcb *inp, struct sockopt *sopt)
-{
- struct ip_moptions *imo;
- struct in_addr addr;
- struct in_ifaddr *ia;
- int error, optval;
- u_char coptval;
-
- INP_LOCK(inp);
- imo = inp->inp_moptions;
-
- error = 0;
- switch (sopt->sopt_name) {
- case IP_MULTICAST_VIF:
- if (imo != NULL)
- optval = imo->imo_multicast_vif;
- else
- optval = -1;
- INP_UNLOCK(inp);
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
-
- case IP_MULTICAST_IF:
- if (imo == NULL || imo->imo_multicast_ifp == NULL)
- addr.s_addr = INADDR_ANY;
- else if (imo->imo_multicast_addr.s_addr) {
- /* return the value user has set */
- addr = imo->imo_multicast_addr;
- } else {
- IFP_TO_IA(imo->imo_multicast_ifp, ia);
- addr.s_addr = (ia == NULL) ? INADDR_ANY
- : IA_SIN(ia)->sin_addr.s_addr;
- }
- INP_UNLOCK(inp);
- error = sooptcopyout(sopt, &addr, sizeof addr);
- break;
-
- case IP_MULTICAST_TTL:
- if (imo == 0)
- optval = coptval = IP_DEFAULT_MULTICAST_TTL;
- else
- optval = coptval = imo->imo_multicast_ttl;
- INP_UNLOCK(inp);
- if (sopt->sopt_valsize == 1)
- error = sooptcopyout(sopt, &coptval, 1);
- else
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
-
- case IP_MULTICAST_LOOP:
- if (imo == 0)
- optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
- else
- optval = coptval = imo->imo_multicast_loop;
- INP_UNLOCK(inp);
- if (sopt->sopt_valsize == 1)
- error = sooptcopyout(sopt, &coptval, 1);
- else
- error = sooptcopyout(sopt, &optval, sizeof optval);
- break;
-
- default:
- INP_UNLOCK(inp);
- error = ENOPROTOOPT;
- break;
- }
- INP_UNLOCK_ASSERT(inp);
-
- return (error);
-}
-
-/*
- * Discard the IP multicast options.
- */
-void
-ip_freemoptions(imo)
- register struct ip_moptions *imo;
-{
- register int i;
-
- if (imo != NULL) {
- for (i = 0; i < imo->imo_num_memberships; ++i)
- in_delmulti(imo->imo_membership[i]);
- free(imo, M_IPMOPTS);
- }
-}
-
/*
* Routine called from ip_output() to loop back a copy of an IP multicast
* packet to the input queue of a specified interface. Note that this
@@ -2069,11 +1143,8 @@
* replicating that code here.
*/
static void
-ip_mloopback(ifp, m, dst, hlen)
- struct ifnet *ifp;
- register struct mbuf *m;
- register struct sockaddr_in *dst;
- int hlen;
+ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
+ int hlen)
{
register struct ip *ip;
struct mbuf *copym;
Index: tcp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet/tcp_var.h -L sys/netinet/tcp_var.h -u -r1.3 -r1.4
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.126.2.1 2006/03/01 21:13:29 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.157.2.1 2007/12/07 05:46:09 kmacy Exp $
*/
#ifndef _NETINET_TCP_VAR_H_
@@ -75,6 +75,18 @@
#define tcp6cb tcpcb /* for KAME src sync over BSD*'s */
+/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */
+#ifdef INET6
+#define ND6_HINT(tp) \
+do { \
+ if ((tp) && (tp)->t_inpcb && \
+ ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0) \
+ nd6_nud_hint(NULL, NULL, 0); \
+} while (0)
+#else
+#define ND6_HINT(tp)
+#endif
+
/*
* Tcp control block, one per tcp; fields:
* Organized for 16 byte cacheline efficiency.
@@ -84,11 +96,7 @@
int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
- struct callout *tt_rexmt; /* retransmit timer */
- struct callout *tt_persist; /* retransmit persistence */
- struct callout *tt_keep; /* keepalive */
- struct callout *tt_2msl; /* 2*msl TIME_WAIT timer */
- struct callout *tt_delack; /* delayed ACK timer */
+ struct tcp_timer *t_timers; /* All the TCP timers in one struct */
struct inpcb *t_inpcb; /* back pointer to internet pcb */
int t_state; /* state of this connection */
@@ -114,6 +122,7 @@
#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
#define TF_FORCEDATA 0x800000 /* force out a byte */
+#define TF_TSO 0x1000000 /* TSO enabled on this connection */
tcp_seq snd_una; /* send unacknowledged */
tcp_seq snd_max; /* highest sequence number sent;
@@ -173,10 +182,10 @@
u_char snd_scale; /* window scaling for send window */
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
- u_char requested_s_scale;
- u_long ts_recent; /* timestamp echo data */
-
+ u_int32_t ts_recent; /* timestamp echo data */
u_long ts_recent_age; /* when last updated */
+ u_int32_t ts_offset; /* our timestamp offset */
+
tcp_seq last_ack_sent;
/* experimental */
u_long snd_cwnd_prev; /* cwnd prior to retransmit */
@@ -184,12 +193,7 @@
tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
u_long t_badrxtwin; /* window for retransmit recovery */
u_char snd_limited; /* segments limited transmitted */
-/* anti DoS counters */
- u_long rcv_second; /* start of interval second */
- u_long rcv_pps; /* received packets per second */
- u_long rcv_byps; /* received bytes per second */
/* SACK related state */
- int sack_enable; /* enable SACK for this connection */
int snd_numholes; /* number of holes seen by sender */
TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
/* SACK scoreboard (sorted) */
@@ -202,6 +206,7 @@
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
+ void *t_pspare[5]; /* toe usrreqs / toepcb * / congestion algo / vimage / 1 general use */
};
#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
@@ -227,58 +232,32 @@
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
* It's basically used to reduce the number of parameters
- * to tcp_dooptions.
+ * to tcp_dooptions and tcp_addoptions.
+ * The binary order of the to_flags is relevant for packing of the
+ * options in tcp_addoptions.
*/
struct tcpopt {
u_long to_flags; /* which options are present */
-#define TOF_TS 0x0001 /* timestamp */
-#define TOF_MSS 0x0010
-#define TOF_SCALE 0x0020
-#define TOF_SIGNATURE 0x0040 /* signature option present */
-#define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */
-#define TOF_SACK 0x0100 /* Peer sent SACK option */
- u_int32_t to_tsval;
- u_int32_t to_tsecr;
- u_int16_t to_mss;
- u_int8_t to_requested_s_scale;
+#define TOF_MSS 0x0001 /* maximum segment size */
+#define TOF_SCALE 0x0002 /* window scaling */
+#define TOF_SACKPERM 0x0004 /* SACK permitted */
+#define TOF_TS 0x0010 /* timestamp */
+#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
+#define TOF_SACK 0x0080 /* Peer sent SACK option */
+#define TOF_MAXOPT 0x0100
+ u_int32_t to_tsval; /* new timestamp */
+ u_int32_t to_tsecr; /* reflected timestamp */
+ u_int16_t to_mss; /* maximum segment size */
+ u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_char *to_sacks; /* pointer to the first SACK blocks */
+ u_char *to_signature; /* pointer to the TCP-MD5 signature */
};
-#ifdef _NETINET_IN_PCB_H_
-struct syncache {
- inp_gen_t sc_inp_gencnt; /* pointer check */
- struct tcpcb *sc_tp; /* tcb for listening socket */
- struct mbuf *sc_ipopts; /* source route */
- struct in_conninfo sc_inc; /* addresses */
- u_int32_t sc_tsrecent;
- u_int32_t sc_flowlabel; /* IPv6 flowlabel */
- tcp_seq sc_irs; /* seq from peer */
- tcp_seq sc_iss; /* our ISS */
- u_long sc_rxttime; /* retransmit time */
- u_int16_t sc_rxtslot; /* retransmit counter */
- u_int16_t sc_peer_mss; /* peer's MSS */
- u_int16_t sc_wnd; /* advertised window */
- u_int8_t sc_requested_s_scale:4,
- sc_request_r_scale:4;
- u_int8_t sc_flags;
-#define SCF_NOOPT 0x01 /* no TCP options */
-#define SCF_WINSCALE 0x02 /* negotiated window scaling */
-#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */
-#define SCF_UNREACH 0x10 /* icmp unreachable received */
-#define SCF_SIGNATURE 0x20 /* send MD5 digests */
-#define SCF_SACK 0x80 /* send SACK option */
- TAILQ_ENTRY(syncache) sc_hash;
- TAILQ_ENTRY(syncache) sc_timerq;
-};
-
-struct syncache_head {
- TAILQ_HEAD(, syncache) sch_bucket;
- u_int sch_length;
-};
-#else
-struct in_conninfo;
-#endif /* _NETINET_IN_PCB_H_ */
+/*
+ * Flags for tcp_dooptions.
+ */
+#define TO_SYN 0x01 /* parse SYN-only options */
struct hc_metrics_lite { /* must stay in sync with hc_metrics */
u_long rmx_mtu; /* MTU for this path */
@@ -291,6 +270,10 @@
u_long rmx_recvpipe; /* inbound delay-bandwidth product */
};
+#ifndef _NETINET_IN_PCB_H_
+struct in_conninfo;
+#endif /* _NETINET_IN_PCB_H_ */
+
struct tcptw {
struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */
tcp_seq snd_nxt;
@@ -301,9 +284,10 @@
u_short tw_so_options; /* copy of so_options */
struct ucred *tw_cred; /* user credentials */
u_long t_recent;
+ u_int32_t ts_offset; /* our timestamp offset */
u_long t_starttime;
int tw_time;
- LIST_ENTRY(tcptw) tw_2msl;
+ TAILQ_ENTRY(tcptw) tw_2msl;
};
#define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb)
@@ -437,6 +421,8 @@
u_long tcps_hc_added; /* entry added to hostcache */
u_long tcps_hc_bucketoverflow; /* hostcache per bucket limit hit */
+ u_long tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */
+
/* SACK related stats */
u_long tcps_sack_recovery_episode; /* SACK recovery episodes */
u_long tcps_sack_rexmits; /* SACK rexmit segments */
@@ -479,6 +465,7 @@
#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */
#define TCPCTL_DROP 15 /* drop tcp connection */
#define TCPCTL_MAXID 16
+#define TCPCTL_FINWAIT2_TIMEOUT 17
#define TCPCTL_NAMES { \
{ 0, 0 }, \
@@ -502,28 +489,33 @@
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
SYSCTL_DECL(_net_inet_tcp_sack);
+MALLOC_DECLARE(M_TCPLOG);
#endif
extern struct inpcbhead tcb; /* head of queue of active tcpcb's */
extern struct inpcbinfo tcbinfo;
extern struct tcpstat tcpstat; /* tcp statistics */
+extern int tcp_log_in_vain;
extern int tcp_mssdflt; /* XXX */
extern int tcp_minmss;
-extern int tcp_minmssoverload;
extern int tcp_delack_enabled;
extern int tcp_do_newreno;
extern int path_mtu_discovery;
extern int ss_fltsz;
extern int ss_fltsz_local;
-extern int tcp_do_sack; /* SACK enabled/disabled */
+extern int tcp_do_sack; /* SACK enabled/disabled */
+extern int tcp_sc_rst_sock_fail; /* RST on sock alloc failure */
+int tcp_addoptions(struct tcpopt *, u_char *);
struct tcpcb *
tcp_close(struct tcpcb *);
+void tcp_discardcb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
+#if 0
int tcp_twrecycleable(struct tcptw *tw);
-struct tcptw *
- tcp_twclose(struct tcptw *_tw, int _reuse);
+#endif
+void tcp_twclose(struct tcptw *_tw, int _reuse);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
struct tcpcb *
@@ -532,10 +524,13 @@
void tcp_fasttimo(void);
void tcp_init(void);
void tcp_fini(void *);
+char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
+ const void *);
+int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
void tcp_reass_init(void);
void tcp_input(struct mbuf *, int);
-u_long tcp_maxmtu(struct in_conninfo *);
-u_long tcp_maxmtu6(struct in_conninfo *);
+u_long tcp_maxmtu(struct in_conninfo *, int *);
+u_long tcp_maxmtu6(struct in_conninfo *, int *);
void tcp_mss(struct tcpcb *, int);
int tcp_mssopt(struct in_conninfo *);
struct inpcb *
@@ -547,6 +542,10 @@
int tcp_output(struct tcpcb *);
void tcp_respond(struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
+void tcp_tw_init(void);
+void tcp_tw_zone_change(void);
+int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
+ struct mbuf *, int);
int tcp_twrespond(struct tcptw *, int);
void tcp_setpersist(struct tcpcb *);
#ifdef TCP_SIGNATURE
@@ -556,18 +555,10 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-struct tcpcb *
- tcp_timers(struct tcpcb *, int);
-void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int);
+void tcp_timer_activate(struct tcpcb *, int, u_int);
+int tcp_timer_active(struct tcpcb *, int);
+void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
void tcp_xmit_bandwidth_limit(struct tcpcb *tp, tcp_seq ack_seq);
-void syncache_init(void);
-void syncache_unreach(struct in_conninfo *, struct tcphdr *);
-int syncache_expand(struct in_conninfo *, struct tcphdr *,
- struct socket **, struct mbuf *);
-int syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct socket **, struct mbuf *);
-void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
-void syncache_badack(struct in_conninfo *);
/*
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
*/
--- /dev/null
+++ sys/netinet/ip_ipsec.c
@@ -0,0 +1,387 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_ipsec.c,v 1.8 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_ipsec.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
+#include <netinet/ip_ipsec.h>
+
+#include <machine/in_cksum.h>
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/xform.h>
+#include <netipsec/key.h>
+#endif /*IPSEC*/
+
+extern struct protosw inetsw[];
+
+/*
+ * Check if we have to jump over firewall processing for this packet.
+ * Called from ip_input().
+ * 1 = jump over firewall, 0 = packet goes through firewall.
+ */
+int
+ip_ipsec_filtertunnel(struct mbuf *m)
+{
+#if defined(IPSEC) && !defined(IPSEC_FILTERTUNNEL)
+ /*
+ * Bypass packet filtering for packets from a tunnel.
+ */
+ if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
+ return 1;
+#endif
+ return 0;
+}
+
+/*
+ * Check if this packet has an active SA and needs to be dropped instead
+ * of forwarded.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = forward packet.
+ */
+int
+ip_ipsec_fwd(struct mbuf *m)
+{
+#ifdef IPSEC
+ struct m_tag *mtag;
+ struct tdb_ident *tdbi;
+ struct secpolicy *sp;
+ int s, error;
+
+ mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+ s = splnet();
+ if (mtag != NULL) {
+ tdbi = (struct tdb_ident *)(mtag + 1);
+ sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+ } else {
+ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+ IP_FORWARDING, &error);
+ }
+ if (sp == NULL) { /* NB: can happen if error */
+ splx(s);
+ /*XXX error stat???*/
+ DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
+ return 1;
+ }
+
+ /*
+ * Check security policy against packet attributes.
+ */
+ error = ipsec_in_reject(sp, m);
+ KEY_FREESP(&sp);
+ splx(s);
+ if (error) {
+ ipstat.ips_cantforward++;
+ return 1;
+ }
+#endif /* IPSEC */
+ return 0;
+}
+
+/*
+ * Check if protocol type doesn't have a further header and do IPSEC
+ * decryption or reject right now. Protocols with further headers get
+ * their IPSEC treatment within the protocol specific processing.
+ * Called from ip_input().
+ * 1 = drop packet, 0 = continue processing packet.
+ */
+int
+ip_ipsec_input(struct mbuf *m)
+{
+ struct ip *ip = mtod(m, struct ip *);
+#ifdef IPSEC
+ struct m_tag *mtag;
+ struct tdb_ident *tdbi;
+ struct secpolicy *sp;
+ int s, error;
+ /*
+ * enforce IPsec policy checking if we are seeing last header.
+ * note that we do not visit this with protocols with pcb layer
+ * code - like udp/tcp/raw ip.
+ */
+ if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
+ /*
+ * Check if the packet has already had IPsec processing
+ * done. If so, then just pass it along. This tag gets
+ * set during AH, ESP, etc. input handling, before the
+ * packet is returned to the ip input queue for delivery.
+ */
+ mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
+ s = splnet();
+ if (mtag != NULL) {
+ tdbi = (struct tdb_ident *)(mtag + 1);
+ sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
+ } else {
+ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
+ IP_FORWARDING, &error);
+ }
+ if (sp != NULL) {
+ /*
+ * Check security policy against packet attributes.
+ */
+ error = ipsec_in_reject(sp, m);
+ KEY_FREESP(&sp);
+ } else {
+ /* XXX error stat??? */
+ error = EINVAL;
+ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
+ return 1;
+ }
+ splx(s);
+ if (error)
+ return 1;
+ }
+#endif /* IPSEC */
+ return 0;
+}
+
+/*
+ * Compute the MTU for a forwarded packet that gets IPSEC encapsulated.
+ * Called from ip_forward().
+ * Returns MTU suggestion for ICMP needfrag reply.
+ */
+int
+ip_ipsec_mtu(struct mbuf *m)
+{
+ int mtu = 0;
+ /*
+ * If the packet is routed over IPsec tunnel, tell the
+ * originator the tunnel MTU.
+ * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
+ * XXX quickhack!!!
+ */
+ struct secpolicy *sp = NULL;
+ int ipsecerror;
+ int ipsechdr;
+ struct route *ro;
+ sp = ipsec_getpolicybyaddr(m,
+ IPSEC_DIR_OUTBOUND,
+ IP_FORWARDING,
+ &ipsecerror);
+ if (sp != NULL) {
+ /* count IPsec header size */
+ ipsechdr = ipsec4_hdrsiz(m,
+ IPSEC_DIR_OUTBOUND,
+ NULL);
+
+ /*
+ * find the correct route for outer IPv4
+ * header, compute tunnel MTU.
+ */
+ if (sp->req != NULL &&
+ sp->req->sav != NULL &&
+ sp->req->sav->sah != NULL) {
+ ro = &sp->req->sav->sah->sa_route;
+ if (ro->ro_rt && ro->ro_rt->rt_ifp) {
+ mtu =
+ ro->ro_rt->rt_rmx.rmx_mtu ?
+ ro->ro_rt->rt_rmx.rmx_mtu :
+ ro->ro_rt->rt_ifp->if_mtu;
+ mtu -= ipsechdr;
+ }
+ }
+ KEY_FREESP(&sp);
+ }
+ return mtu;
+}
+
+/*
+ *
+ * Called from ip_output().
+ * 1 = drop packet, 0 = continue processing packet,
+ * -1 = packet was reinjected and stop processing packet
+ */
+int
+ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error,
+ struct route **ro, struct route *iproute, struct sockaddr_in **dst,
+ struct in_ifaddr **ia, struct ifnet **ifp)
+{
+#ifdef IPSEC
+ struct secpolicy *sp = NULL;
+ struct ip *ip = mtod(*m, struct ip *);
+ struct tdb_ident *tdbi;
+ struct m_tag *mtag;
+ int s;
+ /*
+ * Check the security policy (SP) for the packet and, if
+ * required, do IPsec-related processing. There are two
+ * cases here; the first time a packet is sent through
+ * it will be untagged and handled by ipsec4_checkpolicy.
+ * If the packet is resubmitted to ip_output (e.g. after
+ * AH, ESP, etc. processing), there will be a tag to bypass
+ * the lookup and related policy checking.
+ */
+ mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
+ s = splnet();
+ if (mtag != NULL) {
+ tdbi = (struct tdb_ident *)(mtag + 1);
+ sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND);
+ if (sp == NULL)
+ *error = -EINVAL; /* force silent drop */
+ m_tag_delete(*m, mtag);
+ } else {
+ sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags,
+ error, inp);
+ }
+ /*
+ * There are four return cases:
+ * sp != NULL apply IPsec policy
+ * sp == NULL, error == 0 no IPsec handling needed
+ * sp == NULL, error == -EINVAL discard packet w/o error
+ * sp == NULL, error != 0 discard packet, report error
+ */
+ if (sp != NULL) {
+ /* Loop detection, check if ipsec processing already done */
+ KASSERT(sp->req != NULL, ("ip_output: no ipsec request"));
+ for (mtag = m_tag_first(*m); mtag != NULL;
+ mtag = m_tag_next(*m, mtag)) {
+ if (mtag->m_tag_cookie != MTAG_ABI_COMPAT)
+ continue;
+ if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
+ mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
+ continue;
+ /*
+ * Check if policy has an SA associated with it.
+ * This can happen when an SP has yet to acquire
+ * an SA; e.g. on first reference. If it occurs,
+ * then we let ipsec4_process_packet do its thing.
+ */
+ if (sp->req->sav == NULL)
+ break;
+ tdbi = (struct tdb_ident *)(mtag + 1);
+ if (tdbi->spi == sp->req->sav->spi &&
+ tdbi->proto == sp->req->sav->sah->saidx.proto &&
+ bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst,
+ sizeof (union sockaddr_union)) == 0) {
+ /*
+ * No IPsec processing is needed, free
+ * reference to SP.
+ *
+ * NB: null pointer to avoid free at
+ * done: below.
+ */
+ KEY_FREESP(&sp), sp = NULL;
+ splx(s);
+ goto done;
+ }
+ }
+
+ /*
+ * Do delayed checksums now because we send before
+ * this is done in the normal processing path.
+ */
+ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+ in_delayed_cksum(*m);
+ (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+
+ /* NB: callee frees mbuf */
+ *error = ipsec4_process_packet(*m, sp->req, *flags, 0);
+ /*
+ * Preserve KAME behaviour: ENOENT can be returned
+ * when an SA acquire is in progress. Don't propagate
+ * this to user-level; it confuses applications.
+ *
+ * XXX this will go away when the SADB is redone.
+ */
+ if (*error == ENOENT)
+ *error = 0;
+ splx(s);
+ goto reinjected;
+ } else { /* sp == NULL */
+ splx(s);
+
+ if (*error != 0) {
+ /*
+ * Hack: -EINVAL is used to signal that a packet
+ * should be silently discarded. This is typically
+ * because we asked key management for an SA and
+ * it was delayed (e.g. kicked up to IKE).
+ */
+ if (*error == -EINVAL)
+ *error = 0;
+ goto bad;
+ } else {
+ /* No IPsec processing for this packet. */
+ }
+#ifdef notyet
+ /*
+ * If deferred crypto processing is needed, check that
+ * the interface supports it.
+ */
+ mtag = m_tag_find(*m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL);
+ if (mtag != NULL && ((*ifp)->if_capenable & IFCAP_IPSEC) == 0) {
+ /* notify IPsec to do its own crypto */
+ ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
+ *error = EHOSTUNREACH;
+ goto bad;
+ }
+#endif
+ }
+done:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+ return 0;
+reinjected:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+ return -1;
+bad:
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+ return 1;
+#endif /* IPSEC */
+ return 0;
+}
--- /dev/null
+++ sys/netinet/sctp_pcb.h
@@ -0,0 +1,589 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.h,v 1.21 2005/07/16 01:18:47 suz Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_pcb.h,v 1.31.2.1 2007/11/06 02:48:03 rrs Exp $");
+
+#ifndef __sctp_pcb_h__
+#define __sctp_pcb_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_constants.h>
+
+LIST_HEAD(sctppcbhead, sctp_inpcb);
+LIST_HEAD(sctpasochead, sctp_tcb);
+LIST_HEAD(sctpladdr, sctp_laddr);
+LIST_HEAD(sctpvtaghead, sctp_tagblock);
+LIST_HEAD(sctp_vrflist, sctp_vrf);
+LIST_HEAD(sctp_ifnlist, sctp_ifn);
+LIST_HEAD(sctp_ifalist, sctp_ifa);
+TAILQ_HEAD(sctp_readhead, sctp_queued_to_read);
+TAILQ_HEAD(sctp_streamhead, sctp_stream_queue_pending);
+
+#include <netinet/sctp_structs.h>
+#include <netinet/sctp_auth.h>
+
+#define SCTP_PCBHASH_ALLADDR(port, mask) (port & mask)
+#define SCTP_PCBHASH_ASOC(tag, mask) (tag & mask)
+
+struct sctp_vrf {
+ LIST_ENTRY(sctp_vrf) next_vrf;
+ struct sctp_ifalist *vrf_addr_hash;
+ struct sctp_ifnlist ifnlist;
+ uint32_t vrf_id;
+ uint32_t tbl_id_v4; /* default v4 table id */
+ uint32_t tbl_id_v6; /* default v6 table id */
+ uint32_t total_ifa_count;
+ u_long vrf_addr_hashmark;
+ uint32_t refcount;
+};
+
+struct sctp_ifn {
+ struct sctp_ifalist ifalist;
+ struct sctp_vrf *vrf;
+ LIST_ENTRY(sctp_ifn) next_ifn;
+ LIST_ENTRY(sctp_ifn) next_bucket;
+ void *ifn_p; /* never access without appropriate lock */
+ uint32_t ifn_mtu;
+ uint32_t ifn_type;
+ uint32_t ifn_index; /* shorthand way to look at ifn for reference */
+ uint32_t refcount; /* number of reference held should be >=
+ * ifa_count */
+ uint32_t ifa_count; /* IFA's we hold (in our list - ifalist) */
+ uint32_t num_v6; /* number of v6 addresses */
+ uint32_t num_v4; /* number of v4 addresses */
+ uint32_t registered_af; /* registered address family for i/f events */
+ char ifn_name[SCTP_IFNAMSIZ];
+};
+
+/* SCTP local IFA flags */
+#define SCTP_ADDR_VALID 0x00000001 /* its up and active */
+#define SCTP_BEING_DELETED 0x00000002 /* being deleted, when
+ * refcount = 0. Note that it
+ * is pulled from the ifn list
+ * and ifa_p is nulled right
+ * away but it cannot be freed
+ * until the last *net
+ * pointing to it is deleted. */
+#define SCTP_ADDR_DEFER_USE 0x00000004 /* Hold off using this one */
+#define SCTP_ADDR_IFA_UNUSEABLE 0x00000008
+
+struct sctp_ifa {
+ LIST_ENTRY(sctp_ifa) next_ifa;
+ LIST_ENTRY(sctp_ifa) next_bucket;
+ struct sctp_ifn *ifn_p; /* back pointer to parent ifn */
+ void *ifa; /* pointer to ifa, needed for flag update for
+ * that we MUST lock appropriate locks. This
+ * is for V6. */
+ union sctp_sockstore address;
+ uint32_t refcount; /* number of folks refering to this */
+ uint32_t flags;
+ uint32_t localifa_flags;
+ uint32_t vrf_id; /* vrf_id of this addr (for deleting) */
+ uint8_t src_is_loop;
+ uint8_t src_is_priv;
+ uint8_t src_is_glob;
+ uint8_t resv;
+};
+
+struct sctp_laddr {
+ LIST_ENTRY(sctp_laddr) sctp_nxt_addr; /* next in list */
+ struct sctp_ifa *ifa;
+ uint32_t action; /* Used during asconf and adding if no-zero
+ * src-addr selection will not consider this
+ * address. */
+ struct timeval start_time; /* time when this address was created */
+};
+
+struct sctp_block_entry {
+ int error;
+};
+
+struct sctp_timewait {
+ uint32_t tv_sec_at_expire; /* the seconds from boot to expire */
+ uint32_t v_tag; /* the vtag that can not be reused */
+};
+
+struct sctp_tagblock {
+ LIST_ENTRY(sctp_tagblock) sctp_nxt_tagblock;
+ struct sctp_timewait vtag_block[SCTP_NUMBER_IN_VTAG_BLOCK];
+};
+
+struct sctp_epinfo {
+ struct sctpasochead *sctp_asochash;
+ u_long hashasocmark;
+
+ struct sctppcbhead *sctp_ephash;
+ u_long hashmark;
+
+ struct sctpasochead *sctp_restarthash;
+ u_long hashrestartmark;
+ /*-
+ * The TCP model represents a substantial overhead in that we get an
+ * additional hash table to keep explicit connections in. The
+ * listening TCP endpoint will exist in the usual ephash above and
+ * accept only INIT's. It will be incapable of sending off an INIT.
+ * When a dg arrives we must look in the normal ephash. If we find a
+ * TCP endpoint that will tell us to go to the specific endpoint
+ * hash and re-hash to find the right assoc/socket. If we find a UDP
+ * model socket we then must complete the lookup. If this fails,
+ * i.e. no association can be found then we must continue to see if
+ * a sctp_peeloff()'d socket is in the tcpephash (a spun off socket
+ * acts like a TCP model connected socket).
+ */
+ struct sctppcbhead *sctp_tcpephash;
+ u_long hashtcpmark;
+ uint32_t hashtblsize;
+
+ struct sctp_vrflist *sctp_vrfhash;
+ u_long hashvrfmark;
+
+ struct sctp_ifnlist *vrf_ifn_hash;
+ u_long vrf_ifn_hashmark;
+
+ struct sctppcbhead listhead;
+ struct sctpladdr addr_wq;
+
+ struct sctpiterators iteratorhead;
+
+ /* ep zone info */
+ sctp_zone_t ipi_zone_ep;
+ sctp_zone_t ipi_zone_asoc;
+ sctp_zone_t ipi_zone_laddr;
+ sctp_zone_t ipi_zone_net;
+ sctp_zone_t ipi_zone_chunk;
+ sctp_zone_t ipi_zone_readq;
+ sctp_zone_t ipi_zone_strmoq;
+ sctp_zone_t ipi_zone_asconf_ack;
+
+ struct rwlock ipi_ep_mtx;
+ struct mtx it_mtx;
+ struct mtx ipi_iterator_wq_mtx;
+ struct rwlock ipi_addr_mtx;
+ struct mtx ipi_pktlog_mtx;
+ uint32_t ipi_count_ep;
+
+ /* assoc/tcb zone info */
+ uint32_t ipi_count_asoc;
+
+ /* local addrlist zone info */
+ uint32_t ipi_count_laddr;
+
+ /* remote addrlist zone info */
+ uint32_t ipi_count_raddr;
+
+ /* chunk structure list for output */
+ uint32_t ipi_count_chunk;
+
+ /* socket queue zone info */
+ uint32_t ipi_count_readq;
+
+ /* socket queue zone info */
+ uint32_t ipi_count_strmoq;
+
+ /* Number of vrfs */
+ uint32_t ipi_count_vrfs;
+
+ /* Number of ifns */
+ uint32_t ipi_count_ifns;
+
+ /* Number of ifas */
+ uint32_t ipi_count_ifas;
+
+ /* system wide number of free chunks hanging around */
+ uint32_t ipi_free_chunks;
+ uint32_t ipi_free_strmoq;
+
+
+ struct sctpvtaghead vtag_timewait[SCTP_STACK_VTAG_HASH_SIZE_A];
+
+ /* address work queue handling */
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+ uint32_t iterator_running;
+ SCTP_PROCESS_STRUCT thread_proc;
+#endif
+ struct sctp_timer addr_wq_timer;
+
+};
+
+/*-
+ * Here we have all the relevant information for each SCTP entity created. We
+ * will need to modify this as approprate. We also need to figure out how to
+ * access /dev/random.
+ */
+struct sctp_pcb {
+ unsigned int time_of_secret_change; /* number of seconds from
+ * timeval.tv_sec */
+ uint32_t secret_key[SCTP_HOW_MANY_SECRETS][SCTP_NUMBER_OF_SECRETS];
+ unsigned int size_of_a_cookie;
+
+ unsigned int sctp_timeoutticks[SCTP_NUM_TMRS];
+ unsigned int sctp_minrto;
+ unsigned int sctp_maxrto;
+ unsigned int initial_rto;
+ int initial_init_rto_max;
+
+ unsigned int sctp_sack_freq;
+ uint32_t sctp_sws_sender;
+ uint32_t sctp_sws_receiver;
+
+ uint32_t sctp_default_cc_module;
+ /* authentication related fields */
+ struct sctp_keyhead shared_keys;
+ sctp_auth_chklist_t *local_auth_chunks;
+ sctp_hmaclist_t *local_hmacs;
+ uint16_t default_keyid;
+
+ /* various thresholds */
+ /* Max times I will init at a guy */
+ uint16_t max_init_times;
+
+ /* Max times I will send before we consider someone dead */
+ uint16_t max_send_times;
+
+ uint16_t def_net_failure;
+
+ /* number of streams to pre-open on a association */
+ uint16_t pre_open_stream_count;
+ uint16_t max_open_streams_intome;
+
+ /* random number generator */
+ uint32_t random_counter;
+ uint8_t random_numbers[SCTP_SIGNATURE_ALOC_SIZE];
+ uint8_t random_store[SCTP_SIGNATURE_ALOC_SIZE];
+
+ /*
+ * This timer is kept running per endpoint. When it fires it will
+ * change the secret key. The default is once a hour
+ */
+ struct sctp_timer signature_change;
+
+ /* Zero copy full buffer timer */
+ struct sctp_timer zero_copy_timer;
+ /* Zero copy app to transport (sendq) read repulse timer */
+ struct sctp_timer zero_copy_sendq_timer;
+ uint32_t def_cookie_life;
+ /* defaults to 0 */
+ int auto_close_time;
+ uint32_t initial_sequence_debug;
+ uint32_t adaptation_layer_indicator;
+ uint32_t store_at;
+ uint8_t max_burst;
+ char current_secret_number;
+ char last_secret_number;
+};
+
+#ifndef SCTP_ALIGNMENT
+#define SCTP_ALIGNMENT 32
+#endif
+
+#ifndef SCTP_ALIGNM1
+#define SCTP_ALIGNM1 (SCTP_ALIGNMENT-1)
+#endif
+
+#define sctp_lport ip_inp.inp.inp_lport
+
+struct sctp_pcbtsn_rlog {
+ uint32_t vtag;
+ uint16_t strm;
+ uint16_t seq;
+ uint16_t sz;
+ uint16_t flgs;
+};
+
+#define SCTP_READ_LOG_SIZE 135 /* we choose the number to make a pcb a page */
+
+
+struct sctp_inpcb {
+ /*-
+ * put an inpcb in front of it all, kind of a waste but we need to
+ * for compatability with all the other stuff.
+ */
+ union {
+ struct inpcb inp;
+ char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
+ ~SCTP_ALIGNM1];
+ } ip_inp;
+
+
+ /* Socket buffer lock protects read_queue and of course sb_cc */
+ struct sctp_readhead read_queue;
+
+ LIST_ENTRY(sctp_inpcb) sctp_list; /* lists all endpoints */
+ /* hash of all endpoints for model */
+ LIST_ENTRY(sctp_inpcb) sctp_hash;
+ /* count of local addresses bound, 0 if bound all */
+ int laddr_count;
+
+ /* list of addrs in use by the EP, NULL if bound-all */
+ struct sctpladdr sctp_addr_list;
+ /*
+ * used for source address selection rotation when we are subset
+ * bound
+ */
+ struct sctp_laddr *next_addr_touse;
+
+ /* back pointer to our socket */
+ struct socket *sctp_socket;
+ uint32_t sctp_flags; /* INP state flag set */
+ uint32_t sctp_features; /* Feature flags */
+ uint32_t sctp_mobility_features; /* Mobility Feature flags */
+ struct sctp_pcb sctp_ep;/* SCTP ep data */
+ /* head of the hash of all associations */
+ struct sctpasochead *sctp_tcbhash;
+ u_long sctp_hashmark;
+ /* head of the list of all associations */
+ struct sctpasochead sctp_asoc_list;
+#ifdef SCTP_TRACK_FREED_ASOCS
+ struct sctpasochead sctp_asoc_free_list;
+#endif
+ struct sctp_iterator *inp_starting_point_for_iterator;
+ uint32_t sctp_frag_point;
+ uint32_t partial_delivery_point;
+ uint32_t sctp_context;
+ struct sctp_nonpad_sndrcvinfo def_send;
+ /*-
+ * These three are here for the sosend_dgram
+ * (pkt, pkt_last and control).
+ * routine. However, I don't think anyone in
+ * the current FreeBSD kernel calls this. So
+ * they are candidates with sctp_sendm for
+ * de-supporting.
+ */
+ struct mbuf *pkt, *pkt_last;
+ struct mbuf *control;
+ struct mtx inp_mtx;
+ struct mtx inp_create_mtx;
+ struct mtx inp_rdata_mtx;
+ int32_t refcount;
+ uint32_t def_vrf_id;
+ uint32_t total_sends;
+ uint32_t total_recvs;
+ uint32_t last_abort_code;
+ uint32_t total_nospaces;
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ struct sctp_pcbtsn_rlog readlog[SCTP_READ_LOG_SIZE];
+ uint32_t readlog_index;
+#endif
+};
+
+struct sctp_tcb {
+ struct socket *sctp_socket; /* back pointer to socket */
+ struct sctp_inpcb *sctp_ep; /* back pointer to ep */
+ LIST_ENTRY(sctp_tcb) sctp_tcbhash; /* next link in hash
+ * table */
+ LIST_ENTRY(sctp_tcb) sctp_tcblist; /* list of all of the
+ * TCB's */
+ LIST_ENTRY(sctp_tcb) sctp_tcbrestarhash; /* next link in restart
+ * hash table */
+ LIST_ENTRY(sctp_tcb) sctp_asocs; /* vtag hash list */
+ struct sctp_block_entry *block_entry; /* pointer locked by socket
+ * send buffer */
+ struct sctp_association asoc;
+ /*-
+ * freed_by_sorcv_sincelast is protected by the sockbuf_lock NOT the
+ * tcb_lock. Its special in this way to help avoid extra mutex calls
+ * in the reading of data.
+ */
+ uint32_t freed_by_sorcv_sincelast;
+ uint32_t total_sends;
+ uint32_t total_recvs;
+ int freed_from_where;
+ uint16_t rport; /* remote port in network format */
+ uint16_t resv;
+ struct mtx tcb_mtx;
+ struct mtx tcb_send_mtx;
+};
+
+
+
+#include <netinet/sctp_lock_bsd.h>
+
+
+#if defined(_KERNEL)
+
+extern struct sctp_epinfo sctppcbinfo;
+
+int SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b);
+
+void sctp_fill_pcbinfo(struct sctp_pcbinfo *);
+
+struct sctp_ifn *
+ sctp_find_ifn(void *ifn, uint32_t ifn_index);
+
+struct sctp_vrf *sctp_allocate_vrf(int vrfid);
+struct sctp_vrf *sctp_find_vrf(uint32_t vrfid);
+void sctp_free_vrf(struct sctp_vrf *vrf);
+
+/*-
+ * Change address state, can be used if
+ * O/S supports telling transports about
+ * changes to IFA/IFN's (link layer triggers).
+ * If a ifn goes down, we will do src-addr-selection
+ * and NOT use that, as a source address. This does
+ * not stop the routing system from routing out
+ * that interface, but we won't put it as a source.
+ */
+void sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+void sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr, const char *if_name, uint32_t ifn_index);
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrfid,
+ void *ifn, uint32_t ifn_index, uint32_t ifn_type,
+ const char *if_name,
+ void *ifa, struct sockaddr *addr, uint32_t ifa_flags,
+ int dynamic_add);
+
+void sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu);
+
+void sctp_free_ifn(struct sctp_ifn *sctp_ifnp);
+void sctp_free_ifa(struct sctp_ifa *sctp_ifap);
+
+
+void
+sctp_del_addr_from_vrf(uint32_t vrfid, struct sockaddr *addr,
+ uint32_t ifn_index, const char *if_name);
+
+
+
+struct sctp_nets *sctp_findnet(struct sctp_tcb *, struct sockaddr *);
+
+struct sctp_inpcb *sctp_pcb_findep(struct sockaddr *, int, int, uint32_t);
+
+int
+sctp_inpcb_bind(struct socket *, struct sockaddr *,
+ struct sctp_ifa *, struct thread *);
+
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *, int, int,
+ struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb **,
+ struct sctp_nets **, uint32_t vrf_id);
+
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *,
+ struct sockaddr *, struct sctp_inpcb **, struct sctp_nets **, int, uint32_t);
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *, struct sctp_inpcb *,
+ struct sctp_tcb *);
+
+/*-
+ * For this call ep_addr, the to is the destination endpoint address of the
+ * peer (relative to outbound). The from field is only used if the TCP model
+ * is enabled and helps distingush amongst the subset bound (non-boundall).
+ * The TCP model MAY change the actual ep field, this is why it is passed.
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **,
+ struct sockaddr *, struct sctp_nets **, struct sockaddr *,
+ struct sctp_tcb *);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *,
+ sctp_assoc_t, int);
+
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *, int, int,
+ struct sctphdr *, struct sctp_inpcb **, struct sctp_nets **);
+
+int sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id);
+
+int sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id);
+
+void sctp_inpcb_free(struct sctp_inpcb *, int, int);
+
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *, struct sockaddr *,
+ int, int *, uint32_t, uint32_t, struct thread *);
+
+int sctp_free_assoc(struct sctp_inpcb *, struct sctp_tcb *, int, int);
+
+
+void sctp_delete_from_timewait(uint32_t);
+
+int sctp_is_in_timewait(uint32_t tag);
+
+void
+ sctp_add_vtag_to_timewait(uint32_t, uint32_t);
+
+void sctp_add_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *, uint32_t);
+
+int sctp_insert_laddr(struct sctpladdr *, struct sctp_ifa *, uint32_t);
+
+void sctp_remove_laddr(struct sctp_laddr *);
+
+void sctp_del_local_addr_ep(struct sctp_inpcb *, struct sctp_ifa *);
+
+int sctp_add_remote_addr(struct sctp_tcb *, struct sockaddr *, int, int);
+
+void sctp_remove_net(struct sctp_tcb *, struct sctp_nets *);
+
+int sctp_del_remote_addr(struct sctp_tcb *, struct sockaddr *);
+
+void sctp_pcb_init(void);
+
+
+void sctp_add_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+void sctp_del_local_addr_restricted(struct sctp_tcb *, struct sctp_ifa *);
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *, struct mbuf *, int, int,
+ int, struct sctphdr *, struct sockaddr *);
+
+int
+sctp_set_primary_addr(struct sctp_tcb *, struct sockaddr *,
+ struct sctp_nets *);
+
+int sctp_is_vtag_good(struct sctp_inpcb *, uint32_t, struct timeval *, int);
+
+/* void sctp_drain(void); */
+
+int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *);
+
+/*-
+ * Null in last arg inpcb indicate run on ALL ep's. Specific inp in last arg
+ * indicates run on ONLY assoc's of the specified endpoint.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+ asoc_func af,
+ inp_func inpe,
+ uint32_t, uint32_t,
+ uint32_t, void *,
+ uint32_t,
+ end_func ef,
+ struct sctp_inpcb *,
+ uint8_t co_off);
+
+#endif /* _KERNEL */
+#endif /* !__sctp_pcb_h__ */
Index: tcp_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.7
retrieving revision 1.8
diff -L sys/netinet/tcp_usrreq.c -L sys/netinet/tcp_usrreq.c -u -r1.7 -r1.8
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1,6 +1,8 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * Copyright (c) 2006-2007 Robert N. M. Watson
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,12 +29,12 @@
* SUCH DAMAGE.
*
* From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
- * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.124.2.2 2005/11/04 20:26:14 ume Exp $
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.163.2.1.2.1 2008/01/26 13:57:33 rwatson Exp $");
+#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_tcpdebug.h"
@@ -52,6 +54,10 @@
#include <sys/proc.h>
#include <sys/jail.h>
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
#include <net/if.h>
#include <net/route.h>
@@ -83,10 +89,6 @@
/*
* TCP protocol interface to socket abstraction.
*/
-#ifdef TCPDEBUG
-extern char *tcpstates[]; /* XXX ??? */
-#endif
-
static int tcp_attach(struct socket *);
static int tcp_connect(struct tcpcb *, struct sockaddr *,
struct thread *td);
@@ -94,10 +96,8 @@
static int tcp6_connect(struct tcpcb *, struct sockaddr *,
struct thread *td);
#endif /* INET6 */
-static struct tcpcb *
- tcp_disconnect(struct tcpcb *);
-static struct tcpcb *
- tcp_usrclosed(struct tcpcb *);
+static void tcp_disconnect(struct tcpcb *);
+static void tcp_usrclosed(struct tcpcb *);
static void tcp_fill_info(struct tcpcb *, struct tcp_info *);
#ifdef TCPDEBUG
@@ -118,16 +118,14 @@
static int
tcp_usr_attach(struct socket *so, int proto, struct thread *td)
{
- int error;
struct inpcb *inp;
- struct tcpcb *tp = 0;
+ struct tcpcb *tp = NULL;
+ int error;
+ TCPDEBUG0;
- INP_INFO_WLOCK(&tcbinfo);
inp = sotoinpcb(so);
- if (inp) {
- error = EISCONN;
- goto out;
- }
+ KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
+ TCPDEBUG1();
error = tcp_attach(so);
if (error)
@@ -139,80 +137,128 @@
inp = sotoinpcb(so);
tp = intotcpcb(inp);
out:
- INP_INFO_WUNLOCK(&tcbinfo);
+ TCPDEBUG2(PRU_ATTACH);
return error;
}
/*
+ * tcp_detach is called when the socket layer loses its final reference
+ * to the socket, be it a file descriptor reference, a reference from TCP,
+ * etc. At this point, there is only one case in which we will keep around
+ * inpcb state: time wait.
+ *
+ * This function can probably be re-absorbed back into tcp_usr_detach() now
+ * that there is a single detach path.
+ */
+static void
+tcp_detach(struct socket *so, struct inpcb *inp)
+{
+ struct tcpcb *tp;
+#ifdef INET6
+ int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
+#endif
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(inp);
+
+ KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
+ KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
+
+ tp = intotcpcb(inp);
+
+ if (inp->inp_vflag & INP_TIMEWAIT) {
+ /*
+ * There are two cases to handle: one in which the time wait
+ * state is being discarded (INP_DROPPED), and one in which
+ * this connection will remain in timewait. In the former,
+ * it is time to discard all state (except tcptw, which has
+ * already been discarded by the timewait close code, which
+ * should be further up the call stack somewhere). In the
+ * latter case, we detach from the socket, but leave the pcb
+ * present until timewait ends.
+ *
+ * XXXRW: Would it be cleaner to free the tcptw here?
+ */
+ if (inp->inp_vflag & INP_DROPPED) {
+ KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
+ "INP_DROPPED && tp != NULL"));
+#ifdef INET6
+ if (isipv6) {
+ in6_pcbdetach(inp);
+ in6_pcbfree(inp);
+ } else {
+#endif
+ in_pcbdetach(inp);
+ in_pcbfree(inp);
+#ifdef INET6
+ }
+#endif
+ } else {
+#ifdef INET6
+ if (isipv6)
+ in6_pcbdetach(inp);
+ else
+#endif
+ in_pcbdetach(inp);
+ INP_UNLOCK(inp);
+ }
+ } else {
+ /*
+ * If the connection is not in timewait, we consider two
+ * two conditions: one in which no further processing is
+ * necessary (dropped || embryonic), and one in which TCP is
+ * not yet done, but no longer requires the socket, so the
+ * pcb will persist for the time being.
+ *
+ * XXXRW: Does the second case still occur?
+ */
+ if (inp->inp_vflag & INP_DROPPED ||
+ tp->t_state < TCPS_SYN_SENT) {
+ tcp_discardcb(tp);
+#ifdef INET6
+ if (isipv6) {
+ in6_pcbdetach(inp);
+ in6_pcbfree(inp);
+ } else {
+#endif
+ in_pcbdetach(inp);
+ in_pcbfree(inp);
+#ifdef INET6
+ }
+#endif
+ } else {
+#ifdef INET6
+ if (isipv6)
+ in6_pcbdetach(inp);
+ else
+#endif
+ in_pcbdetach(inp);
+ }
+ }
+}
+
+/*
* pru_detach() detaches the TCP protocol from the socket.
* If the protocol state is non-embryonic, then can't
* do this directly: have to initiate a pru_disconnect(),
* which may finish later; embryonic TCB's can just
* be discarded here.
*/
-static int
+static void
tcp_usr_detach(struct socket *so)
{
- int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- TCPDEBUG0;
- INP_INFO_WLOCK(&tcbinfo);
inp = sotoinpcb(so);
- if (inp == NULL) {
- INP_INFO_WUNLOCK(&tcbinfo);
- return error;
- }
+ KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
+ INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(inp);
- tp = intotcpcb(inp);
- TCPDEBUG1();
- tp = tcp_disconnect(tp);
-
- TCPDEBUG2(PRU_DETACH);
- if (tp)
- INP_UNLOCK(inp);
+ KASSERT(inp->inp_socket != NULL,
+ ("tcp_usr_detach: inp_socket == NULL"));
+ tcp_detach(so, inp);
INP_INFO_WUNLOCK(&tcbinfo);
- return error;
}
-#define INI_NOLOCK 0
-#define INI_READ 1
-#define INI_WRITE 2
-
-#define COMMON_START() \
- TCPDEBUG0; \
- do { \
- if (inirw == INI_READ) \
- INP_INFO_RLOCK(&tcbinfo); \
- else if (inirw == INI_WRITE) \
- INP_INFO_WLOCK(&tcbinfo); \
- inp = sotoinpcb(so); \
- if (inp == 0) { \
- if (inirw == INI_READ) \
- INP_INFO_RUNLOCK(&tcbinfo); \
- else if (inirw == INI_WRITE) \
- INP_INFO_WUNLOCK(&tcbinfo); \
- return EINVAL; \
- } \
- INP_LOCK(inp); \
- if (inirw == INI_READ) \
- INP_INFO_RUNLOCK(&tcbinfo); \
- tp = intotcpcb(inp); \
- TCPDEBUG1(); \
-} while(0)
-
-#define COMMON_END(req) \
-out: TCPDEBUG2(req); \
- do { \
- if (tp) \
- INP_UNLOCK(inp); \
- if (inirw == INI_WRITE) \
- INP_INFO_WUNLOCK(&tcbinfo); \
- return error; \
- goto out; \
-} while(0)
-
/*
* Give the socket an address.
*/
@@ -221,9 +267,8 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
+ struct tcpcb *tp = NULL;
struct sockaddr_in *sinp;
- const int inirw = INI_WRITE;
sinp = (struct sockaddr_in *)nam;
if (nam->sa_len != sizeof (*sinp))
@@ -236,11 +281,24 @@
IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
return (EAFNOSUPPORT);
- COMMON_START();
- error = in_pcbbind(inp, nam, td->td_ucred);
- if (error)
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
goto out;
- COMMON_END(PRU_BIND);
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
+ error = in_pcbbind(inp, nam, td->td_ucred);
+out:
+ TCPDEBUG2(PRU_BIND);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+
+ return (error);
}
#ifdef INET6
@@ -249,9 +307,8 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
+ struct tcpcb *tp = NULL;
struct sockaddr_in6 *sin6p;
- const int inirw = INI_WRITE;
sin6p = (struct sockaddr_in6 *)nam;
if (nam->sa_len != sizeof (*sin6p))
@@ -264,7 +321,17 @@
IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
return (EAFNOSUPPORT);
- COMMON_START();
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -282,9 +349,11 @@
}
}
error = in6_pcbbind(inp, nam, td->td_ucred);
- if (error)
- goto out;
- COMMON_END(PRU_BIND);
+out:
+ TCPDEBUG2(PRU_BIND);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
#endif /* INET6 */
@@ -292,36 +361,59 @@
* Prepare to accept connections.
*/
static int
-tcp_usr_listen(struct socket *so, struct thread *td)
+tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_WRITE;
+ struct tcpcb *tp = NULL;
- COMMON_START();
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error == 0) {
tp->t_state = TCPS_LISTEN;
- solisten_proto(so);
+ solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
- COMMON_END(PRU_LISTEN);
+
+out:
+ TCPDEBUG2(PRU_LISTEN);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
#ifdef INET6
static int
-tcp6_usr_listen(struct socket *so, struct thread *td)
+tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_WRITE;
+ struct tcpcb *tp = NULL;
- COMMON_START();
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0 && inp->inp_lport == 0) {
@@ -332,10 +424,15 @@
}
if (error == 0) {
tp->t_state = TCPS_LISTEN;
- solisten_proto(so);
+ solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
- COMMON_END(PRU_LISTEN);
+
+out:
+ TCPDEBUG2(PRU_LISTEN);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
#endif /* INET6 */
@@ -351,9 +448,8 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
+ struct tcpcb *tp = NULL;
struct sockaddr_in *sinp;
- const int inirw = INI_WRITE;
sinp = (struct sockaddr_in *)nam;
if (nam->sa_len != sizeof (*sinp))
@@ -367,11 +463,25 @@
if (jailed(td->td_ucred))
prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr);
- COMMON_START();
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
error = tcp_output(tp);
- COMMON_END(PRU_CONNECT);
+out:
+ TCPDEBUG2(PRU_CONNECT);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
#ifdef INET6
@@ -380,9 +490,10 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
+ struct tcpcb *tp = NULL;
struct sockaddr_in6 *sin6p;
- const int inirw = INI_WRITE;
+
+ TCPDEBUG0;
sin6p = (struct sockaddr_in6 *)nam;
if (nam->sa_len != sizeof (*sin6p))
@@ -394,7 +505,16 @@
&& IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
return (EAFNOSUPPORT);
- COMMON_START();
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = EINVAL;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
struct sockaddr_in sin;
@@ -417,7 +537,12 @@
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
error = tcp_output(tp);
- COMMON_END(PRU_CONNECT);
+
+out:
+ TCPDEBUG2(PRU_CONNECT);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
#endif /* INET6 */
@@ -435,14 +560,27 @@
static int
tcp_usr_disconnect(struct socket *so)
{
- int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_WRITE;
+ struct tcpcb *tp = NULL;
+ int error = 0;
- COMMON_START();
- tp = tcp_disconnect(tp);
- COMMON_END(PRU_DISCONNECT);
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNRESET;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
+ tcp_disconnect(tp);
+out:
+ TCPDEBUG2(PRU_DISCONNECT);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ return (error);
}
/*
@@ -460,33 +598,32 @@
in_port_t port = 0;
TCPDEBUG0;
- if (so->so_state & SS_ISDISCONNECTED) {
- error = ECONNABORTED;
- goto out;
- }
+ if (so->so_state & SS_ISDISCONNECTED)
+ return (ECONNABORTED);
- INP_INFO_RLOCK(&tcbinfo);
inp = sotoinpcb(so);
- if (!inp) {
- INP_INFO_RUNLOCK(&tcbinfo);
- return (EINVAL);
- }
+ KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
+ INP_INFO_RLOCK(&tcbinfo);
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&tcbinfo);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNABORTED;
+ goto out;
+ }
tp = intotcpcb(inp);
TCPDEBUG1();
/*
- * We inline in_setpeeraddr and COMMON_END here, so that we can
+ * We inline in_getpeeraddr and COMMON_END here, so that we can
* copy the data of interest and defer the malloc until after we
* release the lock.
*/
port = inp->inp_fport;
addr = inp->inp_faddr;
-out: TCPDEBUG2(PRU_ACCEPT);
- if (tp)
- INP_UNLOCK(inp);
+out:
+ TCPDEBUG2(PRU_ACCEPT);
+ INP_UNLOCK(inp);
+ INP_INFO_RUNLOCK(&tcbinfo);
if (error == 0)
*nam = in_sockaddr(port, &addr);
return error;
@@ -505,21 +642,19 @@
int v4 = 0;
TCPDEBUG0;
- if (so->so_state & SS_ISDISCONNECTED) {
- error = ECONNABORTED;
- goto out;
- }
+ if (so->so_state & SS_ISDISCONNECTED)
+ return (ECONNABORTED);
- INP_INFO_RLOCK(&tcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_RUNLOCK(&tcbinfo);
- return (EINVAL);
- }
+ KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&tcbinfo);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNABORTED;
+ goto out;
+ }
tp = intotcpcb(inp);
TCPDEBUG1();
+
/*
* We inline in6_mapped_peeraddr and COMMON_END here, so that we can
* copy the data of interest and defer the malloc until after we
@@ -534,9 +669,9 @@
addr6 = inp->in6p_faddr;
}
-out: TCPDEBUG2(PRU_ACCEPT);
- if (tp)
- INP_UNLOCK(inp);
+out:
+ TCPDEBUG2(PRU_ACCEPT);
+ INP_UNLOCK(inp);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -548,27 +683,6 @@
#endif /* INET6 */
/*
- * This is the wrapper function for in_setsockaddr. We just pass down
- * the pcbinfo for in_setsockaddr to lock. We don't want to do the locking
- * here because in_setsockaddr will call malloc and can block.
- */
-static int
-tcp_sockaddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setsockaddr(so, nam, &tcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-tcp_peeraddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setpeeraddr(so, nam, &tcbinfo));
-}
-
-/*
* Mark the connection as being incapable of further output.
*/
static int
@@ -576,15 +690,29 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_WRITE;
+ struct tcpcb *tp = NULL;
- COMMON_START();
+ TCPDEBUG0;
+ INP_INFO_WLOCK(&tcbinfo);
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNRESET;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
socantsendmore(so);
- tp = tcp_usrclosed(tp);
- if (tp)
- error = tcp_output(tp);
- COMMON_END(PRU_SHUTDOWN);
+ tcp_usrclosed(tp);
+ error = tcp_output(tp);
+
+out:
+ TCPDEBUG2(PRU_SHUTDOWN);
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+
+ return (error);
}
/*
@@ -593,14 +721,26 @@
static int
tcp_usr_rcvd(struct socket *so, int flags)
{
- int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_READ;
+ struct tcpcb *tp = NULL;
+ int error = 0;
- COMMON_START();
+ TCPDEBUG0;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNRESET;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
tcp_output(tp);
- COMMON_END(PRU_RCVD);
+
+out:
+ TCPDEBUG2(PRU_RCVD);
+ INP_UNLOCK(inp);
+ return (error);
}
/*
@@ -612,41 +752,41 @@
*/
static int
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
- struct sockaddr *nam, struct mbuf *control, struct thread *td)
+ struct sockaddr *nam, struct mbuf *control, struct thread *td)
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- int unlocked = 0;
+ struct tcpcb *tp = NULL;
+ int headlocked = 0;
#ifdef INET6
int isipv6;
#endif
TCPDEBUG0;
/*
- * Need write lock here because this function might call
- * tcp_connect or tcp_usrclosed.
- * We really want to have to this function upgrade from read lock
- * to write lock. XXX
+ * We require the pcbinfo lock in two cases:
+ *
+ * (1) An implied connect is taking place, which can result in
+ * binding IPs and ports and hence modification of the pcb hash
+ * chains.
+ *
+ * (2) PRUS_EOF is set, resulting in explicit close on the send.
*/
- INP_INFO_WLOCK(&tcbinfo);
+ if ((nam != NULL) || (flags & PRUS_EOF)) {
+ INP_INFO_WLOCK(&tcbinfo);
+ headlocked = 1;
+ }
inp = sotoinpcb(so);
- if (inp == NULL) {
- /*
- * OOPS! we lost a race, the TCP session got reset after
- * we checked SBS_CANTSENDMORE, eg: while doing uiomove or a
- * network interrupt in the non-splnet() section of sosend().
- */
- if (m)
- m_freem(m);
+ KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
if (control)
m_freem(control);
- error = ECONNRESET; /* XXX EPIPE? */
- tp = NULL;
- TCPDEBUG1();
+ if (m)
+ m_freem(m);
+ error = ECONNRESET;
goto out;
}
- INP_LOCK(inp);
#ifdef INET6
isipv6 = nam && nam->sa_family == AF_INET6;
#endif /* INET6 */
@@ -672,6 +812,7 @@
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -683,17 +824,19 @@
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
}
-
if (flags & PRUS_EOF) {
/*
* Close the send side of the connection after
* the data is sent.
*/
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
socantsendmore(so);
- tp = tcp_usrclosed(tp);
+ tcp_usrclosed(tp);
+ }
+ if (headlocked) {
+ INP_INFO_WUNLOCK(&tcbinfo);
+ headlocked = 0;
}
- INP_INFO_WUNLOCK(&tcbinfo);
- unlocked = 1;
if (tp != NULL) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
@@ -702,6 +845,9 @@
tp->t_flags &= ~TF_MORETOCOME;
}
} else {
+ /*
+ * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
+ */
SOCKBUF_LOCK(&so->so_snd);
if (sbspace(&so->so_snd) < -512) {
SOCKBUF_UNLOCK(&so->so_snd);
@@ -726,6 +872,7 @@
* initialize maxseg/maxopd using peer's cached
* MSS.
*/
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
#ifdef INET6
if (isipv6)
error = tcp6_connect(tp, nam, td);
@@ -736,9 +883,12 @@
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
tcp_mss(tp, -1);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ headlocked = 0;
+ } else if (nam) {
+ INP_INFO_WUNLOCK(&tcbinfo);
+ headlocked = 0;
}
- INP_INFO_WUNLOCK(&tcbinfo);
- unlocked = 1;
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
error = tcp_output(tp);
@@ -747,27 +897,87 @@
out:
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
- if (tp)
- INP_UNLOCK(inp);
- if (!unlocked)
+ INP_UNLOCK(inp);
+ if (headlocked)
INP_INFO_WUNLOCK(&tcbinfo);
return (error);
}
/*
- * Abort the TCP.
+ * Abort the TCP. Drop the connection abruptly.
*/
-static int
+static void
tcp_usr_abort(struct socket *so)
{
- int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_WRITE;
+ struct tcpcb *tp = NULL;
+ TCPDEBUG0;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
+
+ INP_INFO_WLOCK(&tcbinfo);
+ INP_LOCK(inp);
+ KASSERT(inp->inp_socket != NULL,
+ ("tcp_usr_abort: inp_socket == NULL"));
+
+ /*
+ * If we still have full TCP state, and we're not dropped, drop.
+ */
+ if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+ !(inp->inp_vflag & INP_DROPPED)) {
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
+ tcp_drop(tp, ECONNABORTED);
+ TCPDEBUG2(PRU_ABORT);
+ }
+ if (!(inp->inp_vflag & INP_DROPPED)) {
+ SOCK_LOCK(so);
+ so->so_state |= SS_PROTOREF;
+ SOCK_UNLOCK(so);
+ inp->inp_vflag |= INP_SOCKREF;
+ }
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+}
+
+/*
+ * TCP socket is closed. Start friendly disconnect.
+ */
+static void
+tcp_usr_close(struct socket *so)
+{
+ struct inpcb *inp;
+ struct tcpcb *tp = NULL;
+ TCPDEBUG0;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
+
+ INP_INFO_WLOCK(&tcbinfo);
+ INP_LOCK(inp);
+ KASSERT(inp->inp_socket != NULL,
+ ("tcp_usr_close: inp_socket == NULL"));
- COMMON_START();
- tp = tcp_drop(tp, ECONNABORTED);
- COMMON_END(PRU_ABORT);
+ /*
+ * If we still have full TCP state, and we're not dropped, initiate
+ * a disconnect.
+ */
+ if (!(inp->inp_vflag & INP_TIMEWAIT) &&
+ !(inp->inp_vflag & INP_DROPPED)) {
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
+ tcp_disconnect(tp);
+ TCPDEBUG2(PRU_CLOSE);
+ }
+ if (!(inp->inp_vflag & INP_DROPPED)) {
+ SOCK_LOCK(so);
+ so->so_state |= SS_PROTOREF;
+ SOCK_UNLOCK(so);
+ inp->inp_vflag |= INP_SOCKREF;
+ }
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
}
/*
@@ -778,10 +988,18 @@
{
int error = 0;
struct inpcb *inp;
- struct tcpcb *tp;
- const int inirw = INI_READ;
+ struct tcpcb *tp = NULL;
- COMMON_START();
+ TCPDEBUG0;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
+ INP_LOCK(inp);
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNRESET;
+ goto out;
+ }
+ tp = intotcpcb(inp);
+ TCPDEBUG1();
if ((so->so_oobmark == 0 &&
(so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
so->so_options & SO_OOBINLINE ||
@@ -797,7 +1015,11 @@
*mtod(m, caddr_t) = tp->t_iobc;
if ((flags & MSG_PEEK) == 0)
tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
- COMMON_END(PRU_RCVOOB);
+
+out:
+ TCPDEBUG2(PRU_RCVOOB);
+ INP_UNLOCK(inp);
+ return (error);
}
struct pr_usrreqs tcp_usrreqs = {
@@ -810,13 +1032,14 @@
.pru_detach = tcp_usr_detach,
.pru_disconnect = tcp_usr_disconnect,
.pru_listen = tcp_usr_listen,
- .pru_peeraddr = tcp_peeraddr,
+ .pru_peeraddr = in_getpeeraddr,
.pru_rcvd = tcp_usr_rcvd,
.pru_rcvoob = tcp_usr_rcvoob,
.pru_send = tcp_usr_send,
.pru_shutdown = tcp_usr_shutdown,
- .pru_sockaddr = tcp_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel
+ .pru_sockaddr = in_getsockaddr,
+ .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_close = tcp_usr_close,
};
#ifdef INET6
@@ -836,7 +1059,8 @@
.pru_send = tcp_usr_send,
.pru_shutdown = tcp_usr_shutdown,
.pru_sockaddr = in6_mapped_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel
+ .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_close = tcp_usr_close,
};
#endif /* INET6 */
@@ -859,6 +1083,9 @@
u_short lport;
int error;
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(inp);
+
if (inp->inp_lport == 0) {
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
@@ -884,16 +1111,16 @@
/*
* Compute window scaling to request:
* Scale to fit into sweet spot. See tcp_syncache.c.
- * XXX: This should be moved to tcp_output().
+ * XXX: This should move to tcp_output().
*/
while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
- (TCP_MAXWIN << tp->request_r_scale) < sb_max) /* XXX */
+ (TCP_MAXWIN << tp->request_r_scale) < sb_max)
tp->request_r_scale++;
soisconnecting(so);
tcpstat.tcps_connattempt++;
tp->t_state = TCPS_SYN_SENT;
- callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
@@ -911,6 +1138,9 @@
struct in6_addr *addr6;
int error;
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(inp);
+
if (inp->inp_lport == 0) {
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error)
@@ -953,7 +1183,7 @@
soisconnecting(so);
tcpstat.tcps_connattempt++;
tp->t_state = TCPS_SYN_SENT;
- callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
tp->iss = tcp_new_isn(tp);
tp->t_bw_rtseq = tp->iss;
tcp_sendseqinit(tp);
@@ -979,13 +1209,17 @@
ti->tcpi_state = tp->t_state;
if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- if (tp->sack_enable)
+ if (tp->t_flags & TF_SACK_PERMIT)
ti->tcpi_options |= TCPI_OPT_SACK;
if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
ti->tcpi_options |= TCPI_OPT_WSCALE;
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
+
+ ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
+ ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
+
ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
ti->tcpi_snd_cwnd = tp->snd_cwnd;
@@ -1016,14 +1250,9 @@
struct tcp_info ti;
error = 0;
- INP_INFO_RLOCK(&tcbinfo);
inp = sotoinpcb(so);
- if (inp == NULL) {
- INP_INFO_RUNLOCK(&tcbinfo);
- return (ECONNRESET);
- }
+ KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&tcbinfo);
if (sopt->sopt_level != IPPROTO_TCP) {
INP_UNLOCK(inp);
#ifdef INET6
@@ -1031,9 +1260,13 @@
error = ip6_ctloutput(so, sopt);
else
#endif /* INET6 */
- error = ip_ctloutput_pcbinfo(so, sopt, &tcbinfo);
+ error = ip_ctloutput(so, sopt);
return (error);
}
+ if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) {
+ error = ECONNRESET;
+ goto out;
+ }
tp = intotcpcb(inp);
switch (sopt->sopt_dir) {
@@ -1148,6 +1381,7 @@
}
break;
}
+out:
INP_UNLOCK(inp);
return (error);
}
@@ -1158,10 +1392,10 @@
* be set by the route).
*/
u_long tcp_sendspace = 1024*32;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
&tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
u_long tcp_recvspace = 1024*64;
-SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
+SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
&tcp_recvspace , 0, "Maximum incoming TCP datagram size");
/*
@@ -1179,8 +1413,6 @@
int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0;
#endif
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = soreserve(so, tcp_sendspace, tcp_recvspace);
if (error)
@@ -1188,9 +1420,12 @@
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- error = in_pcballoc(so, &tcbinfo, "tcpinp");
- if (error)
+ INP_INFO_WLOCK(&tcbinfo);
+ error = in_pcballoc(so, &tcbinfo);
+ if (error) {
+ INP_INFO_WUNLOCK(&tcbinfo);
return (error);
+ }
inp = sotoinpcb(so);
#ifdef INET6
if (isipv6) {
@@ -1201,22 +1436,24 @@
#endif
inp->inp_vflag |= INP_IPV4;
tp = tcp_newtcpcb(inp);
- if (tp == 0) {
- int nofd = so->so_state & SS_NOFDREF; /* XXX */
-
- so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
-
- INP_LOCK(inp);
+ if (tp == NULL) {
#ifdef INET6
- if (isipv6)
+ if (isipv6) {
in6_pcbdetach(inp);
- else
+ in6_pcbfree(inp);
+ } else {
#endif
- in_pcbdetach(inp);
- so->so_state |= nofd;
+ in_pcbdetach(inp);
+ in_pcbfree(inp);
+#ifdef INET6
+ }
+#endif
+ INP_INFO_WUNLOCK(&tcbinfo);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
return (0);
}
@@ -1228,7 +1465,7 @@
* current input data; switch states based on user close, and
* send segment to peer (with FIN).
*/
-static struct tcpcb *
+static void
tcp_disconnect(struct tcpcb *tp)
{
struct inpcb *inp = tp->t_inpcb;
@@ -1237,18 +1474,25 @@
INP_INFO_WLOCK_ASSERT(&tcbinfo);
INP_LOCK_ASSERT(inp);
- if (tp->t_state < TCPS_ESTABLISHED)
+ /*
+ * Neither tcp_close() nor tcp_drop() should return NULL, as the
+ * socket is still open.
+ */
+ if (tp->t_state < TCPS_ESTABLISHED) {
tp = tcp_close(tp);
- else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
+ KASSERT(tp != NULL,
+ ("tcp_disconnect: tcp_close() returned NULL"));
+ } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
tp = tcp_drop(tp, 0);
- else {
+ KASSERT(tp != NULL,
+ ("tcp_disconnect: tcp_drop() returned NULL"));
+ } else {
soisdisconnecting(so);
sbflush(&so->so_rcv);
- tp = tcp_usrclosed(tp);
- if (tp)
- (void) tcp_output(tp);
+ tcp_usrclosed(tp);
+ if (!(inp->inp_vflag & INP_DROPPED))
+ tcp_output(tp);
}
- return (tp);
}
/*
@@ -1261,7 +1505,7 @@
* for peer to send FIN or not respond to keep-alives, etc.
* We can let the user exit from the close as soon as the FIN is acked.
*/
-static struct tcpcb *
+static void
tcp_usrclosed(struct tcpcb *tp)
{
@@ -1273,6 +1517,12 @@
case TCPS_LISTEN:
tp->t_state = TCPS_CLOSED;
tp = tcp_close(tp);
+ /*
+ * tcp_close() should never return NULL here as the socket is
+ * still open.
+ */
+ KASSERT(tp != NULL,
+ ("tcp_usrclosed: tcp_close() returned NULL"));
break;
case TCPS_SYN_SENT:
@@ -1291,9 +1541,322 @@
if (tp->t_state >= TCPS_FIN_WAIT_2) {
soisdisconnected(tp->t_inpcb->inp_socket);
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
- if (tp->t_state == TCPS_FIN_WAIT_2)
- callout_reset(tp->tt_2msl, tcp_maxidle,
- tcp_timer_2msl, tp);
+ if (tp->t_state == TCPS_FIN_WAIT_2) {
+ int timeout;
+
+ timeout = (tcp_fast_finwait2_recycle) ?
+ tcp_finwait2_timeout : tcp_maxidle;
+ tcp_timer_activate(tp, TT_2MSL, timeout);
+ }
+ }
+}
+
+#ifdef DDB
+static void
+db_print_indent(int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ db_printf(" ");
+}
+
+static void
+db_print_tstate(int t_state)
+{
+
+ switch (t_state) {
+ case TCPS_CLOSED:
+ db_printf("TCPS_CLOSED");
+ return;
+
+ case TCPS_LISTEN:
+ db_printf("TCPS_LISTEN");
+ return;
+
+ case TCPS_SYN_SENT:
+ db_printf("TCPS_SYN_SENT");
+ return;
+
+ case TCPS_SYN_RECEIVED:
+ db_printf("TCPS_SYN_RECEIVED");
+ return;
+
+ case TCPS_ESTABLISHED:
+ db_printf("TCPS_ESTABLISHED");
+ return;
+
+ case TCPS_CLOSE_WAIT:
+ db_printf("TCPS_CLOSE_WAIT");
+ return;
+
+ case TCPS_FIN_WAIT_1:
+ db_printf("TCPS_FIN_WAIT_1");
+ return;
+
+ case TCPS_CLOSING:
+ db_printf("TCPS_CLOSING");
+ return;
+
+ case TCPS_LAST_ACK:
+ db_printf("TCPS_LAST_ACK");
+ return;
+
+ case TCPS_FIN_WAIT_2:
+ db_printf("TCPS_FIN_WAIT_2");
+ return;
+
+ case TCPS_TIME_WAIT:
+ db_printf("TCPS_TIME_WAIT");
+ return;
+
+ default:
+ db_printf("unknown");
+ return;
+ }
+}
+
+static void
+db_print_tflags(u_int t_flags)
+{
+ int comma;
+
+ comma = 0;
+ if (t_flags & TF_ACKNOW) {
+ db_printf("%sTF_ACKNOW", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_DELACK) {
+ db_printf("%sTF_DELACK", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NODELAY) {
+ db_printf("%sTF_NODELAY", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NOOPT) {
+ db_printf("%sTF_NOOPT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_SENTFIN) {
+ db_printf("%sTF_SENTFIN", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_REQ_SCALE) {
+ db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_RCVD_SCALE) {
+ db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_REQ_TSTMP) {
+ db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_RCVD_TSTMP) {
+ db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_SACK_PERMIT) {
+ db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NEEDSYN) {
+ db_printf("%sTF_NEEDSYN", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NEEDFIN) {
+ db_printf("%sTF_NEEDFIN", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NOPUSH) {
+ db_printf("%sTF_NOPUSH", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_NOPUSH) {
+ db_printf("%sTF_NOPUSH", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_MORETOCOME) {
+ db_printf("%sTF_MORETOCOME", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_LQ_OVERFLOW) {
+ db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_LASTIDLE) {
+ db_printf("%sTF_LASTIDLE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_RXWIN0SENT) {
+ db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_FASTRECOVERY) {
+ db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_WASFRECOVERY) {
+ db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_SIGNATURE) {
+ db_printf("%sTF_SIGNATURE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_FORCEDATA) {
+ db_printf("%sTF_FORCEDATA", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags & TF_TSO) {
+ db_printf("%sTF_TSO", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
+static void
+db_print_toobflags(char t_oobflags)
+{
+ int comma;
+
+ comma = 0;
+ if (t_oobflags & TCPOOB_HAVEDATA) {
+ db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_oobflags & TCPOOB_HADDATA) {
+ db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
+static void
+db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
+{
+
+ db_print_indent(indent);
+ db_printf("%s at %p\n", name, tp);
+
+ indent += 2;
+
+ db_print_indent(indent);
+ db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
+ LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+
+ db_print_indent(indent);
+ db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
+ &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
+
+ db_print_indent(indent);
+ db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl,
+ &tp->t_timers->tt_delack, tp->t_inpcb);
+
+ db_print_indent(indent);
+ db_printf("t_state: %d (", tp->t_state);
+ db_print_tstate(tp->t_state);
+ db_printf(")\n");
+
+ db_print_indent(indent);
+ db_printf("t_flags: 0x%x (", tp->t_flags);
+ db_print_tflags(tp->t_flags);
+ db_printf(")\n");
+
+ db_print_indent(indent);
+ db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n",
+ tp->snd_una, tp->snd_max, tp->snd_nxt);
+
+ db_print_indent(indent);
+ db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n",
+ tp->snd_up, tp->snd_wl1, tp->snd_wl2);
+
+ db_print_indent(indent);
+ db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n",
+ tp->iss, tp->irs, tp->rcv_nxt);
+
+ db_print_indent(indent);
+ db_printf("rcv_adv: 0x%08x rcv_wnd: %lu rcv_up: 0x%08x\n",
+ tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
+
+ db_print_indent(indent);
+ db_printf("snd_wnd: %lu snd_cwnd: %lu snd_bwnd: %lu\n",
+ tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
+
+ db_print_indent(indent);
+ db_printf("snd_ssthresh: %lu snd_bandwidth: %lu snd_recover: "
+ "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
+ tp->snd_recover);
+
+ db_print_indent(indent);
+ db_printf("t_maxopd: %u t_rcvtime: %lu t_startime: %lu\n",
+ tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+
+ db_print_indent(indent);
+ db_printf("t_rttime: %d t_rtsq: 0x%08x t_bw_rtttime: %d\n",
+ tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
+
+ db_print_indent(indent);
+ db_printf("t_bw_rtseq: 0x%08x t_rxtcur: %d t_maxseg: %u "
+ "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
+ tp->t_srtt);
+
+ db_print_indent(indent);
+ db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u "
+ "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
+ tp->t_rttbest);
+
+ db_print_indent(indent);
+ db_printf("t_rttupdated: %lu max_sndwnd: %lu t_softerror: %d\n",
+ tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
+
+ db_print_indent(indent);
+ db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
+ db_print_toobflags(tp->t_oobflags);
+ db_printf(") t_iobc: 0x%02x\n", tp->t_iobc);
+
+ db_print_indent(indent);
+ db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n",
+ tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
+
+ db_print_indent(indent);
+ db_printf("ts_recent: %u ts_recent_age: %lu\n",
+ tp->ts_recent, tp->ts_recent_age);
+
+ db_print_indent(indent);
+ db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: "
+ "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
+
+ db_print_indent(indent);
+ db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x "
+ "t_badrxtwin: %lu\n", tp->snd_ssthresh_prev,
+ tp->snd_recover_prev, tp->t_badrxtwin);
+
+ db_print_indent(indent);
+ db_printf("snd_numholes: %d snd_holes first: %p\n",
+ tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
+
+ db_print_indent(indent);
+ db_printf("snd_fack: 0x%08x rcv_numsacks: %d sack_newdata: "
+ "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
+
+ /* Skip sackblks, sackhint. */
+
+ db_print_indent(indent);
+ db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n",
+ tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
+}
+
+DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
+{
+ struct tcpcb *tp;
+
+ if (!have_addr) {
+ db_printf("usage: show tcpcb <addr>\n");
+ return;
}
- return (tp);
+ tp = (struct tcpcb *)addr;
+
+ db_print_tcpcb(tp, "tcpcb", 0);
}
+#endif
--- /dev/null
+++ sys/netinet/sctp_input.h
@@ -0,0 +1,57 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_input.h,v 1.6 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_input.h,v 1.7 2007/07/02 19:22:22 rrs Exp $");
+
+#ifndef __sctp_input_h__
+#define __sctp_input_h__
+
+#if defined(_KERNEL)
+void
+sctp_common_input_processing(struct mbuf **, int, int, int,
+ struct sctphdr *, struct sctp_chunkhdr *, struct sctp_inpcb *,
+ struct sctp_tcb *, struct sctp_nets *, uint8_t, uint32_t);
+
+struct sctp_stream_reset_out_request *
+sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq,
+ struct sctp_tmit_chunk **bchk);
+
+void
+sctp_reset_in_stream(struct sctp_tcb *stcb, int number_entries,
+ uint16_t * list);
+
+
+int sctp_is_there_unsent_data(struct sctp_tcb *stcb);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_os_bsd.h
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 2006-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_os_bsd.h,v 1.33 2007/09/18 15:16:38 rrs Exp $");
+#ifndef __sctp_os_bsd_h__
+#define __sctp_os_bsd_h__
+/*
+ * includes
+ */
+#include "opt_ipsec.h"
+#include "opt_compat.h"
+#include "opt_inet6.h"
+#include "opt_inet.h"
+#include "opt_sctp.h"
+#include <sys/param.h>
+#include <sys/ktr.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/jail.h>
+#include <sys/sysctl.h>
+#include <sys/resourcevar.h>
+#include <sys/uio.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/kthread.h>
+#include <sys/priv.h>
+#include <sys/random.h>
+#include <sys/limits.h>
+#include <sys/queue.h>
+#include <machine/cpu.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/icmp_var.h>
+
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
+#include <netipsec/key.h>
+#endif /* IPSEC */
+
+#ifdef INET6
+#include <sys/domain.h>
+#ifdef IPSEC
+#include <netipsec/ipsec6.h>
+#endif
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_pcb.h>
+#include <netinet/icmp6.h>
+#include <netinet6/ip6protosw.h>
+#include <netinet6/nd6.h>
+#include <netinet6/scope6_var.h>
+#endif /* INET6 */
+
+
+#include <netinet/ip_options.h>
+
+#ifndef in6pcb
+#define in6pcb inpcb
+#endif
+/* Declare all the malloc names for all the various mallocs */
+MALLOC_DECLARE(SCTP_M_MAP);
+MALLOC_DECLARE(SCTP_M_STRMI);
+MALLOC_DECLARE(SCTP_M_STRMO);
+MALLOC_DECLARE(SCTP_M_ASC_ADDR);
+MALLOC_DECLARE(SCTP_M_ASC_IT);
+MALLOC_DECLARE(SCTP_M_AUTH_CL);
+MALLOC_DECLARE(SCTP_M_AUTH_KY);
+MALLOC_DECLARE(SCTP_M_AUTH_HL);
+MALLOC_DECLARE(SCTP_M_AUTH_IF);
+MALLOC_DECLARE(SCTP_M_STRESET);
+MALLOC_DECLARE(SCTP_M_CMSG);
+MALLOC_DECLARE(SCTP_M_COPYAL);
+MALLOC_DECLARE(SCTP_M_VRF);
+MALLOC_DECLARE(SCTP_M_IFA);
+MALLOC_DECLARE(SCTP_M_IFN);
+MALLOC_DECLARE(SCTP_M_TIMW);
+MALLOC_DECLARE(SCTP_M_MVRF);
+MALLOC_DECLARE(SCTP_M_ITER);
+MALLOC_DECLARE(SCTP_M_SOCKOPT);
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+
+#define SCTP_GET_CYCLECOUNT get_cyclecount()
+#define SCTP_CTR6 sctp_log_trace
+
+#else
+#define SCTP_CTR6 CTR6
+#endif
+
+
+/*
+ *
+ */
+#define USER_ADDR_NULL (NULL) /* FIX ME: temp */
+#define SCTP_LIST_EMPTY(list) LIST_EMPTY(list)
+
+#if defined(SCTP_DEBUG)
+#define SCTPDBG(level, params...) \
+{ \
+ do { \
+ if (sctp_debug_on & level ) { \
+ printf(params); \
+ } \
+ } while (0); \
+}
+#define SCTPDBG_ADDR(level, addr) \
+{ \
+ do { \
+ if (sctp_debug_on & level ) { \
+ sctp_print_address(addr); \
+ } \
+ } while (0); \
+}
+#define SCTPDBG_PKT(level, iph, sh) \
+{ \
+ do { \
+ if (sctp_debug_on & level) { \
+ sctp_print_address_pkt(iph, sh); \
+ } \
+ } while (0); \
+}
+#else
+#define SCTPDBG(level, params...)
+#define SCTPDBG_ADDR(level, addr)
+#define SCTPDBG_PKT(level, iph, sh)
+#endif
+#define SCTP_PRINTF(params...) printf(params)
+
+#ifdef SCTP_LTRACE_CHUNKS
+#define SCTP_LTRACE_CHK(a, b, c, d) if(sctp_logging_level & SCTP_LTRACE_CHUNK_ENABLE) CTR6(KTR_SUBSYS, "SCTP:%d[%d]:%x-%x-%x-%x", SCTP_LOG_CHUNK_PROC, 0, a, b, c, d)
+#else
+#define SCTP_LTRACE_CHK(a, b, c, d)
+#endif
+
+#ifdef SCTP_LTRACE_ERRORS
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
+ printf("mbuf:%p inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ m, inp, stcb, net, file, __LINE__, err);
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err) if(sctp_logging_level & SCTP_LTRACE_ERROR_ENABLE) \
+ printf("inp:%p stcb:%p net:%p file:%x line:%d error:%d\n", \
+ inp, stcb, net, file, __LINE__, err);
+#else
+#define SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, file, err)
+#define SCTP_LTRACE_ERR_RET(inp, stcb, net, file, err)
+#endif
+
+
+/*
+ * Local address and interface list handling
+ */
+#define SCTP_MAX_VRF_ID 0
+#define SCTP_SIZE_OF_VRF_HASH 3
+#define SCTP_IFNAMSIZ IFNAMSIZ
+#define SCTP_DEFAULT_VRFID 0
+#define SCTP_VRF_ADDR_HASH_SIZE 16
+#define SCTP_VRF_IFN_HASH_SIZE 3
+#define SCTP_INIT_VRF_TABLEID(vrf)
+
+#define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP)
+
+/*
+ * Access to IFN's to help with src-addr-selection
+ */
+/* This could return VOID if the index works but for BSD we provide both. */
+#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp
+#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index
+#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp)
+
+/*
+ * general memory allocation
+ */
+#define SCTP_MALLOC(var, type, size, name) \
+ do { \
+ MALLOC(var, type, size, name, M_NOWAIT); \
+ } while (0)
+
+#define SCTP_FREE(var, type) FREE(var, type)
+
+#define SCTP_MALLOC_SONAME(var, type, size) \
+ do { \
+ MALLOC(var, type, size, M_SONAME, M_WAITOK | M_ZERO); \
+ } while (0)
+
+#define SCTP_FREE_SONAME(var) FREE(var, M_SONAME)
+
+#define SCTP_PROCESS_STRUCT struct proc *
+
+/*
+ * zone allocation functions
+ */
+#include <vm/uma.h>
+/* SCTP_ZONE_INIT: initialize the zone */
+typedef struct uma_zone *sctp_zone_t;
+
+#define UMA_ZFLAG_FULL 0x0020
+#define SCTP_ZONE_INIT(zone, name, size, number) { \
+ zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,\
+ UMA_ZFLAG_FULL); \
+ uma_zone_set_max(zone, number); \
+}
+
+/* SCTP_ZONE_GET: allocate element from the zone */
+#define SCTP_ZONE_GET(zone, type) \
+ (type *)uma_zalloc(zone, M_NOWAIT);
+
+/* SCTP_ZONE_FREE: free element from the zone */
+#define SCTP_ZONE_FREE(zone, element) \
+ uma_zfree(zone, element);
+#define SCTP_HASH_INIT(size, hashmark) hashinit_flags(size, M_PCB, hashmark, HASH_NOWAIT)
+#define SCTP_HASH_FREE(table, hashmark) hashdestroy(table, M_PCB, hashmark)
+
+#define SCTP_M_COPYM m_copym
+
+/*
+ * timers
+ */
+#include <sys/callout.h>
+typedef struct callout sctp_os_timer_t;
+
+#define SCTP_OS_TIMER_INIT(tmr) callout_init(tmr, 1)
+#define SCTP_OS_TIMER_START callout_reset
+#define SCTP_OS_TIMER_STOP callout_stop
+#define SCTP_OS_TIMER_STOP_DRAIN callout_drain
+#define SCTP_OS_TIMER_PENDING callout_pending
+#define SCTP_OS_TIMER_ACTIVE callout_active
+#define SCTP_OS_TIMER_DEACTIVATE callout_deactivate
+
+#define sctp_get_tick_count() (ticks)
+
+/* The packed define for 64 bit platforms */
+#define SCTP_PACKED __attribute__((packed))
+#define SCTP_UNUSED __attribute__((unused))
+
+/*
+ * Functions
+ */
+/* Mbuf manipulation and access macros */
+#define SCTP_BUF_LEN(m) (m->m_len)
+#define SCTP_BUF_NEXT(m) (m->m_next)
+#define SCTP_BUF_NEXT_PKT(m) (m->m_nextpkt)
+#define SCTP_BUF_RESV_UF(m, size) m->m_data += size
+#define SCTP_BUF_AT(m, size) m->m_data + size
+#define SCTP_BUF_IS_EXTENDED(m) (m->m_flags & M_EXT)
+#define SCTP_BUF_EXTEND_SIZE(m) (m->m_ext.ext_size)
+#define SCTP_BUF_TYPE(m) (m->m_type)
+#define SCTP_BUF_RECVIF(m) (m->m_pkthdr.rcvif)
+#define SCTP_BUF_PREPEND M_PREPEND
+
+#define SCTP_ALIGN_TO_END(m, len) if(m->m_flags & M_PKTHDR) { \
+ MH_ALIGN(m, len); \
+ } else if ((m->m_flags & M_EXT) == 0) { \
+ M_ALIGN(m, len); \
+ }
+
+/* We make it so if you have up to 4 threads
+ * writting based on the default size of
+ * the packet log 65 k, that would be
+ * 4 16k packets before we would hit
+ * a problem.
+ */
+#define SCTP_PKTLOG_WRITERS_NEED_LOCK 3
+
+/*************************/
+/* MTU */
+/*************************/
+#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
+#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((rt != NULL) ? rt->rt_rmx.rmx_mtu : 0)
+#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
+#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
+ if (rt != NULL) \
+ rt->rt_rmx.rmx_mtu = mtu; \
+ } while(0)
+
+/* (de-)register interface event notifications */
+#define SCTP_REGISTER_INTERFACE(ifhandle, af)
+#define SCTP_DEREGISTER_INTERFACE(ifhandle, af)
+
+
+/*************************/
+/* These are for logging */
+/*************************/
+/* return the base ext data pointer */
+#define SCTP_BUF_EXTEND_BASE(m) (m->m_ext.ext_buf)
+ /* return the refcnt of the data pointer */
+#define SCTP_BUF_EXTEND_REFCNT(m) (*m->m_ext.ref_cnt)
+/* return any buffer related flags, this is
+ * used beyond logging for apple only.
+ */
+#define SCTP_BUF_GET_FLAGS(m) (m->m_flags)
+
+/* For BSD this just accesses the M_PKTHDR length
+ * so it operates on an mbuf with hdr flag. Other
+ * O/S's may have seperate packet header and mbuf
+ * chain pointers.. thus the macro.
+ */
+#define SCTP_HEADER_TO_CHAIN(m) (m)
+#define SCTP_DETACH_HEADER_FROM_CHAIN(m)
+#define SCTP_HEADER_LEN(m) (m->m_pkthdr.len)
+#define SCTP_GET_HEADER_FOR_OUTPUT(o_pak) 0
+#define SCTP_RELEASE_HEADER(m)
+#define SCTP_RELEASE_PKT(m) sctp_m_freem(m)
+
+#define SCTP_GET_PKT_VRFID(m, vrf_id) ((vrf_id = SCTP_DEFAULT_VRFID) != SCTP_DEFAULT_VRFID)
+
+
+
+/* Attach the chain of data into the sendable packet. */
+#define SCTP_ATTACH_CHAIN(pak, m, packet_length) do { \
+ pak = m; \
+ pak->m_pkthdr.len = packet_length; \
+ } while(0)
+
+/* Other m_pkthdr type things */
+#define SCTP_IS_IT_BROADCAST(dst, m) ((m->m_flags & M_PKTHDR) ? in_broadcast(dst, m->m_pkthdr.rcvif) : 0)
+#define SCTP_IS_IT_LOOPBACK(m) ((m->m_flags & M_PKTHDR) && ((m->m_pkthdr.rcvif == NULL) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)))
+
+
+/* This converts any input packet header
+ * into the chain of data holders, for BSD
+ * its a NOP.
+ */
+
+/* Macro's for getting length from V6/V4 header */
+#define SCTP_GET_IPV4_LENGTH(iph) (iph->ip_len)
+#define SCTP_GET_IPV6_LENGTH(ip6) (ntohs(ip6->ip6_plen))
+
+/* get the v6 hop limit */
+#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
+
+/* is the endpoint v6only? */
+#define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
+/* is the socket non-blocking? */
+#define SCTP_SO_IS_NBIO(so) ((so)->so_state & SS_NBIO)
+#define SCTP_SET_SO_NBIO(so) ((so)->so_state |= SS_NBIO)
+#define SCTP_CLEAR_SO_NBIO(so) ((so)->so_state &= ~SS_NBIO)
+/* get the socket type */
+#define SCTP_SO_TYPE(so) ((so)->so_type)
+/* reserve sb space for a socket */
+#define SCTP_SORESERVE(so, send, recv) soreserve(so, send, recv)
+/* wakeup a socket */
+#define SCTP_SOWAKEUP(so) wakeup(&(so)->so_timeo)
+/* clear the socket buffer state */
+#define SCTP_SB_CLEAR(sb) \
+ (sb).sb_cc = 0; \
+ (sb).sb_mb = NULL; \
+ (sb).sb_mbcnt = 0;
+
+#define SCTP_SB_LIMIT_RCV(so) so->so_rcv.sb_hiwat
+#define SCTP_SB_LIMIT_SND(so) so->so_snd.sb_hiwat
+
+/*
+ * routes, output, etc.
+ */
+typedef struct route sctp_route_t;
+typedef struct rtentry sctp_rtentry_t;
+
+#define SCTP_RTALLOC(ro, vrf_id) rtalloc_ign((struct route *)ro, 0UL)
+
+/* Future zero copy wakeup/send function */
+#define SCTP_ZERO_COPY_EVENT(inp, so)
+/* This is re-pulse ourselves for sendbuf */
+#define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
+
+/*
+ * IP output routines
+ */
+#define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \
+{ \
+ int o_flgs = 0; \
+ if (stcb && stcb->sctp_ep && stcb->sctp_ep->sctp_socket) { \
+ o_flgs = IP_RAWOUTPUT | (stcb->sctp_ep->sctp_socket->so_options & SO_DONTROUTE); \
+ } else { \
+ o_flgs = IP_RAWOUTPUT; \
+ } \
+ result = ip_output(o_pak, NULL, ro, o_flgs, 0, NULL); \
+}
+
+#define SCTP_IP6_OUTPUT(result, o_pak, ro, ifp, stcb, vrf_id) \
+{ \
+ if (stcb && stcb->sctp_ep) \
+ result = ip6_output(o_pak, \
+ ((struct in6pcb *)(stcb->sctp_ep))->in6p_outputopts, \
+ (ro), 0, 0, ifp, NULL); \
+ else \
+ result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
+}
+
+struct mbuf *
+sctp_get_mbuf_for_msg(unsigned int space_needed,
+ int want_header, int how, int allonebuf, int type);
+
+
+/*
+ * SCTP AUTH
+ */
+#define HAVE_SHA2
+
+#define SCTP_READ_RANDOM(buf, len) read_random(buf, len)
+
+#ifdef USE_SCTP_SHA1
+#include <netinet/sctp_sha1.h>
+#else
+#include <crypto/sha1.h>
+/* map standard crypto API names */
+#define SHA1_Init SHA1Init
+#define SHA1_Update SHA1Update
+#define SHA1_Final(x,y) SHA1Final((caddr_t)x, y)
+#endif
+
+#if defined(HAVE_SHA2)
+#include <crypto/sha2/sha2.h>
+#endif
+
+#include <sys/md5.h>
+/* map standard crypto API names */
+#define MD5_Init MD5Init
+#define MD5_Update MD5Update
+#define MD5_Final MD5Final
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_usrreq.c
@@ -0,0 +1,4444 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_usrreq.c,v 1.48 2005/03/07 23:26:08 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_usrreq.c,v 1.48.2.3 2007/12/09 20:23:47 rrs Exp $");
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_var.h>
+#if defined(INET6)
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_uio.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_auth.h>
+#include <netinet/sctp_bsd_addr.h>
+#include <netinet/sctp_cc_functions.h>
+
+
+
+
+void
+sctp_init(void)
+{
+ /* Init the SCTP pcb in sctp_pcb.c */
+ u_long sb_max_adj;
+
+ sctp_pcb_init();
+
+
+ if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE)
+ sctp_max_chunks_on_queue = (nmbclusters / 8);
+ /*
+ * Allow a user to take no more than 1/2 the number of clusters or
+ * the SB_MAX whichever is smaller for the send window.
+ */
+ sb_max_adj = (u_long)((u_quad_t) (SB_MAX) * MCLBYTES / (MSIZE + MCLBYTES));
+ sctp_sendspace = min(sb_max_adj,
+ (((uint32_t) nmbclusters / 2) * SCTP_DEFAULT_MAXSEGMENT));
+ /*
+ * Now for the recv window, should we take the same amount? or
+ * should I do 1/2 the SB_MAX instead in the SB_MAX min above. For
+ * now I will just copy.
+ */
+ sctp_recvspace = sctp_sendspace;
+
+}
+
+
+
+/*
+ * cleanup of the sctppcbinfo structure.
+ * Assumes that the sctppcbinfo lock is held.
+ */
+void
+sctp_pcbinfo_cleanup(void)
+{
+ /* free the hash tables */
+ if (sctppcbinfo.sctp_asochash != NULL)
+ SCTP_HASH_FREE(sctppcbinfo.sctp_asochash, sctppcbinfo.hashasocmark);
+ if (sctppcbinfo.sctp_ephash != NULL)
+ SCTP_HASH_FREE(sctppcbinfo.sctp_ephash, sctppcbinfo.hashmark);
+ if (sctppcbinfo.sctp_tcpephash != NULL)
+ SCTP_HASH_FREE(sctppcbinfo.sctp_tcpephash, sctppcbinfo.hashtcpmark);
+ if (sctppcbinfo.sctp_restarthash != NULL)
+ SCTP_HASH_FREE(sctppcbinfo.sctp_restarthash, sctppcbinfo.hashrestartmark);
+}
+
+
+static void
+sctp_pathmtu_adjustment(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ uint16_t nxtsz)
+{
+ struct sctp_tmit_chunk *chk;
+
+ /* Adjust that too */
+ stcb->asoc.smallest_mtu = nxtsz;
+ /* now off to subtract IP_DF flag if needed */
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("sctp_pathmtu_adjust called inp:%p stcb:%p net:%p nxtsz:%d\n",
+ inp, stcb, net, nxtsz);
+#endif
+ TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
+ if ((chk->send_size + IP_HDR_SIZE) > nxtsz) {
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+ }
+ }
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if ((chk->send_size + IP_HDR_SIZE) > nxtsz) {
+ /*
+ * For this guy we also mark for immediate resend
+ * since we sent to big of chunk
+ */
+ chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
+ if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ }
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ chk->rec.data.doing_fast_retransmit = 0;
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
+ chk->whoTo->flight_size,
+ chk->book_size,
+ (uintptr_t) chk->whoTo,
+ chk->rec.data.TSN_seq);
+ }
+ /* Clear any time so NO RTT is being done */
+ chk->do_rtt = 0;
+ sctp_flight_size_decrease(chk);
+ sctp_total_flight_decrease(stcb, chk);
+ }
+ }
+}
+
+static void
+sctp_notify_mbuf(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ struct ip *ip,
+ struct sctphdr *sh)
+{
+ struct icmp *icmph;
+ int totsz, tmr_stopped = 0;
+ uint16_t nxtsz;
+
+ /* protection */
+ if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+ (ip == NULL) || (sh == NULL)) {
+ if (stcb != NULL) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ return;
+ }
+ /* First job is to verify the vtag matches what I would send */
+ if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+ sizeof(struct ip)));
+ if (icmph->icmp_type != ICMP_UNREACH) {
+ /* We only care about unreachable */
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ if (icmph->icmp_code != ICMP_UNREACH_NEEDFRAG) {
+ /* not a unreachable message due to frag. */
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ totsz = ip->ip_len;
+
+ nxtsz = ntohs(icmph->icmp_nextmtu);
+ if (nxtsz == 0) {
+ /*
+ * old type router that does not tell us what the next size
+ * mtu is. Rats we will have to guess (in a educated fashion
+ * of course)
+ */
+ nxtsz = find_next_best_mtu(totsz);
+ }
+ /* Stop any PMTU timer */
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ tmr_stopped = 1;
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
+ }
+ /* Adjust destination size limit */
+ if (net->mtu > nxtsz) {
+ net->mtu = nxtsz;
+ }
+ /* now what about the ep? */
+ if (stcb->asoc.smallest_mtu > nxtsz) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("notify_mbuf (ICMP) calls sctp_pathmtu_adjust mtu:%d\n",
+ nxtsz);
+#endif
+ sctp_pathmtu_adjustment(inp, stcb, net, nxtsz);
+ }
+ if (tmr_stopped)
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+
+ SCTP_TCB_UNLOCK(stcb);
+}
+
+
+void
+sctp_notify(struct sctp_inpcb *inp,
+ struct ip *ip,
+ struct sctphdr *sh,
+ struct sockaddr *to,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ struct socket *so;
+
+#endif
+ /* protection */
+ int reason;
+ struct icmp *icmph;
+
+
+ if ((inp == NULL) || (stcb == NULL) || (net == NULL) ||
+ (sh == NULL) || (to == NULL)) {
+ if (stcb)
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ /* First job is to verify the vtag matches what I would send */
+ if (ntohl(sh->v_tag) != (stcb->asoc.peer_vtag)) {
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ icmph = (struct icmp *)((caddr_t)ip - (sizeof(struct icmp) -
+ sizeof(struct ip)));
+ if (icmph->icmp_type != ICMP_UNREACH) {
+ /* We only care about unreachable */
+ SCTP_TCB_UNLOCK(stcb);
+ return;
+ }
+ if ((icmph->icmp_code == ICMP_UNREACH_NET) ||
+ (icmph->icmp_code == ICMP_UNREACH_HOST) ||
+ (icmph->icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
+ (icmph->icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
+ (icmph->icmp_code == ICMP_UNREACH_ISOLATED) ||
+ (icmph->icmp_code == ICMP_UNREACH_NET_PROHIB) ||
+ (icmph->icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
+ (icmph->icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
+
+ /*
+ * Hmm reachablity problems we must examine closely. If its
+ * not reachable, we may have lost a network. Or if there is
+ * NO protocol at the other end named SCTP. well we consider
+ * it a OOTB abort.
+ */
+ if (net->dest_state & SCTP_ADDR_REACHABLE) {
+ /* Ok that destination is NOT reachable */
+ SCTP_PRINTF("ICMP (thresh %d/%d) takes interface %p down\n",
+ net->error_count,
+ net->failure_threshold,
+ net);
+
+ net->dest_state &= ~SCTP_ADDR_REACHABLE;
+ net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+ /*
+ * JRS 5/14/07 - If a destination is unreachable,
+ * the PF bit is turned off. This allows an
+ * unambiguous use of the PF bit for destinations
+ * that are reachable but potentially failed. If the
+ * destination is set to the unreachable state, also
+ * set the destination to the PF state.
+ */
+ /*
+ * Add debug message here if destination is not in
+ * PF state.
+ */
+ /* Stop any running T3 timers here? */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+ net);
+ }
+ net->error_count = net->failure_threshold + 1;
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+ stcb, SCTP_FAILED_THRESHOLD,
+ (void *)net, SCTP_SO_NOT_LOCKED);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else if ((icmph->icmp_code == ICMP_UNREACH_PROTOCOL) ||
+ (icmph->icmp_code == ICMP_UNREACH_PORT)) {
+ /*
+ * Here the peer is either playing tricks on us, including
+ * an address that belongs to someone who does not support
+ * SCTP OR was a userland implementation that shutdown and
+ * now is dead. In either case treat it like a OOTB abort
+ * with no TCB
+ */
+ reason = SCTP_PEER_FAULTY;
+ sctp_abort_notification(stcb, reason, SCTP_SO_NOT_LOCKED);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ so = SCTP_INP_SO(inp);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_SOCKET_LOCK(so, 1);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+#endif
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
+#if defined (__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
+ SCTP_SOCKET_UNLOCK(so, 1);
+ /* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
+#endif
+ /* no need to unlock here, since the TCB is gone */
+ } else {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+}
+
+void
+sctp_ctlinput(cmd, sa, vip)
+ int cmd;
+ struct sockaddr *sa;
+ void *vip;
+{
+ struct ip *ip = vip;
+ struct sctphdr *sh;
+ uint32_t vrf_id;
+
+ /* FIX, for non-bsd is this right? */
+ vrf_id = SCTP_DEFAULT_VRFID;
+ if (sa->sa_family != AF_INET ||
+ ((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) {
+ return;
+ }
+ if (PRC_IS_REDIRECT(cmd)) {
+ ip = 0;
+ } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
+ return;
+ }
+ if (ip) {
+ struct sctp_inpcb *inp = NULL;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_nets *net = NULL;
+ struct sockaddr_in to, from;
+
+ sh = (struct sctphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ bzero(&to, sizeof(to));
+ bzero(&from, sizeof(from));
+ from.sin_family = to.sin_family = AF_INET;
+ from.sin_len = to.sin_len = sizeof(to);
+ from.sin_port = sh->src_port;
+ from.sin_addr = ip->ip_src;
+ to.sin_port = sh->dest_port;
+ to.sin_addr = ip->ip_dst;
+
+ /*
+ * 'to' holds the dest of the packet that failed to be sent.
+ * 'from' holds our local endpoint address. Thus we reverse
+ * the to and the from in the lookup.
+ */
+ stcb = sctp_findassociation_addr_sa((struct sockaddr *)&from,
+ (struct sockaddr *)&to,
+ &inp, &net, 1, vrf_id);
+ if (stcb != NULL && inp && (inp->sctp_socket != NULL)) {
+ if (cmd != PRC_MSGSIZE) {
+ sctp_notify(inp, ip, sh,
+ (struct sockaddr *)&to, stcb,
+ net);
+ } else {
+ /* handle possible ICMP size messages */
+ sctp_notify_mbuf(inp, stcb, net, ip, sh);
+ }
+ } else {
+ if ((stcb == NULL) && (inp != NULL)) {
+ /* reduce ref-count */
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ }
+ return;
+}
+
+static int
+sctp_getcred(SYSCTL_HANDLER_ARGS)
+{
+ struct xucred xuc;
+ struct sockaddr_in addrs[2];
+ struct sctp_inpcb *inp;
+ struct sctp_nets *net;
+ struct sctp_tcb *stcb;
+ int error;
+ uint32_t vrf_id;
+
+ /* FIX, for non-bsd is this right? */
+ vrf_id = SCTP_DEFAULT_VRFID;
+
+ error = priv_check(req->td, PRIV_NETINET_GETCRED);
+
+ if (error)
+ return (error);
+
+ error = SYSCTL_IN(req, addrs, sizeof(addrs));
+ if (error)
+ return (error);
+
+ stcb = sctp_findassociation_addr_sa(sintosa(&addrs[0]),
+ sintosa(&addrs[1]),
+ &inp, &net, 1, vrf_id);
+ if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
+ if ((inp != NULL) && (stcb == NULL)) {
+ /* reduce ref-count */
+ SCTP_INP_WLOCK(inp);
+ SCTP_INP_DECR_REF(inp);
+ goto cred_can_cont;
+ }
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ goto out;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ /*
+ * We use the write lock here, only since in the error leg we need
+ * it. If we used RLOCK, then we would have to
+ * wlock/decr/unlock/rlock. Which in theory could create a hole.
+ * Better to use higher wlock.
+ */
+ SCTP_INP_WLOCK(inp);
+cred_can_cont:
+ error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket);
+ if (error) {
+ SCTP_INP_WUNLOCK(inp);
+ goto out;
+ }
+ cru2x(inp->sctp_socket->so_cred, &xuc);
+ SCTP_INP_WUNLOCK(inp);
+ error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
+out:
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW,
+ 0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection");
+
+
+static void
+sctp_abort(struct socket *so)
+{
+ struct sctp_inpcb *inp;
+ uint32_t flags;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ return;
+ }
+sctp_must_try_again:
+ flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 17);
+#endif
+ if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+ (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 16);
+#endif
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+ SOCK_LOCK(so);
+ SCTP_SB_CLEAR(so->so_snd);
+ /*
+ * same for the rcv ones, they are only here for the
+ * accounting/select.
+ */
+ SCTP_SB_CLEAR(so->so_rcv);
+
+ /* Now null out the reference, we are completely detached. */
+ so->so_pcb = NULL;
+ SOCK_UNLOCK(so);
+ } else {
+ flags = inp->sctp_flags;
+ if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+ goto sctp_must_try_again;
+ }
+ }
+ return;
+}
+
+static int
+sctp_attach(struct socket *so, int proto, struct thread *p)
+{
+ struct sctp_inpcb *inp;
+ struct inpcb *ip_inp;
+ int error;
+ uint32_t vrf_id = SCTP_DEFAULT_VRFID;
+
+#ifdef IPSEC
+ uint32_t flags;
+
+#endif
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp != 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ error = SCTP_SORESERVE(so, sctp_sendspace, sctp_recvspace);
+ if (error) {
+ return error;
+ }
+ error = sctp_inpcb_alloc(so, vrf_id);
+ if (error) {
+ return error;
+ }
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUND_V6; /* I'm not v6! */
+ ip_inp = &inp->ip_inp.inp;
+ ip_inp->inp_vflag |= INP_IPV4;
+ ip_inp->inp_ip_ttl = ip_defttl;
+#ifdef IPSEC
+ error = ipsec_init_policy(so, &ip_inp->inp_sp);
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 17);
+#endif
+ if (error != 0) {
+ flags = inp->sctp_flags;
+ if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+ (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 15);
+#endif
+ SCTP_INP_WUNLOCK(inp);
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+ } else {
+ SCTP_INP_WUNLOCK(inp);
+ }
+ return error;
+ }
+#endif /* IPSEC */
+ SCTP_INP_WUNLOCK(inp);
+ return 0;
+}
+
+static int
+sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+ struct sctp_inpcb *inp = NULL;
+ int error;
+
+#ifdef INET6
+ if (addr && addr->sa_family != AF_INET) {
+ /* must be a v4 address! */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+#endif /* INET6 */
+ if (addr && (addr->sa_len != sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ error = sctp_inpcb_bind(so, addr, NULL, p);
+ return error;
+}
+
+void
+sctp_close(struct socket *so)
+{
+ struct sctp_inpcb *inp;
+ uint32_t flags;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0)
+ return;
+
+ /*
+ * Inform all the lower layer assoc that we are done.
+ */
+sctp_must_try_again:
+ flags = inp->sctp_flags;
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 17);
+#endif
+ if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
+ (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
+ if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) ||
+ (so->so_rcv.sb_cc > 0)) {
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 13);
+#endif
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
+ SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+ } else {
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 14);
+#endif
+ sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+ SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
+ }
+ /*
+ * The socket is now detached, no matter what the state of
+ * the SCTP association.
+ */
+ SOCK_LOCK(so);
+ SCTP_SB_CLEAR(so->so_snd);
+ /*
+ * same for the rcv ones, they are only here for the
+ * accounting/select.
+ */
+ SCTP_SB_CLEAR(so->so_rcv);
+
+ /* Now null out the reference, we are completely detached. */
+ so->so_pcb = NULL;
+ SOCK_UNLOCK(so);
+ } else {
+ flags = inp->sctp_flags;
+ if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
+ goto sctp_must_try_again;
+ }
+ }
+ return;
+}
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+ struct mbuf *control, struct thread *p);
+
+
+int
+sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
+ struct mbuf *control, struct thread *p)
+{
+ struct sctp_inpcb *inp;
+ int error;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ if (control) {
+ sctp_m_freem(control);
+ control = NULL;
+ }
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ sctp_m_freem(m);
+ return EINVAL;
+ }
+ /* Got to have an to address if we are NOT a connected socket */
+ if ((addr == NULL) &&
+ ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))
+ ) {
+ goto connected_type;
+ } else if (addr == NULL) {
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+ error = EDESTADDRREQ;
+ sctp_m_freem(m);
+ if (control) {
+ sctp_m_freem(control);
+ control = NULL;
+ }
+ return (error);
+ }
+#ifdef INET6
+ if (addr->sa_family != AF_INET) {
+ /* must be a v4 address! */
+ SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
+ sctp_m_freem(m);
+ if (control) {
+ sctp_m_freem(control);
+ control = NULL;
+ }
+ error = EDESTADDRREQ;
+ return EDESTADDRREQ;
+ }
+#endif /* INET6 */
+connected_type:
+ /* now what about control */
+ if (control) {
+ if (inp->control) {
+ SCTP_PRINTF("huh? control set?\n");
+ sctp_m_freem(inp->control);
+ inp->control = NULL;
+ }
+ inp->control = control;
+ }
+ /* Place the data */
+ if (inp->pkt) {
+ SCTP_BUF_NEXT(inp->pkt_last) = m;
+ inp->pkt_last = m;
+ } else {
+ inp->pkt_last = inp->pkt = m;
+ }
+ if (
+ /* FreeBSD uses a flag passed */
+ ((flags & PRUS_MORETOCOME) == 0)
+ ) {
+ /*
+ * note with the current version this code will only be used
+ * by OpenBSD-- NetBSD, FreeBSD, and MacOS have methods for
+ * re-defining sosend to use the sctp_sosend. One can
+ * optionally switch back to this code (by changing back the
+ * definitions) but this is not advisable. This code is used
+ * by FreeBSD when sending a file with sendfile() though.
+ */
+ int ret;
+
+ ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags);
+ inp->pkt = NULL;
+ inp->control = NULL;
+ return (ret);
+ } else {
+ return (0);
+ }
+}
+
+int
+sctp_disconnect(struct socket *so)
+{
+ struct sctp_inpcb *inp;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ return (ENOTCONN);
+ }
+ SCTP_INP_RLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ if (SCTP_LIST_EMPTY(&inp->sctp_asoc_list)) {
+ /* No connection */
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ } else {
+ struct sctp_association *asoc;
+ struct sctp_tcb *stcb;
+
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ SCTP_TCB_LOCK(stcb);
+ asoc = &stcb->asoc;
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ /* We are about to be freed, out of here */
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ if (((so->so_options & SO_LINGER) &&
+ (so->so_linger == 0)) ||
+ (so->so_rcv.sb_cc > 0)) {
+ if (SCTP_GET_STATE(asoc) !=
+ SCTP_STATE_COOKIE_WAIT) {
+ /* Left with Data unread */
+ struct mbuf *err;
+
+ err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_DONTWAIT, 1, MT_DATA);
+ if (err) {
+ /*
+ * Fill in the user
+ * initiated abort
+ */
+ struct sctp_paramhdr *ph;
+
+ ph = mtod(err, struct sctp_paramhdr *);
+ SCTP_BUF_LEN(err) = sizeof(struct sctp_paramhdr);
+ ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(err));
+ }
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("disconnect does an abort");
+#endif
+ sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
+ /* No unlock tcb assoc is gone */
+ return (0);
+ }
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->stream_queue_cnt == 0)) {
+ /* there is nothing queued to send, so done */
+ if (asoc->locked_on_sending) {
+ goto abort_anyway;
+ }
+ if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /* only send SHUTDOWN 1st time thru */
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb,
+ stcb->asoc.primary_destination);
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ }
+ } else {
+ /*
+ * we still got (or just got) data to send,
+ * so set SHUTDOWN_PENDING
+ */
+ /*
+ * XXX sockets draft says that SCTP_EOF
+ * should be sent with no data. currently,
+ * we will allow user data to be sent first
+ * and move to SHUTDOWN-PENDING
+ */
+ asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ if (asoc->locked_on_sending) {
+ /* Locked to send out the data */
+ struct sctp_stream_queue_pending *sp;
+
+ sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+ if (sp == NULL) {
+ SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+ asoc->locked_on_sending->stream_no);
+ } else {
+ if ((sp->length == 0) && (sp->msg_is_complete == 0))
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ }
+ }
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+
+ abort_anyway:
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ /*
+ * Fill in the user
+ * initiated abort
+ */
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(op_err) =
+ (sizeof(struct sctp_paramhdr) + sizeof(uint32_t));
+ ph = mtod(op_err,
+ struct sctp_paramhdr *);
+ ph->param_type = htons(
+ SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4);
+ }
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("disconnect does an abort");
+#endif
+
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
+ sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ (void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
+ return (0);
+ } else {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ /* not reached */
+ } else {
+ /* UDP model does not support this */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ return EOPNOTSUPP;
+ }
+}
+
+int
+sctp_shutdown(struct socket *so)
+{
+ struct sctp_inpcb *inp;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ SCTP_INP_RLOCK(inp);
+ /* For UDP model this is a invalid call */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+ /* Restore the flags that the soshutdown took away. */
+ so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
+ /* This proc will wakeup for read and do nothing (I hope) */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ return (EOPNOTSUPP);
+ }
+ /*
+ * Ok if we reach here its the TCP model and it is either a SHUT_WR
+ * or SHUT_RDWR. This means we put the shutdown flag against it.
+ */
+ {
+ struct sctp_tcb *stcb;
+ struct sctp_association *asoc;
+
+ socantsendmore(so);
+
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ /*
+ * Ok we hit the case that the shutdown call was
+ * made after an abort or something. Nothing to do
+ * now.
+ */
+ SCTP_INP_RUNLOCK(inp);
+ return (0);
+ }
+ SCTP_TCB_LOCK(stcb);
+ asoc = &stcb->asoc;
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->stream_queue_cnt == 0)) {
+ if (asoc->locked_on_sending) {
+ goto abort_anyway;
+ }
+ /* there is nothing queued to send, so I'm done... */
+ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ /* only send SHUTDOWN the first time through */
+ sctp_stop_timers_for_shutdown(stcb);
+ sctp_send_shutdown(stcb,
+ stcb->asoc.primary_destination);
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ }
+ } else {
+ /*
+ * we still got (or just got) data to send, so set
+ * SHUTDOWN_PENDING
+ */
+ asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+
+ if (asoc->locked_on_sending) {
+ /* Locked to send out the data */
+ struct sctp_stream_queue_pending *sp;
+
+ sp = TAILQ_LAST(&asoc->locked_on_sending->outqueue, sctp_streamhead);
+ if (sp == NULL) {
+ SCTP_PRINTF("Error, sp is NULL, locked on sending is non-null strm:%d\n",
+ asoc->locked_on_sending->stream_no);
+ } else {
+ if ((sp->length == 0) && (sp->msg_is_complete == 0)) {
+ asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ }
+ }
+ }
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue) &&
+ (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+
+ abort_anyway:
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ /* Fill in the user initiated abort */
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(op_err) =
+ sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+ ph = mtod(op_err,
+ struct sctp_paramhdr *);
+ ph->param_type = htons(
+ SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6);
+ }
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("shutdown does an abort");
+#endif
+ stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
+ sctp_abort_an_association(stcb->sctp_ep, stcb,
+ SCTP_RESPONSE_TO_USER_REQ,
+ op_err, SCTP_SO_LOCKED);
+ goto skip_unlock;
+ } else {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+skip_unlock:
+ SCTP_INP_RUNLOCK(inp);
+ return 0;
+}
+
+/*
+ * copies a "user" presentable address and removes embedded scope, etc.
+ * returns 0 on success, 1 on error
+ */
+static uint32_t
+sctp_fill_user_address(struct sockaddr_storage *ss, struct sockaddr *sa)
+{
+ struct sockaddr_in6 lsa6;
+
+ sa = (struct sockaddr *)sctp_recover_scope((struct sockaddr_in6 *)sa,
+ &lsa6);
+ memcpy(ss, sa, sa->sa_len);
+ return (0);
+}
+
+
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static size_t
+sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ size_t limit,
+ struct sockaddr_storage *sas,
+ uint32_t vrf_id)
+{
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+ int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+ size_t actual;
+ int ipv4_addr_legal, ipv6_addr_legal;
+ struct sctp_vrf *vrf;
+
+ actual = 0;
+ if (limit <= 0)
+ return (actual);
+
+ if (stcb) {
+ /* Turn on all the appropriate scope */
+ loopback_scope = stcb->asoc.loopback_scope;
+ ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+ local_scope = stcb->asoc.local_scope;
+ site_scope = stcb->asoc.site_scope;
+ } else {
+ /* Turn on ALL scope, since we look at the EP */
+ loopback_scope = ipv4_local_scope = local_scope =
+ site_scope = 1;
+ }
+ ipv4_addr_legal = ipv6_addr_legal = 0;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ipv6_addr_legal = 1;
+ if (SCTP_IPV6_V6ONLY(inp) == 0) {
+ ipv4_addr_legal = 1;
+ }
+ } else {
+ ipv4_addr_legal = 1;
+ }
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ return (0);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if ((loopback_scope == 0) &&
+ SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
+ /* Skip loopback if loopback_scope not set */
+ continue;
+ }
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if (stcb) {
+ /*
+ * For the BOUND-ALL case, the list
+ * associated with a TCB is Always
+ * considered a reverse list.. i.e.
+ * it lists addresses that are NOT
+ * part of the association. If this
+ * is one of those we must skip it.
+ */
+ if (sctp_is_addr_restricted(stcb,
+ sctp_ifa)) {
+ continue;
+ }
+ }
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) &&
+ (ipv4_addr_legal)) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0) {
+ /*
+ * we skip unspecifed
+ * addresses
+ */
+ continue;
+ }
+ if ((ipv4_local_scope == 0) &&
+ (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+ continue;
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) {
+ in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)sas);
+ ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
+ actual += sizeof(struct sockaddr_in6);
+ } else {
+ memcpy(sas, sin, sizeof(*sin));
+ ((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin));
+ actual += sizeof(*sin);
+ }
+ if (actual >= limit) {
+ return (actual);
+ }
+ } else if ((sctp_ifa->address.sa.sa_family == AF_INET6) &&
+ (ipv6_addr_legal)) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /*
+ * we skip unspecifed
+ * addresses
+ */
+ continue;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (local_scope == 0)
+ continue;
+ if (sin6->sin6_scope_id == 0) {
+ if (sa6_recoverscope(sin6) != 0)
+ /*
+ * bad link
+ * local
+ * address
+ */
+ continue;
+ }
+ }
+ if ((site_scope == 0) &&
+ (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
+ continue;
+ }
+ memcpy(sas, sin6, sizeof(*sin6));
+ ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin6));
+ actual += sizeof(*sin6);
+ if (actual >= limit) {
+ return (actual);
+ }
+ }
+ }
+ }
+ } else {
+ struct sctp_laddr *laddr;
+
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (stcb) {
+ if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
+ continue;
+ }
+ }
+ if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
+ continue;
+
+ ((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
+ sas = (struct sockaddr_storage *)((caddr_t)sas +
+ laddr->ifa->address.sa.sa_len);
+ actual += laddr->ifa->address.sa.sa_len;
+ if (actual >= limit) {
+ return (actual);
+ }
+ }
+ }
+ return (actual);
+}
+
+static size_t
+sctp_fill_up_addresses(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ size_t limit,
+ struct sockaddr_storage *sas)
+{
+ size_t size = 0;
+
+ SCTP_IPI_ADDR_RLOCK();
+ /* fill up addresses for the endpoint's default vrf */
+ size = sctp_fill_up_addresses_vrf(inp, stcb, limit, sas,
+ inp->def_vrf_id);
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (size);
+}
+
+/*
+ * NOTE: assumes addr lock is held
+ */
+static int
+sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
+{
+ int cnt = 0;
+ struct sctp_vrf *vrf = NULL;
+
+ /*
+ * In both sub-set bound an bound_all cases we return the MAXIMUM
+ * number of addresses that you COULD get. In reality the sub-set
+ * bound may have an exclusion list for a given TCB OR in the
+ * bound-all case a TCB may NOT include the loopback or other
+ * addresses as well.
+ */
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ return (0);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ /* Count them if they are the right type */
+ if (sctp_ifa->address.sa.sa_family == AF_INET) {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)
+ cnt += sizeof(struct sockaddr_in6);
+ else
+ cnt += sizeof(struct sockaddr_in);
+
+ } else if (sctp_ifa->address.sa.sa_family == AF_INET6)
+ cnt += sizeof(struct sockaddr_in6);
+ }
+ }
+ } else {
+ struct sctp_laddr *laddr;
+
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa->address.sa.sa_family == AF_INET) {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)
+ cnt += sizeof(struct sockaddr_in6);
+ else
+ cnt += sizeof(struct sockaddr_in);
+
+ } else if (laddr->ifa->address.sa.sa_family == AF_INET6)
+ cnt += sizeof(struct sockaddr_in6);
+ }
+ }
+ return (cnt);
+}
+
+static int
+sctp_count_max_addresses(struct sctp_inpcb *inp)
+{
+ int cnt = 0;
+
+ SCTP_IPI_ADDR_RLOCK();
+ /* count addresses for the endpoint's default VRF */
+ cnt = sctp_count_max_addresses_vrf(inp, inp->def_vrf_id);
+ SCTP_IPI_ADDR_RUNLOCK();
+ return (cnt);
+}
+
+static int
+sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
+ size_t optsize, void *p, int delay)
+{
+ int error = 0;
+ int creat_lock_on = 0;
+ struct sctp_tcb *stcb = NULL;
+ struct sockaddr *sa;
+ int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
+ int added = 0;
+ uint32_t vrf_id;
+ int bad_addresses = 0;
+ sctp_assoc_t *a_id;
+
+ SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n");
+
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+ /* We are already connected AND the TCP model */
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+ SCTP_INP_RLOCK(inp);
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ if (stcb) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ return (EALREADY);
+ }
+ SCTP_INP_INCR_REF(inp);
+ SCTP_ASOC_CREATE_LOCK(inp);
+ creat_lock_on = 1;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+ error = EFAULT;
+ goto out_now;
+ }
+ totaddrp = (int *)optval;
+ totaddr = *totaddrp;
+ sa = (struct sockaddr *)(totaddrp + 1);
+ stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (optsize - sizeof(int)), &bad_addresses);
+ if ((stcb != NULL) || bad_addresses) {
+ /* Already have or am bring up an association */
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ creat_lock_on = 0;
+ if (stcb)
+ SCTP_TCB_UNLOCK(stcb);
+ if (bad_addresses == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ }
+ goto out_now;
+ }
+#ifdef INET6
+ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+ (num_v6 > 0)) {
+ error = EINVAL;
+ goto out_now;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ (num_v4 > 0)) {
+ struct in6pcb *inp6;
+
+ inp6 = (struct in6pcb *)inp;
+ if (SCTP_IPV6_V6ONLY(inp6)) {
+ /*
+ * if IPV6_V6ONLY flag, ignore connections destined
+ * to a v4 addr or v4-mapped addr
+ */
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_now;
+ }
+ }
+#endif /* INET6 */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+ SCTP_PCB_FLAGS_UNBOUND) {
+ /* Bind a ephemeral port */
+ error = sctp_inpcb_bind(so, NULL, NULL, p);
+ if (error) {
+ goto out_now;
+ }
+ }
+ /* FIX ME: do we want to pass in a vrf on the connect call? */
+ vrf_id = inp->def_vrf_id;
+
+ /* We are GOOD to go */
+ stcb = sctp_aloc_assoc(inp, sa, 1, &error, 0, vrf_id,
+ (struct thread *)p
+ );
+ if (stcb == NULL) {
+ /* Gak! no memory */
+ goto out_now;
+ }
+ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ /* move to second address */
+ if (sa->sa_family == AF_INET)
+ sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in));
+ else
+ sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6));
+
+ error = 0;
+ added = sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
+ /* Fill in the return id */
+ if (error) {
+ (void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
+ goto out_now;
+ }
+ a_id = (sctp_assoc_t *) optval;
+ *a_id = sctp_get_associd(stcb);
+
+ /* initialize authentication parameters for the assoc */
+ sctp_initialize_auth_params(inp, stcb);
+
+ if (delay) {
+ /* doing delayed connection */
+ stcb->asoc.delayed_connection = 1;
+ sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination);
+ } else {
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+ sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+ /* Set the connected flag so we can queue data */
+ soisconnecting(so);
+ }
+out_now:
+ if (creat_lock_on) {
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ }
+ SCTP_INP_DECR_REF(inp);
+ return error;
+}
+
+#define SCTP_FIND_STCB(inp, stcb, assoc_id) { \
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||\
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { \
+ SCTP_INP_RLOCK(inp); \
+ stcb = LIST_FIRST(&inp->sctp_asoc_list); \
+ if (stcb) { \
+ SCTP_TCB_LOCK(stcb); \
+ } \
+ SCTP_INP_RUNLOCK(inp); \
+ } else if (assoc_id != 0) { \
+ stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \
+ if (stcb == NULL) { \
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \
+ error = ENOENT; \
+ break; \
+ } \
+ } else { \
+ stcb = NULL; \
+ } \
+ }
+
+
+#define SCTP_CHECK_AND_CAST(destp, srcp, type, size) {\
+ if (size < sizeof(type)) { \
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \
+ error = EINVAL; \
+ break; \
+ } else { \
+ destp = (type *)srcp; \
+ } \
+ }
+
+static int
+sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
+ void *p)
+{
+ struct sctp_inpcb *inp = NULL;
+ int error, val = 0;
+ struct sctp_tcb *stcb = NULL;
+
+ if (optval == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ error = 0;
+
+ switch (optname) {
+ case SCTP_NODELAY:
+ case SCTP_AUTOCLOSE:
+ case SCTP_EXPLICIT_EOR:
+ case SCTP_AUTO_ASCONF:
+ case SCTP_DISABLE_FRAGMENTS:
+ case SCTP_I_WANT_MAPPED_V4_ADDR:
+ case SCTP_USE_EXT_RCVINFO:
+ SCTP_INP_RLOCK(inp);
+ switch (optname) {
+ case SCTP_DISABLE_FRAGMENTS:
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT);
+ break;
+ case SCTP_I_WANT_MAPPED_V4_ADDR:
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4);
+ break;
+ case SCTP_AUTO_ASCONF:
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* only valid for bound all sockets */
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto flags_out;
+ }
+ break;
+ case SCTP_EXPLICIT_EOR:
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
+ break;
+ case SCTP_NODELAY:
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY);
+ break;
+ case SCTP_USE_EXT_RCVINFO:
+ val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO);
+ break;
+ case SCTP_AUTOCLOSE:
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))
+ val = TICKS_TO_SEC(inp->sctp_ep.auto_close_time);
+ else
+ val = 0;
+ break;
+
+ default:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+ error = ENOPROTOOPT;
+ } /* end switch (sopt->sopt_name) */
+ if (optname != SCTP_AUTOCLOSE) {
+ /* make it an "on/off" value */
+ val = (val != 0);
+ }
+ if (*optsize < sizeof(val)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+flags_out:
+ SCTP_INP_RUNLOCK(inp);
+ if (error == 0) {
+ /* return the option value */
+ *(int *)optval = val;
+ *optsize = sizeof(val);
+ }
+ break;
+ case SCTP_GET_PACKET_LOG:
+ {
+#ifdef SCTP_PACKET_LOGGING
+ uint8_t *target;
+ int ret;
+
+ SCTP_CHECK_AND_CAST(target, optval, uint8_t, *optsize);
+ ret = sctp_copy_out_packet_log(target, (int)*optsize);
+ *optsize = ret;
+#else
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+#endif
+ break;
+ }
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ *value = inp->partial_delivery_point;
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_FRAGMENT_INTERLEAVE:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) {
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) {
+ *value = SCTP_FRAG_LEVEL_2;
+ } else {
+ *value = SCTP_FRAG_LEVEL_1;
+ }
+ } else {
+ *value = SCTP_FRAG_LEVEL_0;
+ }
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_CMT_ON_OFF:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ if (sctp_cmt_on_off) {
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+ if (stcb) {
+ av->assoc_value = stcb->asoc.sctp_cmt_on_off;
+ SCTP_TCB_UNLOCK(stcb);
+
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ error = ENOTCONN;
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+ error = ENOPROTOOPT;
+ }
+ *optsize = sizeof(*av);
+ }
+ break;
+ /* JRS - Get socket option for pluggable congestion control */
+ case SCTP_PLUGGABLE_CC:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+ if (stcb) {
+ av->assoc_value = stcb->asoc.congestion_control_module;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
+ }
+ *optsize = sizeof(*av);
+ }
+ break;
+ case SCTP_GET_ADDR_LEN:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ error = EINVAL;
+#ifdef INET
+ if (av->assoc_value == AF_INET) {
+ av->assoc_value = sizeof(struct sockaddr_in);
+ error = 0;
+ }
+#endif
+#ifdef INET6
+ if (av->assoc_value == AF_INET6) {
+ av->assoc_value = sizeof(struct sockaddr_in6);
+ error = 0;
+ }
+#endif
+ if (error) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ }
+ *optsize = sizeof(*av);
+ }
+ break;
+ case SCTP_GET_ASSOC_NUMBER:
+ {
+ uint32_t *value, cnt;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ cnt = 0;
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ cnt++;
+ }
+ SCTP_INP_RUNLOCK(inp);
+ *value = cnt;
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+
+ case SCTP_GET_ASSOC_ID_LIST:
+ {
+ struct sctp_assoc_ids *ids;
+ unsigned int at, limit;
+
+ SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
+ at = 0;
+ limit = *optsize / sizeof(sctp_assoc_t);
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ if (at < limit) {
+ ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
+ } else {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ }
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = at * sizeof(sctp_assoc_t);
+ }
+ break;
+ case SCTP_CONTEXT:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = stcb->asoc.context;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ av->assoc_value = inp->sctp_context;
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*av);
+ }
+ break;
+ case SCTP_VRF_ID:
+ {
+ uint32_t *default_vrfid;
+
+ SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize);
+ *default_vrfid = inp->def_vrf_id;
+ break;
+ }
+ case SCTP_GET_ASOC_VRF:
+ {
+ struct sctp_assoc_value *id;
+
+ SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, id->assoc_id);
+ if (stcb == NULL) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ id->assoc_value = stcb->asoc.vrf_id;
+ break;
+ }
+ case SCTP_GET_VRF_IDS:
+ {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+ break;
+ }
+ case SCTP_GET_NONCE_VALUES:
+ {
+ struct sctp_get_nonce_values *gnv;
+
+ SCTP_CHECK_AND_CAST(gnv, optval, struct sctp_get_nonce_values, *optsize);
+ SCTP_FIND_STCB(inp, stcb, gnv->gn_assoc_id);
+
+ if (stcb) {
+ gnv->gn_peers_tag = stcb->asoc.peer_vtag;
+ gnv->gn_local_tag = stcb->asoc.my_vtag;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ error = ENOTCONN;
+ }
+ *optsize = sizeof(*gnv);
+ }
+ break;
+ case SCTP_DELAYED_SACK:
+ {
+ struct sctp_sack_info *sack;
+
+ SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+ if (stcb) {
+ sack->sack_delay = stcb->asoc.delayed_ack;
+ sack->sack_freq = stcb->asoc.sack_freq;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
+ sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*sack);
+ }
+ break;
+
+ case SCTP_GET_SNDBUF_USE:
+ {
+ struct sctp_sockstat *ss;
+
+ SCTP_CHECK_AND_CAST(ss, optval, struct sctp_sockstat, *optsize);
+ SCTP_FIND_STCB(inp, stcb, ss->ss_assoc_id);
+
+ if (stcb) {
+ ss->ss_total_sndbuf = stcb->asoc.total_output_queue_size;
+ ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue +
+ stcb->asoc.size_on_all_streams);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ error = ENOTCONN;
+ }
+ *optsize = sizeof(struct sctp_sockstat);
+ }
+ break;
+ case SCTP_MAX_BURST:
+ {
+ uint8_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint8_t, *optsize);
+
+ SCTP_INP_RLOCK(inp);
+ *value = inp->sctp_ep.max_burst;
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(uint8_t);
+ }
+ break;
+ case SCTP_MAXSEG:
+ {
+ struct sctp_assoc_value *av;
+ int ovh;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+ if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
+ av->assoc_value = 0;
+ else
+ av->assoc_value = inp->sctp_frag_point - ovh;
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(struct sctp_assoc_value);
+ }
+ break;
+ case SCTP_GET_STAT_LOG:
+ error = sctp_fill_stat_log(optval, optsize);
+ break;
+ case SCTP_EVENTS:
+ {
+ struct sctp_event_subscribe *events;
+
+ SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize);
+ memset(events, 0, sizeof(*events));
+ SCTP_INP_RLOCK(inp);
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT))
+ events->sctp_data_io_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT))
+ events->sctp_association_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT))
+ events->sctp_address_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT))
+ events->sctp_send_failure_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR))
+ events->sctp_peer_error_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
+ events->sctp_shutdown_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT))
+ events->sctp_partial_delivery_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT))
+ events->sctp_adaptation_layer_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT))
+ events->sctp_authentication_event = 1;
+
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
+ events->sctp_stream_reset_events = 1;
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(struct sctp_event_subscribe);
+ }
+ break;
+
+ case SCTP_ADAPTATION_LAYER:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+
+ SCTP_INP_RLOCK(inp);
+ *value = inp->sctp_ep.adaptation_layer_indicator;
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_SET_INITIAL_DBG_SEQ:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ SCTP_INP_RLOCK(inp);
+ *value = inp->sctp_ep.initial_sequence_debug;
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_GET_LOCAL_ADDR_SIZE:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ SCTP_INP_RLOCK(inp);
+ *value = sctp_count_max_addresses(inp);
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_GET_REMOTE_ADDR_SIZE:
+ {
+ uint32_t *value;
+ size_t size;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
+ /* FIXME MT: change to sctp_assoc_value? */
+ SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t) * value);
+
+ if (stcb) {
+ size = 0;
+ /* Count the sizes */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) ||
+ (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+ size += sizeof(struct sockaddr_in6);
+ } else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+ size += sizeof(struct sockaddr_in);
+ } else {
+ /* huh */
+ break;
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ *value = (uint32_t) size;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ error = ENOTCONN;
+ }
+ *optsize = sizeof(uint32_t);
+ }
+ break;
+ case SCTP_GET_PEER_ADDRESSES:
+ /*
+ * Get the address information, an array is passed in to
+ * fill up we pack it.
+ */
+ {
+ size_t cpsz, left;
+ struct sockaddr_storage *sas;
+ struct sctp_nets *net;
+ struct sctp_getaddresses *saddr;
+
+ SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+ SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+ if (stcb) {
+ left = (*optsize) - sizeof(struct sctp_getaddresses);
+ *optsize = sizeof(struct sctp_getaddresses);
+ sas = (struct sockaddr_storage *)&saddr->addr[0];
+
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) ||
+ (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET6)) {
+ cpsz = sizeof(struct sockaddr_in6);
+ } else if (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET) {
+ cpsz = sizeof(struct sockaddr_in);
+ } else {
+ /* huh */
+ break;
+ }
+ if (left < cpsz) {
+ /* not enough room. */
+ break;
+ }
+ if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_NEEDS_MAPPED_V4) &&
+ (((struct sockaddr *)&net->ro._l_addr)->sa_family == AF_INET)) {
+ /* Must map the address */
+ in6_sin_2_v4mapsin6((struct sockaddr_in *)&net->ro._l_addr,
+ (struct sockaddr_in6 *)sas);
+ } else {
+ memcpy(sas, &net->ro._l_addr, cpsz);
+ }
+ ((struct sockaddr_in *)sas)->sin_port = stcb->rport;
+
+ sas = (struct sockaddr_storage *)((caddr_t)sas + cpsz);
+ left -= cpsz;
+ *optsize += cpsz;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ }
+ }
+ break;
+ case SCTP_GET_LOCAL_ADDRESSES:
+ {
+ size_t limit, actual;
+ struct sockaddr_storage *sas;
+ struct sctp_getaddresses *saddr;
+
+ SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
+ SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
+
+ sas = (struct sockaddr_storage *)&saddr->addr[0];
+ limit = *optsize - sizeof(sctp_assoc_t);
+ actual = sctp_fill_up_addresses(inp, stcb, limit, sas);
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ *optsize = sizeof(struct sockaddr_storage) + actual;
+ }
+ break;
+ case SCTP_PEER_ADDR_PARAMS:
+ {
+ struct sctp_paddrparams *paddrp;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
+ SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+
+ net = NULL;
+ if (stcb) {
+ net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+ } else {
+ /*
+ * We increment here since
+ * sctp_findassociation_ep_addr() wil do a
+ * decrement if it finds the stcb as long as
+ * the locked tcb (last argument) is NOT a
+ * TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddrp->spp_address, &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+ if (stcb && (net == NULL)) {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *)&paddrp->spp_address;
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)sa;
+ if (sin->sin_addr.s_addr) {
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ } else {
+ error = EAFNOSUPPORT;
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ }
+ if (stcb) {
+ /* Applys to the specific association */
+ paddrp->spp_flags = 0;
+ if (net) {
+ int ovh;
+
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+
+
+ paddrp->spp_pathmaxrxt = net->failure_threshold;
+ paddrp->spp_pathmtu = net->mtu - ovh;
+ /* get flags for HB */
+ if (net->dest_state & SCTP_ADDR_NOHB)
+ paddrp->spp_flags |= SPP_HB_DISABLE;
+ else
+ paddrp->spp_flags |= SPP_HB_ENABLE;
+ /* get flags for PMTU */
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+ } else {
+ paddrp->spp_flags |= SPP_PMTUD_DISABLE;
+ }
+#ifdef INET
+ if (net->ro._l_addr.sin.sin_family == AF_INET) {
+ paddrp->spp_ipv4_tos = net->tos_flowlabel & 0x000000fc;
+ paddrp->spp_flags |= SPP_IPV4_TOS;
+ }
+#endif
+#ifdef INET6
+ if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+ paddrp->spp_ipv6_flowlabel = net->tos_flowlabel;
+ paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+ }
+#endif
+ } else {
+ /*
+ * No destination so return default
+ * value
+ */
+ int cnt = 0;
+
+ paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
+ paddrp->spp_pathmtu = sctp_get_frag_point(stcb, &stcb->asoc);
+#ifdef INET
+ paddrp->spp_ipv4_tos = stcb->asoc.default_tos & 0x000000fc;
+ paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+ paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel;
+ paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+#endif
+ /* default settings should be these */
+ if (stcb->asoc.hb_is_disabled == 0) {
+ paddrp->spp_flags |= SPP_HB_ENABLE;
+ } else {
+ paddrp->spp_flags |= SPP_HB_DISABLE;
+ }
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ cnt++;
+ }
+ }
+ if (cnt) {
+ paddrp->spp_flags |= SPP_PMTUD_ENABLE;
+ }
+ }
+ paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay;
+ paddrp->spp_assoc_id = sctp_get_associd(stcb);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* Use endpoint defaults */
+ SCTP_INP_RLOCK(inp);
+ paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
+ paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
+ paddrp->spp_assoc_id = (sctp_assoc_t) 0;
+ /* get inp's default */
+#ifdef INET
+ paddrp->spp_ipv4_tos = inp->ip_inp.inp.inp_ip_tos;
+ paddrp->spp_flags |= SPP_IPV4_TOS;
+#endif
+#ifdef INET6
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ paddrp->spp_ipv6_flowlabel = ((struct in6pcb *)inp)->in6p_flowinfo;
+ paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
+ }
+#endif
+ /* can't return this */
+ paddrp->spp_pathmtu = 0;
+
+ /* default behavior, no stcb */
+ paddrp->spp_flags = SPP_PMTUD_ENABLE;
+
+ if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
+ paddrp->spp_flags |= SPP_HB_ENABLE;
+ } else {
+ paddrp->spp_flags |= SPP_HB_DISABLE;
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(struct sctp_paddrparams);
+ }
+ break;
+ case SCTP_GET_PEER_ADDR_INFO:
+ {
+ struct sctp_paddrinfo *paddri;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
+ SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
+
+ net = NULL;
+ if (stcb) {
+ net = sctp_findnet(stcb, (struct sockaddr *)&paddri->spinfo_address);
+ } else {
+ /*
+ * We increment here since
+ * sctp_findassociation_ep_addr() wil do a
+ * decrement if it finds the stcb as long as
+ * the locked tcb (last argument) is NOT a
+ * TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp, (struct sockaddr *)&paddri->spinfo_address, &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+
+ if ((stcb) && (net)) {
+ paddri->spinfo_state = net->dest_state & (SCTP_REACHABLE_MASK | SCTP_ADDR_NOHB);
+ paddri->spinfo_cwnd = net->cwnd;
+ paddri->spinfo_srtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ paddri->spinfo_rto = net->RTO;
+ paddri->spinfo_assoc_id = sctp_get_associd(stcb);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ }
+ *optsize = sizeof(struct sctp_paddrinfo);
+ }
+ break;
+ case SCTP_PCB_STATUS:
+ {
+ struct sctp_pcbinfo *spcb;
+
+ SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize);
+ sctp_fill_pcbinfo(spcb);
+ *optsize = sizeof(struct sctp_pcbinfo);
+ }
+ break;
+
+ case SCTP_STATUS:
+ {
+ struct sctp_nets *net;
+ struct sctp_status *sstat;
+
+ SCTP_CHECK_AND_CAST(sstat, optval, struct sctp_status, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id);
+
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ error = EINVAL;
+ break;
+ }
+ /*
+ * I think passing the state is fine since
+ * sctp_constants.h will be available to the user
+ * land.
+ */
+ sstat->sstat_state = stcb->asoc.state;
+ sstat->sstat_assoc_id = sctp_get_associd(stcb);
+ sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
+ sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
+ /*
+ * We can't include chunks that have been passed to
+ * the socket layer. Only things in queue.
+ */
+ sstat->sstat_penddata = (stcb->asoc.cnt_on_reasm_queue +
+ stcb->asoc.cnt_on_all_streams);
+
+
+ sstat->sstat_instrms = stcb->asoc.streamincnt;
+ sstat->sstat_outstrms = stcb->asoc.streamoutcnt;
+ sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb, &stcb->asoc);
+ memcpy(&sstat->sstat_primary.spinfo_address,
+ &stcb->asoc.primary_destination->ro._l_addr,
+ ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
+ net = stcb->asoc.primary_destination;
+ ((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
+ /*
+ * Again the user can get info from sctp_constants.h
+ * for what the state of the network is.
+ */
+ sstat->sstat_primary.spinfo_state = net->dest_state & SCTP_REACHABLE_MASK;
+ sstat->sstat_primary.spinfo_cwnd = net->cwnd;
+ sstat->sstat_primary.spinfo_srtt = net->lastsa;
+ sstat->sstat_primary.spinfo_rto = net->RTO;
+ sstat->sstat_primary.spinfo_mtu = net->mtu;
+ sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
+ SCTP_TCB_UNLOCK(stcb);
+ *optsize = sizeof(*sstat);
+ }
+ break;
+ case SCTP_RTOINFO:
+ {
+ struct sctp_rtoinfo *srto;
+
+ SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, *optsize);
+ SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+ if (stcb) {
+ srto->srto_initial = stcb->asoc.initial_rto;
+ srto->srto_max = stcb->asoc.maxrto;
+ srto->srto_min = stcb->asoc.minrto;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ srto->srto_initial = inp->sctp_ep.initial_rto;
+ srto->srto_max = inp->sctp_ep.sctp_maxrto;
+ srto->srto_min = inp->sctp_ep.sctp_minrto;
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*srto);
+ }
+ break;
+ case SCTP_ASSOCINFO:
+ {
+ struct sctp_assocparams *sasoc;
+ uint32_t oldval;
+
+ SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+
+ if (stcb) {
+ oldval = sasoc->sasoc_cookie_life;
+ sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life);
+ sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times;
+ sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+ sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd;
+ sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
+ sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
+ sasoc->sasoc_number_peer_destinations = 0;
+ sasoc->sasoc_peer_rwnd = 0;
+ sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*sasoc);
+ }
+ break;
+ case SCTP_DEFAULT_SEND_PARAM:
+ {
+ struct sctp_sndrcvinfo *s_info;
+
+ SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, *optsize);
+ SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+ if (stcb) {
+ memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send));
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_RLOCK(inp);
+ memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*s_info);
+ }
+ break;
+ case SCTP_INITMSG:
+ {
+ struct sctp_initmsg *sinit;
+
+ SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, *optsize);
+ SCTP_INP_RLOCK(inp);
+ sinit->sinit_num_ostreams = inp->sctp_ep.pre_open_stream_count;
+ sinit->sinit_max_instreams = inp->sctp_ep.max_open_streams_intome;
+ sinit->sinit_max_attempts = inp->sctp_ep.max_init_times;
+ sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max;
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = sizeof(*sinit);
+ }
+ break;
+ case SCTP_PRIMARY_ADDR:
+ /* we allow a "get" operation on this */
+ {
+ struct sctp_setprim *ssp;
+
+ SCTP_CHECK_AND_CAST(ssp, optval, struct sctp_setprim, *optsize);
+ SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
+
+ if (stcb) {
+ /* simply copy out the sockaddr_storage... */
+ int len;
+
+ len = *optsize;
+ if (len > stcb->asoc.primary_destination->ro._l_addr.sa.sa_len)
+ len = stcb->asoc.primary_destination->ro._l_addr.sa.sa_len;
+
+ memcpy(&ssp->ssp_addr,
+ &stcb->asoc.primary_destination->ro._l_addr,
+ len);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ error = EINVAL;
+ }
+ *optsize = sizeof(*ssp);
+ }
+ break;
+
+ case SCTP_HMAC_IDENT:
+ {
+ struct sctp_hmacalgo *shmac;
+ sctp_hmaclist_t *hmaclist;
+ uint32_t size;
+ int i;
+
+ SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, *optsize);
+
+ SCTP_INP_RLOCK(inp);
+ hmaclist = inp->sctp_ep.local_hmacs;
+ if (hmaclist == NULL) {
+ /* no HMACs to return */
+ *optsize = sizeof(*shmac);
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
+ /* is there room for all of the hmac ids? */
+ size = sizeof(*shmac) + (hmaclist->num_algo *
+ sizeof(shmac->shmac_idents[0]));
+ if ((size_t)(*optsize) < size) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ error = EINVAL;
+ SCTP_INP_RUNLOCK(inp);
+ break;
+ }
+ /* copy in the list */
+ for (i = 0; i < hmaclist->num_algo; i++)
+ shmac->shmac_idents[i] = hmaclist->hmac[i];
+ SCTP_INP_RUNLOCK(inp);
+ *optsize = size;
+ break;
+ }
+ case SCTP_AUTH_ACTIVE_KEY:
+ {
+ struct sctp_authkeyid *scact;
+
+ SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, *optsize);
+ SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+ if (stcb) {
+ /* get the active key on the assoc */
+ scact->scact_keynumber = stcb->asoc.authinfo.assoc_keyid;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* get the endpoint active key */
+ SCTP_INP_RLOCK(inp);
+ scact->scact_keynumber = inp->sctp_ep.default_keyid;
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(*scact);
+ break;
+ }
+ case SCTP_LOCAL_AUTH_CHUNKS:
+ {
+ struct sctp_authchunks *sac;
+ sctp_auth_chklist_t *chklist = NULL;
+ size_t size = 0;
+
+ SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+ if (stcb) {
+ /* get off the assoc */
+ chklist = stcb->asoc.local_auth_chunks;
+ /* is there enough space? */
+ size = sctp_auth_get_chklist_size(chklist);
+ if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ } else {
+ /* copy in the chunks */
+ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* get off the endpoint */
+ SCTP_INP_RLOCK(inp);
+ chklist = inp->sctp_ep.local_auth_chunks;
+ /* is there enough space? */
+ size = sctp_auth_get_chklist_size(chklist);
+ if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ } else {
+ /* copy in the chunks */
+ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ *optsize = sizeof(struct sctp_authchunks) + size;
+ break;
+ }
+ case SCTP_PEER_AUTH_CHUNKS:
+ {
+ struct sctp_authchunks *sac;
+ sctp_auth_chklist_t *chklist = NULL;
+ size_t size = 0;
+
+ SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
+ SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
+
+ if (stcb) {
+ /* get off the assoc */
+ chklist = stcb->asoc.peer_auth_chunks;
+ /* is there enough space? */
+ size = sctp_auth_get_chklist_size(chklist);
+ if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
+ error = EINVAL;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ } else {
+ /* copy in the chunks */
+ (void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ }
+ *optsize = sizeof(struct sctp_authchunks) + size;
+ break;
+ }
+
+
+ default:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+ error = ENOPROTOOPT;
+ *optsize = 0;
+ break;
+ } /* end switch (sopt->sopt_name) */
+ return (error);
+}
+
+static int
+sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
+ void *p)
+{
+ int error, set_opt;
+ uint32_t *mopt;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_inpcb *inp = NULL;
+ uint32_t vrf_id;
+
+ if (optval == NULL) {
+ SCTP_PRINTF("optval is NULL\n");
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ SCTP_PRINTF("inp is NULL?\n");
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ vrf_id = inp->def_vrf_id;
+
+ error = 0;
+ switch (optname) {
+ case SCTP_NODELAY:
+ case SCTP_AUTOCLOSE:
+ case SCTP_AUTO_ASCONF:
+ case SCTP_EXPLICIT_EOR:
+ case SCTP_DISABLE_FRAGMENTS:
+ case SCTP_USE_EXT_RCVINFO:
+ case SCTP_I_WANT_MAPPED_V4_ADDR:
+ /* copy in the option value */
+ SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
+ set_opt = 0;
+ if (error)
+ break;
+ switch (optname) {
+ case SCTP_DISABLE_FRAGMENTS:
+ set_opt = SCTP_PCB_FLAGS_NO_FRAGMENT;
+ break;
+ case SCTP_AUTO_ASCONF:
+ /*
+ * NOTE: we don't really support this flag
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* only valid for bound all sockets */
+ set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ break;
+ case SCTP_EXPLICIT_EOR:
+ set_opt = SCTP_PCB_FLAGS_EXPLICIT_EOR;
+ break;
+ case SCTP_USE_EXT_RCVINFO:
+ set_opt = SCTP_PCB_FLAGS_EXT_RCVINFO;
+ break;
+ case SCTP_I_WANT_MAPPED_V4_ADDR:
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ set_opt = SCTP_PCB_FLAGS_NEEDS_MAPPED_V4;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ break;
+ case SCTP_NODELAY:
+ set_opt = SCTP_PCB_FLAGS_NODELAY;
+ break;
+ case SCTP_AUTOCLOSE:
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ set_opt = SCTP_PCB_FLAGS_AUTOCLOSE;
+ /*
+ * The value is in ticks. Note this does not effect
+ * old associations, only new ones.
+ */
+ inp->sctp_ep.auto_close_time = SEC_TO_TICKS(*mopt);
+ break;
+ }
+ SCTP_INP_WLOCK(inp);
+ if (*mopt != 0) {
+ sctp_feature_on(inp, set_opt);
+ } else {
+ sctp_feature_off(inp, set_opt);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ case SCTP_PARTIAL_DELIVERY_POINT:
+ {
+ uint32_t *value;
+
+ SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
+ if (*value > SCTP_SB_LIMIT_RCV(so)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ inp->partial_delivery_point = *value;
+ }
+ break;
+ case SCTP_FRAGMENT_INTERLEAVE:
+ /* not yet until we re-write sctp_recvmsg() */
+ {
+ uint32_t *level;
+
+ SCTP_CHECK_AND_CAST(level, optval, uint32_t, optsize);
+ if (*level == SCTP_FRAG_LEVEL_2) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+ } else if (*level == SCTP_FRAG_LEVEL_1) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+ } else if (*level == SCTP_FRAG_LEVEL_0) {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ }
+ break;
+ case SCTP_CMT_ON_OFF:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ if (sctp_cmt_on_off) {
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+ if (stcb) {
+ stcb->asoc.sctp_cmt_on_off = (uint8_t) av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ error = ENOTCONN;
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+ error = ENOPROTOOPT;
+ }
+ }
+ break;
+ /* JRS - Set socket option for pluggable congestion control */
+ case SCTP_PLUGGABLE_CC:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+ if (stcb) {
+ switch (av->assoc_value) {
+ /*
+ * JRS - Standard TCP congestion
+ * control
+ */
+ case SCTP_CC_RFC2581:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_RFC2581;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ /*
+ * JRS - High Speed TCP congestion
+ * control (Floyd)
+ */
+ case SCTP_CC_HSTCP:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_HSTCP;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_hs_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_hs_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_cwnd_update_after_fr_timer;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ /* JRS - HTCP congestion control */
+ case SCTP_CC_HTCP:
+ {
+ stcb->asoc.congestion_control_module = SCTP_CC_HTCP;
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param = &sctp_htcp_set_initial_cc_param;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_sack = &sctp_htcp_cwnd_update_after_sack;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr = &sctp_htcp_cwnd_update_after_fr;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout = &sctp_htcp_cwnd_update_after_timeout;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo = &sctp_htcp_cwnd_update_after_ecn_echo;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped = &sctp_cwnd_update_after_packet_dropped;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_output = &sctp_cwnd_update_after_output;
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer = &sctp_htcp_cwnd_update_after_fr_timer;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ /*
+ * JRS - All other values are
+ * invalid
+ */
+ default:
+ {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ }
+ } else {
+ switch (av->assoc_value) {
+ case SCTP_CC_RFC2581:
+ case SCTP_CC_HSTCP:
+ case SCTP_CC_HTCP:
+ inp->sctp_ep.sctp_default_cc_module = av->assoc_value;
+ break;
+ default:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ };
+ }
+ }
+ break;
+ case SCTP_CLR_STAT_LOG:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+ break;
+ case SCTP_CONTEXT:
+ {
+ struct sctp_assoc_value *av;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (stcb) {
+ stcb->asoc.context = av->assoc_value;
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_context = av->assoc_value;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_VRF_ID:
+ {
+ uint32_t *default_vrfid;
+
+ SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, optsize);
+ if (*default_vrfid > SCTP_MAX_VRF_ID) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ inp->def_vrf_id = *default_vrfid;
+ break;
+ }
+ case SCTP_DEL_VRF_ID:
+ {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+ break;
+ }
+ case SCTP_ADD_VRF_ID:
+ {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ error = EOPNOTSUPP;
+ break;
+ }
+ case SCTP_DELAYED_SACK:
+ {
+ struct sctp_sack_info *sack;
+
+ SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, optsize);
+ SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
+ if (sack->sack_delay) {
+ if (sack->sack_delay > SCTP_MAX_SACK_DELAY)
+ sack->sack_delay = SCTP_MAX_SACK_DELAY;
+ }
+ if (stcb) {
+ if (sack->sack_delay) {
+ if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+ sack->sack_delay = TICKS_TO_MSEC(1);
+ }
+ stcb->asoc.delayed_ack = sack->sack_delay;
+ }
+ if (sack->sack_freq) {
+ stcb->asoc.sack_freq = sack->sack_freq;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (sack->sack_delay) {
+ if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
+ sack->sack_delay = TICKS_TO_MSEC(1);
+ }
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
+ }
+ if (sack->sack_freq) {
+ inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ }
+ case SCTP_AUTH_CHUNK:
+ {
+ struct sctp_authchunk *sauth;
+
+ SCTP_CHECK_AND_CAST(sauth, optval, struct sctp_authchunk, optsize);
+
+ SCTP_INP_WLOCK(inp);
+ if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ case SCTP_AUTH_KEY:
+ {
+ struct sctp_authkey *sca;
+ struct sctp_keyhead *shared_keys;
+ sctp_sharedkey_t *shared_key;
+ sctp_key_t *key = NULL;
+ size_t size;
+
+ SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize);
+ SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id);
+ size = optsize - sizeof(*sca);
+
+ if (stcb) {
+ /* set it on the assoc */
+ shared_keys = &stcb->asoc.shared_keys;
+ /* clear the cached keys for this key id */
+ sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
+ /*
+ * create the new shared key and
+ * insert/replace it
+ */
+ if (size > 0) {
+ key = sctp_set_key(sca->sca_key, (uint32_t) size);
+ if (key == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ }
+ shared_key = sctp_alloc_sharedkey();
+ if (shared_key == NULL) {
+ sctp_free_key(key);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ shared_key->key = key;
+ shared_key->keyid = sca->sca_keynumber;
+ sctp_insert_sharedkey(shared_keys, shared_key);
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* set it on the endpoint */
+ SCTP_INP_WLOCK(inp);
+ shared_keys = &inp->sctp_ep.shared_keys;
+ /*
+ * clear the cached keys on all assocs for
+ * this key id
+ */
+ sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
+ /*
+ * create the new shared key and
+ * insert/replace it
+ */
+ if (size > 0) {
+ key = sctp_set_key(sca->sca_key, (uint32_t) size);
+ if (key == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ }
+ shared_key = sctp_alloc_sharedkey();
+ if (shared_key == NULL) {
+ sctp_free_key(key);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ SCTP_INP_WUNLOCK(inp);
+ break;
+ }
+ shared_key->key = key;
+ shared_key->keyid = sca->sca_keynumber;
+ sctp_insert_sharedkey(shared_keys, shared_key);
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ }
+ case SCTP_HMAC_IDENT:
+ {
+ struct sctp_hmacalgo *shmac;
+ sctp_hmaclist_t *hmaclist;
+ uint32_t hmacid;
+ size_t size, i, found;
+
+ SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
+ size = (optsize - sizeof(*shmac)) / sizeof(shmac->shmac_idents[0]);
+ hmaclist = sctp_alloc_hmaclist(size);
+ if (hmaclist == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
+ error = ENOMEM;
+ break;
+ }
+ for (i = 0; i < size; i++) {
+ hmacid = shmac->shmac_idents[i];
+ if (sctp_auth_add_hmacid(hmaclist, (uint16_t) hmacid)) {
+ /* invalid HMACs were found */ ;
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ sctp_free_hmaclist(hmaclist);
+ goto sctp_set_hmac_done;
+ }
+ }
+ found = 0;
+ for (i = 0; i < hmaclist->num_algo; i++) {
+ if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
+ /* already in list */
+ found = 1;
+ }
+ }
+ if (!found) {
+ sctp_free_hmaclist(hmaclist);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ /* set it on the endpoint */
+ SCTP_INP_WLOCK(inp);
+ if (inp->sctp_ep.local_hmacs)
+ sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+ inp->sctp_ep.local_hmacs = hmaclist;
+ SCTP_INP_WUNLOCK(inp);
+ sctp_set_hmac_done:
+ break;
+ }
+ case SCTP_AUTH_ACTIVE_KEY:
+ {
+ struct sctp_authkeyid *scact;
+
+ SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize);
+ SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
+
+ /* set the active key on the right place */
+ if (stcb) {
+ /* set the active key on the assoc */
+ if (sctp_auth_setactivekey(stcb, scact->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* set the active key on the endpoint */
+ SCTP_INP_WLOCK(inp);
+ if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ }
+ case SCTP_AUTH_DELETE_KEY:
+ {
+ struct sctp_authkeyid *scdel;
+
+ SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize);
+ SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id);
+
+ /* delete the key from the right place */
+ if (stcb) {
+ if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ }
+
+ case SCTP_RESET_STREAMS:
+ {
+ struct sctp_stream_reset *strrst;
+ uint8_t send_in = 0, send_tsn = 0, send_out = 0;
+ int i;
+
+ SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_stream_reset, optsize);
+ SCTP_FIND_STCB(inp, stcb, strrst->strrst_assoc_id);
+
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ break;
+ }
+ if (stcb->asoc.peer_supports_strreset == 0) {
+ /*
+ * Peer does not support it, we return
+ * protocol not supported since this is true
+ * for this feature and this peer, not the
+ * socket request in general.
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPROTONOSUPPORT);
+ error = EPROTONOSUPPORT;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ if (stcb->asoc.stream_reset_outstanding) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ if (strrst->strrst_flags == SCTP_RESET_LOCAL_RECV) {
+ send_in = 1;
+ } else if (strrst->strrst_flags == SCTP_RESET_LOCAL_SEND) {
+ send_out = 1;
+ } else if (strrst->strrst_flags == SCTP_RESET_BOTH) {
+ send_in = 1;
+ send_out = 1;
+ } else if (strrst->strrst_flags == SCTP_RESET_TSN) {
+ send_tsn = 1;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ for (i = 0; i < strrst->strrst_num_streams; i++) {
+ if ((send_in) &&
+
+ (strrst->strrst_list[i] > stcb->asoc.streamincnt)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto get_out;
+ }
+ if ((send_out) &&
+ (strrst->strrst_list[i] > stcb->asoc.streamoutcnt)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto get_out;
+ }
+ }
+ if (error) {
+ get_out:
+ SCTP_TCB_UNLOCK(stcb);
+ break;
+ }
+ error = sctp_send_str_reset_req(stcb, strrst->strrst_num_streams,
+ strrst->strrst_list,
+ send_out, (stcb->asoc.str_reset_seq_in - 3),
+ send_in, send_tsn);
+
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ break;
+
+ case SCTP_CONNECT_X:
+ if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ error = sctp_do_connect_x(so, inp, optval, optsize, p, 0);
+ break;
+
+ case SCTP_CONNECT_X_DELAYED:
+ if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ error = sctp_do_connect_x(so, inp, optval, optsize, p, 1);
+ break;
+
+ case SCTP_CONNECT_X_COMPLETE:
+ {
+ struct sockaddr *sa;
+ struct sctp_nets *net;
+
+ /* FIXME MT: check correct? */
+ SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
+
+ /* find tcb */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+ SCTP_INP_RLOCK(inp);
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb) {
+ SCTP_TCB_LOCK(stcb);
+ net = sctp_findnet(stcb, sa);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ /*
+ * We increment here since
+ * sctp_findassociation_ep_addr() wil do a
+ * decrement if it finds the stcb as long as
+ * the locked tcb (last argument) is NOT a
+ * TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp, sa, &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+
+ if (stcb == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ error = ENOENT;
+ break;
+ }
+ if (stcb->asoc.delayed_connection == 1) {
+ stcb->asoc.delayed_connection = 0;
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+ sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
+ stcb->asoc.primary_destination,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
+ sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+ } else {
+ /*
+ * already expired or did not use delayed
+ * connectx
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ break;
+ case SCTP_MAX_BURST:
+ {
+ uint8_t *burst;
+
+ SCTP_CHECK_AND_CAST(burst, optval, uint8_t, optsize);
+
+ SCTP_INP_WLOCK(inp);
+ if (*burst) {
+ inp->sctp_ep.max_burst = *burst;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ case SCTP_MAXSEG:
+ {
+ struct sctp_assoc_value *av;
+ int ovh;
+
+ SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
+ SCTP_FIND_STCB(inp, stcb, av->assoc_id);
+
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+ if (stcb) {
+ if (av->assoc_value) {
+ stcb->asoc.sctp_frag_point = (av->assoc_value + ovh);
+ } else {
+ stcb->asoc.sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ /*
+ * FIXME MT: I think this is not in tune
+ * with the API ID
+ */
+ if (av->assoc_value) {
+ inp->sctp_frag_point = (av->assoc_value + ovh);
+ } else {
+ inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_EVENTS:
+ {
+ struct sctp_event_subscribe *events;
+
+ SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, optsize);
+
+ SCTP_INP_WLOCK(inp);
+ if (events->sctp_data_io_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
+ }
+
+ if (events->sctp_association_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
+ }
+
+ if (events->sctp_address_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
+ }
+
+ if (events->sctp_send_failure_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
+ }
+
+ if (events->sctp_peer_error_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPEERERR);
+ }
+
+ if (events->sctp_shutdown_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
+ }
+
+ if (events->sctp_partial_delivery_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
+ }
+
+ if (events->sctp_adaptation_layer_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
+ }
+
+ if (events->sctp_authentication_event) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTHEVNT);
+ }
+
+ if (events->sctp_stream_reset_events) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+ } else {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+
+ case SCTP_ADAPTATION_LAYER:
+ {
+ struct sctp_setadaptation *adap_bits;
+
+ SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+#ifdef SCTP_DEBUG
+ case SCTP_SET_INITIAL_DBG_SEQ:
+ {
+ uint32_t *vvv;
+
+ SCTP_CHECK_AND_CAST(vvv, optval, uint32_t, optsize);
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_ep.initial_sequence_debug = *vvv;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+#endif
+ case SCTP_DEFAULT_SEND_PARAM:
+ {
+ struct sctp_sndrcvinfo *s_info;
+
+ SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, optsize);
+ SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
+
+ if (stcb) {
+ if (s_info->sinfo_stream <= stcb->asoc.streamoutcnt) {
+ memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_PEER_ADDR_PARAMS:
+ /* Applys to the specific association */
+ {
+ struct sctp_paddrparams *paddrp;
+ struct sctp_nets *net;
+
+ SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
+ SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
+ net = NULL;
+ if (stcb) {
+ net = sctp_findnet(stcb, (struct sockaddr *)&paddrp->spp_address);
+ } else {
+ /*
+ * We increment here since
+ * sctp_findassociation_ep_addr() wil do a
+ * decrement if it finds the stcb as long as
+ * the locked tcb (last argument) is NOT a
+ * TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp,
+ (struct sockaddr *)&paddrp->spp_address,
+ &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+ if (stcb && (net == NULL)) {
+ struct sockaddr *sa;
+
+ sa = (struct sockaddr *)&paddrp->spp_address;
+ if (sa->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)sa;
+ if (sin->sin_addr.s_addr) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ SCTP_TCB_UNLOCK(stcb);
+ error = EINVAL;
+ break;
+ }
+ } else if (sa->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)sa;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ SCTP_TCB_UNLOCK(stcb);
+ error = EINVAL;
+ break;
+ }
+ } else {
+ error = EAFNOSUPPORT;
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
+ break;
+ }
+ }
+ /* sanity checks */
+ if ((paddrp->spp_flags & SPP_HB_ENABLE) && (paddrp->spp_flags & SPP_HB_DISABLE)) {
+ if (stcb)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
+ if (stcb)
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ if (stcb) {
+ /************************TCB SPECIFIC SET ******************/
+ /*
+ * do we change the timer for HB, we run
+ * only one?
+ */
+ int ovh = 0;
+
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ovh = SCTP_MED_OVERHEAD;
+ } else {
+ ovh = SCTP_MED_V4_OVERHEAD;
+ }
+
+ if (paddrp->spp_hbinterval)
+ stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
+ else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+ stcb->asoc.heart_beat_delay = 0;
+
+ /* network sets ? */
+ if (net) {
+ /************************NET SPECIFIC SET ******************/
+ if (paddrp->spp_flags & SPP_HB_DEMAND) {
+ /* on demand HB */
+ if (sctp_send_hb(stcb, 1, net) < 0) {
+ /* asoc destroyed */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ }
+ if (paddrp->spp_flags & SPP_HB_DISABLE) {
+ net->dest_state |= SCTP_ADDR_NOHB;
+ }
+ if (paddrp->spp_flags & SPP_HB_ENABLE) {
+ net->dest_state &= ~SCTP_ADDR_NOHB;
+ }
+ if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ }
+ if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+ net->mtu = paddrp->spp_pathmtu + ovh;
+ if (net->mtu < stcb->asoc.smallest_mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("SCTP_PMTU_DISABLE calls sctp_pathmtu_adjustment:%d\n",
+ net->mtu);
+#endif
+ sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+ }
+ }
+ }
+ if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+ }
+ }
+ if (paddrp->spp_pathmaxrxt)
+ net->failure_threshold = paddrp->spp_pathmaxrxt;
+#ifdef INET
+ if (paddrp->spp_flags & SPP_IPV4_TOS) {
+ if (net->ro._l_addr.sin.sin_family == AF_INET) {
+ net->tos_flowlabel = paddrp->spp_ipv4_tos & 0x000000fc;
+ }
+ }
+#endif
+#ifdef INET6
+ if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
+ if (net->ro._l_addr.sin6.sin6_family == AF_INET6) {
+ net->tos_flowlabel = paddrp->spp_ipv6_flowlabel;
+ }
+ }
+#endif
+ } else {
+ /************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
+ if (paddrp->spp_pathmaxrxt)
+ stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
+
+ if (paddrp->spp_flags & SPP_HB_ENABLE) {
+ /* Turn back on the timer */
+ stcb->asoc.hb_is_disabled = 0;
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+ }
+ if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
+ }
+ if (paddrp->spp_pathmtu > SCTP_DEFAULT_MINSEGMENT) {
+ net->mtu = paddrp->spp_pathmtu + ovh;
+ if (net->mtu < stcb->asoc.smallest_mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("SCTP_PMTU_DISABLE calls sctp_pathmtu_adjustment:%d\n",
+ net->mtu);
+#endif
+ sctp_pathmtu_adjustment(inp, stcb, net, net->mtu);
+ }
+ }
+ }
+ }
+ if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+ }
+ }
+ }
+ if (paddrp->spp_flags & SPP_HB_DISABLE) {
+ int cnt_of_unconf = 0;
+ struct sctp_nets *lnet;
+
+ stcb->asoc.hb_is_disabled = 1;
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ if (lnet->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ cnt_of_unconf++;
+ }
+ }
+ /*
+ * stop the timer ONLY if we
+ * have no unconfirmed
+ * addresses
+ */
+ if (cnt_of_unconf == 0) {
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
+ SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
+ }
+ }
+ }
+ if (paddrp->spp_flags & SPP_HB_ENABLE) {
+ /* start up the timer. */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
+ }
+ }
+#ifdef INET
+ if (paddrp->spp_flags & SPP_IPV4_TOS)
+ stcb->asoc.default_tos = paddrp->spp_ipv4_tos & 0x000000fc;
+#endif
+#ifdef INET6
+ if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL)
+ stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel;
+#endif
+
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /************************NO TCB, SET TO default stuff ******************/
+ SCTP_INP_WLOCK(inp);
+ /*
+ * For the TOS/FLOWLABEL stuff you set it
+ * with the options on the socket
+ */
+ if (paddrp->spp_pathmaxrxt) {
+ inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
+ }
+ if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
+ else if (paddrp->spp_hbinterval) {
+ if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
+ paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
+ inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
+ }
+ if (paddrp->spp_flags & SPP_HB_ENABLE) {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+
+ } else if (paddrp->spp_flags & SPP_HB_DISABLE) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_RTOINFO:
+ {
+ struct sctp_rtoinfo *srto;
+ uint32_t new_init, new_min, new_max;
+
+ SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, optsize);
+ SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
+
+ if (stcb) {
+ if (srto->srto_initial)
+ new_init = srto->srto_initial;
+ else
+ new_init = stcb->asoc.initial_rto;
+ if (srto->srto_max)
+ new_max = srto->srto_max;
+ else
+ new_max = stcb->asoc.maxrto;
+ if (srto->srto_min)
+ new_min = srto->srto_min;
+ else
+ new_min = stcb->asoc.minrto;
+ if ((new_min <= new_init) && (new_init <= new_max)) {
+ stcb->asoc.initial_rto = new_init;
+ stcb->asoc.maxrto = new_max;
+ stcb->asoc.minrto = new_min;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDOM);
+ error = EDOM;
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (srto->srto_initial)
+ new_init = srto->srto_initial;
+ else
+ new_init = inp->sctp_ep.initial_rto;
+ if (srto->srto_max)
+ new_max = srto->srto_max;
+ else
+ new_max = inp->sctp_ep.sctp_maxrto;
+ if (srto->srto_min)
+ new_min = srto->srto_min;
+ else
+ new_min = inp->sctp_ep.sctp_minrto;
+ if ((new_min <= new_init) && (new_init <= new_max)) {
+ inp->sctp_ep.initial_rto = new_init;
+ inp->sctp_ep.sctp_maxrto = new_max;
+ inp->sctp_ep.sctp_minrto = new_min;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDOM);
+ error = EDOM;
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_ASSOCINFO:
+ {
+ struct sctp_assocparams *sasoc;
+
+ SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, optsize);
+ SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
+ if (sasoc->sasoc_cookie_life) {
+ /* boundary check the cookie life */
+ if (sasoc->sasoc_cookie_life < 1000)
+ sasoc->sasoc_cookie_life = 1000;
+ if (sasoc->sasoc_cookie_life > SCTP_MAX_COOKIE_LIFE) {
+ sasoc->sasoc_cookie_life = SCTP_MAX_COOKIE_LIFE;
+ }
+ }
+ if (stcb) {
+ if (sasoc->sasoc_asocmaxrxt)
+ stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt;
+ sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
+ sasoc->sasoc_peer_rwnd = 0;
+ sasoc->sasoc_local_rwnd = 0;
+ if (sasoc->sasoc_cookie_life) {
+ stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (sasoc->sasoc_asocmaxrxt)
+ inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
+ sasoc->sasoc_number_peer_destinations = 0;
+ sasoc->sasoc_peer_rwnd = 0;
+ sasoc->sasoc_local_rwnd = 0;
+ if (sasoc->sasoc_cookie_life) {
+ inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ }
+ break;
+ case SCTP_INITMSG:
+ {
+ struct sctp_initmsg *sinit;
+
+ SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, optsize);
+ SCTP_INP_WLOCK(inp);
+ if (sinit->sinit_num_ostreams)
+ inp->sctp_ep.pre_open_stream_count = sinit->sinit_num_ostreams;
+
+ if (sinit->sinit_max_instreams)
+ inp->sctp_ep.max_open_streams_intome = sinit->sinit_max_instreams;
+
+ if (sinit->sinit_max_attempts)
+ inp->sctp_ep.max_init_times = sinit->sinit_max_attempts;
+
+ if (sinit->sinit_max_init_timeo)
+ inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo;
+ SCTP_INP_WUNLOCK(inp);
+ }
+ break;
+ case SCTP_PRIMARY_ADDR:
+ {
+ struct sctp_setprim *spa;
+ struct sctp_nets *net, *lnet;
+
+ SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
+ SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
+
+ net = NULL;
+ if (stcb) {
+ net = sctp_findnet(stcb, (struct sockaddr *)&spa->ssp_addr);
+ } else {
+ /*
+ * We increment here since
+ * sctp_findassociation_ep_addr() wil do a
+ * decrement if it finds the stcb as long as
+ * the locked tcb (last argument) is NOT a
+ * TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp,
+ (struct sockaddr *)&spa->ssp_addr,
+ &net, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+
+ if ((stcb) && (net)) {
+ if ((net != stcb->asoc.primary_destination) &&
+ (!(net->dest_state & SCTP_ADDR_UNCONFIRMED))) {
+ /* Ok we need to set it */
+ lnet = stcb->asoc.primary_destination;
+ if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
+ if (net->dest_state & SCTP_ADDR_SWITCH_PRIMARY) {
+ net->dest_state |= SCTP_ADDR_DOUBLE_SWITCH;
+ }
+ net->dest_state |= SCTP_ADDR_SWITCH_PRIMARY;
+ }
+ }
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ if (stcb) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+ break;
+ case SCTP_SET_DYNAMIC_PRIMARY:
+ {
+ union sctp_sockstore *ss;
+
+ error = priv_check(curthread,
+ PRIV_NETINET_RESERVEDPORT);
+ if (error)
+ break;
+
+ SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize);
+ /* SUPER USER CHECK? */
+ error = sctp_dynamic_set_primary(&ss->sa, vrf_id);
+ }
+ break;
+ case SCTP_SET_PEER_PRIMARY_ADDR:
+ {
+ struct sctp_setpeerprim *sspp;
+
+ SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
+ SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
+ if (stcb != NULL) {
+ struct sctp_ifa *ifa;
+
+ ifa = sctp_find_ifa_by_addr((struct sockaddr *)&sspp->sspp_addr,
+ stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
+ if (ifa == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_of_it;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+ /*
+ * Must validate the ifa found is in
+ * our ep
+ */
+ struct sctp_laddr *laddr;
+ int found = 0;
+
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
+ __FUNCTION__);
+ continue;
+ }
+ if (laddr->ifa == ifa) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_of_it;
+ }
+ }
+ if (sctp_set_primary_ip_address_sa(stcb,
+ (struct sockaddr *)&sspp->sspp_addr) != 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ out_of_it:
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+
+ }
+ break;
+ case SCTP_BINDX_ADD_ADDR:
+ {
+ struct sctp_getaddresses *addrs;
+ size_t sz;
+ struct thread *td;
+ int prison = 0;
+
+ td = (struct thread *)p;
+ if (jailed(td->td_ucred)) {
+ prison = 1;
+ }
+ SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses,
+ optsize);
+ if (addrs->addr->sa_family == AF_INET) {
+ sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+ if (optsize < sz) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ if (prison && prison_ip(td->td_ucred, 0, &(((struct sockaddr_in *)(addrs->addr))->sin_addr.s_addr))) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRNOTAVAIL);
+ error = EADDRNOTAVAIL;
+ }
+ } else if (addrs->addr->sa_family == AF_INET6) {
+ sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+ if (optsize < sz) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ /* JAIL XXXX Add else here for V6 */
+ }
+ sctp_bindx_add_address(so, inp, addrs->addr,
+ addrs->sget_assoc_id, vrf_id,
+ &error, p);
+ }
+ break;
+ case SCTP_BINDX_REM_ADDR:
+ {
+ struct sctp_getaddresses *addrs;
+ size_t sz;
+ struct thread *td;
+ int prison = 0;
+
+ td = (struct thread *)p;
+ if (jailed(td->td_ucred)) {
+ prison = 1;
+ }
+ SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize);
+ if (addrs->addr->sa_family == AF_INET) {
+ sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in);
+ if (optsize < sz) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ if (prison && prison_ip(td->td_ucred, 0, &(((struct sockaddr_in *)(addrs->addr))->sin_addr.s_addr))) {
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRNOTAVAIL);
+ error = EADDRNOTAVAIL;
+ }
+ } else if (addrs->addr->sa_family == AF_INET6) {
+ sz = sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6);
+ if (optsize < sz) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ break;
+ }
+ /* JAIL XXXX Add else here for V6 */
+ }
+ sctp_bindx_delete_address(so, inp, addrs->addr,
+ addrs->sget_assoc_id, vrf_id,
+ &error);
+ }
+ break;
+ default:
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
+ error = ENOPROTOOPT;
+ break;
+ } /* end switch (opt) */
+ return (error);
+}
+
+
+int
+sctp_ctloutput(struct socket *so, struct sockopt *sopt)
+{
+ void *optval = NULL;
+ size_t optsize = 0;
+ struct sctp_inpcb *inp;
+ void *p;
+ int error = 0;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ /* I made the same as TCP since we are not setup? */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (ECONNRESET);
+ }
+ if (sopt->sopt_level != IPPROTO_SCTP) {
+ /* wrong proto level... send back up to IP */
+#ifdef INET6
+ if (INP_CHECK_SOCKAF(so, AF_INET6))
+ error = ip6_ctloutput(so, sopt);
+ else
+#endif /* INET6 */
+ error = ip_ctloutput(so, sopt);
+ return (error);
+ }
+ optsize = sopt->sopt_valsize;
+ if (optsize) {
+ SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
+ if (optval == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
+ return (ENOBUFS);
+ }
+ error = sooptcopyin(sopt, optval, optsize, optsize);
+ if (error) {
+ SCTP_FREE(optval, SCTP_M_SOCKOPT);
+ goto out;
+ }
+ }
+ p = (void *)sopt->sopt_td;
+ if (sopt->sopt_dir == SOPT_SET) {
+ error = sctp_setopt(so, sopt->sopt_name, optval, optsize, p);
+ } else if (sopt->sopt_dir == SOPT_GET) {
+ error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p);
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ }
+ if ((error == 0) && (optval != NULL)) {
+ error = sooptcopyout(sopt, optval, optsize);
+ SCTP_FREE(optval, SCTP_M_SOCKOPT);
+ } else if (optval != NULL) {
+ SCTP_FREE(optval, SCTP_M_SOCKOPT);
+ }
+out:
+ return (error);
+}
+
+
+static int
+sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
+{
+ int error = 0;
+ int create_lock_on = 0;
+ uint32_t vrf_id;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb = NULL;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ /* I made the same as TCP since we are not setup? */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (ECONNRESET);
+ }
+ if (addr == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return EINVAL;
+ }
+ if ((addr->sa_family == AF_INET6) && (addr->sa_len != sizeof(struct sockaddr_in6))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ if ((addr->sa_family == AF_INET) && (addr->sa_len != sizeof(struct sockaddr_in))) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (EINVAL);
+ }
+ SCTP_ASOC_CREATE_LOCK(inp);
+ create_lock_on = 1;
+
+ SCTP_INP_INCR_REF(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+ /* Should I really unlock ? */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
+ error = EFAULT;
+ goto out_now;
+ }
+#ifdef INET6
+ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
+ (addr->sa_family == AF_INET6)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_now;
+ }
+#endif /* INET6 */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
+ SCTP_PCB_FLAGS_UNBOUND) {
+ /* Bind a ephemeral port */
+ error = sctp_inpcb_bind(so, NULL, NULL, p);
+ if (error) {
+ goto out_now;
+ }
+ }
+ /* Now do we connect? */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ error = EINVAL;
+ goto out_now;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+ /* We are already connected AND the TCP model */
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+ error = EADDRINUSE;
+ goto out_now;
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+ SCTP_INP_RLOCK(inp);
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ SCTP_INP_RUNLOCK(inp);
+ } else {
+ /*
+ * We increment here since sctp_findassociation_ep_addr()
+ * wil do a decrement if it finds the stcb as long as the
+ * locked tcb (last argument) is NOT a TCB.. aka NULL.
+ */
+ SCTP_INP_INCR_REF(inp);
+ stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL);
+ if (stcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ } else {
+ SCTP_TCB_LOCK(stcb);
+ }
+ }
+ if (stcb != NULL) {
+ /* Already have or am bring up an association */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
+ error = EALREADY;
+ goto out_now;
+ }
+ vrf_id = inp->def_vrf_id;
+ /* We are GOOD to go */
+ stcb = sctp_aloc_assoc(inp, addr, 1, &error, 0, vrf_id, p);
+ if (stcb == NULL) {
+ /* Gak! no memory */
+ goto out_now;
+ }
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+ stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
+ /* Set the connected flag so we can queue data */
+ soisconnecting(so);
+ }
+ SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
+ (void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
+
+ /* initialize authentication parameters for the assoc */
+ sctp_initialize_auth_params(inp, stcb);
+
+ sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
+ SCTP_TCB_UNLOCK(stcb);
+out_now:
+ if (create_lock_on) {
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ }
+ SCTP_INP_DECR_REF(inp);
+ return error;
+}
+
+int
+sctp_listen(struct socket *so, int backlog, struct thread *p)
+{
+ /*
+ * Note this module depends on the protocol processing being called
+ * AFTER any socket level flags and backlog are applied to the
+ * socket. The traditional way that the socket flags are applied is
+ * AFTER protocol processing. We have made a change to the
+ * sys/kern/uipc_socket.c module to reverse this but this MUST be in
+ * place if the socket API for SCTP is to work properly.
+ */
+
+ int error = 0;
+ struct sctp_inpcb *inp;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (inp == 0) {
+ /* I made the same as TCP since we are not setup? */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (ECONNRESET);
+ }
+ SCTP_INP_RLOCK(inp);
+#ifdef SCTP_LOCK_LOGGING
+ if (sctp_logging_level & SCTP_LOCK_LOGGING_ENABLE) {
+ sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
+ }
+#endif
+ SOCK_LOCK(so);
+ error = solisten_proto_check(so);
+ if (error) {
+ SOCK_UNLOCK(so);
+ SCTP_INP_RUNLOCK(inp);
+ return (error);
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
+ (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
+ /* We are already connected AND the TCP model */
+ SCTP_INP_RUNLOCK(inp);
+ SOCK_UNLOCK(so);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+ /* We must do a bind. */
+ SOCK_UNLOCK(so);
+ SCTP_INP_RUNLOCK(inp);
+ if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
+ /* bind error, probably perm */
+ return (error);
+ }
+ SOCK_LOCK(so);
+ } else {
+ if (backlog != 0) {
+ inp->sctp_flags |= SCTP_PCB_FLAGS_LISTENING;
+ } else {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_LISTENING;
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ /* It appears for 7.0 and on, we must always call this. */
+ solisten_proto(so, backlog);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+ /* remove the ACCEPTCONN flag for one-to-many sockets */
+ so->so_options &= ~SO_ACCEPTCONN;
+ }
+ if (backlog == 0) {
+ /* turning off listen */
+ so->so_options &= ~SO_ACCEPTCONN;
+ }
+ SOCK_UNLOCK(so);
+ return (error);
+}
+
+static int sctp_defered_wakeup_cnt = 0;
+
+int
+sctp_accept(struct socket *so, struct sockaddr **addr)
+{
+ struct sctp_tcb *stcb;
+ struct sctp_inpcb *inp;
+ union sctp_sockstore store;
+
+ int error;
+
+ inp = (struct sctp_inpcb *)so->so_pcb;
+
+ if (inp == 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (ECONNRESET);
+ }
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
+ return (EOPNOTSUPP);
+ }
+ if (so->so_state & SS_ISDISCONNECTED) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED);
+ return (ECONNABORTED);
+ }
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return (ECONNRESET);
+ }
+ SCTP_TCB_LOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ store = stcb->asoc.primary_destination->ro._l_addr;
+ SCTP_TCB_UNLOCK(stcb);
+ if (store.sa.sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = ((struct sockaddr_in *)&store)->sin_port;
+ sin->sin_addr = ((struct sockaddr_in *)&store)->sin_addr;
+ *addr = (struct sockaddr *)sin;
+ } else {
+ struct sockaddr_in6 *sin6;
+
+ SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_port = ((struct sockaddr_in6 *)&store)->sin6_port;
+
+ sin6->sin6_addr = ((struct sockaddr_in6 *)&store)->sin6_addr;
+ if ((error = sa6_recoverscope(sin6)) != 0) {
+ SCTP_FREE_SONAME(sin6);
+ return (error);
+ }
+ *addr = (struct sockaddr *)sin6;
+ }
+ /* Wake any delayed sleep action */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
+ SCTP_INP_WLOCK(inp);
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+ SCTP_INP_WUNLOCK(inp);
+ SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
+ if (sowriteable(inp->sctp_socket)) {
+ sowwakeup_locked(inp->sctp_socket);
+ } else {
+ SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
+ }
+ SCTP_INP_WLOCK(inp);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+ SCTP_INP_WUNLOCK(inp);
+ SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
+ if (soreadable(inp->sctp_socket)) {
+ sctp_defered_wakeup_cnt++;
+ sorwakeup_locked(inp->sctp_socket);
+ } else {
+ SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
+ }
+ SCTP_INP_WLOCK(inp);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ }
+ return (0);
+}
+
+int
+sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
+{
+ struct sockaddr_in *sin;
+ uint32_t vrf_id;
+ struct sctp_inpcb *inp;
+ struct sctp_ifa *sctp_ifa;
+
+ /*
+ * Do the malloc first in case it blocks.
+ */
+ SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (!inp) {
+ SCTP_FREE_SONAME(sin);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return ECONNRESET;
+ }
+ SCTP_INP_RLOCK(inp);
+ sin->sin_port = inp->sctp_lport;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+ struct sctp_tcb *stcb;
+ struct sockaddr_in *sin_a;
+ struct sctp_nets *net;
+ int fnd;
+
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ goto notConn;
+ }
+ fnd = 0;
+ sin_a = NULL;
+ SCTP_TCB_LOCK(stcb);
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+ if (sin_a == NULL)
+ /* this will make coverity happy */
+ continue;
+
+ if (sin_a->sin_family == AF_INET) {
+ fnd = 1;
+ break;
+ }
+ }
+ if ((!fnd) || (sin_a == NULL)) {
+ /* punt */
+ SCTP_TCB_UNLOCK(stcb);
+ goto notConn;
+ }
+ vrf_id = inp->def_vrf_id;
+ sctp_ifa = sctp_source_address_selection(inp,
+ stcb,
+ (sctp_route_t *) & net->ro,
+ net, 0, vrf_id);
+ if (sctp_ifa) {
+ sin->sin_addr = sctp_ifa->address.sin.sin_addr;
+ sctp_free_ifa(sctp_ifa);
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } else {
+ /* For the bound all case you get back 0 */
+ notConn:
+ sin->sin_addr.s_addr = 0;
+ }
+
+ } else {
+ /* Take the first IPv4 address in the list */
+ struct sctp_laddr *laddr;
+ int fnd = 0;
+
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa->address.sa.sa_family == AF_INET) {
+ struct sockaddr_in *sin_a;
+
+ sin_a = (struct sockaddr_in *)&laddr->ifa->address.sa;
+ sin->sin_addr = sin_a->sin_addr;
+ fnd = 1;
+ break;
+ }
+ }
+ if (!fnd) {
+ SCTP_FREE_SONAME(sin);
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ return ENOENT;
+ }
+ }
+ SCTP_INP_RUNLOCK(inp);
+ (*addr) = (struct sockaddr *)sin;
+ return (0);
+}
+
+int
+sctp_peeraddr(struct socket *so, struct sockaddr **addr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)*addr;
+ int fnd;
+ struct sockaddr_in *sin_a;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+
+ /* Do the malloc first in case it blocks. */
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if ((inp == NULL) ||
+ ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
+ /* UDP type and listeners will drop out here */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
+ return (ENOTCONN);
+ }
+ SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+
+ /* We must recapture incase we blocked */
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ if (!inp) {
+ SCTP_FREE_SONAME(sin);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return ECONNRESET;
+ }
+ SCTP_INP_RLOCK(inp);
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb) {
+ SCTP_TCB_LOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ if (stcb == NULL) {
+ SCTP_FREE_SONAME(sin);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
+ return ECONNRESET;
+ }
+ fnd = 0;
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sin_a = (struct sockaddr_in *)&net->ro._l_addr;
+ if (sin_a->sin_family == AF_INET) {
+ fnd = 1;
+ sin->sin_port = stcb->rport;
+ sin->sin_addr = sin_a->sin_addr;
+ break;
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ if (!fnd) {
+ /* No IPv4 address */
+ SCTP_FREE_SONAME(sin);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
+ return ENOENT;
+ }
+ (*addr) = (struct sockaddr *)sin;
+ return (0);
+}
+
+struct pr_usrreqs sctp_usrreqs = {
+ .pru_abort = sctp_abort,
+ .pru_accept = sctp_accept,
+ .pru_attach = sctp_attach,
+ .pru_bind = sctp_bind,
+ .pru_connect = sctp_connect,
+ .pru_control = in_control,
+ .pru_close = sctp_close,
+ .pru_detach = sctp_close,
+ .pru_sopoll = sopoll_generic,
+ .pru_disconnect = sctp_disconnect,
+ .pru_listen = sctp_listen,
+ .pru_peeraddr = sctp_peeraddr,
+ .pru_send = sctp_sendm,
+ .pru_shutdown = sctp_shutdown,
+ .pru_sockaddr = sctp_ingetaddr,
+ .pru_sosend = sctp_sosend,
+ .pru_soreceive = sctp_soreceive
+};
Index: icmp6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/icmp6.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/icmp6.h -L sys/netinet/icmp6.h -u -r1.1.1.2 -r1.2
--- sys/netinet/icmp6.h
+++ sys/netinet/icmp6.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet/icmp6.h,v 1.16.2.4 2005/12/25 14:03:37 suz Exp $ */
+/* $FreeBSD: src/sys/netinet/icmp6.h,v 1.21 2007/05/17 21:20:23 jinmei Exp $ */
/* $KAME: icmp6.h,v 1.46 2001/04/27 15:09:48 itojun Exp $ */
/*-
@@ -709,6 +709,11 @@
extern int icmp6_rediraccept; /* accept/process redirects */
extern int icmp6_redirtimeout; /* cache time for redirect routes */
+
+#define ICMP6_NODEINFO_FQDNOK 0x1
+#define ICMP6_NODEINFO_NODEADDROK 0x2
+#define ICMP6_NODEINFO_TMPADDROK 0x4
+#define ICMP6_NODEINFO_GLOBALOK 0x8
#endif /* _KERNEL */
#endif /* not _NETINET_ICMP6_H_ */
Index: ip_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_input.c -L sys/netinet/ip_input.c -u -r1.2 -r1.3
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -27,9 +27,11 @@
* SUCH DAMAGE.
*
* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
- * $FreeBSD: src/sys/netinet/ip_input.c,v 1.301.2.7 2006/03/04 09:15:08 oleg Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_input.c,v 1.332.2.1 2007/12/19 08:10:30 guido Exp $");
+
#include "opt_bootp.h"
#include "opt_ipfw.h"
#include "opt_ipstealth.h"
@@ -40,7 +42,6 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/domain.h>
@@ -66,10 +67,14 @@
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
+#include <netinet/ip_options.h>
#include <machine/in_cksum.h>
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
+#ifdef IPSEC
+#include <netinet/ip_ipsec.h>
+#endif /* IPSEC */
#include <sys/socketvar.h>
@@ -77,15 +82,7 @@
#include <netinet/ip_fw.h>
#include <netinet/ip_dummynet.h>
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netkey/key.h>
-#endif
-
-#ifdef FAST_IPSEC
-#include <netipsec/ipsec.h>
-#include <netipsec/key.h>
-#endif
+#include <security/mac/mac_framework.h>
int rsvp_on = 0;
@@ -101,33 +98,20 @@
SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
&ip_defttl, 0, "Maximum TTL on IP packets");
-static int ip_dosourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
- &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
-
-static int ip_acceptsourceroute = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
- CTLFLAG_RW, &ip_acceptsourceroute, 0,
- "Enable accepting source routed IP packets");
-
-int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
- &ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
-
static int ip_keepfaith = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
- &ip_keepfaith, 0,
- "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
+ &ip_keepfaith, 0,
+ "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
static int ip_sendsourcequench = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
- &ip_sendsourcequench, 0,
- "Enable the transmission of source quench packets");
+ &ip_sendsourcequench, 0,
+ "Enable the transmission of source quench packets");
int ip_do_randomid = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
- &ip_do_randomid, 0,
- "Assign random ip_id values");
+ &ip_do_randomid, 0,
+ "Assign random ip_id values");
/*
* XXX - Setting ip_checkinterface mostly implements the receive side of
@@ -146,10 +130,6 @@
SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
&ip_checkinterface, 0, "Verify packet arrives on correct interface");
-#ifdef DIAGNOSTIC
-static int ipprintfs = 0;
-#endif
-
struct pfil_head inet_pfil_hook; /* Packet filter hooks */
static struct ifqueue ipintrq;
@@ -165,7 +145,8 @@
SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
&ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
- &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
+ &ipintrq.ifq_drops, 0,
+ "Number of packets dropped from the IP input queue");
struct ipstat ipstat;
SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
@@ -190,16 +171,17 @@
#define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED)
static void maxnipq_update(void);
+static void ipq_zone_change(void *);
static int maxnipq; /* Administrative limit on # reass queues. */
static int nipq = 0; /* Total # of reass queues */
-SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, &nipq, 0,
- "Current number of IPv4 fragment reassembly queue entries");
+SYSCTL_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
+ &nipq, 0, "Current number of IPv4 fragment reassembly queue entries");
static int maxfragsperpacket;
SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
- &maxfragsperpacket, 0,
- "Maximum number of IPv4 fragments allowed per packet");
+ &maxfragsperpacket, 0,
+ "Maximum number of IPv4 fragments allowed per packet");
struct callout ipport_tick_callout;
@@ -211,7 +193,7 @@
#ifdef IPSTEALTH
int ipstealth = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
- &ipstealth, 0, "");
+ &ipstealth, 0, "IP stealth mode, no TTL decrementation on forwarding");
#endif
/*
@@ -220,28 +202,8 @@
*/
ip_fw_chk_t *ip_fw_chk_ptr = NULL;
ip_dn_io_t *ip_dn_io_ptr = NULL;
-int fw_enable = 1;
int fw_one_pass = 1;
-/*
- * XXX this is ugly. IP options source routing magic.
- */
-struct ipoptrt {
- struct in_addr dst; /* final destination */
- char nop; /* one NOP to align */
- char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
- struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
-};
-
-struct ipopt_tag {
- struct m_tag tag;
- int ip_nhops;
- struct ipoptrt ip_srcrt;
-};
-
-static void save_rte(struct mbuf *, u_char *, struct in_addr);
-static int ip_dooptions(struct mbuf *m, int);
-static void ip_forward(struct mbuf *m, int srcrt);
static void ip_freef(struct ipqhead *, struct ipq *);
/*
@@ -249,10 +211,10 @@
* All protocols not implemented in kernel go to raw IP protocol handler.
*/
void
-ip_init()
+ip_init(void)
{
- register struct protosw *pr;
- register int i;
+ struct protosw *pr;
+ int i;
TAILQ_INIT(&in_ifaddrhead);
in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
@@ -298,6 +260,8 @@
ipport_tick(NULL);
EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
+ EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
+ NULL, EVENTHANDLER_PRI_ANY);
/* Initialize various other remaining things. */
ip_id = time_second & 0xffff;
@@ -306,9 +270,10 @@
netisr_register(NETISR_IP, ip_input, &ipintrq, NETISR_MPSAFE);
}
-void ip_fini(xtp)
- void *xtp;
+void
+ip_fini(void *xtp)
{
+
callout_stop(&ipport_tick_callout);
}
@@ -326,15 +291,9 @@
u_short sum;
int dchg = 0; /* dest changed after fw */
struct in_addr odst; /* original dst address */
-#ifdef FAST_IPSEC
- struct m_tag *mtag;
- struct tdb_ident *tdbi;
- struct secpolicy *sp;
- int s, error;
-#endif /* FAST_IPSEC */
- M_ASSERTPKTHDR(m);
-
+ M_ASSERTPKTHDR(m);
+
if (m->m_flags & M_FASTFWD_OURS) {
/*
* Firewall or NAT changed destination to local.
@@ -344,8 +303,8 @@
/* Set up some basics that will be used later. */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
- goto ours;
- }
+ goto ours;
+ }
ipstat.ips_total++;
@@ -434,20 +393,13 @@
} else
m_adj(m, ip->ip_len - m->m_pkthdr.len);
}
-#if defined(IPSEC) && !defined(IPSEC_FILTERGIF)
- /*
- * Bypass packet filtering for packets from a tunnel (gif).
- */
- if (ipsec_getnhist(m))
- goto passin;
-#endif
-#if defined(FAST_IPSEC) && !defined(IPSEC_FILTERGIF)
+#ifdef IPSEC
/*
* Bypass packet filtering for packets from a tunnel (gif).
*/
- if (m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
+ if (ip_ipsec_filtertunnel(m))
goto passin;
-#endif
+#endif /* IPSEC */
/*
* Run through list of hooks for input packets.
@@ -458,7 +410,7 @@
*/
/* Jump over all PFIL processing if hooks are not active. */
- if (inet_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet_pfil_hook))
goto passin;
odst = ip->ip_dst;
@@ -476,9 +428,6 @@
m->m_flags &= ~M_FASTFWD_OURS;
goto ours;
}
-#ifndef IPFIREWALL_FORWARD_EXTENDED
- dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
-#else
if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
/*
* Directly ship on the packet. This allows to forward packets
@@ -488,7 +437,6 @@
ip_forward(m, dchg);
return;
}
-#endif /* IPFIREWALL_FORWARD_EXTENDED */
#endif /* IPFIREWALL_FORWARD */
passin:
@@ -584,6 +532,12 @@
#endif
}
}
+ /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
+ if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
+ ipstat.ips_cantforward++;
+ m_freem(m);
+ return;
+ }
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
struct in_multi *inm;
if (ip_mrouter) {
@@ -650,42 +604,9 @@
m_freem(m);
} else {
#ifdef IPSEC
- /*
- * Enforce inbound IPsec SPD.
- */
- if (ipsec4_in_reject(m, NULL)) {
- ipsecstat.in_polvio++;
+ if (ip_ipsec_fwd(m))
goto bad;
- }
#endif /* IPSEC */
-#ifdef FAST_IPSEC
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
- if (mtag != NULL) {
- tdbi = (struct tdb_ident *)(mtag + 1);
- sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
- } else {
- sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
- IP_FORWARDING, &error);
- }
- if (sp == NULL) { /* NB: can happen if error */
- splx(s);
- /*XXX error stat???*/
- DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
- goto bad;
- }
-
- /*
- * Check security policy against packet attributes.
- */
- error = ipsec_in_reject(sp, m);
- KEY_FREESP(&sp);
- splx(s);
- if (error) {
- ipstat.ips_cantforward++;
- goto bad;
- }
-#endif /* FAST_IPSEC */
ip_forward(m, dchg);
}
return;
@@ -732,51 +653,9 @@
* note that we do not visit this with protocols with pcb layer
* code - like udp/tcp/raw ip.
*/
- if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
- ipsec4_in_reject(m, NULL)) {
- ipsecstat.in_polvio++;
+ if (ip_ipsec_input(m))
goto bad;
- }
-#endif
-#if FAST_IPSEC
- /*
- * enforce IPsec policy checking if we are seeing last header.
- * note that we do not visit this with protocols with pcb layer
- * code - like udp/tcp/raw ip.
- */
- if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
- /*
- * Check if the packet has already had IPsec processing
- * done. If so, then just pass it along. This tag gets
- * set during AH, ESP, etc. input handling, before the
- * packet is returned to the ip input queue for delivery.
- */
- mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
- s = splnet();
- if (mtag != NULL) {
- tdbi = (struct tdb_ident *)(mtag + 1);
- sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
- } else {
- sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
- IP_FORWARDING, &error);
- }
- if (sp != NULL) {
- /*
- * Check security policy against packet attributes.
- */
- error = ipsec_in_reject(sp, m);
- KEY_FREESP(&sp);
- } else {
- /* XXX error stat??? */
- error = EINVAL;
-DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
- goto bad;
- }
- splx(s);
- if (error)
- goto bad;
- }
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
/*
* Switch out to protocol's input routine.
@@ -817,6 +696,16 @@
uma_zone_set_max(ipq_zone, 1);
}
+static void
+ipq_zone_change(void *tag)
+{
+
+ if (maxnipq > 0 && maxnipq < (nmbclusters / 32)) {
+ maxnipq = nmbclusters / 32;
+ maxnipq_update();
+ }
+}
+
static int
sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
{
@@ -852,7 +741,6 @@
* to the first packet/fragment are preserved.
* The IP header is *NOT* adjusted out of iplen.
*/
-
struct mbuf *
ip_reass(struct mbuf *m)
{
@@ -966,6 +854,7 @@
#ifdef MAC
if (mac_init_ipq(fp, M_NOWAIT) != 0) {
uma_zfree(ipq_zone, fp);
+ fp = NULL;
goto dropfrag;
}
mac_create_ipq(m, fp);
@@ -1167,11 +1056,9 @@
* associated datagrams.
*/
static void
-ip_freef(fhp, fp)
- struct ipqhead *fhp;
- struct ipq *fp;
+ip_freef(struct ipqhead *fhp, struct ipq *fp)
{
- register struct mbuf *q;
+ struct mbuf *q;
IPQ_LOCK_ASSERT();
@@ -1191,9 +1078,9 @@
* queue, discard it.
*/
void
-ip_slowtimo()
+ip_slowtimo(void)
{
- register struct ipq *fp;
+ struct ipq *fp;
int i;
IPQ_LOCK();
@@ -1230,7 +1117,7 @@
* Drain off all datagram fragments.
*/
void
-ip_drain()
+ip_drain(void)
{
int i;
@@ -1306,296 +1193,12 @@
return (0);
}
-
-/*
- * Do option processing on a datagram,
- * possibly discarding it if bad options are encountered,
- * or forwarding it if source-routed.
- * The pass argument is used when operating in the IPSTEALTH
- * mode to tell what options to process:
- * [LS]SRR (pass 0) or the others (pass 1).
- * The reason for as many as two passes is that when doing IPSTEALTH,
- * non-routing options should be processed only if the packet is for us.
- * Returns 1 if packet has been forwarded/freed,
- * 0 if the packet should be processed further.
- */
-static int
-ip_dooptions(struct mbuf *m, int pass)
-{
- struct ip *ip = mtod(m, struct ip *);
- u_char *cp;
- struct in_ifaddr *ia;
- int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
- struct in_addr *sin, dst;
- n_time ntime;
- struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
-
- /* ignore or reject packets with IP options */
- if (ip_doopts == 0)
- return 0;
- else if (ip_doopts == 2) {
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_FILTER_PROHIB;
- goto bad;
- }
-
- dst = ip->ip_dst;
- cp = (u_char *)(ip + 1);
- cnt = (ip->ip_hl << 2) - sizeof (struct ip);
- for (; cnt > 0; cnt -= optlen, cp += optlen) {
- opt = cp[IPOPT_OPTVAL];
- if (opt == IPOPT_EOL)
- break;
- if (opt == IPOPT_NOP)
- optlen = 1;
- else {
- if (cnt < IPOPT_OLEN + sizeof(*cp)) {
- code = &cp[IPOPT_OLEN] - (u_char *)ip;
- goto bad;
- }
- optlen = cp[IPOPT_OLEN];
- if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
- code = &cp[IPOPT_OLEN] - (u_char *)ip;
- goto bad;
- }
- }
- switch (opt) {
-
- default:
- break;
-
- /*
- * Source routing with record.
- * Find interface with current destination address.
- * If none on this machine then drop if strictly routed,
- * or do nothing if loosely routed.
- * Record interface address and bring up next address
- * component. If strictly routed make sure next
- * address is on directly accessible net.
- */
- case IPOPT_LSRR:
- case IPOPT_SSRR:
-#ifdef IPSTEALTH
- if (ipstealth && pass > 0)
- break;
-#endif
- if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
- code = &cp[IPOPT_OLEN] - (u_char *)ip;
- goto bad;
- }
- if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- ipaddr.sin_addr = ip->ip_dst;
- ia = (struct in_ifaddr *)
- ifa_ifwithaddr((struct sockaddr *)&ipaddr);
- if (ia == NULL) {
- if (opt == IPOPT_SSRR) {
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_SRCFAIL;
- goto bad;
- }
- if (!ip_dosourceroute)
- goto nosourcerouting;
- /*
- * Loose routing, and not at next destination
- * yet; nothing to do except forward.
- */
- break;
- }
- off--; /* 0 origin */
- if (off > optlen - (int)sizeof(struct in_addr)) {
- /*
- * End of source route. Should be for us.
- */
- if (!ip_acceptsourceroute)
- goto nosourcerouting;
- save_rte(m, cp, ip->ip_src);
- break;
- }
-#ifdef IPSTEALTH
- if (ipstealth)
- goto dropit;
-#endif
- if (!ip_dosourceroute) {
- if (ipforwarding) {
- char buf[16]; /* aaa.bbb.ccc.ddd\0 */
- /*
- * Acting as a router, so generate ICMP
- */
-nosourcerouting:
- strcpy(buf, inet_ntoa(ip->ip_dst));
- log(LOG_WARNING,
- "attempted source route from %s to %s\n",
- inet_ntoa(ip->ip_src), buf);
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_SRCFAIL;
- goto bad;
- } else {
- /*
- * Not acting as a router, so silently drop.
- */
-#ifdef IPSTEALTH
-dropit:
-#endif
- ipstat.ips_cantforward++;
- m_freem(m);
- return (1);
- }
- }
-
- /*
- * locate outgoing interface
- */
- (void)memcpy(&ipaddr.sin_addr, cp + off,
- sizeof(ipaddr.sin_addr));
-
- if (opt == IPOPT_SSRR) {
-#define INA struct in_ifaddr *
-#define SA struct sockaddr *
- if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
- ia = (INA)ifa_ifwithnet((SA)&ipaddr);
- } else
- ia = ip_rtaddr(ipaddr.sin_addr);
- if (ia == NULL) {
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_SRCFAIL;
- goto bad;
- }
- ip->ip_dst = ipaddr.sin_addr;
- (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
- sizeof(struct in_addr));
- cp[IPOPT_OFFSET] += sizeof(struct in_addr);
- /*
- * Let ip_intr's mcast routing check handle mcast pkts
- */
- forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
- break;
-
- case IPOPT_RR:
-#ifdef IPSTEALTH
- if (ipstealth && pass == 0)
- break;
-#endif
- if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- /*
- * If no space remains, ignore.
- */
- off--; /* 0 origin */
- if (off > optlen - (int)sizeof(struct in_addr))
- break;
- (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
- sizeof(ipaddr.sin_addr));
- /*
- * locate outgoing interface; if we're the destination,
- * use the incoming interface (should be same).
- */
- if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
- (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
- type = ICMP_UNREACH;
- code = ICMP_UNREACH_HOST;
- goto bad;
- }
- (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
- sizeof(struct in_addr));
- cp[IPOPT_OFFSET] += sizeof(struct in_addr);
- break;
-
- case IPOPT_TS:
-#ifdef IPSTEALTH
- if (ipstealth && pass == 0)
- break;
-#endif
- code = cp - (u_char *)ip;
- if (optlen < 4 || optlen > 40) {
- code = &cp[IPOPT_OLEN] - (u_char *)ip;
- goto bad;
- }
- if ((off = cp[IPOPT_OFFSET]) < 5) {
- code = &cp[IPOPT_OLEN] - (u_char *)ip;
- goto bad;
- }
- if (off > optlen - (int)sizeof(int32_t)) {
- cp[IPOPT_OFFSET + 1] += (1 << 4);
- if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- break;
- }
- off--; /* 0 origin */
- sin = (struct in_addr *)(cp + off);
- switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
-
- case IPOPT_TS_TSONLY:
- break;
-
- case IPOPT_TS_TSANDADDR:
- if (off + sizeof(n_time) +
- sizeof(struct in_addr) > optlen) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- ipaddr.sin_addr = dst;
- ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
- m->m_pkthdr.rcvif);
- if (ia == NULL)
- continue;
- (void)memcpy(sin, &IA_SIN(ia)->sin_addr,
- sizeof(struct in_addr));
- cp[IPOPT_OFFSET] += sizeof(struct in_addr);
- off += sizeof(struct in_addr);
- break;
-
- case IPOPT_TS_PRESPEC:
- if (off + sizeof(n_time) +
- sizeof(struct in_addr) > optlen) {
- code = &cp[IPOPT_OFFSET] - (u_char *)ip;
- goto bad;
- }
- (void)memcpy(&ipaddr.sin_addr, sin,
- sizeof(struct in_addr));
- if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
- continue;
- cp[IPOPT_OFFSET] += sizeof(struct in_addr);
- off += sizeof(struct in_addr);
- break;
-
- default:
- code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
- goto bad;
- }
- ntime = iptime();
- (void)memcpy(cp + off, &ntime, sizeof(n_time));
- cp[IPOPT_OFFSET] += sizeof(n_time);
- }
- }
- if (forward && ipforwarding) {
- ip_forward(m, 1);
- return (1);
- }
- return (0);
-bad:
- icmp_error(m, type, code, 0, 0);
- ipstat.ips_badoptions++;
- return (1);
-}
-
/*
* Given address of next destination (final or next hop),
* return internet address info of interface to be used to get there.
*/
struct in_ifaddr *
-ip_rtaddr(dst)
- struct in_addr dst;
+ip_rtaddr(struct in_addr dst)
{
struct route sro;
struct sockaddr_in *sin;
@@ -1616,143 +1219,6 @@
return (ifa);
}
-/*
- * Save incoming source route for use in replies,
- * to be picked up later by ip_srcroute if the receiver is interested.
- */
-static void
-save_rte(m, option, dst)
- struct mbuf *m;
- u_char *option;
- struct in_addr dst;
-{
- unsigned olen;
- struct ipopt_tag *opts;
-
- opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
- sizeof(struct ipopt_tag), M_NOWAIT);
- if (opts == NULL)
- return;
-
- olen = option[IPOPT_OLEN];
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf("save_rte: olen %d\n", olen);
-#endif
- if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
- m_tag_free((struct m_tag *)opts);
- return;
- }
- bcopy(option, opts->ip_srcrt.srcopt, olen);
- opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
- opts->ip_srcrt.dst = dst;
- m_tag_prepend(m, (struct m_tag *)opts);
-}
-
-/*
- * Retrieve incoming source route for use in replies,
- * in the same form used by setsockopt.
- * The first hop is placed before the options, will be removed later.
- */
-struct mbuf *
-ip_srcroute(m0)
- struct mbuf *m0;
-{
- register struct in_addr *p, *q;
- register struct mbuf *m;
- struct ipopt_tag *opts;
-
- opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
- if (opts == NULL)
- return (NULL);
-
- if (opts->ip_nhops == 0)
- return (NULL);
- m = m_get(M_DONTWAIT, MT_DATA);
- if (m == NULL)
- return (NULL);
-
-#define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
-
- /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
- m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
- sizeof(struct in_addr) + OPTSIZ;
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf("ip_srcroute: nhops %d mlen %d", opts->ip_nhops, m->m_len);
-#endif
-
- /*
- * First save first hop for return route
- */
- p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
- *(mtod(m, struct in_addr *)) = *p--;
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
-#endif
-
- /*
- * Copy option fields and padding (nop) to mbuf.
- */
- opts->ip_srcrt.nop = IPOPT_NOP;
- opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
- (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
- &(opts->ip_srcrt.nop), OPTSIZ);
- q = (struct in_addr *)(mtod(m, caddr_t) +
- sizeof(struct in_addr) + OPTSIZ);
-#undef OPTSIZ
- /*
- * Record return path as an IP source route,
- * reversing the path (pointers are now aligned).
- */
- while (p >= opts->ip_srcrt.route) {
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf(" %lx", (u_long)ntohl(q->s_addr));
-#endif
- *q++ = *p--;
- }
- /*
- * Last hop goes to final destination.
- */
- *q = opts->ip_srcrt.dst;
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf(" %lx\n", (u_long)ntohl(q->s_addr));
-#endif
- m_tag_delete(m0, (struct m_tag *)opts);
- return (m);
-}
-
-/*
- * Strip out IP options, at higher
- * level protocol in the kernel.
- * Second argument is buffer to which options
- * will be moved, and return value is their length.
- * XXX should be deleted; last arg currently ignored.
- */
-void
-ip_stripoptions(m, mopt)
- register struct mbuf *m;
- struct mbuf *mopt;
-{
- register int i;
- struct ip *ip = mtod(m, struct ip *);
- register caddr_t opts;
- int olen;
-
- olen = (ip->ip_hl << 2) - sizeof (struct ip);
- opts = (caddr_t)(ip + 1);
- i = m->m_len - (sizeof (struct ip) + olen);
- bcopy(opts + olen, opts, (unsigned)i);
- m->m_len -= olen;
- if (m->m_flags & M_PKTHDR)
- m->m_pkthdr.len -= olen;
- ip->ip_v = IPVERSION;
- ip->ip_hl = sizeof(struct ip) >> 2;
-}
-
u_char inetctlerrmap[PRC_NCMDS] = {
0, 0, 0, 0,
0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
@@ -1785,14 +1251,6 @@
struct in_addr dest;
int error, type = 0, code = 0, mtu = 0;
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf("forward: src %lx dst %lx ttl %x\n",
- (u_long)ip->ip_src.s_addr, (u_long)ip->ip_dst.s_addr,
- ip->ip_ttl);
-#endif
-
-
if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
ipstat.ips_cantforward++;
m_freem(m);
@@ -1810,7 +1268,8 @@
}
#endif
- if (!srcrt && (ia = ip_rtaddr(ip->ip_dst)) == NULL) {
+ ia = ip_rtaddr(ip->ip_dst);
+ if (!srcrt && ia == NULL) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
return;
}
@@ -1831,7 +1290,7 @@
* assume exclusive access to the IP header in `m', so any
* data in a cluster may change before we reach icmp_error().
*/
- MGET(mcopy, M_DONTWAIT, m->m_type);
+ MGETHDR(mcopy, M_DONTWAIT, m->m_type);
if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
/*
* It's probably ok if the pkthdr dup fails (because
@@ -1843,8 +1302,7 @@
mcopy = NULL;
}
if (mcopy != NULL) {
- mcopy->m_len = imin((ip->ip_hl << 2) + 8,
- (int)ip->ip_len);
+ mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
mcopy->m_pkthdr.len = mcopy->m_len;
m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
}
@@ -1894,10 +1352,6 @@
/* Router requirements says to only send host redirects */
type = ICMP_REDIRECT;
code = ICMP_REDIRECT_HOST;
-#ifdef DIAGNOSTIC
- if (ipprintfs)
- printf("redirect (%d) to %lx\n", code, (u_long)dest.s_addr);
-#endif
}
}
if (rt)
@@ -1938,63 +1392,10 @@
case EMSGSIZE:
type = ICMP_UNREACH;
code = ICMP_UNREACH_NEEDFRAG;
-#if defined(IPSEC) || defined(FAST_IPSEC)
- /*
- * If the packet is routed over IPsec tunnel, tell the
- * originator the tunnel MTU.
- * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
- * XXX quickhack!!!
- */
- {
- struct secpolicy *sp = NULL;
- int ipsecerror;
- int ipsechdr;
- struct route *ro;
-
-#ifdef IPSEC
- sp = ipsec4_getpolicybyaddr(mcopy,
- IPSEC_DIR_OUTBOUND,
- IP_FORWARDING,
- &ipsecerror);
-#else /* FAST_IPSEC */
- sp = ipsec_getpolicybyaddr(mcopy,
- IPSEC_DIR_OUTBOUND,
- IP_FORWARDING,
- &ipsecerror);
-#endif
- if (sp != NULL) {
- /* count IPsec header size */
- ipsechdr = ipsec4_hdrsiz(mcopy,
- IPSEC_DIR_OUTBOUND,
- NULL);
-
- /*
- * find the correct route for outer IPv4
- * header, compute tunnel MTU.
- */
- if (sp->req != NULL
- && sp->req->sav != NULL
- && sp->req->sav->sah != NULL) {
- ro = &sp->req->sav->sah->sa_route;
- if (ro->ro_rt && ro->ro_rt->rt_ifp) {
- mtu =
- ro->ro_rt->rt_rmx.rmx_mtu ?
- ro->ro_rt->rt_rmx.rmx_mtu :
- ro->ro_rt->rt_ifp->if_mtu;
- mtu -= ipsechdr;
- }
- }
#ifdef IPSEC
- key_freesp(sp);
-#else /* FAST_IPSEC */
- KEY_FREESP(&sp);
-#endif
- ipstat.ips_cantfrag++;
- break;
- }
- }
-#endif /*IPSEC || FAST_IPSEC*/
+ mtu = ip_ipsec_mtu(m);
+#endif /* IPSEC */
/*
* If the MTU wasn't set before use the interface mtu or
* fall back to the next smaller mtu step compared to the
@@ -2035,11 +1436,8 @@
}
void
-ip_savecontrol(inp, mp, ip, m)
- register struct inpcb *inp;
- register struct mbuf **mp;
- register struct ip *ip;
- register struct mbuf *m;
+ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
+ struct mbuf *m)
{
if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
struct bintime bt;
@@ -2104,8 +1502,7 @@
if (((ifp = m->m_pkthdr.rcvif))
&& ( ifp->if_index && (ifp->if_index <= if_index))) {
- sdp = (struct sockaddr_dl *)
- (ifaddr_byindex(ifp->if_index)->ifa_addr);
+ sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
/*
* Change our mind and don't try copy.
*/
@@ -2130,11 +1527,10 @@
}
/*
- * XXX these routines are called from the upper part of the kernel.
- * They need to be locked when we remove Giant.
- *
- * They could also be moved to ip_mroute.c, since all the RSVP
- * handling is done there already.
+ * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
+ * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
+ * locking. This code remains in ip_input.c as ip_mroute.c is optionally
+ * compiled.
*/
static int ip_rsvp_on;
struct socket *ip_rsvpd;
--- /dev/null
+++ sys/netinet/tcp_syncache.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
+ * $FreeBSD: src/sys/netinet/tcp_syncache.h,v 1.1 2007/07/27 00:57:06 silby Exp $
+ */
+
+#ifndef _NETINET_TCP_SYNCACHE_H_
+#define _NETINET_TCP_SYNCACHE_H_
+#ifdef _KERNEL
+
+void syncache_init(void);
+void syncache_unreach(struct in_conninfo *, struct tcphdr *);
+int syncache_expand(struct in_conninfo *, struct tcpopt *,
+ struct tcphdr *, struct socket **, struct mbuf *);
+void syncache_add(struct in_conninfo *, struct tcpopt *,
+ struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
+void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
+void syncache_badack(struct in_conninfo *);
+int syncache_pcbcount(void);
+int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+
+#endif /* _KERNEL */
+#endif /* _NETINET_TCP_SYNCACHE_H_ */
Index: pim_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/pim_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/pim_var.h -L sys/netinet/pim_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/pim_var.h
+++ sys/netinet/pim_var.h
@@ -27,7 +27,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/pim_var.h,v 1.2 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/pim_var.h,v 1.3 2005/08/10 07:10:02 obrien Exp $
*/
#ifndef _NETINET_PIM_VAR_H_
@@ -71,7 +71,6 @@
}
#ifdef _KERNEL
-extern struct pimstat pimstat;
void pim_input(struct mbuf *, int);
SYSCTL_DECL(_net_inet_pim);
Index: ip_ecn.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_ecn.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_ecn.c -L sys/netinet/ip_ecn.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_ecn.c
+++ sys/netinet/ip_ecn.c
@@ -1,4 +1,3 @@
-/* $FreeBSD: src/sys/netinet/ip_ecn.c,v 1.7 2005/01/07 01:45:44 imp Exp $ */
/* $KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $ */
/*-
@@ -35,6 +34,9 @@
* http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_ecn.c,v 1.9 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -90,11 +92,9 @@
* modify outer ECN (TOS) field on ingress operation (tunnel encapsulation).
*/
void
-ip_ecn_ingress(mode, outer, inner)
- int mode;
- u_int8_t *outer;
- const u_int8_t *inner;
+ip_ecn_ingress(int mode, u_int8_t *outer, const u_int8_t *inner)
{
+
if (!outer || !inner)
panic("NULL pointer passed to ip_ecn_ingress");
@@ -124,11 +124,9 @@
* the caller should drop the packet if the return value is 0.
*/
int
-ip_ecn_egress(mode, outer, inner)
- int mode;
- const u_int8_t *outer;
- u_int8_t *inner;
+ip_ecn_egress(int mode, const u_int8_t *outer, u_int8_t *inner)
{
+
if (!outer || !inner)
panic("NULL pointer passed to ip_ecn_egress");
@@ -160,10 +158,7 @@
#ifdef INET6
void
-ip6_ecn_ingress(mode, outer, inner)
- int mode;
- u_int32_t *outer;
- const u_int32_t *inner;
+ip6_ecn_ingress(int mode, u_int32_t *outer, const u_int32_t *inner)
{
u_int8_t outer8, inner8;
@@ -177,10 +172,7 @@
}
int
-ip6_ecn_egress(mode, outer, inner)
- int mode;
- const u_int32_t *outer;
- u_int32_t *inner;
+ip6_ecn_egress(int mode, const u_int32_t *outer, u_int32_t *inner)
{
u_int8_t outer8, inner8, oinner8;
Index: tcp_syncache.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_syncache.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -L sys/netinet/tcp_syncache.c -L sys/netinet/tcp_syncache.c -u -r1.8 -r1.9
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2001 McAfee, Inc.
+ * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jonathan Lemon
@@ -27,32 +28,33 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.74.2.5 2006/02/16 01:06:22 qingli Exp $
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.130.2.6 2007/12/20 12:34:32 ru Exp $");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
-#include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/md5.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/random.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <sys/syslog.h>
+
+#include <vm/uma.h>
#include <net/if.h>
#include <net/route.h>
@@ -63,6 +65,7 @@
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
@@ -71,62 +74,108 @@
#include <netinet6/in6_pcb.h>
#endif
#include <netinet/tcp.h>
-#ifdef TCPDEBUG
-#include <netinet/tcpip.h>
-#endif
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
-#ifdef TCPDEBUG
-#include <netinet/tcp_debug.h>
-#endif
+#include <netinet/tcp_syncache.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/key.h>
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
#include <machine/in_cksum.h>
-#include <vm/uma.h>
+
+#include <security/mac/mac_framework.h>
static int tcp_syncookies = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW,
&tcp_syncookies, 0,
"Use TCP SYN cookies if the syncache overflows");
+static int tcp_syncookiesonly = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
+ &tcp_syncookiesonly, 0,
+ "Use only TCP SYN cookies");
+
+#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */
+#define SYNCOOKIE_LIFETIME 16 /* seconds */
+
+struct syncache {
+ TAILQ_ENTRY(syncache) sc_hash;
+ struct in_conninfo sc_inc; /* addresses */
+ int sc_rxttime; /* retransmit time */
+ u_int16_t sc_rxmits; /* retransmit counter */
+
+ u_int32_t sc_tsreflect; /* timestamp to reflect */
+ u_int32_t sc_ts; /* our timestamp to send */
+ u_int32_t sc_tsoff; /* ts offset w/ syncookies */
+ u_int32_t sc_flowlabel; /* IPv6 flowlabel */
+ tcp_seq sc_irs; /* seq from peer */
+ tcp_seq sc_iss; /* our ISS */
+ struct mbuf *sc_ipopts; /* source route */
+
+ u_int16_t sc_peer_mss; /* peer's MSS */
+ u_int16_t sc_wnd; /* advertised window */
+ u_int8_t sc_ip_ttl; /* IPv4 TTL */
+ u_int8_t sc_ip_tos; /* IPv4 TOS */
+ u_int8_t sc_requested_s_scale:4,
+ sc_requested_r_scale:4;
+ u_int8_t sc_flags;
+#define SCF_NOOPT 0x01 /* no TCP options */
+#define SCF_WINSCALE 0x02 /* negotiated window scaling */
+#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */
+ /* MSS is implicit */
+#define SCF_UNREACH 0x10 /* icmp unreachable received */
+#define SCF_SIGNATURE 0x20 /* send MD5 digests */
+#define SCF_SACK 0x80 /* send SACK option */
+#ifndef DISABLE_TCP_OFFLOAD
+ void *sc_pspare[2]; /* toepcb / toe_usrreqs */
+#endif
+#ifdef MAC
+ struct label *sc_label; /* MAC label reference */
+#endif
+};
+
+struct syncache_head {
+ struct mtx sch_mtx;
+ TAILQ_HEAD(sch_head, syncache) sch_bucket;
+ struct callout sch_timer;
+ int sch_nextc;
+ u_int sch_length;
+ u_int sch_oddeven;
+ u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE];
+ u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE];
+ u_int sch_reseed; /* time_uptime, seconds */
+};
+
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **);
-#ifdef TCPDEBUG
-static int syncache_respond(struct syncache *, struct mbuf *, struct socket *);
-#else
-static int syncache_respond(struct syncache *, struct mbuf *);
-#endif
+static int syncache_respond(struct syncache *);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
+static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
+ int docallout);
static void syncache_timer(void *);
-static u_int32_t syncookie_generate(struct syncache *, u_int32_t *);
-static struct syncache *syncookie_lookup(struct in_conninfo *,
- struct tcphdr *, struct socket *);
+static void syncookie_generate(struct syncache_head *, struct syncache *,
+ u_int32_t *);
+static struct syncache
+ *syncookie_lookup(struct in_conninfo *, struct syncache_head *,
+ struct syncache *, struct tcpopt *, struct tcphdr *,
+ struct socket *);
/*
* Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
- * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds,
+ * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
* the odds are that the user has given up attempting to connect by then.
*/
#define SYNCACHE_MAXREXMTS 3
@@ -141,12 +190,10 @@
u_int hashsize;
u_int hashmask;
u_int bucket_limit;
- u_int cache_count;
+ u_int cache_count; /* XXX: unprotected */
u_int cache_limit;
u_int rexmt_limit;
u_int hash_secret;
- TAILQ_HEAD(, syncache) timerq[SYNCACHE_MAXREXMTS + 1];
- struct callout tt_timerq[SYNCACHE_MAXREXMTS + 1];
};
static struct tcp_syncache tcp_syncache;
@@ -167,6 +214,10 @@
SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW,
&tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions");
+int tcp_sc_rst_sock_fail = 1;
+SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail, CTLFLAG_RW,
+ &tcp_sc_rst_sock_fail, 0, "Send reset on socket allocation failure");
+
static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
#define SYNCACHE_HASH(inc, mask) \
@@ -190,21 +241,21 @@
#define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0)
-#define SYNCACHE_TIMEOUT(sc, slot) do { \
- sc->sc_rxtslot = (slot); \
- sc->sc_rxttime = ticks + TCPTV_RTOBASE * tcp_backoff[(slot)]; \
- TAILQ_INSERT_TAIL(&tcp_syncache.timerq[(slot)], sc, sc_timerq); \
- if (!callout_active(&tcp_syncache.tt_timerq[(slot)])) \
- callout_reset(&tcp_syncache.tt_timerq[(slot)], \
- TCPTV_RTOBASE * tcp_backoff[(slot)], \
- syncache_timer, (void *)((intptr_t)(slot))); \
-} while (0)
+#define SCH_LOCK(sch) mtx_lock(&(sch)->sch_mtx)
+#define SCH_UNLOCK(sch) mtx_unlock(&(sch)->sch_mtx)
+#define SCH_LOCK_ASSERT(sch) mtx_assert(&(sch)->sch_mtx, MA_OWNED)
+/*
+ * Requires the syncache entry to be already removed from the bucket list.
+ */
static void
syncache_free(struct syncache *sc)
{
if (sc->sc_ipopts)
(void) m_free(sc->sc_ipopts);
+#ifdef MAC
+ mac_destroy_syncache(&sc->sc_label);
+#endif
uma_zfree(tcp_syncache.zone, sc);
}
@@ -217,15 +268,11 @@
tcp_syncache.cache_count = 0;
tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
- tcp_syncache.cache_limit =
- tcp_syncache.hashsize * tcp_syncache.bucket_limit;
tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
tcp_syncache.hash_secret = arc4random();
TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
&tcp_syncache.hashsize);
- TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
- &tcp_syncache.cache_limit);
TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
&tcp_syncache.bucket_limit);
if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) {
@@ -234,6 +281,12 @@
}
tcp_syncache.hashmask = tcp_syncache.hashsize - 1;
+ /* Set limits. */
+ tcp_syncache.cache_limit =
+ tcp_syncache.hashsize * tcp_syncache.bucket_limit;
+ TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
+ &tcp_syncache.cache_limit);
+
/* Allocate the hash table. */
MALLOC(tcp_syncache.hashbase, struct syncache_head *,
tcp_syncache.hashsize * sizeof(struct syncache_head),
@@ -242,180 +295,174 @@
/* Initialize the hash buckets. */
for (i = 0; i < tcp_syncache.hashsize; i++) {
TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket);
+ mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
+ NULL, MTX_DEF);
+ callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer,
+ &tcp_syncache.hashbase[i].sch_mtx, 0);
tcp_syncache.hashbase[i].sch_length = 0;
}
- /* Initialize the timer queues. */
- for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) {
- TAILQ_INIT(&tcp_syncache.timerq[i]);
- callout_init(&tcp_syncache.tt_timerq[i], NET_CALLOUT_MPSAFE);
- }
-
- /*
- * Allocate the syncache entries. Allow the zone to allocate one
- * more entry than cache limit, so a new entry can bump out an
- * older one.
- */
+ /* Create the syncache entry zone. */
tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit);
- tcp_syncache.cache_limit -= 1;
}
+/*
+ * Inserts a syncache entry into the specified bucket row.
+ * Locks and unlocks the syncache_head autonomously.
+ */
static void
-syncache_insert(sc, sch)
- struct syncache *sc;
- struct syncache_head *sch;
+syncache_insert(struct syncache *sc, struct syncache_head *sch)
{
struct syncache *sc2;
- int i;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ SCH_LOCK(sch);
/*
- * Make sure that we don't overflow the per-bucket
- * limit or the total cache size limit.
+ * Make sure that we don't overflow the per-bucket limit.
+ * If the bucket is full, toss the oldest element.
*/
if (sch->sch_length >= tcp_syncache.bucket_limit) {
- /*
- * The bucket is full, toss the oldest element.
- */
- sc2 = TAILQ_FIRST(&sch->sch_bucket);
- sc2->sc_tp->ts_recent = ticks;
+ KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
+ ("sch->sch_length incorrect"));
+ sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
syncache_drop(sc2, sch);
tcpstat.tcps_sc_bucketoverflow++;
- } else if (tcp_syncache.cache_count >= tcp_syncache.cache_limit) {
- /*
- * The cache is full. Toss the oldest entry in the
- * entire cache. This is the front entry in the
- * first non-empty timer queue with the largest
- * timeout value.
- */
- for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
- sc2 = TAILQ_FIRST(&tcp_syncache.timerq[i]);
- if (sc2 != NULL)
- break;
- }
- sc2->sc_tp->ts_recent = ticks;
- syncache_drop(sc2, NULL);
- tcpstat.tcps_sc_cacheoverflow++;
}
- /* Initialize the entry's timer. */
- SYNCACHE_TIMEOUT(sc, 0);
-
/* Put it into the bucket. */
- TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash);
+ TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
+
+ /* Reinitialize the bucket row's timer. */
+ if (sch->sch_length == 1)
+ sch->sch_nextc = ticks + INT_MAX;
+ syncache_timeout(sc, sch, 1);
+
+ SCH_UNLOCK(sch);
+
tcp_syncache.cache_count++;
tcpstat.tcps_sc_added++;
}
+/*
+ * Remove and free entry from syncache bucket row.
+ * Expects locked syncache head.
+ */
static void
-syncache_drop(sc, sch)
- struct syncache *sc;
- struct syncache_head *sch;
+syncache_drop(struct syncache *sc, struct syncache_head *sch)
{
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
- if (sch == NULL) {
-#ifdef INET6
- if (sc->sc_inc.inc_isipv6) {
- sch = &tcp_syncache.hashbase[
- SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)];
- } else
-#endif
- {
- sch = &tcp_syncache.hashbase[
- SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)];
- }
- }
+ SCH_LOCK_ASSERT(sch);
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
- tcp_syncache.cache_count--;
-
- TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot], sc, sc_timerq);
- if (TAILQ_EMPTY(&tcp_syncache.timerq[sc->sc_rxtslot]))
- callout_stop(&tcp_syncache.tt_timerq[sc->sc_rxtslot]);
syncache_free(sc);
+ tcp_syncache.cache_count--;
+}
+
+/*
+ * Engage/reengage time on bucket row.
+ */
+static void
+syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
+{
+ sc->sc_rxttime = ticks +
+ TCPTV_RTOBASE * (tcp_backoff[sc->sc_rxmits]);
+ sc->sc_rxmits++;
+ if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
+ sch->sch_nextc = sc->sc_rxttime;
+ if (docallout)
+ callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
+ syncache_timer, (void *)sch);
+ }
}
/*
* Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
* If we have retransmitted an entry the maximum number of times, expire it.
+ * One separate timer for each bucket row.
*/
static void
-syncache_timer(xslot)
- void *xslot;
+syncache_timer(void *xsch)
{
- intptr_t slot = (intptr_t)xslot;
+ struct syncache_head *sch = (struct syncache_head *)xsch;
struct syncache *sc, *nsc;
- struct inpcb *inp;
+ int tick = ticks;
+ char *s;
- INP_INFO_WLOCK(&tcbinfo);
- if (callout_pending(&tcp_syncache.tt_timerq[slot]) ||
- !callout_active(&tcp_syncache.tt_timerq[slot])) {
- /* XXX can this happen? */
- INP_INFO_WUNLOCK(&tcbinfo);
- return;
- }
- callout_deactivate(&tcp_syncache.tt_timerq[slot]);
+ /* NB: syncache_head has already been locked by the callout. */
+ SCH_LOCK_ASSERT(sch);
- nsc = TAILQ_FIRST(&tcp_syncache.timerq[slot]);
- while (nsc != NULL) {
- if (ticks < nsc->sc_rxttime)
- break;
- sc = nsc;
- inp = sc->sc_tp->t_inpcb;
- if (slot == SYNCACHE_MAXREXMTS ||
- slot >= tcp_syncache.rexmt_limit ||
- inp == NULL || inp->inp_gencnt != sc->sc_inp_gencnt) {
- nsc = TAILQ_NEXT(sc, sc_timerq);
- syncache_drop(sc, NULL);
+ /*
+ * In the following cycle we may remove some entries and/or
+ * advance some timeouts, so re-initialize the bucket timer.
+ */
+ sch->sch_nextc = tick + INT_MAX;
+
+ TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
+ /*
+ * We do not check if the listen socket still exists
+ * and accept the case where the listen socket may be
+ * gone by the time we resend the SYN/ACK. We do
+ * not expect this to happens often. If it does,
+ * then the RST will be sent by the time the remote
+ * host does the SYN/ACK->ACK.
+ */
+ if (TSTMP_GT(sc->sc_rxttime, tick)) {
+ if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
+ sch->sch_nextc = sc->sc_rxttime;
+ continue;
+ }
+
+ if (sc->sc_rxmits > tcp_syncache.rexmt_limit) {
+ if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
+ "giving up and removing syncache entry\n",
+ s, __func__);
+ free(s, M_TCPLOG);
+ }
+ syncache_drop(sc, sch);
tcpstat.tcps_sc_stale++;
continue;
}
- /*
- * syncache_respond() may call back into the syncache to
- * to modify another entry, so do not obtain the next
- * entry on the timer chain until it has completed.
- */
-#ifdef TCPDEBUG
- (void) syncache_respond(sc, NULL, NULL);
-#else
- (void) syncache_respond(sc, NULL);
-#endif
- nsc = TAILQ_NEXT(sc, sc_timerq);
+ if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Response timeout, "
+ "retransmitting (%u) SYN|ACK\n",
+ s, __func__, sc->sc_rxmits);
+ free(s, M_TCPLOG);
+ }
+
+ (void) syncache_respond(sc);
tcpstat.tcps_sc_retransmitted++;
- TAILQ_REMOVE(&tcp_syncache.timerq[slot], sc, sc_timerq);
- SYNCACHE_TIMEOUT(sc, slot + 1);
+ syncache_timeout(sc, sch, 0);
}
- if (nsc != NULL)
- callout_reset(&tcp_syncache.tt_timerq[slot],
- nsc->sc_rxttime - ticks, syncache_timer, (void *)(slot));
- INP_INFO_WUNLOCK(&tcbinfo);
+ if (!TAILQ_EMPTY(&(sch)->sch_bucket))
+ callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
+ syncache_timer, (void *)(sch));
}
/*
* Find an entry in the syncache.
+ * Returns always with locked syncache_head plus a matching entry or NULL.
*/
struct syncache *
-syncache_lookup(inc, schp)
- struct in_conninfo *inc;
- struct syncache_head **schp;
+syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
{
struct syncache *sc;
struct syncache_head *sch;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
#ifdef INET6
if (inc->inc_isipv6) {
sch = &tcp_syncache.hashbase[
SYNCACHE_HASH6(inc, tcp_syncache.hashmask)];
*schp = sch;
+
+ SCH_LOCK(sch);
+
+ /* Circle through bucket row to find matching entry. */
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie))
return (sc);
@@ -426,6 +473,10 @@
sch = &tcp_syncache.hashbase[
SYNCACHE_HASH(inc, tcp_syncache.hashmask)];
*schp = sch;
+
+ SCH_LOCK(sch);
+
+ /* Circle through bucket row to find matching entry. */
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
#ifdef INET6
if (sc->sc_inc.inc_isipv6)
@@ -435,7 +486,8 @@
return (sc);
}
}
- return (NULL);
+ SCH_LOCK_ASSERT(*schp);
+ return (NULL); /* always returns with locked sch */
}
/*
@@ -444,18 +496,44 @@
* connection is in the syn cache. If it is, zap it.
*/
void
-syncache_chkrst(inc, th)
- struct in_conninfo *inc;
- struct tcphdr *th;
+syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
{
struct syncache *sc;
struct syncache_head *sch;
+ char *s = NULL;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ SCH_LOCK_ASSERT(sch);
+
+ /*
+ * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
+ * See RFC 793 page 65, section SEGMENT ARRIVES.
+ */
+ if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
+ "FIN flag set, segment ignored\n", s, __func__);
+ tcpstat.tcps_badrst++;
+ goto done;
+ }
+
+ /*
+ * No corresponding connection was found in syncache.
+ * If syncookies are enabled and possibly exclusively
+ * used, or we are under memory pressure, a valid RST
+ * may not find a syncache entry. In that case we're
+ * done and no SYN|ACK retransmissions will happen.
+ * Otherwise the the RST was misdirected or spoofed.
+ */
+ if (sc == NULL) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
+ "syncache entry (possibly syncookie only), "
+ "segment ignored\n", s, __func__);
+ tcpstat.tcps_badrst++;
+ goto done;
+ }
- sc = syncache_lookup(inc, &sch);
- if (sc == NULL)
- return;
/*
* If the RST bit is set, check the sequence number to see
* if this is a valid reset segment.
@@ -472,43 +550,53 @@
if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
syncache_drop(sc, sch);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
+ "connection attempt aborted by remote endpoint\n",
+ s, __func__);
tcpstat.tcps_sc_reset++;
+ } else if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != IRS %u "
+ "(+WND %u), segment ignored\n",
+ s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
+ tcpstat.tcps_badrst++;
}
+
+done:
+ if (s != NULL)
+ free(s, M_TCPLOG);
+ SCH_UNLOCK(sch);
}
void
-syncache_badack(inc)
- struct in_conninfo *inc;
+syncache_badack(struct in_conninfo *inc)
{
struct syncache *sc;
struct syncache_head *sch;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
- sc = syncache_lookup(inc, &sch);
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
syncache_drop(sc, sch);
tcpstat.tcps_sc_badack++;
}
+ SCH_UNLOCK(sch);
}
void
-syncache_unreach(inc, th)
- struct in_conninfo *inc;
- struct tcphdr *th;
+syncache_unreach(struct in_conninfo *inc, struct tcphdr *th)
{
struct syncache *sc;
struct syncache_head *sch;
- INP_INFO_WLOCK_ASSERT(&tcbinfo);
-
- sc = syncache_lookup(inc, &sch);
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ SCH_LOCK_ASSERT(sch);
if (sc == NULL)
- return;
+ goto done;
/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
if (ntohl(th->th_seq) != sc->sc_iss)
- return;
+ goto done;
/*
* If we've rertransmitted 3 times and this is our second error,
@@ -518,28 +606,27 @@
*
* See tcp_notify().
*/
- if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) {
+ if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
sc->sc_flags |= SCF_UNREACH;
- return;
+ goto done;
}
syncache_drop(sc, sch);
tcpstat.tcps_sc_unreach++;
+done:
+ SCH_UNLOCK(sch);
}
/*
* Build a new TCP socket structure from a syncache entry.
*/
static struct socket *
-syncache_socket(sc, lso, m)
- struct syncache *sc;
- struct socket *lso;
- struct mbuf *m;
+syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
{
struct inpcb *inp = NULL;
struct socket *so;
struct tcpcb *tp;
+ char *s;
- NET_ASSERT_GIANT();
INP_INFO_WLOCK_ASSERT(&tcbinfo);
/*
@@ -551,10 +638,17 @@
so = sonewconn(lso, SS_ISCONNECTED);
if (so == NULL) {
/*
- * Drop the connection; we will send a RST if the peer
- * retransmits the ACK,
+ * Drop the connection; we will either send a RST or
+ * have the peer retransmit its SYN again after its
+ * RTO and try again.
*/
tcpstat.tcps_listendrop++;
+ if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Socket create failed "
+ "due to limits or memory shortage\n",
+ s, __func__);
+ free(s, M_TCPLOG);
+ }
goto abort2;
}
#ifdef MAC
@@ -566,9 +660,7 @@
inp = sotoinpcb(so);
INP_LOCK(inp);
- /*
- * Insert new socket into hash list.
- */
+ /* Insert new socket into PCB hash list. */
inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6;
#ifdef INET6
if (sc->sc_inc.inc_isipv6) {
@@ -597,14 +689,9 @@
goto abort;
}
#ifdef IPSEC
- /* copy old policy into new socket's */
- if (ipsec_copy_pcbpolicy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
- printf("syncache_expand: could not copy policy\n");
-#endif
-#ifdef FAST_IPSEC
- /* copy old policy into new socket's */
+ /* Copy old policy into new socket's. */
if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp))
- printf("syncache_expand: could not copy policy\n");
+ printf("syncache_socket: could not copy policy\n");
#endif
#ifdef INET6
if (sc->sc_inc.inc_isipv6) {
@@ -667,7 +754,6 @@
goto abort;
}
}
-
tp = intotcpcb(inp);
tp->t_state = TCPS_SYN_RECEIVED;
tp->iss = sc->sc_iss;
@@ -675,31 +761,36 @@
tcp_rcvseqinit(tp);
tcp_sendseqinit(tp);
tp->snd_wl1 = sc->sc_irs;
+ tp->snd_max = tp->iss + 1;
+ tp->snd_nxt = tp->iss + 1;
tp->rcv_up = sc->sc_irs + 1;
tp->rcv_wnd = sc->sc_wnd;
tp->rcv_adv += tp->rcv_wnd;
+ tp->last_ack_sent = tp->rcv_nxt;
tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
if (sc->sc_flags & SCF_NOOPT)
tp->t_flags |= TF_NOOPT;
- if (sc->sc_flags & SCF_WINSCALE) {
- tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
- tp->requested_s_scale = sc->sc_requested_s_scale;
- tp->request_r_scale = sc->sc_request_r_scale;
- }
- if (sc->sc_flags & SCF_TIMESTAMP) {
- tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
- tp->ts_recent = sc->sc_tsrecent;
- tp->ts_recent_age = ticks;
- }
+ else {
+ if (sc->sc_flags & SCF_WINSCALE) {
+ tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
+ tp->snd_scale = sc->sc_requested_s_scale;
+ tp->request_r_scale = sc->sc_requested_r_scale;
+ }
+ if (sc->sc_flags & SCF_TIMESTAMP) {
+ tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
+ tp->ts_recent = sc->sc_tsreflect;
+ tp->ts_recent_age = ticks;
+ tp->ts_offset = sc->sc_tsoff;
+ }
#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE)
- tp->t_flags |= TF_SIGNATURE;
+ if (sc->sc_flags & SCF_SIGNATURE)
+ tp->t_flags |= TF_SIGNATURE;
#endif
- if (sc->sc_flags & SCF_SACK) {
- tp->sack_enable = 1;
- tp->t_flags |= TF_SACK_PERMIT;
+ if (sc->sc_flags & SCF_SACK)
+ tp->t_flags |= TF_SACK_PERMIT;
}
+
/*
* Set up MSS and get cached values from tcp_hostcache.
* This might overwrite some of the defaults we just set.
@@ -709,9 +800,9 @@
/*
* If the SYN,ACK was retransmitted, reset cwnd to 1 segment.
*/
- if (sc->sc_rxtslot != 0)
+ if (sc->sc_rxmits)
tp->snd_cwnd = tp->t_maxseg;
- callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
INP_UNLOCK(inp);
@@ -722,7 +813,7 @@
INP_UNLOCK(inp);
abort2:
if (so != NULL)
- (void) soabort(so);
+ soabort(so);
return (NULL);
}
@@ -734,15 +825,24 @@
* the SYN-RECEIVED state.
*/
int
-syncache_expand(struct in_conninfo *inc, struct tcphdr *th, struct socket **sop, struct mbuf *m)
+syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct socket **lsop, struct mbuf *m)
{
struct syncache *sc;
struct syncache_head *sch;
- struct socket *so;
+ struct syncache scs;
+ char *s;
+ /*
+ * Global TCP locks are held because we manipulate the PCB lists
+ * and create a new socket.
+ */
INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
+ ("%s: can handle only ACK", __func__));
- sc = syncache_lookup(inc, &sch);
+ sc = syncache_lookup(inc, &sch); /* returns locked sch */
+ SCH_LOCK_ASSERT(sch);
if (sc == NULL) {
/*
* There is no syncache entry, so see if this ACK is
@@ -753,43 +853,105 @@
* B. check that the syncookie is valid. If it is, then
* cobble up a fake syncache entry, and return.
*/
- if (!tcp_syncookies)
- return (0);
- sc = syncookie_lookup(inc, th, *sop);
- if (sc == NULL)
- return (0);
- sch = NULL;
- tcpstat.tcps_sc_recvcookie++;
+ if (!tcp_syncookies) {
+ SCH_UNLOCK(sch);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Spurious ACK, "
+ "segment rejected (syncookies disabled)\n",
+ s, __func__);
+ goto failed;
+ }
+ bzero(&scs, sizeof(scs));
+ sc = syncookie_lookup(inc, sch, &scs, to, th, *lsop);
+ SCH_UNLOCK(sch);
+ if (sc == NULL) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Segment failed "
+ "SYNCOOKIE authentication, segment rejected "
+ "(probably spoofed)\n", s, __func__);
+ goto failed;
+ }
+ } else {
+ /* Pull out the entry to unlock the bucket row. */
+ TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
+ sch->sch_length--;
+ tcp_syncache.cache_count--;
+ SCH_UNLOCK(sch);
}
/*
- * If seg contains an ACK, but not for our SYN/ACK, send a RST.
+ * Segment validation:
+ * ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
if (th->th_ack != sc->sc_iss + 1) {
- if (sch == NULL)
- syncache_free(sc);
- return (0);
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
+ "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
+ goto failed;
+ }
+ /*
+ * The SEQ must match the received initial receive sequence
+ * number + 1 (the SYN) because we didn't ACK any data that
+ * may have come with the SYN.
+ */
+ if (th->th_seq != sc->sc_irs + 1) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
+ "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
+ goto failed;
}
-
- so = syncache_socket(sc, *sop, m);
- if (so == NULL) {
#if 0
-resetandabort:
- /* XXXjlemon check this - is this correct? */
- (void) tcp_respond(NULL, m, m, th,
- th->th_seq + tlen, (tcp_seq)0, TH_RST|TH_ACK);
+ /*
+ * If timestamps were present in the SYN and we accepted
+ * them in our SYN|ACK we require them to be present from
+ * now on. And vice versa.
+ *
+ * Unfortunately, during testing of 7.0 some users found
+ * network devices that violate this constraint, so it must
+ * be disabled.
+ */
+ if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Timestamp missing, "
+ "segment rejected\n", s, __func__);
+ goto failed;
+ }
#endif
- m_freem(m); /* XXX only needed for above */
+ if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
+ "segment rejected\n", s, __func__);
+ goto failed;
+ }
+ /*
+ * If timestamps were negotiated the reflected timestamp
+ * must be equal to what we actually sent in the SYN|ACK.
+ */
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
+ "segment rejected\n",
+ s, __func__, to->to_tsecr, sc->sc_ts);
+ goto failed;
+ }
+
+ *lsop = syncache_socket(sc, *lsop, m);
+
+ if (*lsop == NULL)
tcpstat.tcps_sc_aborted++;
- } else
+ else
tcpstat.tcps_sc_completed++;
- if (sch == NULL)
+ if (sc != &scs)
syncache_free(sc);
- else
- syncache_drop(sc, sch);
- *sop = so;
return (1);
+failed:
+ if (sc != NULL && sc != &scs)
+ syncache_free(sc);
+ if (s != NULL)
+ free(s, M_TCPLOG);
+ *lsop = NULL;
+ return (0);
}
/*
@@ -805,13 +967,9 @@
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
*/
-int
-syncache_add(inc, to, th, sop, m)
- struct in_conninfo *inc;
- struct tcpopt *to;
- struct tcphdr *th;
- struct socket **sop;
- struct mbuf *m;
+void
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m)
{
struct tcpcb *tp;
struct socket *so;
@@ -819,13 +977,53 @@
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
u_int32_t flowtmp;
- int i, win;
+ int win, sb_hiwat, ip_ttl, ip_tos, noopt;
+ char *s;
+#ifdef INET6
+ int autoflowlabel = 0;
+#endif
+#ifdef MAC
+ struct label *maclabel;
+#endif
+ struct syncache scs;
INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(inp); /* listen socket */
+ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
+ ("%s: unexpected tcp flags", __func__));
- so = *sop;
+ /*
+ * Combine all so/tp operations very early to drop the INP lock as
+ * soon as possible.
+ */
+ so = *lsop;
tp = sototcpcb(so);
+#ifdef INET6
+ if (inc->inc_isipv6 &&
+ (inp->in6p_flags & IN6P_AUTOFLOWLABEL))
+ autoflowlabel = 1;
+#endif
+ ip_ttl = inp->inp_ip_ttl;
+ ip_tos = inp->inp_ip_tos;
+ win = sbspace(&so->so_rcv);
+ sb_hiwat = so->so_rcv.sb_hiwat;
+ noopt = (tp->t_flags & TF_NOOPT);
+
+ so = NULL;
+ tp = NULL;
+
+#ifdef MAC
+ if (mac_init_syncache(&maclabel) != 0) {
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+ goto done;
+ } else
+ mac_init_syncache_from_inpcb(maclabel, inp);
+#endif
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&tcbinfo);
+
/*
* Remember the IP options, if any.
*/
@@ -838,11 +1036,16 @@
* See if we already have an entry for this connection.
* If we do, resend the SYN,ACK, and reset the retransmit timer.
*
- * XXX
- * should the syncache be re-initialized with the contents
+ * XXX: should the syncache be re-initialized with the contents
* of the new SYN here (which may have different options?)
+ *
+ * XXX: We do not check the sequence number to see if this is a
+ * real retransmit or a new connection attempt. The question is
+ * how to handle such a case; either ignore it as spoofed, or
+ * drop the current entry and create a new one?
*/
- sc = syncache_lookup(inc, &sch);
+ sc = syncache_lookup(inc, &sch); /* returns locked entry */
+ SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
tcpstat.tcps_sc_dupsyn++;
if (ipopts) {
@@ -857,27 +1060,35 @@
/*
* Update timestamp if present.
*/
- if (sc->sc_flags & SCF_TIMESTAMP)
- sc->sc_tsrecent = to->to_tsval;
+ if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
+ sc->sc_tsreflect = to->to_tsval;
+ else
+ sc->sc_flags &= ~SCF_TIMESTAMP;
+#ifdef MAC
/*
- * PCB may have changed, pick up new values.
+ * Since we have already unconditionally allocated label
+ * storage, free it up. The syncache entry will already
+ * have an initialized label we can use.
*/
- sc->sc_tp = tp;
- sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
-#ifdef TCPDEBUG
- if (syncache_respond(sc, m, so) == 0) {
-#else
- if (syncache_respond(sc, m) == 0) {
-#endif
- /* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */
- TAILQ_REMOVE(&tcp_syncache.timerq[sc->sc_rxtslot],
- sc, sc_timerq);
- SYNCACHE_TIMEOUT(sc, sc->sc_rxtslot);
+ mac_destroy_syncache(&maclabel);
+ KASSERT(sc->sc_label != NULL,
+ ("%s: label not initialized", __func__));
+#endif
+ /* Retransmit SYN|ACK and reset retransmit count. */
+ if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
+ log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
+ "resetting timer and retransmitting SYN|ACK\n",
+ s, __func__);
+ free(s, M_TCPLOG);
+ }
+ if (syncache_respond(sc) == 0) {
+ sc->sc_rxmits = 0;
+ syncache_timeout(sc, sch, 1);
tcpstat.tcps_sndacks++;
tcpstat.tcps_sndtotal++;
}
- *sop = NULL;
- return (1);
+ SCH_UNLOCK(sch);
+ goto done;
}
sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
@@ -887,67 +1098,48 @@
* Treat this as if the cache was full; drop the oldest
* entry and insert the new one.
*/
- /* NB: guarded by INP_INFO_WLOCK(&tcbinfo) */
- for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) {
- sc = TAILQ_FIRST(&tcp_syncache.timerq[i]);
- if (sc != NULL)
- break;
- }
- sc->sc_tp->ts_recent = ticks;
- syncache_drop(sc, NULL);
tcpstat.tcps_sc_zonefail++;
+ if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL)
+ syncache_drop(sc, sch);
sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
if (sc == NULL) {
- if (ipopts)
- (void) m_free(ipopts);
- return (0);
+ if (tcp_syncookies) {
+ bzero(&scs, sizeof(scs));
+ sc = &scs;
+ } else {
+ SCH_UNLOCK(sch);
+ if (ipopts)
+ (void) m_free(ipopts);
+ goto done;
+ }
}
}
/*
* Fill in the syncache values.
*/
- sc->sc_tp = tp;
- sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt;
+#ifdef MAC
+ sc->sc_label = maclabel;
+#endif
sc->sc_ipopts = ipopts;
- sc->sc_inc.inc_fport = inc->inc_fport;
- sc->sc_inc.inc_lport = inc->inc_lport;
+ bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
#ifdef INET6
- sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
- if (inc->inc_isipv6) {
- sc->sc_inc.inc6_faddr = inc->inc6_faddr;
- sc->sc_inc.inc6_laddr = inc->inc6_laddr;
- } else
+ if (!inc->inc_isipv6)
#endif
{
- sc->sc_inc.inc_faddr = inc->inc_faddr;
- sc->sc_inc.inc_laddr = inc->inc_laddr;
+ sc->sc_ip_tos = ip_tos;
+ sc->sc_ip_ttl = ip_ttl;
}
+
sc->sc_irs = th->th_seq;
+ sc->sc_iss = arc4random();
sc->sc_flags = 0;
- sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0;
sc->sc_flowlabel = 0;
- if (tcp_syncookies) {
- sc->sc_iss = syncookie_generate(sc, &flowtmp);
-#ifdef INET6
- if (inc->inc_isipv6 &&
- (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)) {
- sc->sc_flowlabel = flowtmp & IPV6_FLOWLABEL_MASK;
- }
-#endif
- } else {
- sc->sc_iss = arc4random();
-#ifdef INET6
- if (inc->inc_isipv6 &&
- (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)) {
- sc->sc_flowlabel =
- (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
- }
-#endif
- }
- /* Initial receive window: clip sbspace to [0 .. TCP_MAXWIN] */
- win = sbspace(&so->so_rcv);
+ /*
+ * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
+ * win was derived from socket earlier in the function.
+ */
win = imax(win, 0);
win = imin(win, TCP_MAXWIN);
sc->sc_wnd = win;
@@ -958,7 +1150,8 @@
* it ok to send timestamp requests and replies.
*/
if (to->to_flags & TOF_TS) {
- sc->sc_tsrecent = to->to_tsval;
+ sc->sc_tsreflect = to->to_tsval;
+ sc->sc_ts = ticks;
sc->sc_flags |= SCF_TIMESTAMP;
}
if (to->to_flags & TOF_SCALE) {
@@ -981,73 +1174,86 @@
* of 3 will be chosen by this algorithm. Those who
* choose a larger maxsockbuf should watch out
* for the compatiblity problems mentioned above.
- *
- * RFC1323: The Window field in a SYN (i.e., a <SYN>
- * or <SYN,ACK>) segment itself is never scaled.
+ *
+ * RFC1323: The Window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled.
*/
while (wscale < TCP_MAX_WINSHIFT &&
(TCP_MAXWIN << wscale) < sb_max)
wscale++;
- sc->sc_request_r_scale = wscale;
- sc->sc_requested_s_scale = to->to_requested_s_scale;
+ sc->sc_requested_r_scale = wscale;
+ sc->sc_requested_s_scale = to->to_wscale;
sc->sc_flags |= SCF_WINSCALE;
}
}
- if (tp->t_flags & TF_NOOPT)
- sc->sc_flags = SCF_NOOPT;
#ifdef TCP_SIGNATURE
/*
* If listening socket requested TCP digests, and received SYN
* contains the option, flag this in the syncache so that
* syncache_respond() will do the right thing with the SYN+ACK.
- * XXX Currently we always record the option by default and will
+ * XXX: Currently we always record the option by default and will
* attempt to use it in syncache_respond().
*/
if (to->to_flags & TOF_SIGNATURE)
sc->sc_flags |= SCF_SIGNATURE;
#endif
-
- if (to->to_flags & TOF_SACK)
+ if (to->to_flags & TOF_SACKPERM)
sc->sc_flags |= SCF_SACK;
+ if (to->to_flags & TOF_MSS)
+ sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
+ if (noopt)
+ sc->sc_flags |= SCF_NOOPT;
+
+ if (tcp_syncookies) {
+ syncookie_generate(sch, sc, &flowtmp);
+#ifdef INET6
+ if (autoflowlabel)
+ sc->sc_flowlabel = flowtmp;
+#endif
+ } else {
+#ifdef INET6
+ if (autoflowlabel)
+ sc->sc_flowlabel =
+ (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
+#endif
+ }
+ SCH_UNLOCK(sch);
/*
* Do a standard 3-way handshake.
*/
-#ifdef TCPDEBUG
- if (syncache_respond(sc, m, so) == 0) {
-#else
- if (syncache_respond(sc, m) == 0) {
-#endif
- syncache_insert(sc, sch);
+ if (syncache_respond(sc) == 0) {
+ if (tcp_syncookies && tcp_syncookiesonly && sc != &scs)
+ syncache_free(sc);
+ else if (sc != &scs)
+ syncache_insert(sc, sch); /* locks and unlocks sch */
tcpstat.tcps_sndacks++;
tcpstat.tcps_sndtotal++;
} else {
- syncache_free(sc);
+ if (sc != &scs)
+ syncache_free(sc);
tcpstat.tcps_sc_dropped++;
}
- *sop = NULL;
- return (1);
+
+done:
+#ifdef MAC
+ if (sc == &scs)
+ mac_destroy_syncache(&maclabel);
+#endif
+ *lsop = NULL;
+ m_freem(m);
+ return;
}
-#ifdef TCPDEBUG
static int
-syncache_respond(sc, m, so)
- struct syncache *sc;
- struct mbuf *m;
- struct socket *so;
-#else
-static int
-syncache_respond(sc, m)
- struct syncache *sc;
- struct mbuf *m;
-#endif
+syncache_respond(struct syncache *sc)
{
- u_int8_t *optp;
- int optlen, error;
- u_int16_t tlen, hlen, mssopt;
struct ip *ip = NULL;
+ struct mbuf *m;
struct tcphdr *th;
- struct inpcb *inp;
+ int optlen, error;
+ u_int16_t hlen, tlen, mssopt;
+ struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif
@@ -1057,54 +1263,28 @@
(sc->sc_inc.inc_isipv6) ? sizeof(struct ip6_hdr) :
#endif
sizeof(struct ip);
+ tlen = hlen + sizeof(struct tcphdr);
- KASSERT((&sc->sc_inc) != NULL, ("syncache_respond with NULL in_conninfo pointer"));
-
- /* Determine MSS we advertize to other end of connection */
+ /* Determine MSS we advertize to other end of connection. */
mssopt = tcp_mssopt(&sc->sc_inc);
+ if (sc->sc_peer_mss)
+ mssopt = max( min(sc->sc_peer_mss, mssopt), tcp_minmss);
- /* Compute the size of the TCP options. */
- if (sc->sc_flags & SCF_NOOPT) {
- optlen = 0;
- } else {
- optlen = TCPOLEN_MAXSEG +
- ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) +
- ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0);
-#ifdef TCP_SIGNATURE
- if (sc->sc_flags & SCF_SIGNATURE)
- optlen += TCPOLEN_SIGNATURE;
-#endif
- if (sc->sc_flags & SCF_SACK)
- optlen += TCPOLEN_SACK_PERMITTED;
- optlen = roundup2(optlen, 4);
- }
- tlen = hlen + sizeof(struct tcphdr) + optlen;
-
- /*
- * XXX
- * assume that the entire packet will fit in a header mbuf
- */
- KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small"));
+ /* XXX: Assume that the entire packet will fit in a header mbuf. */
+ KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
+ ("syncache: mbuf too small"));
- /*
- * XXX shouldn't this reuse the mbuf if possible ?
- * Create the IP+TCP header from scratch.
- */
- if (m)
- m_freem(m);
-
- m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ /* Create the IP+TCP header from scratch. */
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
if (m == NULL)
return (ENOBUFS);
+#ifdef MAC
+ mac_create_mbuf_from_syncache(sc->sc_label, m);
+#endif
m->m_data += max_linkhdr;
m->m_len = tlen;
m->m_pkthdr.len = tlen;
m->m_pkthdr.rcvif = NULL;
- inp = sc->sc_tp->t_inpcb;
- INP_LOCK(inp);
-#ifdef MAC
- mac_create_mbuf_from_inpcb(inp, m);
-#endif
#ifdef INET6
if (sc->sc_inc.inc_isipv6) {
@@ -1132,8 +1312,8 @@
ip->ip_p = IPPROTO_TCP;
ip->ip_src = sc->sc_inc.inc_laddr;
ip->ip_dst = sc->sc_inc.inc_faddr;
- ip->ip_ttl = inp->inp_ip_ttl; /* XXX */
- ip->ip_tos = inp->inp_ip_tos; /* XXX */
+ ip->ip_ttl = sc->sc_ip_ttl;
+ ip->ip_tos = sc->sc_ip_tos;
/*
* See if we should do MTU discovery. Route lookups are
@@ -1152,277 +1332,397 @@
th->th_seq = htonl(sc->sc_iss);
th->th_ack = htonl(sc->sc_irs + 1);
- th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ th->th_off = sizeof(struct tcphdr) >> 2;
th->th_x2 = 0;
th->th_flags = TH_SYN|TH_ACK;
th->th_win = htons(sc->sc_wnd);
th->th_urp = 0;
/* Tack on the TCP options. */
- if (optlen != 0) {
- optp = (u_int8_t *)(th + 1);
- *optp++ = TCPOPT_MAXSEG;
- *optp++ = TCPOLEN_MAXSEG;
- *optp++ = (mssopt >> 8) & 0xff;
- *optp++ = mssopt & 0xff;
+ if ((sc->sc_flags & SCF_NOOPT) == 0) {
+ to.to_flags = 0;
+ to.to_mss = mssopt;
+ to.to_flags = TOF_MSS;
if (sc->sc_flags & SCF_WINSCALE) {
- *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
- sc->sc_request_r_scale);
- optp += 4;
+ to.to_wscale = sc->sc_requested_r_scale;
+ to.to_flags |= TOF_SCALE;
}
-
if (sc->sc_flags & SCF_TIMESTAMP) {
- u_int32_t *lp = (u_int32_t *)(optp);
-
- /* Form timestamp option per appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
- *lp++ = htonl(ticks);
- *lp = htonl(sc->sc_tsrecent);
- optp += TCPOLEN_TSTAMP_APPA;
+ /* Virgin timestamp or TCP cookie enhanced one. */
+ to.to_tsval = sc->sc_ts;
+ to.to_tsecr = sc->sc_tsreflect;
+ to.to_flags |= TOF_TS;
}
-
+ if (sc->sc_flags & SCF_SACK)
+ to.to_flags |= TOF_SACKPERM;
#ifdef TCP_SIGNATURE
- /*
- * Handle TCP-MD5 passive opener response.
- */
- if (sc->sc_flags & SCF_SIGNATURE) {
- u_int8_t *bp = optp;
- int i;
-
- *bp++ = TCPOPT_SIGNATURE;
- *bp++ = TCPOLEN_SIGNATURE;
- for (i = 0; i < TCP_SIGLEN; i++)
- *bp++ = 0;
- tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
- optp + 2, IPSEC_DIR_OUTBOUND);
- optp += TCPOLEN_SIGNATURE;
- }
-#endif /* TCP_SIGNATURE */
+ if (sc->sc_flags & SCF_SIGNATURE)
+ to.to_flags |= TOF_SIGNATURE;
+#endif
+ optlen = tcp_addoptions(&to, (u_char *)(th + 1));
- if (sc->sc_flags & SCF_SACK) {
- *optp++ = TCPOPT_SACK_PERMITTED;
- *optp++ = TCPOLEN_SACK_PERMITTED;
- }
+ /* Adjust headers by option size. */
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ m->m_len += optlen;
+ m->m_pkthdr.len += optlen;
- {
- /* Pad TCP options to a 4 byte boundary */
- int padlen = optlen - (optp - (u_int8_t *)(th + 1));
- while (padlen-- > 0)
- *optp++ = TCPOPT_EOL;
- }
- }
+#ifdef TCP_SIGNATURE
+ if (sc->sc_flags & SCF_SIGNATURE)
+ tcp_signature_compute(m, sizeof(struct ip), 0, optlen,
+ to.to_signature, IPSEC_DIR_OUTBOUND);
+#endif
+#ifdef INET6
+ if (sc->sc_inc.inc_isipv6)
+ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
+ else
+#endif
+ ip->ip_len += optlen;
+ } else
+ optlen = 0;
#ifdef INET6
if (sc->sc_inc.inc_isipv6) {
th->th_sum = 0;
- th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen,
+ tlen + optlen - hlen);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
- error = ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
+ error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
} else
#endif
{
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(tlen - hlen + IPPROTO_TCP));
+ htons(tlen + optlen - hlen + IPPROTO_TCP));
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
-#ifdef TCPDEBUG
- /*
- * Trace.
- */
- if (so != NULL && so->so_options & SO_DEBUG) {
- struct tcpcb *tp = sototcpcb(so);
- tcp_trace(TA_OUTPUT, tp->t_state, tp,
- mtod(m, void *), th, 0);
- }
-#endif
- error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, inp);
+ error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
- INP_UNLOCK(inp);
return (error);
}
/*
- * cookie layers:
+ * The purpose of SYN cookies is to avoid keeping track of all SYN's we
+ * receive and to be able to handle SYN floods from bogus source addresses
+ * (where we will never receive any reply). SYN floods try to exhaust all
+ * our memory and available slots in the SYN cache table to cause a denial
+ * of service to legitimate users of the local host.
*
- * |. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .|
- * | peer iss |
- * | MD5(laddr,faddr,secret,lport,fport) |. . . . . . .|
- * | 0 |(A)| |
- * (A): peer mss index
- */
-
-/*
- * The values below are chosen to minimize the size of the tcp_secret
- * table, as well as providing roughly a 16 second lifetime for the cookie.
- */
-
-#define SYNCOOKIE_WNDBITS 5 /* exposed bits for window indexing */
-#define SYNCOOKIE_TIMESHIFT 1 /* scale ticks to window time units */
-
-#define SYNCOOKIE_WNDMASK ((1 << SYNCOOKIE_WNDBITS) - 1)
-#define SYNCOOKIE_NSECRETS (1 << SYNCOOKIE_WNDBITS)
-#define SYNCOOKIE_TIMEOUT \
- (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT))
-#define SYNCOOKIE_DATAMASK ((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK)
-
-static struct {
- u_int32_t ts_secbits[4];
- u_int ts_expire;
-} tcp_secret[SYNCOOKIE_NSECRETS];
-
-static int tcp_msstab[] = { 0, 536, 1460, 8960 };
-
-#define MD5Add(v) MD5Update(&syn_ctx, (u_char *)&v, sizeof(v))
-
-struct md5_add {
- u_int32_t laddr, faddr;
- u_int32_t secbits[4];
- u_int16_t lport, fport;
-};
-
-#ifdef CTASSERT
-CTASSERT(sizeof(struct md5_add) == 28);
-#endif
-
-/*
+ * The idea of SYN cookies is to encode and include all necessary information
+ * about the connection setup state within the SYN-ACK we send back and thus
+ * to get along without keeping any local state until the ACK to the SYN-ACK
+ * arrives (if ever). Everything we need to know should be available from
+ * the information we encoded in the SYN-ACK.
+ *
+ * More information about the theory behind SYN cookies and its first
+ * discussion and specification can be found at:
+ * http://cr.yp.to/syncookies.html (overview)
+ * http://cr.yp.to/syncookies/archive (gory details)
+ *
+ * This implementation extends the orginal idea and first implementation
+ * of FreeBSD by using not only the initial sequence number field to store
+ * information but also the timestamp field if present. This way we can
+ * keep track of the entire state we need to know to recreate the session in
+ * its original form. Almost all TCP speakers implement RFC1323 timestamps
+ * these days. For those that do not we still have to live with the known
+ * shortcomings of the ISN only SYN cookies.
+ *
+ * Cookie layers:
+ *
+ * Initial sequence number we send:
+ * 31|................................|0
+ * DDDDDDDDDDDDDDDDDDDDDDDDDMMMRRRP
+ * D = MD5 Digest (first dword)
+ * M = MSS index
+ * R = Rotation of secret
+ * P = Odd or Even secret
+ *
+ * The MD5 Digest is computed with over following parameters:
+ * a) randomly rotated secret
+ * b) struct in_conninfo containing the remote/local ip/port (IPv4&IPv6)
+ * c) the received initial sequence number from remote host
+ * d) the rotation offset and odd/even bit
+ *
+ * Timestamp we send:
+ * 31|................................|0
+ * DDDDDDDDDDDDDDDDDDDDDDSSSSRRRRA5
+ * D = MD5 Digest (third dword) (only as filler)
+ * S = Requested send window scale
+ * R = Requested receive window scale
+ * A = SACK allowed
+ * 5 = TCP-MD5 enabled (not implemented yet)
+ * XORed with MD5 Digest (forth dword)
+ *
+ * The timestamp isn't cryptographically secure and doesn't need to be.
+ * The double use of the MD5 digest dwords ties it to a specific remote/
+ * local host/port, remote initial sequence number and our local time
+ * limited secret. A received timestamp is reverted (XORed) and then
+ * the contained MD5 dword is compared to the computed one to ensure the
+ * timestamp belongs to the SYN-ACK we sent. The other parameters may
+ * have been tampered with but this isn't different from supplying bogus
+ * values in the SYN in the first place.
+ *
+ * Some problems with SYN cookies remain however:
* Consider the problem of a recreated (and retransmitted) cookie. If the
* original SYN was accepted, the connection is established. The second
* SYN is inflight, and if it arrives with an ISN that falls within the
* receive window, the connection is killed.
*
- * However, since cookies have other problems, this may not be worth
- * worrying about.
+ * Notes:
+ * A heuristic to determine when to accept syn cookies is not necessary.
+ * An ACK flood would cause the syncookie verification to be attempted,
+ * but a SYN flood causes syncookies to be generated. Both are of equal
+ * cost, so there's no point in trying to optimize the ACK flood case.
+ * Also, if you don't process certain ACKs for some reason, then all someone
+ * would have to do is launch a SYN and ACK flood at the same time, which
+ * would stop cookie verification and defeat the entire purpose of syncookies.
*/
+static int tcp_sc_msstab[] = { 0, 256, 468, 536, 996, 1452, 1460, 8960 };
-static u_int32_t
-syncookie_generate(struct syncache *sc, u_int32_t *flowid)
+static void
+syncookie_generate(struct syncache_head *sch, struct syncache *sc,
+ u_int32_t *flowlabel)
{
- u_int32_t md5_buffer[4];
+ MD5_CTX ctx;
+ u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
u_int32_t data;
- int idx, i;
- struct md5_add add;
- MD5_CTX syn_ctx;
-
- /* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */
-
- idx = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK;
- if (tcp_secret[idx].ts_expire < ticks) {
- for (i = 0; i < 4; i++)
- tcp_secret[idx].ts_secbits[i] = arc4random();
- tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT;
- }
- for (data = sizeof(tcp_msstab) / sizeof(int) - 1; data > 0; data--)
- if (tcp_msstab[data] <= sc->sc_peer_mss)
+ u_int32_t *secbits;
+ u_int off, pmss, mss;
+ int i;
+
+ SCH_LOCK_ASSERT(sch);
+
+ /* Which of the two secrets to use. */
+ secbits = sch->sch_oddeven ?
+ sch->sch_secbits_odd : sch->sch_secbits_even;
+
+ /* Reseed secret if too old. */
+ if (sch->sch_reseed < time_uptime) {
+ sch->sch_oddeven = sch->sch_oddeven ? 0 : 1; /* toggle */
+ secbits = sch->sch_oddeven ?
+ sch->sch_secbits_odd : sch->sch_secbits_even;
+ for (i = 0; i < SYNCOOKIE_SECRET_SIZE; i++)
+ secbits[i] = arc4random();
+ sch->sch_reseed = time_uptime + SYNCOOKIE_LIFETIME;
+ }
+
+ /* Secret rotation offset. */
+ off = sc->sc_iss & 0x7; /* iss was randomized before */
+
+ /* Maximum segment size calculation. */
+ pmss = max( min(sc->sc_peer_mss, tcp_mssopt(&sc->sc_inc)), tcp_minmss);
+ for (mss = sizeof(tcp_sc_msstab) / sizeof(int) - 1; mss > 0; mss--)
+ if (tcp_sc_msstab[mss] <= pmss)
break;
- data = (data << SYNCOOKIE_WNDBITS) | idx;
- data ^= sc->sc_irs; /* peer's iss */
- MD5Init(&syn_ctx);
+
+ /* Fold parameters and MD5 digest into the ISN we will send. */
+ data = sch->sch_oddeven;/* odd or even secret, 1 bit */
+ data |= off << 1; /* secret offset, derived from iss, 3 bits */
+ data |= mss << 4; /* mss, 3 bits */
+
+ MD5Init(&ctx);
+ MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+ SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+ MD5Update(&ctx, secbits, off);
+ MD5Update(&ctx, &sc->sc_inc, sizeof(sc->sc_inc));
+ MD5Update(&ctx, &sc->sc_irs, sizeof(sc->sc_irs));
+ MD5Update(&ctx, &data, sizeof(data));
+ MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+ data |= (md5_buffer[0] << 7);
+ sc->sc_iss = data;
+
#ifdef INET6
- if (sc->sc_inc.inc_isipv6) {
- MD5Add(sc->sc_inc.inc6_laddr);
- MD5Add(sc->sc_inc.inc6_faddr);
- add.laddr = 0;
- add.faddr = 0;
- } else
+ *flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
#endif
- {
- add.laddr = sc->sc_inc.inc_laddr.s_addr;
- add.faddr = sc->sc_inc.inc_faddr.s_addr;
+
+ /* Additional parameters are stored in the timestamp if present. */
+ if (sc->sc_flags & SCF_TIMESTAMP) {
+ data = ((sc->sc_flags & SCF_SIGNATURE) ? 1 : 0); /* TCP-MD5, 1 bit */
+ data |= ((sc->sc_flags & SCF_SACK) ? 1 : 0) << 1; /* SACK, 1 bit */
+ data |= sc->sc_requested_s_scale << 2; /* SWIN scale, 4 bits */
+ data |= sc->sc_requested_r_scale << 6; /* RWIN scale, 4 bits */
+ data |= md5_buffer[2] << 10; /* more digest bits */
+ data ^= md5_buffer[3];
+ sc->sc_ts = data;
+ sc->sc_tsoff = data - ticks; /* after XOR */
}
- add.lport = sc->sc_inc.inc_lport;
- add.fport = sc->sc_inc.inc_fport;
- add.secbits[0] = tcp_secret[idx].ts_secbits[0];
- add.secbits[1] = tcp_secret[idx].ts_secbits[1];
- add.secbits[2] = tcp_secret[idx].ts_secbits[2];
- add.secbits[3] = tcp_secret[idx].ts_secbits[3];
- MD5Add(add);
- MD5Final((u_char *)&md5_buffer, &syn_ctx);
- data ^= (md5_buffer[0] & ~SYNCOOKIE_WNDMASK);
- *flowid = md5_buffer[1];
+
tcpstat.tcps_sc_sendcookie++;
- return (data);
+ return;
}
static struct syncache *
-syncookie_lookup(inc, th, so)
- struct in_conninfo *inc;
- struct tcphdr *th;
- struct socket *so;
+syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
+ struct syncache *sc, struct tcpopt *to, struct tcphdr *th,
+ struct socket *so)
{
- u_int32_t md5_buffer[4];
- struct syncache *sc;
- u_int32_t data;
- int wnd, idx;
- struct md5_add add;
- MD5_CTX syn_ctx;
-
- /* NB: single threaded; could add INP_INFO_WLOCK_ASSERT(&tcbinfo) */
-
- data = (th->th_ack - 1) ^ (th->th_seq - 1); /* remove ISS */
- idx = data & SYNCOOKIE_WNDMASK;
- if (tcp_secret[idx].ts_expire < ticks ||
- sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks)
+ MD5_CTX ctx;
+ u_int32_t md5_buffer[MD5_DIGEST_LENGTH / sizeof(u_int32_t)];
+ u_int32_t data = 0;
+ u_int32_t *secbits;
+ tcp_seq ack, seq;
+ int off, mss, wnd, flags;
+
+ SCH_LOCK_ASSERT(sch);
+
+ /*
+ * Pull information out of SYN-ACK/ACK and
+ * revert sequence number advances.
+ */
+ ack = th->th_ack - 1;
+ seq = th->th_seq - 1;
+ off = (ack >> 1) & 0x7;
+ mss = (ack >> 4) & 0x7;
+ flags = ack & 0x7f;
+
+ /* Which of the two secrets to use. */
+ secbits = (flags & 0x1) ? sch->sch_secbits_odd : sch->sch_secbits_even;
+
+ /*
+ * The secret wasn't updated for the lifetime of a syncookie,
+ * so this SYN-ACK/ACK is either too old (replay) or totally bogus.
+ */
+ if (sch->sch_reseed < time_uptime) {
return (NULL);
- MD5Init(&syn_ctx);
-#ifdef INET6
- if (inc->inc_isipv6) {
- MD5Add(inc->inc6_laddr);
- MD5Add(inc->inc6_faddr);
- add.laddr = 0;
- add.faddr = 0;
- } else
-#endif
- {
- add.laddr = inc->inc_laddr.s_addr;
- add.faddr = inc->inc_faddr.s_addr;
}
- add.lport = inc->inc_lport;
- add.fport = inc->inc_fport;
- add.secbits[0] = tcp_secret[idx].ts_secbits[0];
- add.secbits[1] = tcp_secret[idx].ts_secbits[1];
- add.secbits[2] = tcp_secret[idx].ts_secbits[2];
- add.secbits[3] = tcp_secret[idx].ts_secbits[3];
- MD5Add(add);
- MD5Final((u_char *)&md5_buffer, &syn_ctx);
- data ^= md5_buffer[0];
- if ((data & ~SYNCOOKIE_DATAMASK) != 0)
- return (NULL);
- data = data >> SYNCOOKIE_WNDBITS;
- sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO);
- if (sc == NULL)
+ /* Recompute the digest so we can compare it. */
+ MD5Init(&ctx);
+ MD5Update(&ctx, ((u_int8_t *)secbits) + off,
+ SYNCOOKIE_SECRET_SIZE * sizeof(*secbits) - off);
+ MD5Update(&ctx, secbits, off);
+ MD5Update(&ctx, inc, sizeof(*inc));
+ MD5Update(&ctx, &seq, sizeof(seq));
+ MD5Update(&ctx, &flags, sizeof(flags));
+ MD5Final((u_int8_t *)&md5_buffer, &ctx);
+
+ /* Does the digest part of or ACK'ed ISS match? */
+ if ((ack & (~0x7f)) != (md5_buffer[0] << 7))
return (NULL);
- /*
- * Fill in the syncache values.
- * XXX duplicate code from syncache_add
- */
+
+ /* Does the digest part of our reflected timestamp match? */
+ if (to->to_flags & TOF_TS) {
+ data = md5_buffer[3] ^ to->to_tsecr;
+ if ((data & (~0x3ff)) != (md5_buffer[2] << 10))
+ return (NULL);
+ }
+
+ /* Fill in the syncache values. */
+ bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
sc->sc_ipopts = NULL;
- sc->sc_inc.inc_fport = inc->inc_fport;
- sc->sc_inc.inc_lport = inc->inc_lport;
- sc->sc_tp = sototcpcb(so);
+
+ sc->sc_irs = seq;
+ sc->sc_iss = ack;
+
#ifdef INET6
- sc->sc_inc.inc_isipv6 = inc->inc_isipv6;
if (inc->inc_isipv6) {
- sc->sc_inc.inc6_faddr = inc->inc6_faddr;
- sc->sc_inc.inc6_laddr = inc->inc6_laddr;
- if (sc->sc_tp->t_inpcb->in6p_flags & IN6P_AUTOFLOWLABEL)
+ if (sotoinpcb(so)->in6p_flags & IN6P_AUTOFLOWLABEL)
sc->sc_flowlabel = md5_buffer[1] & IPV6_FLOWLABEL_MASK;
} else
#endif
{
- sc->sc_inc.inc_faddr = inc->inc_faddr;
- sc->sc_inc.inc_laddr = inc->inc_laddr;
+ sc->sc_ip_ttl = sotoinpcb(so)->inp_ip_ttl;
+ sc->sc_ip_tos = sotoinpcb(so)->inp_ip_tos;
}
- sc->sc_irs = th->th_seq - 1;
- sc->sc_iss = th->th_ack - 1;
+
+ /* Additional parameters that were encoded in the timestamp. */
+ if (data) {
+ sc->sc_flags |= SCF_TIMESTAMP;
+ sc->sc_tsreflect = to->to_tsval;
+ sc->sc_ts = to->to_tsecr;
+ sc->sc_tsoff = to->to_tsecr - ticks;
+ sc->sc_flags |= (data & 0x1) ? SCF_SIGNATURE : 0;
+ sc->sc_flags |= ((data >> 1) & 0x1) ? SCF_SACK : 0;
+ sc->sc_requested_s_scale = min((data >> 2) & 0xf,
+ TCP_MAX_WINSHIFT);
+ sc->sc_requested_r_scale = min((data >> 6) & 0xf,
+ TCP_MAX_WINSHIFT);
+ if (sc->sc_requested_s_scale || sc->sc_requested_r_scale)
+ sc->sc_flags |= SCF_WINSCALE;
+ } else
+ sc->sc_flags |= SCF_NOOPT;
+
wnd = sbspace(&so->so_rcv);
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;
- sc->sc_flags = 0;
- sc->sc_rxtslot = 0;
- sc->sc_peer_mss = tcp_msstab[data];
+
+ sc->sc_rxmits = 0;
+ sc->sc_peer_mss = tcp_sc_msstab[mss];
+
+ tcpstat.tcps_sc_recvcookie++;
return (sc);
}
+
+/*
+ * Returns the current number of syncache entries. This number
+ * will probably change before you get around to calling
+ * syncache_pcblist.
+ */
+
+int
+syncache_pcbcount(void)
+{
+ struct syncache_head *sch;
+ int count, i;
+
+ for (count = 0, i = 0; i < tcp_syncache.hashsize; i++) {
+ /* No need to lock for a read. */
+ sch = &tcp_syncache.hashbase[i];
+ count += sch->sch_length;
+ }
+ return count;
+}
+
+/*
+ * Exports the syncache entries to userland so that netstat can display
+ * them alongside the other sockets. This function is intended to be
+ * called only from tcp_pcblist.
+ *
+ * Due to concurrency on an active system, the number of pcbs exported
+ * may have no relation to max_pcbs. max_pcbs merely indicates the
+ * amount of space the caller allocated for this function to use.
+ */
+int
+syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+{
+ struct xtcpcb xt;
+ struct syncache *sc;
+ struct syncache_head *sch;
+ int count, error, i;
+
+ for (count = 0, error = 0, i = 0; i < tcp_syncache.hashsize; i++) {
+ sch = &tcp_syncache.hashbase[i];
+ SCH_LOCK(sch);
+ TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
+ if (count >= max_pcbs) {
+ SCH_UNLOCK(sch);
+ goto exit;
+ }
+ bzero(&xt, sizeof(xt));
+ xt.xt_len = sizeof(xt);
+ if (sc->sc_inc.inc_isipv6)
+ xt.xt_inp.inp_vflag = INP_IPV6;
+ else
+ xt.xt_inp.inp_vflag = INP_IPV4;
+ bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo));
+ xt.xt_tp.t_inpcb = &xt.xt_inp;
+ xt.xt_tp.t_state = TCPS_SYN_RECEIVED;
+ xt.xt_socket.xso_protocol = IPPROTO_TCP;
+ xt.xt_socket.xso_len = sizeof (struct xsocket);
+ xt.xt_socket.so_type = SOCK_STREAM;
+ xt.xt_socket.so_state = SS_ISCONNECTING;
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ if (error) {
+ SCH_UNLOCK(sch);
+ goto exit;
+ }
+ count++;
+ }
+ SCH_UNLOCK(sch);
+ }
+exit:
+ *pcbs_exported = count;
+ return error;
+}
+
Index: ip_fw_pfil.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw_pfil.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_fw_pfil.c -L sys/netinet/ip_fw_pfil.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_fw_pfil.c
+++ sys/netinet/ip_fw_pfil.c
@@ -22,10 +22,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.19.2.1 2006/02/11 08:19:37 ume Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.25 2007/10/07 20:44:23 silby Exp $");
+
#if !defined(KLD_MODULE)
#include "opt_ipfw.h"
#include "opt_ipdn.h"
@@ -64,7 +65,12 @@
#include <machine/in_cksum.h>
-static int ipfw_pfil_hooked = 0;
+int fw_enable = 1;
+#ifdef INET6
+int fw6_enable = 1;
+#endif
+
+int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
/* Dummynet hooks. */
ip_dn_ruledel_t *ip_dn_ruledel_ptr = NULL;
@@ -96,9 +102,6 @@
KASSERT(dir == PFIL_IN, ("ipfw_check_in wrong direction!"));
- if (!fw_enable)
- goto pass;
-
bzero(&args, sizeof(args));
dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
@@ -187,6 +190,9 @@
if (!NG_IPFW_LOADED)
goto drop;
return ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 0);
+
+ case IP_FW_NAT:
+ goto again; /* continue with packet */
default:
KASSERT(0, ("%s: unknown retval", __func__));
@@ -217,9 +223,6 @@
KASSERT(dir == PFIL_OUT, ("ipfw_check_out wrong direction!"));
- if (!fw_enable)
- goto pass;
-
bzero(&args, sizeof(args));
dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
@@ -316,6 +319,9 @@
goto drop;
return ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 0);
+ case IP_FW_NAT:
+ goto again; /* continue with packet */
+
default:
KASSERT(0, ("%s: unknown retval", __func__));
}
@@ -417,28 +423,13 @@
ipfw_hook(void)
{
struct pfil_head *pfh_inet;
-#ifdef INET6
- struct pfil_head *pfh_inet6;
-#endif
-
- if (ipfw_pfil_hooked)
- return EEXIST;
pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
if (pfh_inet == NULL)
return ENOENT;
-#ifdef INET6
- pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
- if (pfh_inet6 == NULL)
- return ENOENT;
-#endif
pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
-#ifdef INET6
- pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
- pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
-#endif
return 0;
}
@@ -447,31 +438,86 @@
ipfw_unhook(void)
{
struct pfil_head *pfh_inet;
-#ifdef INET6
- struct pfil_head *pfh_inet6;
-#endif
-
- if (!ipfw_pfil_hooked)
- return ENOENT;
pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
if (pfh_inet == NULL)
return ENOENT;
+
+ pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
+ pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
+
+ return 0;
+}
+
#ifdef INET6
+static int
+ipfw6_hook(void)
+{
+ struct pfil_head *pfh_inet6;
+
+ pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
+ if (pfh_inet6 == NULL)
+ return ENOENT;
+
+ pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
+ pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
+
+ return 0;
+}
+
+static int
+ipfw6_unhook(void)
+{
+ struct pfil_head *pfh_inet6;
+
pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
if (pfh_inet6 == NULL)
return ENOENT;
-#endif
- pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet);
- pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet);
-#ifdef INET6
pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6);
pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6);
-#endif
return 0;
}
+#endif /* INET6 */
+
+int
+ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
+{
+ int enable = *(int *)arg1;
+ int error;
+
+ error = sysctl_handle_int(oidp, &enable, 0, req);
+ if (error)
+ return (error);
+
+ enable = (enable) ? 1 : 0;
+
+ if (enable == *(int *)arg1)
+ return (0);
+
+ if (arg1 == &fw_enable) {
+ if (enable)
+ error = ipfw_hook();
+ else
+ error = ipfw_unhook();
+ }
+#ifdef INET6
+ if (arg1 == &fw6_enable) {
+ if (enable)
+ error = ipfw6_hook();
+ else
+ error = ipfw6_unhook();
+ }
+#endif
+
+ if (error)
+ return (error);
+
+ *(int *)arg1 = enable;
+
+ return (0);
+}
static int
ipfw_modevent(module_t mod, int type, void *unused)
@@ -480,31 +526,30 @@
switch (type) {
case MOD_LOAD:
- if (ipfw_pfil_hooked) {
- printf("IP firewall already loaded\n");
- err = EEXIST;
- } else {
- if ((err = ipfw_init()) != 0) {
- printf("ipfw_init() error\n");
- break;
- }
- if ((err = ipfw_hook()) != 0) {
- printf("ipfw_hook() error\n");
- break;
- }
- ipfw_pfil_hooked = 1;
+ if ((err = ipfw_init()) != 0) {
+ printf("ipfw_init() error\n");
+ break;
+ }
+ if ((err = ipfw_hook()) != 0) {
+ printf("ipfw_hook() error\n");
+ break;
}
+#ifdef INET6
+ if ((err = ipfw6_hook()) != 0) {
+ printf("ipfw_hook() error\n");
+ break;
+ }
+#endif
break;
case MOD_UNLOAD:
- if (ipfw_pfil_hooked) {
- if ((err = ipfw_unhook()) > 0)
- break;
- ipfw_destroy();
- ipfw_pfil_hooked = 0;
- } else {
- printf("IP firewall already unloaded\n");
- }
+ if ((err = ipfw_unhook()) > 0)
+ break;
+#ifdef INET6
+ if ((err = ipfw6_unhook()) > 0)
+ break;
+#endif
+ ipfw_destroy();
break;
default:
Index: ip_fw.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_fw.h -L sys/netinet/ip_fw.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_fw.h
+++ sys/netinet/ip_fw.h
@@ -22,7 +22,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.100.2.3 2006/02/17 16:46:47 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.4.1 2008/01/28 17:44:30 rwatson Exp $
*/
#ifndef _IPFW2_H
@@ -124,6 +124,7 @@
O_TEE, /* arg1=port number */
O_FORWARD_IP, /* fwd sockaddr */
O_FORWARD_MAC, /* fwd mac */
+ O_NAT, /* nope */
/*
* More opcodes.
@@ -157,6 +158,9 @@
O_UNREACH6, /* arg1=icmpv6 code arg (deny) */
+ O_TAG, /* arg1=tag number */
+ O_TAGGED, /* arg1=tag number */
+
O_LAST_OPCODE /* not an opcode! */
};
@@ -170,6 +174,8 @@
#define EXT_AH 0x8
#define EXT_ESP 0x10
#define EXT_DSTOPTS 0x20
+#define EXT_RTHDR0 0x40
+#define EXT_RTHDR2 0x80
/*
* Template for instructions.
@@ -215,6 +221,8 @@
*/
#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t))
+#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */
+
/*
* This is used to store an array of 16-bit entries (ports etc.)
*/
@@ -271,19 +279,6 @@
} ipfw_insn_if;
/*
- * This is used for pipe and queue actions, which need to store
- * a single pointer (which can have different size on different
- * architectures.
- * Note that, because of previous instructions, pipe_ptr might
- * be unaligned in the overall structure, so it needs to be
- * manipulated with care.
- */
-typedef struct _ipfw_insn_pipe {
- ipfw_insn o;
- void *pipe_ptr; /* XXX */
-} ipfw_insn_pipe;
-
-/*
* This is used for storing an altq queue id number.
*/
typedef struct _ipfw_insn_altq {
@@ -315,6 +310,75 @@
u_int32_t log_left; /* how many left to log */
} ipfw_insn_log;
+/*
+ * Data structures required by both ipfw(8) and ipfw(4) but not part of the
+ * management API are protcted by IPFW_INTERNAL.
+ */
+#ifdef IPFW_INTERNAL
+/* Server pool support (LSNAT). */
+struct cfg_spool {
+ LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */
+ struct in_addr addr;
+ u_short port;
+};
+#endif
+
+/* Redirect modes id. */
+#define REDIR_ADDR 0x01
+#define REDIR_PORT 0x02
+#define REDIR_PROTO 0x04
+
+#ifdef IPFW_INTERNAL
+/* Nat redirect configuration. */
+struct cfg_redir {
+ LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */
+ u_int16_t mode; /* type of redirect mode */
+ struct in_addr laddr; /* local ip address */
+ struct in_addr paddr; /* public ip address */
+ struct in_addr raddr; /* remote ip address */
+ u_short lport; /* local port */
+ u_short pport; /* public port */
+ u_short rport; /* remote port */
+ u_short pport_cnt; /* number of public ports */
+ u_short rport_cnt; /* number of remote ports */
+ int proto; /* protocol: tcp/udp */
+ struct alias_link **alink;
+ /* num of entry in spool chain */
+ u_int16_t spool_cnt;
+ /* chain of spool instances */
+ LIST_HEAD(spool_chain, cfg_spool) spool_chain;
+};
+#endif
+
+#define NAT_BUF_LEN 1024
+
+#ifdef IPFW_INTERNAL
+/* Nat configuration data struct. */
+struct cfg_nat {
+ /* chain of nat instances */
+ LIST_ENTRY(cfg_nat) _next;
+ int id; /* nat id */
+ struct in_addr ip; /* nat ip address */
+ char if_name[IF_NAMESIZE]; /* interface name */
+ int mode; /* aliasing mode */
+ struct libalias *lib; /* libalias instance */
+ /* number of entry in spool chain */
+ int redir_cnt;
+ /* chain of redir instances */
+ LIST_HEAD(redir_chain, cfg_redir) redir_chain;
+};
+#endif
+
+#define SOF_NAT sizeof(struct cfg_nat)
+#define SOF_REDIR sizeof(struct cfg_redir)
+#define SOF_SPOOL sizeof(struct cfg_spool)
+
+/* Nat command. */
+typedef struct _ipfw_insn_nat {
+ ipfw_insn o;
+ struct cfg_nat *nat;
+} ipfw_insn_nat;
+
/* Apply ipv6 mask on ipv6 addr */
#define APPLY_MASK(addr,mask) \
(addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \
@@ -359,6 +423,7 @@
* + if a rule has a "log" option, then the first action
* (at ACTION_PTR(r)) MUST be O_LOG
* + if a rule has an "altq" option, it comes after "log"
+ * + if a rule has an O_TAG option, it comes after "log" and "altq"
*
* NOTE: we use a simple linked list of rules because we never need
* to delete a rule without scanning the list. We do not use
@@ -490,6 +555,7 @@
IP_FW_DUMMYNET,
IP_FW_NETGRAPH,
IP_FW_NGTEE,
+ IP_FW_NAT,
};
/* flags for divert mtag */
@@ -528,6 +594,7 @@
struct inpcb *inp;
struct _ip6dn_args dummypar; /* dummynet->ip6_output */
+ struct sockaddr_in hopstore; /* store here if cannot use a pointer */
};
/*
@@ -546,12 +613,13 @@
int ipfw_init(void);
void ipfw_destroy(void);
-void flush_pipe_ptrs(struct dn_flow_set *match); /* used by dummynet */
-
typedef int ip_fw_ctl_t(struct sockopt *);
extern ip_fw_ctl_t *ip_fw_ctl_ptr;
extern int fw_one_pass;
extern int fw_enable;
+#ifdef INET6
+extern int fw6_enable;
+#endif
/* For kernel ipfw_ether and ipfw_bridge. */
typedef int ip_fw_chk_t(struct ip_fw_args *args);
--- /dev/null
+++ sys/netinet/sctp_structs.h
@@ -0,0 +1,1047 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_structs.h,v 1.13 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_structs.h,v 1.25.2.2 2007/12/09 20:23:47 rrs Exp $");
+
+#ifndef __sctp_structs_h__
+#define __sctp_structs_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_auth.h>
+
+struct sctp_timer {
+ sctp_os_timer_t timer;
+
+ int type;
+ /*
+ * Depending on the timer type these will be setup and cast with the
+ * appropriate entity.
+ */
+ void *ep;
+ void *tcb;
+ void *net;
+
+ /* for sanity checking */
+ void *self;
+ uint32_t ticks;
+ uint32_t stopped_from;
+};
+
+
+struct sctp_foo_stuff {
+ struct sctp_inpcb *inp;
+ uint32_t lineno;
+ uint32_t ticks;
+ int updown;
+};
+
+
+/*
+ * This is the information we track on each interface that we know about from
+ * the distant end.
+ */
+TAILQ_HEAD(sctpnetlisthead, sctp_nets);
+
+struct sctp_stream_reset_list {
+ TAILQ_ENTRY(sctp_stream_reset_list) next_resp;
+ uint32_t tsn;
+ int number_entries;
+ struct sctp_stream_reset_out_request req;
+};
+
+TAILQ_HEAD(sctp_resethead, sctp_stream_reset_list);
+
+/*
+ * Users of the iterator need to malloc a iterator with a call to
+ * sctp_initiate_iterator(inp_func, assoc_func, inp_func, pcb_flags, pcb_features,
+ * asoc_state, void-ptr-arg, uint32-arg, end_func, inp);
+ *
+ * Use the following two defines if you don't care what pcb flags are on the EP
+ * and/or you don't care what state the association is in.
+ *
+ * Note that if you specify an INP as the last argument then ONLY each
+ * association of that single INP will be executed upon. Note that the pcb
+ * flags STILL apply so if the inp you specify has different pcb_flags then
+ * what you put in pcb_flags nothing will happen. use SCTP_PCB_ANY_FLAGS to
+ * assure the inp you specify gets treated.
+ */
+#define SCTP_PCB_ANY_FLAGS 0x00000000
+#define SCTP_PCB_ANY_FEATURES 0x00000000
+#define SCTP_ASOC_ANY_STATE 0x00000000
+
+typedef void (*asoc_func) (struct sctp_inpcb *, struct sctp_tcb *, void *ptr,
+ uint32_t val);
+typedef int (*inp_func) (struct sctp_inpcb *, void *ptr, uint32_t val);
+typedef void (*end_func) (void *ptr, uint32_t val);
+
+struct sctp_iterator {
+ TAILQ_ENTRY(sctp_iterator) sctp_nxt_itr;
+ struct sctp_timer tmr;
+ struct sctp_inpcb *inp; /* current endpoint */
+ struct sctp_tcb *stcb; /* current* assoc */
+ asoc_func function_assoc; /* per assoc function */
+ inp_func function_inp; /* per endpoint function */
+ inp_func function_inp_end; /* end INP function */
+ end_func function_atend;/* iterator completion function */
+ void *pointer; /* pointer for apply func to use */
+ uint32_t val; /* value for apply func to use */
+ uint32_t pcb_flags; /* endpoint flags being checked */
+ uint32_t pcb_features; /* endpoint features being checked */
+ uint32_t asoc_state; /* assoc state being checked */
+ uint32_t iterator_flags;
+ uint8_t no_chunk_output;
+ uint8_t done_current_ep;
+};
+
+/* iterator_flags values */
+#define SCTP_ITERATOR_DO_ALL_INP 0x00000001
+#define SCTP_ITERATOR_DO_SINGLE_INP 0x00000002
+
+TAILQ_HEAD(sctpiterators, sctp_iterator);
+
+struct sctp_copy_all {
+ struct sctp_inpcb *inp; /* ep */
+ struct mbuf *m;
+ struct sctp_sndrcvinfo sndrcv;
+ int sndlen;
+ int cnt_sent;
+ int cnt_failed;
+};
+
+struct sctp_asconf_iterator {
+ struct sctpladdr list_of_work;
+ int cnt;
+};
+
+struct sctp_net_route {
+ sctp_rtentry_t *ro_rt;
+ union sctp_sockstore _l_addr; /* remote peer addr */
+ struct sctp_ifa *_s_addr; /* our selected src addr */
+};
+
+struct htcp {
+ uint16_t alpha; /* Fixed point arith, << 7 */
+ uint8_t beta; /* Fixed point arith, << 7 */
+ uint8_t modeswitch; /* Delay modeswitch until we had at least one
+ * congestion event */
+ uint32_t last_cong; /* Time since last congestion event end */
+ uint32_t undo_last_cong;
+ uint16_t bytes_acked;
+ uint32_t bytecount;
+ uint32_t minRTT;
+ uint32_t maxRTT;
+
+ uint32_t undo_maxRTT;
+ uint32_t undo_old_maxB;
+
+ /* Bandwidth estimation */
+ uint32_t minB;
+ uint32_t maxB;
+ uint32_t old_maxB;
+ uint32_t Bi;
+ uint32_t lasttime;
+};
+
+
+struct sctp_nets {
+ TAILQ_ENTRY(sctp_nets) sctp_next; /* next link */
+
+ /*
+ * Things on the top half may be able to be split into a common
+ * structure shared by all.
+ */
+ struct sctp_timer pmtu_timer;
+
+ /*
+ * The following two in combination equate to a route entry for v6
+ * or v4.
+ */
+ struct sctp_net_route ro;
+
+ /* mtu discovered so far */
+ uint32_t mtu;
+ uint32_t ssthresh; /* not sure about this one for split */
+
+ /* smoothed average things for RTT and RTO itself */
+ int lastsa;
+ int lastsv;
+ unsigned int RTO;
+
+ /* This is used for SHUTDOWN/SHUTDOWN-ACK/SEND or INIT timers */
+ struct sctp_timer rxt_timer;
+ struct sctp_timer fr_timer; /* for early fr */
+
+ /* last time in seconds I sent to it */
+ struct timeval last_sent_time;
+ int ref_count;
+
+ /* Congestion stats per destination */
+ /*
+ * flight size variables and such, sorry Vern, I could not avoid
+ * this if I wanted performance :>
+ */
+ uint32_t flight_size;
+ uint32_t cwnd; /* actual cwnd */
+ uint32_t prev_cwnd; /* cwnd before any processing */
+ uint32_t partial_bytes_acked; /* in CA tracks when to incr a MTU */
+ uint32_t prev_rtt;
+ /* tracking variables to avoid the aloc/free in sack processing */
+ unsigned int net_ack;
+ unsigned int net_ack2;
+
+ /*
+ * JRS - 5/8/07 - Variable to track last time a destination was
+ * active for CMT PF
+ */
+ uint32_t last_active;
+
+ /*
+ * CMT variables (iyengar at cis.udel.edu)
+ */
+ uint32_t this_sack_highest_newack; /* tracks highest TSN newly
+ * acked for a given dest in
+ * the current SACK. Used in
+ * SFR and HTNA algos */
+ uint32_t pseudo_cumack; /* CMT CUC algorithm. Maintains next expected
+ * pseudo-cumack for this destination */
+ uint32_t rtx_pseudo_cumack; /* CMT CUC algorithm. Maintains next
+ * expected pseudo-cumack for this
+ * destination */
+
+ /* CMT fast recovery variables */
+ uint32_t fast_recovery_tsn;
+ uint32_t heartbeat_random1;
+ uint32_t heartbeat_random2;
+ uint32_t tos_flowlabel;
+
+ struct timeval start_time; /* time when this net was created */
+
+ uint32_t marked_retrans;/* number or DATA chunks marked for timer
+ * based retransmissions */
+ uint32_t marked_fastretrans;
+
+ /* if this guy is ok or not ... status */
+ uint16_t dest_state;
+ /* number of transmit failures to down this guy */
+ uint16_t failure_threshold;
+ /* error stats on destination */
+ uint16_t error_count;
+
+ uint8_t fast_retran_loss_recovery;
+ uint8_t will_exit_fast_recovery;
+ /* Flags that probably can be combined into dest_state */
+ uint8_t fast_retran_ip; /* fast retransmit in progress */
+ uint8_t hb_responded;
+ uint8_t saw_newack; /* CMT's SFR algorithm flag */
+ uint8_t src_addr_selected; /* if we split we move */
+ uint8_t indx_of_eligible_next_to_use;
+ uint8_t addr_is_local; /* its a local address (if known) could move
+ * in split */
+
+ /*
+ * CMT variables (iyengar at cis.udel.edu)
+ */
+ uint8_t find_pseudo_cumack; /* CMT CUC algorithm. Flag used to
+ * find a new pseudocumack. This flag
+ * is set after a new pseudo-cumack
+ * has been received and indicates
+ * that the sender should find the
+ * next pseudo-cumack expected for
+ * this destination */
+ uint8_t find_rtx_pseudo_cumack; /* CMT CUCv2 algorithm. Flag used to
+ * find a new rtx-pseudocumack. This
+ * flag is set after a new
+ * rtx-pseudo-cumack has been received
+ * and indicates that the sender
+ * should find the next
+ * rtx-pseudo-cumack expected for this
+ * destination */
+ uint8_t new_pseudo_cumack; /* CMT CUC algorithm. Flag used to
+ * indicate if a new pseudo-cumack or
+ * rtx-pseudo-cumack has been received */
+ uint8_t window_probe; /* Doing a window probe? */
+ uint8_t RTO_measured; /* Have we done the first measure */
+ uint8_t last_hs_used; /* index into the last HS table entry we used */
+ /* JRS - struct used in HTCP algorithm */
+ struct htcp htcp_ca;
+};
+
+
+struct sctp_data_chunkrec {
+ uint32_t TSN_seq; /* the TSN of this transmit */
+ uint16_t stream_seq; /* the stream sequence number of this transmit */
+ uint16_t stream_number; /* the stream number of this guy */
+ uint32_t payloadtype;
+ uint32_t context; /* from send */
+
+ /* ECN Nonce: Nonce Value for this chunk */
+ uint8_t ect_nonce;
+
+ /*
+ * part of the Highest sacked algorithm to be able to stroke counts
+ * on ones that are FR'd.
+ */
+ uint32_t fast_retran_tsn; /* sending_seq at the time of FR */
+ struct timeval timetodrop; /* time we drop it from queue */
+ uint8_t doing_fast_retransmit;
+ uint8_t rcv_flags; /* flags pulled from data chunk on inbound for
+ * outbound holds sending flags for PR-SCTP. */
+ uint8_t state_flags;
+ uint8_t chunk_was_revoked;
+};
+
+TAILQ_HEAD(sctpchunk_listhead, sctp_tmit_chunk);
+
+/* The lower byte is used to enumerate PR_SCTP policies */
+#define CHUNK_FLAGS_PR_SCTP_TTL SCTP_PR_SCTP_TTL
+#define CHUNK_FLAGS_PR_SCTP_BUF SCTP_PR_SCTP_BUF
+#define CHUNK_FLAGS_PR_SCTP_RTX SCTP_PR_SCTP_RTX
+
+/* The upper byte is used a a bit mask */
+#define CHUNK_FLAGS_FRAGMENT_OK 0x0100
+
+struct chk_id {
+ uint16_t id;
+ uint16_t can_take_data;
+};
+
+
+struct sctp_tmit_chunk {
+ union {
+ struct sctp_data_chunkrec data;
+ struct chk_id chunk_id;
+ } rec;
+ struct sctp_association *asoc; /* bp to asoc this belongs to */
+ struct timeval sent_rcv_time; /* filled in if RTT being calculated */
+ struct mbuf *data; /* pointer to mbuf chain of data */
+ struct mbuf *last_mbuf; /* pointer to last mbuf in chain */
+ struct sctp_nets *whoTo;
+ TAILQ_ENTRY(sctp_tmit_chunk) sctp_next; /* next link */
+ int32_t sent; /* the send status */
+ uint16_t snd_count; /* number of times I sent */
+ uint16_t flags; /* flags, such as FRAGMENT_OK */
+ uint16_t send_size;
+ uint16_t book_size;
+ uint16_t mbcnt;
+ uint8_t pad_inplace;
+ uint8_t do_rtt;
+ uint8_t book_size_scale;
+ uint8_t addr_over; /* flag which is set if the dest address for
+ * this chunk is overridden by user. Used for
+ * CMT (iyengar at cis.udel.edu, 2005/06/21) */
+ uint8_t no_fr_allowed;
+ uint8_t pr_sctp_on;
+ uint8_t copy_by_ref;
+ uint8_t window_probe;
+};
+
+/*
+ * The first part of this structure MUST be the entire sinfo structure. Maybe
+ * I should have made it a sub structure... we can circle back later and do
+ * that if we want.
+ */
+struct sctp_queued_to_read { /* sinfo structure Pluse more */
+ uint16_t sinfo_stream; /* off the wire */
+ uint16_t sinfo_ssn; /* off the wire */
+ uint16_t sinfo_flags; /* SCTP_UNORDERED from wire use SCTP_EOF for
+ * EOR */
+ uint32_t sinfo_ppid; /* off the wire */
+ uint32_t sinfo_context; /* pick this up from assoc def context? */
+ uint32_t sinfo_timetolive; /* not used by kernel */
+ uint32_t sinfo_tsn; /* Use this in reassembly as first TSN */
+ uint32_t sinfo_cumtsn; /* Use this in reassembly as last TSN */
+ sctp_assoc_t sinfo_assoc_id; /* our assoc id */
+ /* Non sinfo stuff */
+ uint32_t length; /* length of data */
+ uint32_t held_length; /* length held in sb */
+ struct sctp_nets *whoFrom; /* where it came from */
+ struct mbuf *data; /* front of the mbuf chain of data with
+ * PKT_HDR */
+ struct mbuf *tail_mbuf; /* used for multi-part data */
+ struct mbuf *aux_data; /* used to hold/cache control if o/s does not
+ * take it from us */
+ struct sctp_tcb *stcb; /* assoc, used for window update */
+ TAILQ_ENTRY(sctp_queued_to_read) next;
+ uint16_t port_from;
+ uint16_t spec_flags; /* Flags to hold the notification field */
+ uint8_t do_not_ref_stcb;
+ uint8_t end_added;
+ uint8_t pdapi_aborted;
+ uint8_t some_taken;
+};
+
+/* This data structure will be on the outbound
+ * stream queues. Data will be pulled off from
+ * the front of the mbuf data and chunk-ified
+ * by the output routines. We will custom
+ * fit every chunk we pull to the send/sent
+ * queue to make up the next full packet
+ * if we can. An entry cannot be removed
+ * from the stream_out queue until
+ * the msg_is_complete flag is set. This
+ * means at times data/tail_mbuf MIGHT
+ * be NULL.. If that occurs it happens
+ * for one of two reasons. Either the user
+ * is blocked on a send() call and has not
+ * awoken to copy more data down... OR
+ * the user is in the explict MSG_EOR mode
+ * and wrote some data, but has not completed
+ * sending.
+ */
+struct sctp_stream_queue_pending {
+ struct mbuf *data;
+ struct mbuf *tail_mbuf;
+ struct timeval ts;
+ struct sctp_nets *net;
+ TAILQ_ENTRY(sctp_stream_queue_pending) next;
+ uint32_t length;
+ uint32_t timetolive;
+ uint32_t ppid;
+ uint32_t context;
+ uint16_t sinfo_flags;
+ uint16_t stream;
+ uint16_t strseq;
+ uint16_t act_flags;
+ uint8_t msg_is_complete;
+ uint8_t some_taken;
+ uint8_t addr_over;
+ uint8_t pr_sctp_on;
+ uint8_t sender_all_done;
+ uint8_t put_last_out;
+};
+
+/*
+ * this struct contains info that is used to track inbound stream data and
+ * help with ordering.
+ */
+TAILQ_HEAD(sctpwheelunrel_listhead, sctp_stream_in);
+struct sctp_stream_in {
+ struct sctp_readhead inqueue;
+ uint16_t stream_no;
+ uint16_t last_sequence_delivered; /* used for re-order */
+ uint8_t delivery_started;
+};
+
+/* This struct is used to track the traffic on outbound streams */
+TAILQ_HEAD(sctpwheel_listhead, sctp_stream_out);
+struct sctp_stream_out {
+ struct sctp_streamhead outqueue;
+ TAILQ_ENTRY(sctp_stream_out) next_spoke; /* next link in wheel */
+ uint16_t stream_no;
+ uint16_t next_sequence_sent; /* next one I expect to send out */
+ uint8_t last_msg_incomplete;
+};
+
+/* used to keep track of the addresses yet to try to add/delete */
+TAILQ_HEAD(sctp_asconf_addrhead, sctp_asconf_addr);
+struct sctp_asconf_addr {
+ TAILQ_ENTRY(sctp_asconf_addr) next;
+ struct sctp_asconf_addr_param ap;
+ struct sctp_ifa *ifa; /* save the ifa for add/del ip */
+ uint8_t sent; /* has this been sent yet? */
+};
+
+struct sctp_scoping {
+ uint8_t ipv4_addr_legal;
+ uint8_t ipv6_addr_legal;
+ uint8_t loopback_scope;
+ uint8_t ipv4_local_scope;
+ uint8_t local_scope;
+ uint8_t site_scope;
+};
+
+#define SCTP_TSN_LOG_SIZE 40
+
+struct sctp_tsn_log {
+ void *stcb;
+ uint32_t tsn;
+ uint16_t strm;
+ uint16_t seq;
+ uint16_t sz;
+ uint16_t flgs;
+ uint16_t in_pos;
+ uint16_t in_out;
+};
+
+#define SCTP_FS_SPEC_LOG_SIZE 200
+struct sctp_fs_spec_log {
+ uint32_t sent;
+ uint32_t total_flight;
+ uint32_t tsn;
+ uint16_t book;
+ uint8_t incr;
+ uint8_t decr;
+};
+
+/* This struct is here to cut out the compatiabilty
+ * pad that bulks up both the inp and stcb. The non
+ * pad portion MUST stay in complete sync with
+ * sctp_sndrcvinfo... i.e. if sinfo_xxxx is added
+ * this must be done here too.
+ */
+struct sctp_nonpad_sndrcvinfo {
+ uint16_t sinfo_stream;
+ uint16_t sinfo_ssn;
+ uint16_t sinfo_flags;
+ uint32_t sinfo_ppid;
+ uint32_t sinfo_context;
+ uint32_t sinfo_timetolive;
+ uint32_t sinfo_tsn;
+ uint32_t sinfo_cumtsn;
+ sctp_assoc_t sinfo_assoc_id;
+};
+
+/*
+ * JRS - Structure to hold function pointers to the functions responsible
+ * for congestion control.
+ */
+
+struct sctp_cc_functions {
+ void (*sctp_set_initial_cc_param) (struct sctp_tcb *stcb, struct sctp_nets *net);
+ void (*sctp_cwnd_update_after_sack) (struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
+ void (*sctp_cwnd_update_after_fr) (struct sctp_tcb *stcb,
+ struct sctp_association *asoc);
+ void (*sctp_cwnd_update_after_timeout) (struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+ void (*sctp_cwnd_update_after_ecn_echo) (struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+ void (*sctp_cwnd_update_after_packet_dropped) (struct sctp_tcb *stcb,
+ struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+ uint32_t * bottle_bw, uint32_t * on_queue);
+ void (*sctp_cwnd_update_after_output) (struct sctp_tcb *stcb,
+ struct sctp_nets *net, int burst_limit);
+ void (*sctp_cwnd_update_after_fr_timer) (struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net);
+};
+
+/* used to save ASCONF-ACK chunks for retransmission */
+TAILQ_HEAD(sctp_asconf_ackhead, sctp_asconf_ack);
+struct sctp_asconf_ack {
+ TAILQ_ENTRY(sctp_asconf_ack) next;
+ uint32_t serial_number;
+ struct sctp_nets *last_sent_to;
+ struct mbuf *data;
+ uint16_t len;
+};
+
+/*
+ * Here we have information about each individual association that we track.
+ * We probably in production would be more dynamic. But for ease of
+ * implementation we will have a fixed array that we hunt for in a linear
+ * fashion.
+ */
+struct sctp_association {
+ /* association state */
+ int state;
+
+ /* queue of pending addrs to add/delete */
+ struct sctp_asconf_addrhead asconf_queue;
+
+ struct timeval time_entered; /* time we entered state */
+ struct timeval time_last_rcvd;
+ struct timeval time_last_sent;
+ struct timeval time_last_sat_advance;
+ struct sctp_nonpad_sndrcvinfo def_send;
+
+ /* timers and such */
+ struct sctp_timer hb_timer; /* hb timer */
+ struct sctp_timer dack_timer; /* Delayed ack timer */
+ struct sctp_timer asconf_timer; /* asconf */
+ struct sctp_timer strreset_timer; /* stream reset */
+ struct sctp_timer shut_guard_timer; /* shutdown guard */
+ struct sctp_timer autoclose_timer; /* automatic close timer */
+ struct sctp_timer delayed_event_timer; /* timer for delayed events */
+ struct sctp_timer delete_prim_timer; /* deleting primary dst */
+
+ /* list of restricted local addresses */
+ struct sctpladdr sctp_restricted_addrs;
+
+ /* last local address pending deletion (waiting for an address add) */
+ struct sctp_ifa *asconf_addr_del_pending;
+ /* Deleted primary destination (used to stop timer) */
+ struct sctp_nets *deleted_primary;
+
+ struct sctpnetlisthead nets; /* remote address list */
+
+ /* Free chunk list */
+ struct sctpchunk_listhead free_chunks;
+
+ /* Control chunk queue */
+ struct sctpchunk_listhead control_send_queue;
+
+ /*
+ * Once a TSN hits the wire it is moved to the sent_queue. We
+ * maintain two counts here (don't know if any but retran_cnt is
+ * needed). The idea is that the sent_queue_retran_cnt reflects how
+ * many chunks have been marked for retranmission by either T3-rxt
+ * or FR.
+ */
+ struct sctpchunk_listhead sent_queue;
+ struct sctpchunk_listhead send_queue;
+
+ /* re-assembly queue for fragmented chunks on the inbound path */
+ struct sctpchunk_listhead reasmqueue;
+
+ /*
+ * this queue is used when we reach a condition that we can NOT put
+ * data into the socket buffer. We track the size of this queue and
+ * set our rwnd to the space in the socket minus also the
+ * size_on_delivery_queue.
+ */
+ struct sctpwheel_listhead out_wheel;
+
+ /*
+ * This pointer will be set to NULL most of the time. But when we
+ * have a fragmented message, where we could not get out all of the
+ * message at the last send then this will point to the stream to go
+ * get data from.
+ */
+ struct sctp_stream_out *locked_on_sending;
+
+ /* If an iterator is looking at me, this is it */
+ struct sctp_iterator *stcb_starting_point_for_iterator;
+
+ /* ASCONF save the last ASCONF-ACK so we can resend it if necessary */
+ struct sctp_asconf_ackhead asconf_ack_sent;
+
+ /*
+ * pointer to last stream reset queued to control queue by us with
+ * requests.
+ */
+ struct sctp_tmit_chunk *str_reset;
+ /*
+ * if Source Address Selection happening, this will rotate through
+ * the link list.
+ */
+ struct sctp_laddr *last_used_address;
+
+ /* stream arrays */
+ struct sctp_stream_in *strmin;
+ struct sctp_stream_out *strmout;
+ uint8_t *mapping_array;
+ /* primary destination to use */
+ struct sctp_nets *primary_destination;
+ /* For CMT */
+ struct sctp_nets *last_net_data_came_from;
+ /* last place I got a data chunk from */
+ struct sctp_nets *last_data_chunk_from;
+ /* last place I got a control from */
+ struct sctp_nets *last_control_chunk_from;
+
+ /* circular looking for output selection */
+ struct sctp_stream_out *last_out_stream;
+
+ /*
+ * wait to the point the cum-ack passes req->send_reset_at_tsn for
+ * any req on the list.
+ */
+ struct sctp_resethead resetHead;
+
+ /* queue of chunks waiting to be sent into the local stack */
+ struct sctp_readhead pending_reply_queue;
+
+ /* JRS - the congestion control functions are in this struct */
+ struct sctp_cc_functions cc_functions;
+ /*
+ * JRS - value to store the currently loaded congestion control
+ * module
+ */
+ uint32_t congestion_control_module;
+
+ uint32_t vrf_id;
+
+ uint32_t cookie_preserve_req;
+ /* ASCONF next seq I am sending out, inits at init-tsn */
+ uint32_t asconf_seq_out;
+ /* ASCONF last received ASCONF from peer, starts at peer's TSN-1 */
+ uint32_t asconf_seq_in;
+
+ /* next seq I am sending in str reset messages */
+ uint32_t str_reset_seq_out;
+ /* next seq I am expecting in str reset messages */
+ uint32_t str_reset_seq_in;
+
+ /* various verification tag information */
+ uint32_t my_vtag; /* The tag to be used. if assoc is re-initited
+ * by remote end, and I have unlocked this
+ * will be regenerated to a new random value. */
+ uint32_t peer_vtag; /* The peers last tag */
+
+ uint32_t my_vtag_nonce;
+ uint32_t peer_vtag_nonce;
+
+ uint32_t assoc_id;
+
+ /* This is the SCTP fragmentation threshold */
+ uint32_t smallest_mtu;
+
+ /*
+ * Special hook for Fast retransmit, allows us to track the highest
+ * TSN that is NEW in this SACK if gap ack blocks are present.
+ */
+ uint32_t this_sack_highest_gap;
+
+ /*
+ * The highest consecutive TSN that has been acked by peer on my
+ * sends
+ */
+ uint32_t last_acked_seq;
+
+ /* The next TSN that I will use in sending. */
+ uint32_t sending_seq;
+
+ /* Original seq number I used ??questionable to keep?? */
+ uint32_t init_seq_number;
+
+
+ /* The Advanced Peer Ack Point, as required by the PR-SCTP */
+ /* (A1 in Section 4.2) */
+ uint32_t advanced_peer_ack_point;
+
+ /*
+ * The highest consequetive TSN at the bottom of the mapping array
+ * (for his sends).
+ */
+ uint32_t cumulative_tsn;
+ /*
+ * Used to track the mapping array and its offset bits. This MAY be
+ * lower then cumulative_tsn.
+ */
+ uint32_t mapping_array_base_tsn;
+ /*
+ * used to track highest TSN we have received and is listed in the
+ * mapping array.
+ */
+ uint32_t highest_tsn_inside_map;
+
+ uint32_t last_echo_tsn;
+ uint32_t last_cwr_tsn;
+ uint32_t fast_recovery_tsn;
+ uint32_t sat_t3_recovery_tsn;
+ uint32_t tsn_last_delivered;
+ /*
+ * For the pd-api we should re-write this a bit more efficent. We
+ * could have multiple sctp_queued_to_read's that we are building at
+ * once. Now we only do this when we get ready to deliver to the
+ * socket buffer. Note that we depend on the fact that the struct is
+ * "stuck" on the read queue until we finish all the pd-api.
+ */
+ struct sctp_queued_to_read *control_pdapi;
+
+ uint32_t tsn_of_pdapi_last_delivered;
+ uint32_t pdapi_ppid;
+ uint32_t context;
+ uint32_t last_reset_action[SCTP_MAX_RESET_PARAMS];
+ uint32_t last_sending_seq[SCTP_MAX_RESET_PARAMS];
+ uint32_t last_base_tsnsent[SCTP_MAX_RESET_PARAMS];
+#ifdef SCTP_ASOCLOG_OF_TSNS
+ /*
+ * special log - This adds considerable size to the asoc, but
+ * provides a log that you can use to detect problems via kgdb.
+ */
+ struct sctp_tsn_log in_tsnlog[SCTP_TSN_LOG_SIZE];
+ struct sctp_tsn_log out_tsnlog[SCTP_TSN_LOG_SIZE];
+ uint32_t cumack_log[SCTP_TSN_LOG_SIZE];
+ uint32_t cumack_logsnt[SCTP_TSN_LOG_SIZE];
+ uint16_t tsn_in_at;
+ uint16_t tsn_out_at;
+ uint16_t tsn_in_wrapped;
+ uint16_t tsn_out_wrapped;
+ uint16_t cumack_log_at;
+ uint16_t cumack_log_atsnt;
+#endif /* SCTP_ASOCLOG_OF_TSNS */
+#ifdef SCTP_FS_SPEC_LOG
+ struct sctp_fs_spec_log fslog[SCTP_FS_SPEC_LOG_SIZE];
+ uint16_t fs_index;
+#endif
+
+ /*
+ * window state information and smallest MTU that I use to bound
+ * segmentation
+ */
+ uint32_t peers_rwnd;
+ uint32_t my_rwnd;
+ uint32_t my_last_reported_rwnd;
+ uint32_t sctp_frag_point;
+
+ uint32_t total_output_queue_size;
+
+ uint32_t sb_cc; /* shadow of sb_cc */
+ uint32_t sb_send_resv; /* amount reserved on a send */
+ uint32_t my_rwnd_control_len; /* shadow of sb_mbcnt used for rwnd
+ * control */
+ /* 32 bit nonce stuff */
+ uint32_t nonce_resync_tsn;
+ uint32_t nonce_wait_tsn;
+ uint32_t default_flowlabel;
+ uint32_t pr_sctp_cnt;
+ int ctrl_queue_cnt; /* could be removed REM */
+ /*
+ * All outbound datagrams queue into this list from the individual
+ * stream queue. Here they get assigned a TSN and then await
+ * sending. The stream seq comes when it is first put in the
+ * individual str queue
+ */
+ unsigned int stream_queue_cnt;
+ unsigned int send_queue_cnt;
+ unsigned int sent_queue_cnt;
+ unsigned int sent_queue_cnt_removeable;
+ /*
+ * Number on sent queue that are marked for retran until this value
+ * is 0 we only send one packet of retran'ed data.
+ */
+ unsigned int sent_queue_retran_cnt;
+
+ unsigned int size_on_reasm_queue;
+ unsigned int cnt_on_reasm_queue;
+ /* amount of data (bytes) currently in flight (on all destinations) */
+ unsigned int total_flight;
+ /* Total book size in flight */
+ unsigned int total_flight_count; /* count of chunks used with
+ * book total */
+ /* count of destinaton nets and list of destination nets */
+ unsigned int numnets;
+
+ /* Total error count on this association */
+ unsigned int overall_error_count;
+
+ unsigned int cnt_msg_on_sb;
+
+ /* All stream count of chunks for delivery */
+ unsigned int size_on_all_streams;
+ unsigned int cnt_on_all_streams;
+
+ /* Heart Beat delay in ticks */
+ unsigned int heart_beat_delay;
+
+ /* autoclose */
+ unsigned int sctp_autoclose_ticks;
+
+ /* how many preopen streams we have */
+ unsigned int pre_open_streams;
+
+ /* How many streams I support coming into me */
+ unsigned int max_inbound_streams;
+
+ /* the cookie life I award for any cookie, in seconds */
+ unsigned int cookie_life;
+ /* time to delay acks for */
+ unsigned int delayed_ack;
+ unsigned int old_delayed_ack;
+ unsigned int sack_freq;
+ unsigned int data_pkts_seen;
+
+ unsigned int numduptsns;
+ int dup_tsns[SCTP_MAX_DUP_TSNS];
+ unsigned int initial_init_rto_max; /* initial RTO for INIT's */
+ unsigned int initial_rto; /* initial send RTO */
+ unsigned int minrto; /* per assoc RTO-MIN */
+ unsigned int maxrto; /* per assoc RTO-MAX */
+
+ /* authentication fields */
+ sctp_auth_chklist_t *local_auth_chunks;
+ sctp_auth_chklist_t *peer_auth_chunks;
+ sctp_hmaclist_t *local_hmacs; /* local HMACs supported */
+ sctp_hmaclist_t *peer_hmacs; /* peer HMACs supported */
+ struct sctp_keyhead shared_keys; /* assoc's shared keys */
+ sctp_authinfo_t authinfo; /* randoms, cached keys */
+ /*
+ * refcnt to block freeing when a sender or receiver is off coping
+ * user data in.
+ */
+ uint32_t refcnt;
+ uint32_t chunks_on_out_queue; /* total chunks floating around,
+ * locked by send socket buffer */
+ uint32_t peers_adaptation;
+ uint16_t peer_hmac_id; /* peer HMAC id to send */
+
+ /*
+ * Being that we have no bag to collect stale cookies, and that we
+ * really would not want to anyway.. we will count them in this
+ * counter. We of course feed them to the pigeons right away (I have
+ * always thought of pigeons as flying rats).
+ */
+ uint16_t stale_cookie_count;
+
+ /*
+ * For the partial delivery API, if up, invoked this is what last
+ * TSN I delivered
+ */
+ uint16_t str_of_pdapi;
+ uint16_t ssn_of_pdapi;
+
+ /* counts of actual built streams. Allocation may be more however */
+ /* could re-arrange to optimize space here. */
+ uint16_t streamincnt;
+ uint16_t streamoutcnt;
+
+ /* my maximum number of retrans of INIT and SEND */
+ /* copied from SCTP but should be individually setable */
+ uint16_t max_init_times;
+ uint16_t max_send_times;
+
+ uint16_t def_net_failure;
+
+ /*
+ * lock flag: 0 is ok to send, 1+ (duals as a retran count) is
+ * awaiting ACK
+ */
+ uint16_t asconf_sent;
+
+ uint16_t mapping_array_size;
+
+ uint16_t last_strm_seq_delivered;
+ uint16_t last_strm_no_delivered;
+
+ uint16_t last_revoke_count;
+ int16_t num_send_timers_up;
+
+ uint16_t stream_locked_on;
+ uint16_t ecn_echo_cnt_onq;
+
+ uint16_t free_chunk_cnt;
+
+ uint8_t stream_locked;
+ uint8_t authenticated; /* packet authenticated ok */
+ /*
+ * This flag indicates that a SACK need to be sent. Initially this
+ * is 1 to send the first sACK immediately.
+ */
+ uint8_t send_sack;
+
+ /* max burst after fast retransmit completes */
+ uint8_t max_burst;
+
+ uint8_t sat_network; /* RTT is in range of sat net or greater */
+ uint8_t sat_network_lockout; /* lockout code */
+ uint8_t burst_limit_applied; /* Burst limit in effect at last send? */
+ /* flag goes on when we are doing a partial delivery api */
+ uint8_t hb_random_values[4];
+ uint8_t fragmented_delivery_inprogress;
+ uint8_t fragment_flags;
+ uint8_t last_flags_delivered;
+ uint8_t hb_ect_randombit;
+ uint8_t hb_random_idx;
+ uint8_t hb_is_disabled; /* is the hb disabled? */
+ uint8_t default_tos;
+ uint8_t asconf_del_pending; /* asconf delete last addr pending */
+
+ /* ECN Nonce stuff */
+ uint8_t receiver_nonce_sum; /* nonce I sum and put in my sack */
+ uint8_t ecn_nonce_allowed; /* Tells us if ECN nonce is on */
+ uint8_t nonce_sum_check;/* On off switch used during re-sync */
+ uint8_t nonce_wait_for_ecne; /* flag when we expect a ECN */
+ uint8_t peer_supports_ecn_nonce;
+
+ /*
+ * This value, plus all other ack'd but above cum-ack is added
+ * together to cross check against the bit that we have yet to
+ * define (probably in the SACK). When the cum-ack is updated, this
+ * sum is updated as well.
+ */
+ uint8_t nonce_sum_expect_base;
+ /* Flag to tell if ECN is allowed */
+ uint8_t ecn_allowed;
+
+ /* flag to indicate if peer can do asconf */
+ uint8_t peer_supports_asconf;
+ /* pr-sctp support flag */
+ uint8_t peer_supports_prsctp;
+ /* peer authentication support flag */
+ uint8_t peer_supports_auth;
+ /* stream resets are supported by the peer */
+ uint8_t peer_supports_strreset;
+
+ /*
+ * packet drop's are supported by the peer, we don't really care
+ * about this but we bookkeep it anyway.
+ */
+ uint8_t peer_supports_pktdrop;
+
+ /* Do we allow V6/V4? */
+ uint8_t ipv4_addr_legal;
+ uint8_t ipv6_addr_legal;
+ /* Address scoping flags */
+ /* scope value for IPv4 */
+ uint8_t ipv4_local_scope;
+ /* scope values for IPv6 */
+ uint8_t local_scope;
+ uint8_t site_scope;
+ /* loopback scope */
+ uint8_t loopback_scope;
+ /* flags to handle send alternate net tracking */
+ uint8_t used_alt_onsack;
+ uint8_t used_alt_asconfack;
+ uint8_t fast_retran_loss_recovery;
+ uint8_t sat_t3_loss_recovery;
+ uint8_t dropped_special_cnt;
+ uint8_t seen_a_sack_this_pkt;
+ uint8_t stream_reset_outstanding;
+ uint8_t stream_reset_out_is_outstanding;
+ uint8_t delayed_connection;
+ uint8_t ifp_had_enobuf;
+ uint8_t saw_sack_with_frags;
+ uint8_t in_restart_hash;
+ uint8_t assoc_up_sent;
+ uint8_t adaptation_needed;
+ uint8_t adaptation_sent;
+ /* CMT variables */
+ uint8_t cmt_dac_pkts_rcvd;
+ uint8_t sctp_cmt_on_off;
+ uint8_t iam_blocking;
+ uint8_t cookie_how[8];
+ /* JRS 5/21/07 - CMT PF variable */
+ uint8_t sctp_cmt_pf;
+ /*
+ * The mapping array is used to track out of order sequences above
+ * last_acked_seq. 0 indicates packet missing 1 indicates packet
+ * rec'd. We slide it up every time we raise last_acked_seq and 0
+ * trailing locactions out. If I get a TSN above the array
+ * mappingArraySz, I discard the datagram and let retransmit happen.
+ */
+ uint32_t marked_retrans;
+ uint32_t timoinit;
+ uint32_t timodata;
+ uint32_t timosack;
+ uint32_t timoshutdown;
+ uint32_t timoheartbeat;
+ uint32_t timocookie;
+ uint32_t timoshutdownack;
+ struct timeval start_time;
+ struct timeval discontinuity_time;
+};
+
+#endif
Index: igmp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/igmp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/igmp.h -L sys/netinet/igmp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/igmp.h
+++ sys/netinet/igmp.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)igmp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/igmp.h,v 1.13 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/igmp.h,v 1.15 2007/06/15 18:59:10 bms Exp $
*/
#ifndef _NETINET_IGMP_H_
@@ -55,7 +55,44 @@
struct in_addr igmp_group; /* group address being reported */
}; /* (zero for queries) */
-#define IGMP_MINLEN 8
+struct igmpv3 {
+ u_char igmp_type; /* version & type of IGMP message */
+ u_char igmp_code; /* subtype for routing msgs */
+ u_short igmp_cksum; /* IP-style checksum */
+ struct in_addr igmp_group; /* group address being reported */
+ /* (zero for queries) */
+ u_char igmp_misc; /* reserved/suppress/robustness */
+ u_char igmp_qqi; /* querier's query interval */
+ u_short igmp_numsrc; /* number of sources */
+ /*struct in_addr igmp_sources[1];*/ /* source addresses */
+};
+
+struct igmp_grouprec {
+ u_char ig_type; /* record type */
+ u_char ig_datalen; /* length of auxiliary data */
+ u_short ig_numsrc; /* number of sources */
+ struct in_addr ig_group; /* group address being reported */
+ /*struct in_addr ig_sources[1];*/ /* source addresses */
+};
+
+struct igmp_report {
+ u_char ir_type; /* record type */
+ u_char ir_rsv1; /* reserved */
+ u_short ir_cksum; /* checksum */
+ u_short ir_rsv2; /* reserved */
+ u_short ir_numgrps; /* number of group records */
+ struct igmp_grouprec ir_groups[1]; /* group records */
+};
+
+#define IGMP_MINLEN 8
+#define IGMP_HDRLEN 8
+#define IGMP_GRPREC_HDRLEN 8
+#define IGMP_PREPEND 0
+
+#if 0
+#define IGMP_QRV(pigmp) ((pigmp)->igmp_misc & (0x07)) /* XXX */
+#define IGMP_MAXSOURCES(len) (((len) - 12) >> 2) /* XXX */
+#endif
/*
* Message types, including version number.
@@ -71,6 +108,8 @@
#define IGMP_MTRACE_RESP 0x1e /* traceroute resp.(to sender)*/
#define IGMP_MTRACE 0x1f /* mcast traceroute messages */
+#define IGMP_V3_MEMBERSHIP_REPORT 0x22 /* Ver. 3 membership report */
+
#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */
/* query (in seconds) according */
/* to RFC1112 */
--- /dev/null
+++ sys/netinet/sctp_bsd_addr.h
@@ -0,0 +1,59 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_bsd_addr.h,v 1.6 2007/05/29 09:29:02 rrs Exp $");
+
+#ifndef __sctp_bsd_addr_h__
+#define __sctp_bsd_addr_h__
+#include <netinet/sctp_pcb.h>
+
+#if defined(_KERNEL)
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+void sctp_wakeup_iterator(void);
+
+void sctp_startup_iterator(void);
+
+#endif
+
+void sctp_gather_internal_ifa_flags(struct sctp_ifa *ifa);
+
+#ifdef SCTP_PACKET_LOGGING
+
+void sctp_packet_log(struct mbuf *m, int length);
+int sctp_copy_out_packet_log(uint8_t * target, int length);
+
+#endif
+
+void sctp_addr_change(struct ifaddr *ifa, int cmd);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_crc32.h
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.h,v 1.5 2004/08/17 04:06:16 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_crc32.h,v 1.3 2007/05/08 17:01:10 rrs Exp $");
+
+#ifndef __crc32c_h__
+#define __crc32c_h__
+
+#ifndef SCTP_USE_ADLER32
+
+#if defined(_KERNEL)
+uint32_t update_crc32(uint32_t, unsigned char *, unsigned int);
+
+uint32_t old_update_crc32(uint32_t, unsigned char *, unsigned int);
+
+uint32_t sctp_csum_finalize(uint32_t);
+
+
+#endif /* _KERNEL */
+#endif /* !SCTP_USE_ADLER32 */
+#endif /* __crc32c_h__ */
Index: udp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/udp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/udp.h -L sys/netinet/udp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/udp.h
+++ sys/netinet/udp.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,14 +28,14 @@
* SUCH DAMAGE.
*
* @(#)udp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/udp.h,v 1.9 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/udp.h,v 1.10 2007/02/20 10:13:11 rwatson Exp $
*/
#ifndef _NETINET_UDP_H_
-#define _NETINET_UDP_H_
+#define _NETINET_UDP_H_
/*
- * Udp protocol header.
+ * UDP protocol header.
* Per RFC 768, September, 1981.
*/
struct udphdr {
Index: tcp_seq.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_seq.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_seq.h -L sys/netinet/tcp_seq.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_seq.h
+++ sys/netinet/tcp_seq.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_seq.h 8.3 (Berkeley) 6/21/95
- * $FreeBSD: src/sys/netinet/tcp_seq.h,v 1.25 2005/04/10 05:24:59 ps Exp $
+ * $FreeBSD: src/sys/netinet/tcp_seq.h,v 1.26 2006/06/18 14:24:12 andre Exp $
*/
#ifndef _NETINET_TCP_SEQ_H_
@@ -51,19 +51,6 @@
#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
/*
- * TCP connection counts are 32 bit integers operated
- * on with modular arithmetic. These macros can be
- * used to compare such integers.
- */
-#define CC_LT(a,b) ((int)((a)-(b)) < 0)
-#define CC_LEQ(a,b) ((int)((a)-(b)) <= 0)
-#define CC_GT(a,b) ((int)((a)-(b)) > 0)
-#define CC_GEQ(a,b) ((int)((a)-(b)) >= 0)
-
-/* Macro to increment a CC: skip 0 which has a special meaning */
-#define CC_INC(c) (++(c) == 0 ? ++(c) : (c))
-
-/*
* Macros to initialize tcp sequence numbers for
* send and receive from initial send and receive
* sequence numbers.
Index: in.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/in.c -L sys/netinet/in.c -u -r1.1.1.2 -r1.2
--- sys/netinet/in.c
+++ sys/netinet/in.c
@@ -28,15 +28,18 @@
* SUCH DAMAGE.
*
* @(#)in.c 8.4 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/in.c,v 1.85.2.6 2006/01/31 16:11:37 andre Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in.c,v 1.102 2007/10/07 20:44:22 silby Exp $");
+
#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/malloc.h>
+#include <sys/priv.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -48,10 +51,7 @@
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
-
-#include <netinet/igmp_var.h>
-
-static MALLOC_DEFINE(M_IPMADDR, "in_multi", "internet multicast address");
+#include <netinet/ip_var.h>
static int in_mask2len(struct in_addr *);
static void in_len2mask(struct in_addr *, int);
@@ -63,6 +63,7 @@
static void in_socktrim(struct sockaddr_in *);
static int in_ifinit(struct ifnet *,
struct in_ifaddr *, struct sockaddr_in *, int);
+static void in_purgemaddrs(struct ifnet *);
static int subnetsarelocal = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
@@ -72,17 +73,6 @@
&sameprefixcarponly, 0,
"Refuse to create same prefixes on different interfaces");
-/*
- * The IPv4 multicast list (in_multihead and associated structures) are
- * protected by the global in_multi_mtx. See in_var.h for more details. For
- * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
- * ip_output() to send IGMP packets while holding the lock; this probably is
- * not quite desirable.
- */
-struct in_multihead in_multihead; /* XXX BSS initialization */
-struct mtx in_multi_mtx;
-MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
-
extern struct inpcbinfo ripcbinfo;
extern struct inpcbinfo udbinfo;
@@ -93,8 +83,7 @@
* Otherwise, it includes only the directly-connected (sub)nets.
*/
int
-in_localaddr(in)
- struct in_addr in;
+in_localaddr(struct in_addr in)
{
register u_long i = ntohl(in.s_addr);
register struct in_ifaddr *ia;
@@ -116,8 +105,7 @@
* on one of its interfaces.
*/
int
-in_localip(in)
- struct in_addr in;
+in_localip(struct in_addr in)
{
struct in_ifaddr *ia;
@@ -134,13 +122,12 @@
* may be forwarded.
*/
int
-in_canforward(in)
- struct in_addr in;
+in_canforward(struct in_addr in)
{
register u_long i = ntohl(in.s_addr);
register u_long net;
- if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i))
+ if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
return (0);
if (IN_CLASSA(i)) {
net = i & IN_CLASSA_NET;
@@ -154,8 +141,7 @@
* Trim a mask in a sockaddr
*/
static void
-in_socktrim(ap)
-struct sockaddr_in *ap;
+in_socktrim(struct sockaddr_in *ap)
{
register char *cplim = (char *) &ap->sin_addr;
register char *cp = (char *) (&ap->sin_addr + 1);
@@ -191,9 +177,7 @@
}
static void
-in_len2mask(mask, len)
- struct in_addr *mask;
- int len;
+in_len2mask(struct in_addr *mask, int len)
{
int i;
u_char *p;
@@ -212,30 +196,45 @@
*/
/* ARGSUSED */
int
-in_control(so, cmd, data, ifp, td)
- struct socket *so;
- u_long cmd;
- caddr_t data;
- register struct ifnet *ifp;
- struct thread *td;
+in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
+ struct thread *td)
{
register struct ifreq *ifr = (struct ifreq *)data;
register struct in_ifaddr *ia = 0, *iap;
register struct ifaddr *ifa;
+ struct in_addr allhosts_addr;
struct in_addr dst;
struct in_ifaddr *oia;
struct in_aliasreq *ifra = (struct in_aliasreq *)data;
struct sockaddr_in oldaddr;
int error, hostIsNew, iaIsNew, maskIsNew, s;
+ int iaIsFirst;
+ iaIsFirst = 0;
iaIsNew = 0;
+ allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
switch (cmd) {
case SIOCALIFADDR:
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_ADDIFADDR);
+ if (error)
+ return (error);
+ }
+ if (!ifp)
+ return EINVAL;
+ return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
case SIOCDLIFADDR:
- if (td && (error = suser(td)) != 0)
- return error;
- /*fall through*/
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_DELIFADDR);
+ if (error)
+ return (error);
+ }
+ if (!ifp)
+ return EINVAL;
+ return in_lifaddr_ioctl(so, cmd, data, ifp, td);
+
case SIOCGLIFADDR:
if (!ifp)
return EINVAL;
@@ -264,6 +263,8 @@
break;
}
}
+ if (ia == NULL)
+ iaIsFirst = 1;
}
switch (cmd) {
@@ -292,8 +293,11 @@
case SIOCSIFADDR:
case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
- if (td && (error = suser(td)) != 0)
- return error;
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_ADDIFADDR);
+ if (error)
+ return (error);
+ }
if (ifp == 0)
return (EADDRNOTAVAIL);
@@ -330,8 +334,11 @@
break;
case SIOCSIFBRDADDR:
- if (td && (error = suser(td)) != 0)
- return error;
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NET_ADDIFADDR);
+ if (error)
+ return (error);
+ }
/* FALLTHROUGH */
case SIOCGIFADDR:
@@ -399,8 +406,11 @@
(struct sockaddr_in *) &ifr->ifr_addr, 1);
if (error != 0 && iaIsNew)
break;
- if (error == 0)
+ if (error == 0) {
+ if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
+ in_addmulti(&allhosts_addr, ifp);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ }
return (0);
case SIOCSIFNETMASK:
@@ -443,8 +453,11 @@
if ((ifp->if_flags & IFF_BROADCAST) &&
(ifra->ifra_broadaddr.sin_family == AF_INET))
ia->ia_broadaddr = ifra->ifra_broadaddr;
- if (error == 0)
+ if (error == 0) {
+ if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
+ in_addmulti(&allhosts_addr, ifp);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ }
return (error);
case SIOCDIFADDR:
@@ -479,8 +492,27 @@
s = splnet();
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link);
- if (ia->ia_addr.sin_family == AF_INET)
+ if (ia->ia_addr.sin_family == AF_INET) {
LIST_REMOVE(ia, ia_hash);
+ /*
+ * If this is the last IPv4 address configured on this
+ * interface, leave the all-hosts group.
+ * XXX: This is quite ugly because of locking and structure.
+ */
+ oia = NULL;
+ IFP_TO_IA(ifp, oia);
+ if (oia == NULL) {
+ struct in_multi *inm;
+
+ IFF_LOCKGIANT(ifp);
+ IN_MULTI_LOCK();
+ IN_LOOKUP_MULTI(allhosts_addr, ifp, inm);
+ if (inm != NULL)
+ in_delmulti_locked(inm);
+ IN_MULTI_UNLOCK();
+ IFF_UNLOCKGIANT(ifp);
+ }
+ }
IFAFREE(&ia->ia_ifa);
splx(s);
@@ -504,12 +536,8 @@
* other values may be returned from in_ioctl()
*/
static int
-in_lifaddr_ioctl(so, cmd, data, ifp, td)
- struct socket *so;
- u_long cmd;
- caddr_t data;
- struct ifnet *ifp;
- struct thread *td;
+in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+ struct ifnet *ifp, struct thread *td)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
@@ -579,9 +607,9 @@
struct in_ifaddr *ia;
struct in_addr mask, candidate, match;
struct sockaddr_in *sin;
- int cmp;
bzero(&mask, sizeof(mask));
+ bzero(&match, sizeof(match));
if (iflr->flags & IFLR_PREFIX) {
/* lookup a prefix rather than address. */
in_len2mask(&mask, iflr->prefixlen);
@@ -594,25 +622,20 @@
if (match.s_addr != sin->sin_addr.s_addr)
return EINVAL;
- cmp = 1;
} else {
- if (cmd == SIOCGLIFADDR) {
- /* on getting an address, take the 1st match */
- cmp = 0; /*XXX*/
- } else {
- /* on deleting an address, do exact match */
+ /* on getting an address, take the 1st match */
+ /* on deleting an address, do exact match */
+ if (cmd != SIOCGLIFADDR) {
in_len2mask(&mask, 32);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
-
- cmp = 1;
}
}
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
- if (!cmp)
+ if (match.s_addr == 0)
break;
candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
candidate.s_addr &= mask.s_addr;
@@ -669,10 +692,9 @@
* Delete any existing route for an interface.
*/
void
-in_ifscrub(ifp, ia)
- register struct ifnet *ifp;
- register struct in_ifaddr *ia;
+in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
{
+
in_scrubprefix(ia);
}
@@ -681,11 +703,8 @@
* and routing table entry.
*/
static int
-in_ifinit(ifp, ia, sin, scrub)
- register struct ifnet *ifp;
- register struct in_ifaddr *ia;
- struct sockaddr_in *sin;
- int scrub;
+in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
+ int scrub)
{
register u_long i = ntohl(sin->sin_addr.s_addr);
struct sockaddr_in oldaddr;
@@ -769,16 +788,6 @@
if ((error = in_addprefix(ia, flags)) != 0)
return (error);
- /*
- * If the interface supports multicast, join the "all hosts"
- * multicast group on that interface.
- */
- if (ifp->if_flags & IFF_MULTICAST) {
- struct in_addr addr;
-
- addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
- in_addmulti(&addr, ifp);
- }
return (error);
}
@@ -786,21 +795,19 @@
((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
? RTF_HOST : 0)
/*
- * Check if we have a route for the given prefix already or add a one
- * accordingly.
+ * Check if we have a route for the given prefix already or add one accordingly.
*/
static int
-in_addprefix(target, flags)
- struct in_ifaddr *target;
- int flags;
+in_addprefix(struct in_ifaddr *target, int flags)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p, m;
int error;
- if ((flags & RTF_HOST) != 0)
+ if ((flags & RTF_HOST) != 0) {
prefix = target->ia_dstaddr.sin_addr;
- else {
+ mask.s_addr = 0;
+ } else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
@@ -851,8 +858,7 @@
* otherwise.
*/
static int
-in_scrubprefix(target)
- struct in_ifaddr *target;
+in_scrubprefix(struct in_ifaddr *target)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p;
@@ -918,9 +924,7 @@
* Return 1 if the address might be a local broadcast address.
*/
int
-in_broadcast(in, ifp)
- struct in_addr in;
- struct ifnet *ifp;
+in_broadcast(struct in_addr in, struct ifnet *ifp)
{
register struct ifaddr *ifa;
u_long t;
@@ -954,102 +958,26 @@
return (0);
#undef ia
}
-/*
- * Add an address to the list of IP multicast addresses for a given interface.
- */
-struct in_multi *
-in_addmulti(ap, ifp)
- register struct in_addr *ap;
- register struct ifnet *ifp;
-{
- register struct in_multi *inm;
- int error;
- struct sockaddr_in sin;
- struct ifmultiaddr *ifma;
-
- IFF_LOCKGIANT(ifp);
- IN_MULTI_LOCK();
- /*
- * Call generic routine to add membership or increment
- * refcount. It wants addresses in the form of a sockaddr,
- * so we build one here (being careful to zero the unused bytes).
- */
- bzero(&sin, sizeof sin);
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof sin;
- sin.sin_addr = *ap;
- error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma);
- if (error) {
- IN_MULTI_UNLOCK();
- IFF_UNLOCKGIANT(ifp);
- return 0;
- }
-
- /*
- * If ifma->ifma_protospec is null, then if_addmulti() created
- * a new record. Otherwise, we are done.
- */
- if (ifma->ifma_protospec != NULL) {
- IN_MULTI_UNLOCK();
- IFF_UNLOCKGIANT(ifp);
- return ifma->ifma_protospec;
- }
-
- inm = (struct in_multi *)malloc(sizeof(*inm), M_IPMADDR,
- M_NOWAIT | M_ZERO);
- if (inm == NULL) {
- IN_MULTI_UNLOCK();
- IFF_UNLOCKGIANT(ifp);
- return (NULL);
- }
-
- inm->inm_addr = *ap;
- inm->inm_ifp = ifp;
- inm->inm_ifma = ifma;
- ifma->ifma_protospec = inm;
- LIST_INSERT_HEAD(&in_multihead, inm, inm_link);
-
- /*
- * Let IGMP know that we have joined a new IP multicast group.
- */
- igmp_joingroup(inm);
- IN_MULTI_UNLOCK();
- IFF_UNLOCKGIANT(ifp);
- return (inm);
-}
/*
- * Delete a multicast address record.
+ * Delete all IPv4 multicast address records, and associated link-layer
+ * multicast address records, associated with ifp.
*/
-void
-in_delmulti(inm)
- register struct in_multi *inm;
+static void
+in_purgemaddrs(struct ifnet *ifp)
{
- struct ifmultiaddr *ifma;
- struct in_multi my_inm;
- struct ifnet *ifp;
+ struct in_multi *inm;
+ struct in_multi *oinm;
- ifp = inm->inm_ifp;
+#ifdef DIAGNOSTIC
+ printf("%s: purging ifp %p\n", __func__, ifp);
+#endif
IFF_LOCKGIANT(ifp);
IN_MULTI_LOCK();
- ifma = inm->inm_ifma;
- my_inm.inm_ifp = NULL ; /* don't send the leave msg */
- if (ifma->ifma_refcount == 1) {
- /*
- * No remaining claims to this record; let IGMP know that
- * we are leaving the multicast group.
- * But do it after the if_delmulti() which might reset
- * the interface and nuke the packet.
- */
- my_inm = *inm ;
- ifma->ifma_protospec = NULL;
- LIST_REMOVE(inm, inm_link);
- free(inm, M_IPMADDR);
- }
- /* XXX - should be separate API for when we have an ifma? */
- if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
- if (my_inm.inm_ifp != NULL)
- igmp_leavegroup(&my_inm);
+ LIST_FOREACH_SAFE(inm, &in_multihead, inm_link, oinm) {
+ if (inm->inm_ifp == ifp)
+ in_delmulti_locked(inm);
+ }
IN_MULTI_UNLOCK();
IFF_UNLOCKGIANT(ifp);
}
@@ -1058,10 +986,10 @@
* On interface removal, clean up IPv4 data structures hung off of the ifnet.
*/
void
-in_ifdetach(ifp)
- struct ifnet *ifp;
+in_ifdetach(struct ifnet *ifp)
{
in_pcbpurgeif0(&ripcbinfo, ifp);
in_pcbpurgeif0(&udbinfo, ifp);
+ in_purgemaddrs(ifp);
}
Index: ip_dummynet.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_dummynet.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_dummynet.h -L sys/netinet/ip_dummynet.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_dummynet.h
+++ sys/netinet/ip_dummynet.h
@@ -24,7 +24,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.36.2.2 2006/02/17 16:46:47 ru Exp $
+ * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40 2007/06/17 00:33:34 mjacob Exp $
*/
#ifndef _IP_DUMMYNET_H
@@ -70,11 +70,6 @@
* virtual time wraps every 15 days.
*/
-/*
- * The OFFSET_OF macro is used to return the offset of a field within
- * a structure. It is used by the heap management routines.
- */
-#define OFFSET_OF(type, field) ((int)&( ((type *)0)->field) )
/*
* The maximum hash table size for queues. This value must be a power
@@ -121,7 +116,7 @@
int dn_dir; /* action when packet comes out. */
#define DN_TO_IP_OUT 1
#define DN_TO_IP_IN 2
-#define DN_TO_BDG_FWD 3
+/* Obsolete: #define DN_TO_BDG_FWD 3 */
#define DN_TO_ETH_DEMUX 4
#define DN_TO_ETH_OUT 5
#define DN_TO_IP6_IN 6
Index: ip.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip.h -L sys/netinet/ip.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip.h
+++ sys/netinet/ip.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,16 +28,17 @@
* SUCH DAMAGE.
*
* @(#)ip.h 8.2 (Berkeley) 6/1/94
- * $FreeBSD: src/sys/netinet/ip.h,v 1.29 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/ip.h,v 1.31 2007/05/11 11:00:48 rwatson Exp $
*/
#ifndef _NETINET_IP_H_
-#define _NETINET_IP_H_
+#define _NETINET_IP_H_
#include <sys/cdefs.h>
/*
* Definitions for internet protocol version 4.
+ *
* Per RFC 791, September 1981.
*/
#define IPVERSION 4
@@ -65,7 +67,7 @@
u_char ip_p; /* protocol */
u_short ip_sum; /* checksum */
struct in_addr ip_src,ip_dst; /* source and dest address */
-} __packed;
+} __packed __aligned(4);
#ifdef CTASSERT
CTASSERT(sizeof (struct ip) == 20);
@@ -74,7 +76,7 @@
#define IP_MAXPACKET 65535 /* maximum packet size */
/*
- * Definitions for IP type of service (ip_tos)
+ * Definitions for IP type of service (ip_tos).
*/
#define IPTOS_LOWDELAY 0x10
#define IPTOS_THROUGHPUT 0x08
@@ -87,7 +89,7 @@
#endif
/*
- * Definitions for IP precedence (also in ip_tos) (hopefully unused)
+ * Definitions for IP precedence (also in ip_tos) (hopefully unused).
*/
#define IPTOS_PREC_NETCONTROL 0xe0
#define IPTOS_PREC_INTERNETCONTROL 0xc0
@@ -99,8 +101,8 @@
#define IPTOS_PREC_ROUTINE 0x00
/*
- * ECN (Explicit Congestion Notification) codepoints in RFC3168
- * mapped to the lower 2 bits of the TOS field.
+ * ECN (Explicit Congestion Notification) codepoints in RFC3168 mapped to the
+ * lower 2 bits of the TOS field.
*/
#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */
#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */
@@ -138,7 +140,7 @@
*/
#define IPOPT_OPTVAL 0 /* option ID */
#define IPOPT_OLEN 1 /* option length */
-#define IPOPT_OFFSET 2 /* offset within option */
+#define IPOPT_OFFSET 2 /* offset within option */
#define IPOPT_MINOFF 4 /* min value of above */
/*
@@ -165,12 +167,12 @@
} ipt_timestamp;
};
-/* flag bits for ipt_flg */
+/* Flag bits for ipt_flg. */
#define IPOPT_TS_TSONLY 0 /* timestamps only */
#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
#define IPOPT_TS_PRESPEC 3 /* specified modules only */
-/* bits for security (not byte swapped) */
+/* Bits for security (not byte swapped). */
#define IPOPT_SECUR_UNCLASS 0x0000
#define IPOPT_SECUR_CONFID 0xf135
#define IPOPT_SECUR_EFTO 0x789a
@@ -186,7 +188,6 @@
#define IPDEFTTL 64 /* default ttl, from RFC 1340 */
#define IPFRAGTTL 60 /* time to live for frags, slowhz */
#define IPTTLDEC 1 /* subtracted when forwarding */
-
#define IP_MSS 576 /* default maximum segment size */
/*
--- /dev/null
+++ sys/netinet/sctp_sysctl.c
@@ -0,0 +1,809 @@
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_sysctl.c,v 1.16 2007/09/13 14:43:54 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_constants.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+/*
+ * sysctl tunable variables
+ */
+uint32_t sctp_sendspace = SCTPCTL_MAXDGRAM_DEFAULT;
+uint32_t sctp_recvspace = SCTPCTL_RECVSPACE_DEFAULT;
+uint32_t sctp_auto_asconf = SCTPCTL_AUTOASCONF_DEFAULT;
+uint32_t sctp_ecn_enable = SCTPCTL_ECN_ENABLE_DEFAULT;
+uint32_t sctp_ecn_nonce = SCTPCTL_ECN_NONCE_DEFAULT;
+uint32_t sctp_strict_sacks = SCTPCTL_STRICT_SACKS_DEFAULT;
+uint32_t sctp_no_csum_on_loopback = SCTPCTL_LOOPBACK_NOCSUM_DEFAULT;
+uint32_t sctp_strict_init = SCTPCTL_STRICT_INIT_DEFAULT;
+uint32_t sctp_peer_chunk_oh = SCTPCTL_PEER_CHKOH_DEFAULT;
+uint32_t sctp_max_burst_default = SCTPCTL_MAXBURST_DEFAULT;
+uint32_t sctp_max_chunks_on_queue = SCTPCTL_MAXCHUNKS_DEFAULT;
+uint32_t sctp_hashtblsize = SCTPCTL_TCBHASHSIZE_DEFAULT;
+uint32_t sctp_pcbtblsize = SCTPCTL_PCBHASHSIZE_DEFAULT;
+uint32_t sctp_min_split_point = SCTPCTL_MIN_SPLIT_POINT_DEFAULT;
+uint32_t sctp_chunkscale = SCTPCTL_CHUNKSCALE_DEFAULT;
+uint32_t sctp_delayed_sack_time_default = SCTPCTL_DELAYED_SACK_TIME_DEFAULT;
+uint32_t sctp_sack_freq_default = SCTPCTL_SACK_FREQ_DEFAULT;
+uint32_t sctp_system_free_resc_limit = SCTPCTL_SYS_RESOURCE_DEFAULT;
+uint32_t sctp_asoc_free_resc_limit = SCTPCTL_ASOC_RESOURCE_DEFAULT;
+uint32_t sctp_heartbeat_interval_default = SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT;
+uint32_t sctp_pmtu_raise_time_default = SCTPCTL_PMTU_RAISE_TIME_DEFAULT;
+uint32_t sctp_shutdown_guard_time_default = SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT;
+uint32_t sctp_secret_lifetime_default = SCTPCTL_SECRET_LIFETIME_DEFAULT;
+uint32_t sctp_rto_max_default = SCTPCTL_RTO_MAX_DEFAULT;
+uint32_t sctp_rto_min_default = SCTPCTL_RTO_MIN_DEFAULT;
+uint32_t sctp_rto_initial_default = SCTPCTL_RTO_INITIAL_DEFAULT;
+uint32_t sctp_init_rto_max_default = SCTPCTL_INIT_RTO_MAX_DEFAULT;
+uint32_t sctp_valid_cookie_life_default = SCTPCTL_VALID_COOKIE_LIFE_DEFAULT;
+uint32_t sctp_init_rtx_max_default = SCTPCTL_INIT_RTX_MAX_DEFAULT;
+uint32_t sctp_assoc_rtx_max_default = SCTPCTL_ASSOC_RTX_MAX_DEFAULT;
+uint32_t sctp_path_rtx_max_default = SCTPCTL_PATH_RTX_MAX_DEFAULT;
+uint32_t sctp_add_more_threshold = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
+uint32_t sctp_nr_outgoing_streams_default = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
+uint32_t sctp_cmt_on_off = SCTPCTL_CMT_ON_OFF_DEFAULT;
+uint32_t sctp_cmt_use_dac = SCTPCTL_CMT_USE_DAC_DEFAULT;
+uint32_t sctp_cmt_pf = SCTPCTL_CMT_PF_DEFAULT;
+uint32_t sctp_use_cwnd_based_maxburst = SCTPCTL_CWND_MAXBURST_DEFAULT;
+uint32_t sctp_early_fr = SCTPCTL_EARLY_FAST_RETRAN_DEFAULT;
+uint32_t sctp_early_fr_msec = SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT;
+uint32_t sctp_asconf_auth_nochk = SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT;
+uint32_t sctp_auth_disable = SCTPCTL_AUTH_DISABLE_DEFAULT;
+uint32_t sctp_nat_friendly = SCTPCTL_NAT_FRIENDLY_DEFAULT;
+uint32_t sctp_L2_abc_variable = SCTPCTL_ABC_L_VAR_DEFAULT;
+uint32_t sctp_mbuf_threshold_count = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
+uint32_t sctp_do_drain = SCTPCTL_DO_SCTP_DRAIN_DEFAULT;
+uint32_t sctp_hb_maxburst = SCTPCTL_HB_MAX_BURST_DEFAULT;
+uint32_t sctp_abort_if_one_2_one_hits_limit = SCTPCTL_ABORT_AT_LIMIT_DEFAULT;
+uint32_t sctp_strict_data_order = SCTPCTL_STRICT_DATA_ORDER_DEFAULT;
+uint32_t sctp_min_residual = SCTPCTL_MIN_RESIDUAL_DEFAULT;
+uint32_t sctp_max_retran_chunk = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
+uint32_t sctp_logging_level = SCTPCTL_LOGGING_LEVEL_DEFAULT;
+
+/* JRS - Variable for default congestion control module */
+uint32_t sctp_default_cc_module = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
+uint32_t sctp_default_frag_interleave = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
+uint32_t sctp_mobility_base = SCTPCTL_MOBILITY_BASE_DEFAULT;
+uint32_t sctp_mobility_fasthandoff = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT;
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+struct sctp_log sctp_log;
+
+#endif
+#ifdef SCTP_DEBUG
+uint32_t sctp_debug_on = SCTPCTL_DEBUG_DEFAULT;
+
+#endif
+struct sctpstat sctpstat;
+
+
+/* It returns an upper limit. No filtering is done here */
+static unsigned int
+number_of_addresses(struct sctp_inpcb *inp)
+{
+ int cnt;
+ struct sctp_vrf *vrf;
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+ struct sctp_laddr *laddr;
+
+ cnt = 0;
+ /* neither Mac OS X nor FreeBSD support mulitple routing functions */
+ if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+ return (0);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) ||
+ (sctp_ifa->address.sa.sa_family == AF_INET6)) {
+ cnt++;
+ }
+ }
+ }
+ } else {
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if ((laddr->ifa->address.sa.sa_family == AF_INET) ||
+ (laddr->ifa->address.sa.sa_family == AF_INET6)) {
+ cnt++;
+ }
+ }
+ }
+ return (cnt);
+}
+
+static int
+copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
+{
+ struct sctp_ifn *sctp_ifn;
+ struct sctp_ifa *sctp_ifa;
+ int loopback_scope, ipv4_local_scope, local_scope, site_scope;
+ int ipv4_addr_legal, ipv6_addr_legal;
+ struct sctp_vrf *vrf;
+ struct xsctp_laddr xladdr;
+ struct sctp_laddr *laddr;
+ int error;
+
+ /* Turn on all the appropriate scope */
+ if (stcb) {
+ /* use association specific values */
+ loopback_scope = stcb->asoc.loopback_scope;
+ ipv4_local_scope = stcb->asoc.ipv4_local_scope;
+ local_scope = stcb->asoc.local_scope;
+ site_scope = stcb->asoc.site_scope;
+ } else {
+ /* use generic values for endpoints */
+ loopback_scope = 1;
+ ipv4_local_scope = 1;
+ local_scope = 1;
+ site_scope = 1;
+ }
+
+ /* use only address families of interest */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ ipv6_addr_legal = 1;
+ if (SCTP_IPV6_V6ONLY(inp)) {
+ ipv4_addr_legal = 0;
+ } else {
+ ipv4_addr_legal = 1;
+ }
+ } else {
+ ipv4_addr_legal = 1;
+ ipv6_addr_legal = 0;
+ }
+
+ error = 0;
+
+ /* neither Mac OS X nor FreeBSD support mulitple routing functions */
+ if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ return (-1);
+ }
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
+ if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn))
+ /* Skip loopback if loopback_scope not set */
+ continue;
+ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
+ if (stcb) {
+ /*
+ * ignore if blacklisted at
+ * association level
+ */
+ if (sctp_is_addr_restricted(stcb, sctp_ifa))
+ continue;
+ }
+ if ((sctp_ifa->address.sa.sa_family == AF_INET) && (ipv4_addr_legal)) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&sctp_ifa->address.sa;
+ if (sin->sin_addr.s_addr == 0)
+ continue;
+ if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
+ continue;
+ } else if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (ipv6_addr_legal)) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sctp_ifa->address.sa;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ continue;
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ if (local_scope == 0)
+ continue;
+ if (sin6->sin6_scope_id == 0) {
+ /*
+ * bad link local
+ * address
+ */
+ if (sa6_recoverscope(sin6) != 0)
+ continue;
+ }
+ }
+ if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)))
+ continue;
+ } else
+ continue;
+ memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+ memcpy((void *)&xladdr.address, (const void *)&sctp_ifa->address, sizeof(union sctp_sockstore));
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+ if (error) {
+ return (error);
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ }
+ }
+ }
+ } else {
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ /* ignore if blacklisted at association level */
+ if (stcb && sctp_is_addr_restricted(stcb, laddr->ifa))
+ continue;
+ memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+ memcpy((void *)&xladdr.address, (const void *)&laddr->ifa->address, sizeof(union sctp_sockstore));
+ xladdr.start_time.tv_sec = (uint32_t) laddr->start_time.tv_sec;
+ xladdr.start_time.tv_usec = (uint32_t) laddr->start_time.tv_usec;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+ if (error) {
+ return (error);
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ }
+ }
+ }
+ memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
+ xladdr.last = 1;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
+
+ if (error) {
+ return (error);
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ return (0);
+ }
+}
+
+/*
+ * sysctl functions
+ */
+static int
+sctp_assoclist(SYSCTL_HANDLER_ARGS)
+{
+ unsigned int number_of_endpoints;
+ unsigned int number_of_local_addresses;
+ unsigned int number_of_associations;
+ unsigned int number_of_remote_addresses;
+ unsigned int n;
+ int error;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+ struct xsctp_inpcb xinpcb;
+ struct xsctp_tcb xstcb;
+ struct xsctp_raddr xraddr;
+
+ number_of_endpoints = 0;
+ number_of_local_addresses = 0;
+ number_of_associations = 0;
+ number_of_remote_addresses = 0;
+
+ SCTP_INP_INFO_RLOCK();
+ if (req->oldptr == USER_ADDR_NULL) {
+ LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+ SCTP_INP_RLOCK(inp);
+ number_of_endpoints++;
+ number_of_local_addresses += number_of_addresses(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ number_of_associations++;
+ number_of_local_addresses += number_of_addresses(inp);
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ number_of_remote_addresses++;
+ }
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ SCTP_INP_INFO_RUNLOCK();
+ n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) +
+ (number_of_local_addresses + number_of_endpoints + number_of_associations) * sizeof(struct xsctp_laddr) +
+ (number_of_associations + number_of_endpoints) * sizeof(struct xsctp_tcb) +
+ (number_of_remote_addresses + number_of_associations) * sizeof(struct xsctp_raddr);
+
+ /* request some more memory than needed */
+ req->oldidx = (n + n / 8);
+ return 0;
+ }
+ if (req->newptr != USER_ADDR_NULL) {
+ SCTP_INP_INFO_RUNLOCK();
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
+ return EPERM;
+ }
+ LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+ SCTP_INP_RLOCK(inp);
+ xinpcb.last = 0;
+ xinpcb.local_port = ntohs(inp->sctp_lport);
+ xinpcb.flags = inp->sctp_flags;
+ xinpcb.features = inp->sctp_features;
+ xinpcb.total_sends = inp->total_sends;
+ xinpcb.total_recvs = inp->total_recvs;
+ xinpcb.total_nospaces = inp->total_nospaces;
+ xinpcb.fragmentation_point = inp->sctp_frag_point;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
+ xinpcb.qlen = 0;
+ xinpcb.maxqlen = 0;
+ } else {
+ xinpcb.qlen = inp->sctp_socket->so_qlen;
+ xinpcb.maxqlen = inp->sctp_socket->so_qlimit;
+ }
+ SCTP_INP_INCR_REF(inp);
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ return error;
+ }
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ error = copy_out_local_addresses(inp, NULL, req);
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ return error;
+ }
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ SCTP_TCB_LOCK(stcb);
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ xstcb.last = 0;
+ xstcb.local_port = ntohs(inp->sctp_lport);
+ xstcb.remote_port = ntohs(stcb->rport);
+ if (stcb->asoc.primary_destination != NULL)
+ xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
+ xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
+ xstcb.state = SCTP_GET_STATE(&stcb->asoc); /* FIXME */
+ xstcb.in_streams = stcb->asoc.streamincnt;
+ xstcb.out_streams = stcb->asoc.streamoutcnt;
+ xstcb.max_nr_retrans = stcb->asoc.overall_error_count;
+ xstcb.primary_process = 0; /* not really supported
+ * yet */
+ xstcb.T1_expireries = stcb->asoc.timoinit + stcb->asoc.timocookie;
+ xstcb.T2_expireries = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack;
+ xstcb.retransmitted_tsns = stcb->asoc.marked_retrans;
+ xstcb.start_time.tv_sec = (uint32_t) stcb->asoc.start_time.tv_sec;
+ xstcb.start_time.tv_usec = (uint32_t) stcb->asoc.start_time.tv_usec;
+ xstcb.discontinuity_time.tv_sec = (uint32_t) stcb->asoc.discontinuity_time.tv_sec;
+ xstcb.discontinuity_time.tv_usec = (uint32_t) stcb->asoc.discontinuity_time.tv_usec;
+ xstcb.total_sends = stcb->total_sends;
+ xstcb.total_recvs = stcb->total_recvs;
+ xstcb.local_tag = stcb->asoc.my_vtag;
+ xstcb.remote_tag = stcb->asoc.peer_vtag;
+ xstcb.initial_tsn = stcb->asoc.init_seq_number;
+ xstcb.highest_tsn = stcb->asoc.sending_seq - 1;
+ xstcb.cumulative_tsn = stcb->asoc.last_acked_seq;
+ xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn;
+ xstcb.mtu = stcb->asoc.smallest_mtu;
+ xstcb.refcnt = stcb->asoc.refcnt;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return error;
+ }
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ error = copy_out_local_addresses(inp, stcb, req);
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return error;
+ }
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ xraddr.last = 0;
+ xraddr.address = net->ro._l_addr;
+ xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE);
+ xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0);
+ xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0);
+ xraddr.rto = net->RTO;
+ xraddr.max_path_rtx = net->failure_threshold;
+ xraddr.rtx = net->marked_retrans;
+ xraddr.error_counter = net->error_count;
+ xraddr.cwnd = net->cwnd;
+ xraddr.flight_size = net->flight_size;
+ xraddr.mtu = net->mtu;
+ xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec;
+ xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ return error;
+ }
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ }
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ memset((void *)&xraddr, 0, sizeof(struct xsctp_raddr));
+ xraddr.last = 1;
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
+ if (error) {
+ SCTP_INP_DECR_REF(inp);
+ return error;
+ }
+ SCTP_INP_INFO_RLOCK();
+ SCTP_INP_RLOCK(inp);
+ }
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ memset((void *)&xstcb, 0, sizeof(struct xsctp_tcb));
+ xstcb.last = 1;
+ error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
+ if (error) {
+ return error;
+ }
+ SCTP_INP_INFO_RLOCK();
+ }
+ SCTP_INP_INFO_RUNLOCK();
+
+ memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb));
+ xinpcb.last = 1;
+ error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
+ return error;
+}
+
+#define RANGECHK(var, min, max) \
+ if ((var) < (min)) { (var) = (min); } \
+ else if ((var) > (max)) { (var) = (max); }
+
+static int
+sysctl_sctp_check(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+
+ error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
+ if (error == 0) {
+ RANGECHK(sctp_sendspace, SCTPCTL_MAXDGRAM_MIN, SCTPCTL_MAXDGRAM_MAX);
+ RANGECHK(sctp_recvspace, SCTPCTL_RECVSPACE_MIN, SCTPCTL_RECVSPACE_MAX);
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_AUTO_ASCONF)
+ RANGECHK(sctp_auto_asconf, SCTPCTL_AUTOASCONF_MIN, SCTPCTL_AUTOASCONF_MAX);
+#endif
+ RANGECHK(sctp_ecn_enable, SCTPCTL_ECN_ENABLE_MIN, SCTPCTL_ECN_ENABLE_MAX);
+ RANGECHK(sctp_ecn_nonce, SCTPCTL_ECN_NONCE_MIN, SCTPCTL_ECN_NONCE_MAX);
+ RANGECHK(sctp_strict_sacks, SCTPCTL_STRICT_SACKS_MIN, SCTPCTL_STRICT_SACKS_MAX);
+ RANGECHK(sctp_no_csum_on_loopback, SCTPCTL_LOOPBACK_NOCSUM_MIN, SCTPCTL_LOOPBACK_NOCSUM_MAX);
+ RANGECHK(sctp_strict_init, SCTPCTL_STRICT_INIT_MIN, SCTPCTL_STRICT_INIT_MAX);
+ RANGECHK(sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH_MIN, SCTPCTL_PEER_CHKOH_MAX);
+ RANGECHK(sctp_max_burst_default, SCTPCTL_MAXBURST_MIN, SCTPCTL_MAXBURST_MAX);
+ RANGECHK(sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS_MIN, SCTPCTL_MAXCHUNKS_MAX);
+ RANGECHK(sctp_hashtblsize, SCTPCTL_TCBHASHSIZE_MIN, SCTPCTL_TCBHASHSIZE_MAX);
+ RANGECHK(sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE_MIN, SCTPCTL_PCBHASHSIZE_MAX);
+ RANGECHK(sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT_MIN, SCTPCTL_MIN_SPLIT_POINT_MAX);
+ RANGECHK(sctp_chunkscale, SCTPCTL_CHUNKSCALE_MIN, SCTPCTL_CHUNKSCALE_MAX);
+ RANGECHK(sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME_MIN, SCTPCTL_DELAYED_SACK_TIME_MAX);
+ RANGECHK(sctp_sack_freq_default, SCTPCTL_SACK_FREQ_MIN, SCTPCTL_SACK_FREQ_MAX);
+ RANGECHK(sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE_MIN, SCTPCTL_SYS_RESOURCE_MAX);
+ RANGECHK(sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE_MIN, SCTPCTL_ASOC_RESOURCE_MAX);
+ RANGECHK(sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL_MIN, SCTPCTL_HEARTBEAT_INTERVAL_MAX);
+ RANGECHK(sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME_MIN, SCTPCTL_PMTU_RAISE_TIME_MAX);
+ RANGECHK(sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME_MIN, SCTPCTL_SHUTDOWN_GUARD_TIME_MAX);
+ RANGECHK(sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME_MIN, SCTPCTL_SECRET_LIFETIME_MAX);
+ RANGECHK(sctp_rto_max_default, SCTPCTL_RTO_MAX_MIN, SCTPCTL_RTO_MAX_MAX);
+ RANGECHK(sctp_rto_min_default, SCTPCTL_RTO_MIN_MIN, SCTPCTL_RTO_MIN_MAX);
+ RANGECHK(sctp_rto_initial_default, SCTPCTL_RTO_INITIAL_MIN, SCTPCTL_RTO_INITIAL_MAX);
+ RANGECHK(sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX_MIN, SCTPCTL_INIT_RTO_MAX_MAX);
+ RANGECHK(sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE_MIN, SCTPCTL_VALID_COOKIE_LIFE_MAX);
+ RANGECHK(sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX_MIN, SCTPCTL_INIT_RTX_MAX_MAX);
+ RANGECHK(sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX_MIN, SCTPCTL_ASSOC_RTX_MAX_MAX);
+ RANGECHK(sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX_MIN, SCTPCTL_PATH_RTX_MAX_MAX);
+ RANGECHK(sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT_MIN, SCTPCTL_ADD_MORE_ON_OUTPUT_MAX);
+ RANGECHK(sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS_MIN, SCTPCTL_OUTGOING_STREAMS_MAX);
+ RANGECHK(sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF_MIN, SCTPCTL_CMT_ON_OFF_MAX);
+ RANGECHK(sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC_MIN, SCTPCTL_CMT_USE_DAC_MAX);
+ RANGECHK(sctp_cmt_pf, SCTPCTL_CMT_PF_MIN, SCTPCTL_CMT_PF_MAX);
+ RANGECHK(sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST_MIN, SCTPCTL_CWND_MAXBURST_MAX);
+ RANGECHK(sctp_early_fr, SCTPCTL_EARLY_FAST_RETRAN_MIN, SCTPCTL_EARLY_FAST_RETRAN_MAX);
+ RANGECHK(sctp_early_fr_msec, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN, SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX);
+ RANGECHK(sctp_asconf_auth_nochk, SCTPCTL_ASCONF_AUTH_NOCHK_MIN, SCTPCTL_ASCONF_AUTH_NOCHK_MAX);
+ RANGECHK(sctp_auth_disable, SCTPCTL_AUTH_DISABLE_MIN, SCTPCTL_AUTH_DISABLE_MAX);
+ RANGECHK(sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY_MIN, SCTPCTL_NAT_FRIENDLY_MAX);
+ RANGECHK(sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR_MIN, SCTPCTL_ABC_L_VAR_MAX);
+ RANGECHK(sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS_MIN, SCTPCTL_MAX_CHAINED_MBUFS_MAX);
+ RANGECHK(sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN_MIN, SCTPCTL_DO_SCTP_DRAIN_MAX);
+ RANGECHK(sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST_MIN, SCTPCTL_HB_MAX_BURST_MAX);
+ RANGECHK(sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT_MIN, SCTPCTL_ABORT_AT_LIMIT_MAX);
+ RANGECHK(sctp_strict_data_order, SCTPCTL_STRICT_DATA_ORDER_MIN, SCTPCTL_STRICT_DATA_ORDER_MAX);
+ RANGECHK(sctp_min_residual, SCTPCTL_MIN_RESIDUAL_MIN, SCTPCTL_MIN_RESIDUAL_MAX);
+ RANGECHK(sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK_MIN, SCTPCTL_MAX_RETRAN_CHUNK_MAX);
+ RANGECHK(sctp_logging_level, SCTPCTL_LOGGING_LEVEL_MIN, SCTPCTL_LOGGING_LEVEL_MAX);
+ RANGECHK(sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE_MIN, SCTPCTL_DEFAULT_CC_MODULE_MAX);
+ RANGECHK(sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN, SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX);
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_BASE)
+ RANGECHK(sctp_mobility_base, SCTPCTL_MOBILITY_BASE_MIN, SCTPCTL_MOBILITY_BASE_MAX);
+#endif
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+ RANGECHK(sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF_MIN, SCTPCTL_MOBILITY_FASTHANDOFF_MAX);
+#endif
+#ifdef SCTP_DEBUG
+ RANGECHK(sctp_debug_on, SCTPCTL_DEBUG_MIN, SCTPCTL_DEBUG_MAX);
+#endif
+ }
+ return (error);
+}
+
+/*
+ * sysctl definitions
+ */
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sendspace, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_sendspace, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MAXDGRAM_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, recvspace, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_recvspace, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_RECVSPACE_DESC);
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_AUTO_ASCONF)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auto_asconf, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_auto_asconf, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_AUTOASCONF_DESC);
+#endif
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_enable, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_ecn_enable, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ECN_ENABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, ecn_nonce, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_ecn_nonce, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ECN_NONCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_sacks, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_strict_sacks, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_STRICT_SACKS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, loopback_nocsum, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_no_csum_on_loopback, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_LOOPBACK_NOCSUM_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_init, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_strict_init, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_STRICT_INIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, peer_chkoh, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_peer_chunk_oh, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_PEER_CHKOH_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxburst, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_max_burst_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, maxchunks, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_max_chunks_on_queue, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MAXCHUNKS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, tcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_hashtblsize, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_TCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pcbhashsize, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_pcbtblsize, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_PCBHASHSIZE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_split_point, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_min_split_point, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MIN_SPLIT_POINT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, chunkscale, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_chunkscale, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_CHUNKSCALE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, delayed_sack_time, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_delayed_sack_time_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_DELAYED_SACK_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sack_freq, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_sack_freq_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_SACK_FREQ_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, sys_resource, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_system_free_resc_limit, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_SYS_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asoc_resource, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_asoc_free_resc_limit, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ASOC_RESOURCE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, heartbeat_interval, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_heartbeat_interval_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_HEARTBEAT_INTERVAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, pmtu_raise_time, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_pmtu_raise_time_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_PMTU_RAISE_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, shutdown_guard_time, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_shutdown_guard_time_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_SHUTDOWN_GUARD_TIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, secret_lifetime, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_secret_lifetime_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_SECRET_LIFETIME_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_max, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_rto_max_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_min, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_rto_min_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_RTO_MIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, rto_initial, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_rto_initial_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_RTO_INITIAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rto_max, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_init_rto_max_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_INIT_RTO_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, valid_cookie_life, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_valid_cookie_life_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_VALID_COOKIE_LIFE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, init_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_init_rtx_max_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_INIT_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoc_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_assoc_rtx_max_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ASSOC_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, path_rtx_max, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_path_rtx_max_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_PATH_RTX_MAX_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, add_more_on_output, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_add_more_threshold, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ADD_MORE_ON_OUTPUT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, outgoing_streams, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_nr_outgoing_streams_default, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_OUTGOING_STREAMS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_on_off, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_cmt_on_off, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_CMT_ON_OFF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_use_dac, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_cmt_use_dac, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_CMT_USE_DAC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cmt_pf, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_cmt_pf, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_CMT_PF_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, cwnd_maxburst, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_use_cwnd_based_maxburst, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_CWND_MAXBURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_early_fr, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_EARLY_FAST_RETRAN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, early_fast_retran_msec, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_early_fr_msec, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_auth_nochk, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_asconf_auth_nochk, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ASCONF_AUTH_NOCHK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_disable, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_auth_disable, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_AUTH_DISABLE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, nat_friendly, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_nat_friendly, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_NAT_FRIENDLY_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abc_l_var, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_L2_abc_variable, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ABC_L_VAR_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_chained_mbufs, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_mbuf_threshold_count, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MAX_CHAINED_MBUFS_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, do_sctp_drain, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_do_drain, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_DO_SCTP_DRAIN_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, hb_max_burst, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_hb_maxburst, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_HB_MAX_BURST_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, abort_at_limit, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_abort_if_one_2_one_hits_limit, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_ABORT_AT_LIMIT_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, strict_data_order, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_strict_data_order, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_STRICT_DATA_ORDER_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, min_residual, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_min_residual, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MIN_RESIDUAL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, max_retran_chunk, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_max_retran_chunk, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MAX_RETRAN_CHUNK_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log_level, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_logging_level, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_LOGGING_LEVEL_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_cc_module, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_default_cc_module, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_DEFAULT_CC_MODULE_DESC);
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, default_frag_interleave, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_default_frag_interleave, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC);
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_BASE)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_base, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_mobility_base, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MOBILITY_BASE_DESC);
+#endif
+
+#if defined(__FreeBSD__) || defined(SCTP_APPLE_MOBILITY_FASTHANDOFF)
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mobility_fasthandoff, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_mobility_fasthandoff, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_MOBILITY_FASTHANDOFF_DESC);
+#endif
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, log, CTLFLAG_RD,
+ &sctp_log, sctp_log,
+ "SCTP logging (struct sctp_log)");
+#endif
+
+#ifdef SCTP_DEBUG
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, debug, CTLTYPE_INT | CTLFLAG_RW,
+ &sctp_debug_on, 0, sysctl_sctp_check, "IU",
+ SCTPCTL_DEBUG_DESC);
+#endif /* SCTP_DEBUG */
+
+
+SYSCTL_STRUCT(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_RW,
+ &sctpstat, sctpstat,
+ "SCTP statistics (struct sctp_stat)");
+
+SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_RD,
+ 0, 0, sctp_assoclist,
+ "S,xassoc", "List of active SCTP associations");
Index: ip_gre.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_gre.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_gre.h -L sys/netinet/ip_gre.h -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_gre.h
+++ sys/netinet/ip_gre.h
@@ -1,5 +1,5 @@
/* $NetBSD: ip_gre.h,v 1.5 2002/06/09 16:33:40 itojun Exp $ */
-/* $FreeBSD: src/sys/netinet/ip_gre.h,v 1.3.2.1 2006/01/27 21:50:10 bz Exp $ */
+/* $FreeBSD: src/sys/netinet/ip_gre.h,v 1.4 2006/01/21 10:44:34 bz Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
Index: tcp.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp.h -L sys/netinet/tcp.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp.h,v 1.31.2.1 2005/10/09 03:22:51 delphij Exp $
+ * $FreeBSD: src/sys/netinet/tcp.h,v 1.40 2007/05/25 21:28:49 andre Exp $
*/
#ifndef _NETINET_TCP_H_
@@ -68,7 +68,8 @@
#define TH_URG 0x20
#define TH_ECE 0x40
#define TH_CWR 0x80
-#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
+#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
u_short th_win; /* window */
u_short th_sum; /* checksum */
@@ -76,30 +77,24 @@
};
#define TCPOPT_EOL 0
+#define TCPOLEN_EOL 1
#define TCPOPT_NOP 1
+#define TCPOLEN_NOP 1
#define TCPOPT_MAXSEG 2
#define TCPOLEN_MAXSEG 4
#define TCPOPT_WINDOW 3
#define TCPOLEN_WINDOW 3
-#define TCPOPT_SACK_PERMITTED 4 /* Experimental */
+#define TCPOPT_SACK_PERMITTED 4
#define TCPOLEN_SACK_PERMITTED 2
-#define TCPOPT_SACK 5 /* Experimental */
+#define TCPOPT_SACK 5
+#define TCPOLEN_SACKHDR 2
#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */
#define TCPOPT_TIMESTAMP 8
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */
-#define TCPOPT_TSTAMP_HDR \
- (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
-
-#define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */
-
-#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
+#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */
#define TCPOLEN_SIGNATURE 18
-/* Option definitions */
-#define TCPOPT_SACK_PERMIT_HDR \
-(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK_PERMITTED<<8|TCPOLEN_SACK_PERMITTED)
-#define TCPOPT_SACK_HDR (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_SACK<<8)
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */
@@ -121,14 +116,6 @@
* Setting this to "0" disables the minmss check.
*/
#define TCP_MINMSS 216
-/*
- * TCP_MINMSSOVERLOAD is defined to be 1000 which should cover any type
- * of interactive TCP session.
- * See tcp_subr.c tcp_minmssoverload SYSCTL declaration and tcp_input.c
- * for more comments.
- * Setting this to "0" disables the minmssoverload check.
- */
-#define TCP_MINMSSOVERLOAD 0 /* XXX: Disabled until refined */
/*
* Default maximum segment size for TCP6.
@@ -208,8 +195,8 @@
/* Metrics; variable units. */
u_int32_t __tcpi_pmtu;
u_int32_t __tcpi_rcv_ssthresh;
- u_int32_t __tcpi_rtt;
- u_int32_t __tcpi_rttvar;
+ u_int32_t tcpi_rtt; /* Smoothed RTT in usecs. */
+ u_int32_t tcpi_rttvar; /* RTT variance in usecs. */
u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */
u_int32_t tcpi_snd_cwnd; /* Send congestion window. */
u_int32_t __tcpi_advmss;
--- /dev/null
+++ sys/netinet/sctp_pcb.c
@@ -0,0 +1,6163 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_pcb.c,v 1.38 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_pcb.c,v 1.62.2.2.2.1 2008/01/31 17:21:50 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <sys/proc.h>
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctp_bsd_addr.h>
+
+
+struct sctp_epinfo sctppcbinfo;
+
+/* FIX: we don't handle multiple link local scopes */
+/* "scopeless" replacement IN6_ARE_ADDR_EQUAL */
+int
+SCTP6_ARE_ADDR_EQUAL(struct in6_addr *a, struct in6_addr *b)
+{
+ struct in6_addr tmp_a, tmp_b;
+
+ /* use a copy of a and b */
+ tmp_a = *a;
+ tmp_b = *b;
+ in6_clearscope(&tmp_a);
+ in6_clearscope(&tmp_b);
+ return (IN6_ARE_ADDR_EQUAL(&tmp_a, &tmp_b));
+}
+
+void
+sctp_fill_pcbinfo(struct sctp_pcbinfo *spcb)
+{
+ /*
+ * We really don't need to lock this, but I will just because it
+ * does not hurt.
+ */
+ SCTP_INP_INFO_RLOCK();
+ spcb->ep_count = sctppcbinfo.ipi_count_ep;
+ spcb->asoc_count = sctppcbinfo.ipi_count_asoc;
+ spcb->laddr_count = sctppcbinfo.ipi_count_laddr;
+ spcb->raddr_count = sctppcbinfo.ipi_count_raddr;
+ spcb->chk_count = sctppcbinfo.ipi_count_chunk;
+ spcb->readq_count = sctppcbinfo.ipi_count_readq;
+ spcb->stream_oque = sctppcbinfo.ipi_count_strmoq;
+ spcb->free_chunks = sctppcbinfo.ipi_free_chunks;
+
+ SCTP_INP_INFO_RUNLOCK();
+}
+
+/*
+ * Addresses are added to VRF's (Virtual Router's). For BSD we
+ * have only the default VRF 0. We maintain a hash list of
+ * VRF's. Each VRF has its own list of sctp_ifn's. Each of
+ * these has a list of addresses. When we add a new address
+ * to a VRF we lookup the ifn/ifn_index, if the ifn does
+ * not exist we create it and add it to the list of IFN's
+ * within the VRF. Once we have the sctp_ifn, we add the
+ * address to the list. So we look something like:
+ *
+ * hash-vrf-table
+ * vrf-> ifn-> ifn -> ifn
+ * vrf |
+ * ... +--ifa-> ifa -> ifa
+ * vrf
+ *
+ * We keep these seperate lists since the SCTP subsystem will
+ * point to these from its source address selection nets structure.
+ * When an address is deleted it does not happen right away on
+ * the SCTP side, it gets scheduled. What we do when a
+ * delete happens is immediately remove the address from
+ * the master list and decrement the refcount. As our
+ * addip iterator works through and frees the src address
+ * selection pointing to the sctp_ifa, eventually the refcount
+ * will reach 0 and we will delete it. Note that it is assumed
+ * that any locking on system level ifn/ifa is done at the
+ * caller of these functions and these routines will only
+ * lock the SCTP structures as they add or delete things.
+ *
+ * Other notes on VRF concepts.
+ * - An endpoint can be in multiple VRF's
+ * - An association lives within a VRF and only one VRF.
+ * - Any incoming packet we can deduce the VRF for by
+ * looking at the mbuf/pak inbound (for BSD its VRF=0 :D)
+ * - Any downward send call or connect call must supply the
+ * VRF via ancillary data or via some sort of set default
+ * VRF socket option call (again for BSD no brainer since
+ * the VRF is always 0).
+ * - An endpoint may add multiple VRF's to it.
+ * - Listening sockets can accept associations in any
+ * of the VRF's they are in but the assoc will end up
+ * in only one VRF (gotten from the packet or connect/send).
+ *
+ */
+
+struct sctp_vrf *
+sctp_allocate_vrf(int vrf_id)
+{
+ struct sctp_vrf *vrf = NULL;
+ struct sctp_vrflist *bucket;
+
+ /* First allocate the VRF structure */
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf) {
+ /* Already allocated */
+ return (vrf);
+ }
+ SCTP_MALLOC(vrf, struct sctp_vrf *, sizeof(struct sctp_vrf),
+ SCTP_M_VRF);
+ if (vrf == NULL) {
+ /* No memory */
+#ifdef INVARIANTS
+ panic("No memory for VRF:%d", vrf_id);
+#endif
+ return (NULL);
+ }
+ /* setup the VRF */
+ memset(vrf, 0, sizeof(struct sctp_vrf));
+ vrf->vrf_id = vrf_id;
+ LIST_INIT(&vrf->ifnlist);
+ vrf->total_ifa_count = 0;
+ vrf->refcount = 0;
+ /* now also setup table ids */
+ SCTP_INIT_VRF_TABLEID(vrf);
+ /* Init the HASH of addresses */
+ vrf->vrf_addr_hash = SCTP_HASH_INIT(SCTP_VRF_ADDR_HASH_SIZE,
+ &vrf->vrf_addr_hashmark);
+ if (vrf->vrf_addr_hash == NULL) {
+ /* No memory */
+#ifdef INVARIANTS
+ panic("No memory for VRF:%d", vrf_id);
+#endif
+ SCTP_FREE(vrf, SCTP_M_VRF);
+ return (NULL);
+ }
+ /* Add it to the hash table */
+ bucket = &sctppcbinfo.sctp_vrfhash[(vrf_id & sctppcbinfo.hashvrfmark)];
+ LIST_INSERT_HEAD(bucket, vrf, next_vrf);
+ atomic_add_int(&sctppcbinfo.ipi_count_vrfs, 1);
+ return (vrf);
+}
+
+
+struct sctp_ifn *
+sctp_find_ifn(void *ifn, uint32_t ifn_index)
+{
+ struct sctp_ifn *sctp_ifnp;
+ struct sctp_ifnlist *hash_ifn_head;
+
+ /*
+ * We assume the lock is held for the addresses if thats wrong
+ * problems could occur :-)
+ */
+ hash_ifn_head = &sctppcbinfo.vrf_ifn_hash[(ifn_index & sctppcbinfo.vrf_ifn_hashmark)];
+ LIST_FOREACH(sctp_ifnp, hash_ifn_head, next_bucket) {
+ if (sctp_ifnp->ifn_index == ifn_index) {
+ return (sctp_ifnp);
+ }
+ if (sctp_ifnp->ifn_p && ifn && (sctp_ifnp->ifn_p == ifn)) {
+ return (sctp_ifnp);
+ }
+ }
+ return (NULL);
+}
+
+
+
+struct sctp_vrf *
+sctp_find_vrf(uint32_t vrf_id)
+{
+ struct sctp_vrflist *bucket;
+ struct sctp_vrf *liste;
+
+ bucket = &sctppcbinfo.sctp_vrfhash[(vrf_id & sctppcbinfo.hashvrfmark)];
+ LIST_FOREACH(liste, bucket, next_vrf) {
+ if (vrf_id == liste->vrf_id) {
+ return (liste);
+ }
+ }
+ return (NULL);
+}
+
+void
+sctp_free_vrf(struct sctp_vrf *vrf)
+{
+ int ret;
+
+ ret = atomic_fetchadd_int(&vrf->refcount, -1);
+ if (ret == 1) {
+ /* We zero'd the count */
+ LIST_REMOVE(vrf, next_vrf);
+ SCTP_FREE(vrf, SCTP_M_VRF);
+ atomic_subtract_int(&sctppcbinfo.ipi_count_vrfs, 1);
+ }
+}
+
+void
+sctp_free_ifn(struct sctp_ifn *sctp_ifnp)
+{
+ int ret;
+
+ ret = atomic_fetchadd_int(&sctp_ifnp->refcount, -1);
+ if (ret == 1) {
+ /* We zero'd the count */
+ if (sctp_ifnp->vrf) {
+ sctp_free_vrf(sctp_ifnp->vrf);
+ }
+ SCTP_FREE(sctp_ifnp, SCTP_M_IFN);
+ atomic_subtract_int(&sctppcbinfo.ipi_count_ifns, 1);
+ }
+}
+
+void
+sctp_update_ifn_mtu(uint32_t ifn_index, uint32_t mtu)
+{
+ struct sctp_ifn *sctp_ifnp;
+
+ sctp_ifnp = sctp_find_ifn((void *)NULL, ifn_index);
+ if (sctp_ifnp != NULL) {
+ sctp_ifnp->ifn_mtu = mtu;
+ }
+}
+
+
+void
+sctp_free_ifa(struct sctp_ifa *sctp_ifap)
+{
+ int ret;
+
+ ret = atomic_fetchadd_int(&sctp_ifap->refcount, -1);
+ if (ret == 1) {
+ /* We zero'd the count */
+ if (sctp_ifap->ifn_p) {
+ sctp_free_ifn(sctp_ifap->ifn_p);
+ }
+ SCTP_FREE(sctp_ifap, SCTP_M_IFA);
+ atomic_subtract_int(&sctppcbinfo.ipi_count_ifas, 1);
+ }
+}
+
+static void
+sctp_delete_ifn(struct sctp_ifn *sctp_ifnp, int hold_addr_lock)
+{
+ struct sctp_ifn *found;
+
+ found = sctp_find_ifn(sctp_ifnp->ifn_p, sctp_ifnp->ifn_index);
+ if (found == NULL) {
+ /* Not in the list.. sorry */
+ return;
+ }
+ if (hold_addr_lock == 0)
+ SCTP_IPI_ADDR_WLOCK();
+ LIST_REMOVE(sctp_ifnp, next_bucket);
+ LIST_REMOVE(sctp_ifnp, next_ifn);
+ SCTP_DEREGISTER_INTERFACE(sctp_ifnp->ifn_index,
+ sctp_ifnp->registered_af);
+ if (hold_addr_lock == 0)
+ SCTP_IPI_ADDR_WUNLOCK();
+ /* Take away the reference, and possibly free it */
+ sctp_free_ifn(sctp_ifnp);
+}
+
+void
+sctp_mark_ifa_addr_down(uint32_t vrf_id, struct sockaddr *addr,
+ const char *if_name, uint32_t ifn_index)
+{
+ struct sctp_vrf *vrf;
+ struct sctp_ifa *sctp_ifap = NULL;
+
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+ goto out;
+
+ }
+ sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+ if (sctp_ifap == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+ goto out;
+ }
+ if (sctp_ifap->ifn_p == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+ goto out;
+ }
+ if (if_name) {
+ int len1, len2;
+
+ len1 = strlen(if_name);
+ len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+ if (len1 != len2) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different lenght %d vs %d - ignored\n",
+ len1, len2);
+ goto out;
+ }
+ if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+ sctp_ifap->ifn_p->ifn_name,
+ if_name);
+ goto out;
+ }
+ } else {
+ if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+ sctp_ifap->ifn_p->ifn_index, ifn_index);
+ goto out;
+ }
+ }
+
+ sctp_ifap->localifa_flags &= (~SCTP_ADDR_VALID);
+ sctp_ifap->localifa_flags |= SCTP_ADDR_IFA_UNUSEABLE;
+out:
+ SCTP_IPI_ADDR_RUNLOCK();
+}
+
+void
+sctp_mark_ifa_addr_up(uint32_t vrf_id, struct sockaddr *addr,
+ const char *if_name, uint32_t ifn_index)
+{
+ struct sctp_vrf *vrf;
+ struct sctp_ifa *sctp_ifap = NULL;
+
+ SCTP_IPI_ADDR_RLOCK();
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+ goto out;
+
+ }
+ sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+ if (sctp_ifap == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Can't find sctp_ifap for address\n");
+ goto out;
+ }
+ if (sctp_ifap->ifn_p == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA has no IFN - can't mark unuseable\n");
+ goto out;
+ }
+ if (if_name) {
+ int len1, len2;
+
+ len1 = strlen(if_name);
+ len2 = strlen(sctp_ifap->ifn_p->ifn_name);
+ if (len1 != len2) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFN of ifa names different lenght %d vs %d - ignored\n",
+ len1, len2);
+ goto out;
+ }
+ if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) != 0) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFN %s of IFA not the same as %s\n",
+ sctp_ifap->ifn_p->ifn_name,
+ if_name);
+ goto out;
+ }
+ } else {
+ if (sctp_ifap->ifn_p->ifn_index != ifn_index) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "IFA owned by ifn_index:%d down command for ifn_index:%d - ignored\n",
+ sctp_ifap->ifn_p->ifn_index, ifn_index);
+ goto out;
+ }
+ }
+
+ sctp_ifap->localifa_flags &= (~SCTP_ADDR_IFA_UNUSEABLE);
+ sctp_ifap->localifa_flags |= SCTP_ADDR_VALID;
+out:
+ SCTP_IPI_ADDR_RUNLOCK();
+}
+
+/*-
+ * Add an ifa to an ifn.
+ * Register the interface as necessary.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_add_ifa_to_ifn(struct sctp_ifn *sctp_ifnp, struct sctp_ifa *sctp_ifap)
+{
+ int ifa_af;
+
+ LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+ sctp_ifap->ifn_p = sctp_ifnp;
+ atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+ /* update address counts */
+ sctp_ifnp->ifa_count++;
+ ifa_af = sctp_ifap->address.sa.sa_family;
+ if (ifa_af == AF_INET)
+ sctp_ifnp->num_v4++;
+ else
+ sctp_ifnp->num_v6++;
+ if (sctp_ifnp->ifa_count == 1) {
+ /* register the new interface */
+ SCTP_REGISTER_INTERFACE(sctp_ifnp->ifn_index, ifa_af);
+ sctp_ifnp->registered_af = ifa_af;
+ }
+}
+
+/*-
+ * Remove an ifa from its ifn.
+ * If no more addresses exist, remove the ifn too. Otherwise, re-register
+ * the interface based on the remaining address families left.
+ * NOTE: ADDR write lock MUST be held.
+ */
+static void
+sctp_remove_ifa_from_ifn(struct sctp_ifa *sctp_ifap)
+{
+ uint32_t ifn_index;
+
+ LIST_REMOVE(sctp_ifap, next_ifa);
+ if (sctp_ifap->ifn_p) {
+ /* update address counts */
+ sctp_ifap->ifn_p->ifa_count--;
+ if (sctp_ifap->address.sa.sa_family == AF_INET6)
+ sctp_ifap->ifn_p->num_v6--;
+ else if (sctp_ifap->address.sa.sa_family == AF_INET)
+ sctp_ifap->ifn_p->num_v4--;
+
+ ifn_index = sctp_ifap->ifn_p->ifn_index;
+ if (SCTP_LIST_EMPTY(&sctp_ifap->ifn_p->ifalist)) {
+ /* remove the ifn, possibly freeing it */
+ sctp_delete_ifn(sctp_ifap->ifn_p, SCTP_ADDR_LOCKED);
+ } else {
+ /* re-register address family type, if needed */
+ if ((sctp_ifap->ifn_p->num_v6 == 0) &&
+ (sctp_ifap->ifn_p->registered_af == AF_INET6)) {
+ SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET6);
+ SCTP_REGISTER_INTERFACE(ifn_index, AF_INET);
+ sctp_ifap->ifn_p->registered_af = AF_INET;
+ } else if ((sctp_ifap->ifn_p->num_v4 == 0) &&
+ (sctp_ifap->ifn_p->registered_af == AF_INET)) {
+ SCTP_DEREGISTER_INTERFACE(ifn_index, AF_INET);
+ SCTP_REGISTER_INTERFACE(ifn_index, AF_INET6);
+ sctp_ifap->ifn_p->registered_af = AF_INET6;
+ }
+ /* free the ifn refcount */
+ sctp_free_ifn(sctp_ifap->ifn_p);
+ }
+ sctp_ifap->ifn_p = NULL;
+ }
+}
+
+struct sctp_ifa *
+sctp_add_addr_to_vrf(uint32_t vrf_id, void *ifn, uint32_t ifn_index,
+ uint32_t ifn_type, const char *if_name, void *ifa,
+ struct sockaddr *addr, uint32_t ifa_flags,
+ int dynamic_add)
+{
+ struct sctp_vrf *vrf;
+ struct sctp_ifn *sctp_ifnp = NULL;
+ struct sctp_ifa *sctp_ifap = NULL;
+ struct sctp_ifalist *hash_addr_head;
+ struct sctp_ifnlist *hash_ifn_head;
+ uint32_t hash_of_addr;
+ int new_ifn_af = 0;
+
+#ifdef SCTP_DEBUG
+ SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: adding address: ", vrf_id);
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+ SCTP_IPI_ADDR_WLOCK();
+ sctp_ifnp = sctp_find_ifn(ifn, ifn_index);
+ if (sctp_ifnp) {
+ vrf = sctp_ifnp->vrf;
+ } else {
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ vrf = sctp_allocate_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTP_IPI_ADDR_WUNLOCK();
+ return (NULL);
+ }
+ }
+ }
+ if (sctp_ifnp == NULL) {
+ /*
+ * build one and add it, can't hold lock until after malloc
+ * done though.
+ */
+ SCTP_IPI_ADDR_WUNLOCK();
+ SCTP_MALLOC(sctp_ifnp, struct sctp_ifn *,
+ sizeof(struct sctp_ifn), SCTP_M_IFN);
+ if (sctp_ifnp == NULL) {
+#ifdef INVARIANTS
+ panic("No memory for IFN:%u", sctp_ifnp->ifn_index);
+#endif
+ return (NULL);
+ }
+ memset(sctp_ifnp, 0, sizeof(struct sctp_ifn));
+ sctp_ifnp->ifn_index = ifn_index;
+ sctp_ifnp->ifn_p = ifn;
+ sctp_ifnp->ifn_type = ifn_type;
+ sctp_ifnp->refcount = 0;
+ sctp_ifnp->vrf = vrf;
+ atomic_add_int(&vrf->refcount, 1);
+ sctp_ifnp->ifn_mtu = SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, addr->sa_family);
+ if (if_name != NULL) {
+ memcpy(sctp_ifnp->ifn_name, if_name, SCTP_IFNAMSIZ);
+ } else {
+ memcpy(sctp_ifnp->ifn_name, "unknown", min(7, SCTP_IFNAMSIZ));
+ }
+ hash_ifn_head = &sctppcbinfo.vrf_ifn_hash[(ifn_index & sctppcbinfo.vrf_ifn_hashmark)];
+ LIST_INIT(&sctp_ifnp->ifalist);
+ SCTP_IPI_ADDR_WLOCK();
+ LIST_INSERT_HEAD(hash_ifn_head, sctp_ifnp, next_bucket);
+ LIST_INSERT_HEAD(&vrf->ifnlist, sctp_ifnp, next_ifn);
+ atomic_add_int(&sctppcbinfo.ipi_count_ifns, 1);
+ new_ifn_af = 1;
+ }
+ sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+ if (sctp_ifap) {
+ /* Hmm, it already exists? */
+ if ((sctp_ifap->ifn_p) &&
+ (sctp_ifap->ifn_p->ifn_index == ifn_index)) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Using existing ifn %s (0x%x) for ifa %p\n",
+ sctp_ifap->ifn_p->ifn_name, ifn_index,
+ sctp_ifap);
+ if (new_ifn_af) {
+ /* Remove the created one that we don't want */
+ sctp_delete_ifn(sctp_ifnp, SCTP_ADDR_LOCKED);
+ }
+ if (sctp_ifap->localifa_flags & SCTP_BEING_DELETED) {
+ /* easy to solve, just switch back to active */
+ SCTPDBG(SCTP_DEBUG_PCB4, "Clearing deleted ifa flag\n");
+ sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+ sctp_ifap->ifn_p = sctp_ifnp;
+ atomic_add_int(&sctp_ifap->ifn_p->refcount, 1);
+ }
+ exit_stage_left:
+ SCTP_IPI_ADDR_WUNLOCK();
+ return (sctp_ifap);
+ } else {
+ if (sctp_ifap->ifn_p) {
+ /*
+ * The last IFN gets the address, removee
+ * the old one
+ */
+ SCTPDBG(SCTP_DEBUG_PCB4, "Moving ifa %p from %s (0x%x) to %s (0x%x)\n",
+ sctp_ifap, sctp_ifap->ifn_p->ifn_name,
+ sctp_ifap->ifn_p->ifn_index, if_name,
+ ifn_index);
+ /* remove the address from the old ifn */
+ sctp_remove_ifa_from_ifn(sctp_ifap);
+ /* move the address over to the new ifn */
+ sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+ goto exit_stage_left;
+ } else {
+ /* repair ifnp which was NULL ? */
+ sctp_ifap->localifa_flags = SCTP_ADDR_VALID;
+ SCTPDBG(SCTP_DEBUG_PCB4, "Repairing ifn %p for ifa %p\n",
+ sctp_ifnp, sctp_ifap);
+ sctp_add_ifa_to_ifn(sctp_ifnp, sctp_ifap);
+ }
+ goto exit_stage_left;
+ }
+ }
+ SCTP_IPI_ADDR_WUNLOCK();
+ SCTP_MALLOC(sctp_ifap, struct sctp_ifa *, sizeof(struct sctp_ifa), SCTP_M_IFA);
+ if (sctp_ifap == NULL) {
+#ifdef INVARIANTS
+ panic("No memory for IFA");
+#endif
+ return (NULL);
+ }
+ memset(sctp_ifap, 0, sizeof(struct sctp_ifa));
+ sctp_ifap->ifn_p = sctp_ifnp;
+ atomic_add_int(&sctp_ifnp->refcount, 1);
+ sctp_ifap->vrf_id = vrf_id;
+ sctp_ifap->ifa = ifa;
+ memcpy(&sctp_ifap->address, addr, addr->sa_len);
+ sctp_ifap->localifa_flags = SCTP_ADDR_VALID | SCTP_ADDR_DEFER_USE;
+ sctp_ifap->flags = ifa_flags;
+ /* Set scope */
+ if (sctp_ifap->address.sa.sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&sctp_ifap->address.sin;
+ if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+ (IN4_ISLOOPBACK_ADDRESS(&sin->sin_addr))) {
+ sctp_ifap->src_is_loop = 1;
+ }
+ if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
+ sctp_ifap->src_is_priv = 1;
+ }
+ sctp_ifnp->num_v4++;
+ if (new_ifn_af)
+ new_ifn_af = AF_INET;
+ } else if (sctp_ifap->address.sa.sa_family == AF_INET6) {
+ /* ok to use deprecated addresses? */
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&sctp_ifap->address.sin6;
+ if (SCTP_IFN_IS_IFT_LOOP(sctp_ifap->ifn_p) ||
+ (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))) {
+ sctp_ifap->src_is_loop = 1;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ sctp_ifap->src_is_priv = 1;
+ }
+ sctp_ifnp->num_v6++;
+ if (new_ifn_af)
+ new_ifn_af = AF_INET6;
+ } else {
+ new_ifn_af = 0;
+ }
+ hash_of_addr = sctp_get_ifa_hash_val(&sctp_ifap->address.sa);
+
+ if ((sctp_ifap->src_is_priv == 0) &&
+ (sctp_ifap->src_is_loop == 0)) {
+ sctp_ifap->src_is_glob = 1;
+ }
+ SCTP_IPI_ADDR_WLOCK();
+ hash_addr_head = &vrf->vrf_addr_hash[(hash_of_addr & vrf->vrf_addr_hashmark)];
+ LIST_INSERT_HEAD(hash_addr_head, sctp_ifap, next_bucket);
+ sctp_ifap->refcount = 1;
+ LIST_INSERT_HEAD(&sctp_ifnp->ifalist, sctp_ifap, next_ifa);
+ sctp_ifnp->ifa_count++;
+ vrf->total_ifa_count++;
+ atomic_add_int(&sctppcbinfo.ipi_count_ifas, 1);
+ if (new_ifn_af) {
+ SCTP_REGISTER_INTERFACE(ifn_index, new_ifn_af);
+ sctp_ifnp->registered_af = new_ifn_af;
+ }
+ SCTP_IPI_ADDR_WUNLOCK();
+ if (dynamic_add) {
+ /*
+ * Bump up the refcount so that when the timer completes it
+ * will drop back down.
+ */
+ struct sctp_laddr *wi;
+
+ atomic_add_int(&sctp_ifap->refcount, 1);
+ wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+ if (wi == NULL) {
+ /*
+ * Gak, what can we do? We have lost an address
+ * change can you say HOSED?
+ */
+ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+ /* Opps, must decrement the count */
+ sctp_del_addr_from_vrf(vrf_id, addr, ifn_index,
+ if_name);
+ return (NULL);
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(wi, sizeof(*wi));
+ (void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+ wi->ifa = sctp_ifap;
+ wi->action = SCTP_ADD_IP_ADDRESS;
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ /*
+ * Should this really be a tailq? As it is we will process
+ * the newest first :-0
+ */
+ LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+ (struct sctp_inpcb *)NULL,
+ (struct sctp_tcb *)NULL,
+ (struct sctp_nets *)NULL);
+ } else {
+ /* it's ready for use */
+ sctp_ifap->localifa_flags &= ~SCTP_ADDR_DEFER_USE;
+ }
+ return (sctp_ifap);
+}
+
+void
+sctp_del_addr_from_vrf(uint32_t vrf_id, struct sockaddr *addr,
+ uint32_t ifn_index, const char *if_name)
+{
+ struct sctp_vrf *vrf;
+ struct sctp_ifa *sctp_ifap = NULL;
+
+ SCTP_IPI_ADDR_WLOCK();
+ vrf = sctp_find_vrf(vrf_id);
+ if (vrf == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Can't find vrf_id 0x%x\n", vrf_id);
+ goto out_now;
+ }
+#ifdef SCTP_DEBUG
+ SCTPDBG(SCTP_DEBUG_PCB4, "vrf_id 0x%x: deleting address:", vrf_id);
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB4, addr);
+#endif
+ sctp_ifap = sctp_find_ifa_by_addr(addr, vrf->vrf_id, SCTP_ADDR_LOCKED);
+ if (sctp_ifap) {
+ /* Validate the delete */
+ if (sctp_ifap->ifn_p) {
+ int valid = 0;
+
+ /*-
+ * The name has priority over the ifn_index
+ * if its given. We do this especially for
+ * panda who might recycle indexes fast.
+ */
+ if (if_name) {
+ int len1, len2;
+
+ len1 = min(SCTP_IFNAMSIZ, strlen(if_name));
+ len2 = min(SCTP_IFNAMSIZ, strlen(sctp_ifap->ifn_p->ifn_name));
+ if (len1 && len2 && (len1 == len2)) {
+ /* we can compare them */
+ if (strncmp(if_name, sctp_ifap->ifn_p->ifn_name, len1) == 0) {
+ /*
+ * They match its a correct
+ * delete
+ */
+ valid = 1;
+ }
+ }
+ }
+ if (!valid) {
+ /* last ditch check ifn_index */
+ if (ifn_index == sctp_ifap->ifn_p->ifn_index) {
+ valid = 1;
+ }
+ }
+ if (!valid) {
+ SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s does not match addresses\n",
+ ifn_index, ((if_name == NULL) ? "NULL" : if_name));
+ SCTPDBG(SCTP_DEBUG_PCB4, "ifn:%d ifname:%s - ignoring delete\n",
+ sctp_ifap->ifn_p->ifn_index, sctp_ifap->ifn_p->ifn_name);
+ SCTP_IPI_ADDR_WUNLOCK();
+ return;
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_PCB4, "Deleting ifa %p\n", sctp_ifap);
+ sctp_ifap->localifa_flags &= SCTP_ADDR_VALID;
+ sctp_ifap->localifa_flags |= SCTP_BEING_DELETED;
+ vrf->total_ifa_count--;
+ LIST_REMOVE(sctp_ifap, next_bucket);
+ sctp_remove_ifa_from_ifn(sctp_ifap);
+ }
+#ifdef SCTP_DEBUG
+ else {
+ SCTPDBG(SCTP_DEBUG_PCB4, "Del Addr-ifn:%d Could not find address:",
+ ifn_index);
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+ }
+#endif
+
+out_now:
+ SCTP_IPI_ADDR_WUNLOCK();
+ if (sctp_ifap) {
+ struct sctp_laddr *wi;
+
+ wi = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+ if (wi == NULL) {
+ /*
+ * Gak, what can we do? We have lost an address
+ * change can you say HOSED?
+ */
+ SCTPDBG(SCTP_DEBUG_PCB4, "Lost an address change?\n");
+
+ /* Oops, must decrement the count */
+ sctp_free_ifa(sctp_ifap);
+ return;
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(wi, sizeof(*wi));
+ (void)SCTP_GETTIME_TIMEVAL(&wi->start_time);
+ wi->ifa = sctp_ifap;
+ wi->action = SCTP_DEL_IP_ADDRESS;
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ /*
+ * Should this really be a tailq? As it is we will process
+ * the newest first :-0
+ */
+ LIST_INSERT_HEAD(&sctppcbinfo.addr_wq, wi, sctp_nxt_addr);
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+
+ sctp_timer_start(SCTP_TIMER_TYPE_ADDR_WQ,
+ (struct sctp_inpcb *)NULL,
+ (struct sctp_tcb *)NULL,
+ (struct sctp_nets *)NULL);
+ }
+ return;
+}
+
+
+static struct sctp_tcb *
+sctp_tcb_special_locate(struct sctp_inpcb **inp_p, struct sockaddr *from,
+ struct sockaddr *to, struct sctp_nets **netp, uint32_t vrf_id)
+{
+ /**** ASSUMES THE CALLER holds the INP_INFO_RLOCK */
+ /*
+ * If we support the TCP model, then we must now dig through to see
+ * if we can find our endpoint in the list of tcp ep's.
+ */
+ uint16_t lport, rport;
+ struct sctppcbhead *ephead;
+ struct sctp_inpcb *inp;
+ struct sctp_laddr *laddr;
+ struct sctp_tcb *stcb;
+ struct sctp_nets *net;
+
+ if ((to == NULL) || (from == NULL)) {
+ return (NULL);
+ }
+ if (to->sa_family == AF_INET && from->sa_family == AF_INET) {
+ lport = ((struct sockaddr_in *)to)->sin_port;
+ rport = ((struct sockaddr_in *)from)->sin_port;
+ } else if (to->sa_family == AF_INET6 && from->sa_family == AF_INET6) {
+ lport = ((struct sockaddr_in6 *)to)->sin6_port;
+ rport = ((struct sockaddr_in6 *)from)->sin6_port;
+ } else {
+ return NULL;
+ }
+ ephead = &sctppcbinfo.sctp_tcpephash[SCTP_PCBHASH_ALLADDR(
+ (lport + rport), sctppcbinfo.hashtcpmark)];
+ /*
+ * Ok now for each of the guys in this bucket we must look and see:
+ * - Does the remote port match. - Does there single association's
+ * addresses match this address (to). If so we update p_ep to point
+ * to this ep and return the tcb from it.
+ */
+ LIST_FOREACH(inp, ephead, sctp_hash) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if (lport != inp->sctp_lport) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if (inp->def_vrf_id != vrf_id) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /* check to see if the ep has one of the addresses */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+ /* We are NOT bound all, so look further */
+ int match = 0;
+
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n", __FUNCTION__);
+ continue;
+ }
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "ifa being deleted\n");
+ continue;
+ }
+ if (laddr->ifa->address.sa.sa_family ==
+ to->sa_family) {
+ /* see if it matches */
+ struct sockaddr_in *intf_addr, *sin;
+
+ intf_addr = &laddr->ifa->address.sin;
+ sin = (struct sockaddr_in *)to;
+ if (from->sa_family == AF_INET) {
+ if (sin->sin_addr.s_addr ==
+ intf_addr->sin_addr.s_addr) {
+ match = 1;
+ break;
+ }
+ } else {
+ struct sockaddr_in6 *intf_addr6;
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)
+ to;
+ intf_addr6 = &laddr->ifa->address.sin6;
+
+ if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &intf_addr6->sin6_addr)) {
+ match = 1;
+ break;
+ }
+ }
+ }
+ }
+ if (match == 0) {
+ /* This endpoint does not have this address */
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ }
+ /*
+ * Ok if we hit here the ep has the address, does it hold
+ * the tcb?
+ */
+
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ SCTP_TCB_LOCK(stcb);
+ if (stcb->rport != rport) {
+ /* remote port does not match. */
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /* Does this TCB have a matching address? */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+
+ if (net->ro._l_addr.sa.sa_family != from->sa_family) {
+ /* not the same family, can't be a match */
+ continue;
+ }
+ if (from->sa_family == AF_INET) {
+ struct sockaddr_in *sin, *rsin;
+
+ sin = (struct sockaddr_in *)&net->ro._l_addr;
+ rsin = (struct sockaddr_in *)from;
+ if (sin->sin_addr.s_addr ==
+ rsin->sin_addr.s_addr) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ /* Update the endpoint pointer */
+ *inp_p = inp;
+ SCTP_INP_RUNLOCK(inp);
+ return (stcb);
+ }
+ } else {
+ struct sockaddr_in6 *sin6, *rsin6;
+
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ rsin6 = (struct sockaddr_in6 *)from;
+ if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &rsin6->sin6_addr)) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ /* Update the endpoint pointer */
+ *inp_p = inp;
+ SCTP_INP_RUNLOCK(inp);
+ return (stcb);
+ }
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_RUNLOCK(inp);
+ }
+ return (NULL);
+}
+
+/*
+ * rules for use
+ *
+ * 1) If I return a NULL you must decrement any INP ref cnt. 2) If I find an
+ * stcb, both will be locked (locked_tcb and stcb) but decrement will be done
+ * (if locked == NULL). 3) Decrement happens on return ONLY if locked ==
+ * NULL.
+ */
+
+struct sctp_tcb *
+sctp_findassociation_ep_addr(struct sctp_inpcb **inp_p, struct sockaddr *remote,
+ struct sctp_nets **netp, struct sockaddr *local, struct sctp_tcb *locked_tcb)
+{
+ struct sctpasochead *head;
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb = NULL;
+ struct sctp_nets *net;
+ uint16_t rport;
+
+ inp = *inp_p;
+ if (remote->sa_family == AF_INET) {
+ rport = (((struct sockaddr_in *)remote)->sin_port);
+ } else if (remote->sa_family == AF_INET6) {
+ rport = (((struct sockaddr_in6 *)remote)->sin6_port);
+ } else {
+ return (NULL);
+ }
+ if (locked_tcb) {
+ /*
+ * UN-lock so we can do proper locking here this occurs when
+ * called from load_addresses_from_init.
+ */
+ atomic_add_int(&locked_tcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(locked_tcb);
+ }
+ SCTP_INP_INFO_RLOCK();
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
+ /*-
+ * Now either this guy is our listener or it's the
+ * connector. If it is the one that issued the connect, then
+ * it's only chance is to be the first TCB in the list. If
+ * it is the acceptor, then do the special_lookup to hash
+ * and find the real inp.
+ */
+ if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
+ /* to is peer addr, from is my addr */
+ stcb = sctp_tcb_special_locate(inp_p, remote, local,
+ netp, inp->def_vrf_id);
+ if ((stcb != NULL) && (locked_tcb == NULL)) {
+ /* we have a locked tcb, lower refcount */
+ SCTP_INP_DECR_REF(inp);
+ }
+ if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
+ SCTP_INP_RLOCK(locked_tcb->sctp_ep);
+ SCTP_TCB_LOCK(locked_tcb);
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ SCTP_INP_RUNLOCK(locked_tcb->sctp_ep);
+ }
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ goto null_return;
+ }
+ stcb = LIST_FIRST(&inp->sctp_asoc_list);
+ if (stcb == NULL) {
+ goto null_return;
+ }
+ SCTP_TCB_LOCK(stcb);
+
+ if (stcb->rport != rport) {
+ /* remote port does not match. */
+ SCTP_TCB_UNLOCK(stcb);
+ goto null_return;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ goto null_return;
+ }
+ /* now look at the list of remote addresses */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+ if (net == (TAILQ_NEXT(net, sctp_next))) {
+ panic("Corrupt net list");
+ }
+#endif
+ if (net->ro._l_addr.sa.sa_family !=
+ remote->sa_family) {
+ /* not the same family */
+ continue;
+ }
+ if (remote->sa_family == AF_INET) {
+ struct sockaddr_in *sin, *rsin;
+
+ sin = (struct sockaddr_in *)
+ &net->ro._l_addr;
+ rsin = (struct sockaddr_in *)remote;
+ if (sin->sin_addr.s_addr ==
+ rsin->sin_addr.s_addr) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ if (locked_tcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ } else if (locked_tcb != stcb) {
+ SCTP_TCB_LOCK(locked_tcb);
+ }
+ if (locked_tcb) {
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ } else if (remote->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6, *rsin6;
+
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ rsin6 = (struct sockaddr_in6 *)remote;
+ if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &rsin6->sin6_addr)) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ if (locked_tcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ } else if (locked_tcb != stcb) {
+ SCTP_TCB_LOCK(locked_tcb);
+ }
+ if (locked_tcb) {
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ } else {
+ SCTP_INP_WLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ goto null_return;
+ }
+ head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(rport,
+ inp->sctp_hashmark)];
+ if (head == NULL) {
+ goto null_return;
+ }
+ LIST_FOREACH(stcb, head, sctp_tcbhash) {
+ if (stcb->rport != rport) {
+ /* remote port does not match */
+ continue;
+ }
+ SCTP_TCB_LOCK(stcb);
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ /* now look at the list of remote addresses */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+#ifdef INVARIANTS
+ if (net == (TAILQ_NEXT(net, sctp_next))) {
+ panic("Corrupt net list");
+ }
+#endif
+ if (net->ro._l_addr.sa.sa_family !=
+ remote->sa_family) {
+ /* not the same family */
+ continue;
+ }
+ if (remote->sa_family == AF_INET) {
+ struct sockaddr_in *sin, *rsin;
+
+ sin = (struct sockaddr_in *)
+ &net->ro._l_addr;
+ rsin = (struct sockaddr_in *)remote;
+ if (sin->sin_addr.s_addr ==
+ rsin->sin_addr.s_addr) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ if (locked_tcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ } else if (locked_tcb != stcb) {
+ SCTP_TCB_LOCK(locked_tcb);
+ }
+ if (locked_tcb) {
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ } else if (remote->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6, *rsin6;
+
+ sin6 = (struct sockaddr_in6 *)
+ &net->ro._l_addr;
+ rsin6 = (struct sockaddr_in6 *)remote;
+ if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &rsin6->sin6_addr)) {
+ /* found it */
+ if (netp != NULL) {
+ *netp = net;
+ }
+ if (locked_tcb == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ } else if (locked_tcb != stcb) {
+ SCTP_TCB_LOCK(locked_tcb);
+ }
+ if (locked_tcb) {
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+null_return:
+ /* clean up for returning null */
+ if (locked_tcb) {
+ SCTP_TCB_LOCK(locked_tcb);
+ atomic_subtract_int(&locked_tcb->asoc.refcnt, 1);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_RUNLOCK();
+ /* not found */
+ return (NULL);
+}
+
+/*
+ * Find an association for a specific endpoint using the association id given
+ * out in the COMM_UP notification
+ */
+
+struct sctp_tcb *
+sctp_findassociation_ep_asocid(struct sctp_inpcb *inp, sctp_assoc_t asoc_id, int want_lock)
+{
+ /*
+ * Use my the assoc_id to find a endpoint
+ */
+ struct sctpasochead *head;
+ struct sctp_tcb *stcb;
+ uint32_t id;
+
+ if (asoc_id == 0 || inp == NULL) {
+ return (NULL);
+ }
+ SCTP_INP_INFO_RLOCK();
+ id = (uint32_t) asoc_id;
+ head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(id,
+ sctppcbinfo.hashasocmark)];
+ if (head == NULL) {
+ /* invalid id TSNH */
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+ }
+ LIST_FOREACH(stcb, head, sctp_asocs) {
+ SCTP_INP_RLOCK(stcb->sctp_ep);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+ }
+ if (stcb->asoc.assoc_id == id) {
+ /* candidate */
+ if (inp != stcb->sctp_ep) {
+ /*
+ * some other guy has the same id active (id
+ * collision ??).
+ */
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ continue;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ continue;
+ }
+ if (want_lock) {
+ SCTP_TCB_LOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ }
+ /* Ok if we missed here, lets try the restart hash */
+ head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(id, sctppcbinfo.hashrestartmark)];
+ if (head == NULL) {
+ /* invalid id TSNH */
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+ }
+ LIST_FOREACH(stcb, head, sctp_tcbrestarhash) {
+ SCTP_INP_RLOCK(stcb->sctp_ep);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ continue;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ continue;
+ }
+ if (want_lock) {
+ SCTP_TCB_LOCK(stcb);
+ }
+ if (stcb->asoc.assoc_id == id) {
+ /* candidate */
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ if (inp != stcb->sctp_ep) {
+ /*
+ * some other guy has the same id active (id
+ * collision ??).
+ */
+ if (want_lock) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ continue;
+ }
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ } else {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ }
+ if (want_lock) {
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ }
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+}
+
+
+static struct sctp_inpcb *
+sctp_endpoint_probe(struct sockaddr *nam, struct sctppcbhead *head,
+ uint16_t lport, uint32_t vrf_id)
+{
+ struct sctp_inpcb *inp;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ struct sctp_laddr *laddr;
+ int fnd;
+
+ /*
+ * Endpoing probe expects that the INP_INFO is locked.
+ */
+ if (nam->sa_family == AF_INET) {
+ sin = (struct sockaddr_in *)nam;
+ sin6 = NULL;
+ } else if (nam->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)nam;
+ sin = NULL;
+ } else {
+ /* unsupported family */
+ return (NULL);
+ }
+ if (head == NULL)
+ return (NULL);
+ LIST_FOREACH(inp, head, sctp_hash) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) &&
+ (inp->sctp_lport == lport)) {
+ /* got it */
+ if ((nam->sa_family == AF_INET) &&
+ (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* IPv4 on a IPv6 socket with ONLY IPv6 set */
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /* A V6 address and the endpoint is NOT bound V6 */
+ if (nam->sa_family == AF_INET6 &&
+ (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /* does a VRF id match? */
+ fnd = 0;
+ if (inp->def_vrf_id == vrf_id)
+ fnd = 1;
+
+ SCTP_INP_RUNLOCK(inp);
+ if (!fnd)
+ continue;
+ return (inp);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+
+ if ((nam->sa_family == AF_INET) &&
+ (sin->sin_addr.s_addr == INADDR_ANY)) {
+ /* Can't hunt for one that has no address specified */
+ return (NULL);
+ } else if ((nam->sa_family == AF_INET6) &&
+ (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+ /* Can't hunt for one that has no address specified */
+ return (NULL);
+ }
+ /*
+ * ok, not bound to all so see if we can find a EP bound to this
+ * address.
+ */
+ LIST_FOREACH(inp, head, sctp_hash) {
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL)) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /*
+ * Ok this could be a likely candidate, look at all of its
+ * addresses
+ */
+ if (inp->sctp_lport != lport) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ /* does a VRF id match? */
+ fnd = 0;
+ if (inp->def_vrf_id == vrf_id)
+ fnd = 1;
+
+ if (!fnd) {
+ SCTP_INP_RUNLOCK(inp);
+ continue;
+ }
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+ __FUNCTION__);
+ continue;
+ }
+ SCTPDBG(SCTP_DEBUG_PCB1, "Ok laddr->ifa:%p is possible, ",
+ laddr->ifa);
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "Huh IFA being deleted\n");
+ continue;
+ }
+ if (laddr->ifa->address.sa.sa_family == nam->sa_family) {
+ /* possible, see if it matches */
+ struct sockaddr_in *intf_addr;
+
+ intf_addr = &laddr->ifa->address.sin;
+ if (nam->sa_family == AF_INET) {
+ if (sin->sin_addr.s_addr ==
+ intf_addr->sin_addr.s_addr) {
+ SCTP_INP_RUNLOCK(inp);
+ return (inp);
+ }
+ } else if (nam->sa_family == AF_INET6) {
+ struct sockaddr_in6 *intf_addr6;
+
+ intf_addr6 = &laddr->ifa->address.sin6;
+ if (SCTP6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &intf_addr6->sin6_addr)) {
+ SCTP_INP_RUNLOCK(inp);
+ return (inp);
+ }
+ }
+ }
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ return (NULL);
+}
+
+struct sctp_inpcb *
+sctp_pcb_findep(struct sockaddr *nam, int find_tcp_pool, int have_lock,
+ uint32_t vrf_id)
+{
+ /*
+ * First we check the hash table to see if someone has this port
+ * bound with just the port.
+ */
+ struct sctp_inpcb *inp;
+ struct sctppcbhead *head;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ int lport;
+
+ if (nam->sa_family == AF_INET) {
+ sin = (struct sockaddr_in *)nam;
+ lport = ((struct sockaddr_in *)nam)->sin_port;
+ } else if (nam->sa_family == AF_INET6) {
+ sin6 = (struct sockaddr_in6 *)nam;
+ lport = ((struct sockaddr_in6 *)nam)->sin6_port;
+ } else {
+ /* unsupported family */
+ return (NULL);
+ }
+ /*
+ * I could cheat here and just cast to one of the types but we will
+ * do it right. It also provides the check against an Unsupported
+ * type too.
+ */
+ /* Find the head of the ALLADDR chain */
+ if (have_lock == 0) {
+ SCTP_INP_INFO_RLOCK();
+ }
+ head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+ sctppcbinfo.hashmark)];
+ inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+
+ /*
+ * If the TCP model exists it could be that the main listening
+ * endpoint is gone but there exists a connected socket for this guy
+ * yet. If so we can return the first one that we find. This may NOT
+ * be the correct one but the sctp_findassociation_ep_addr has
+ * further code to look at all TCP models.
+ */
+ if (inp == NULL && find_tcp_pool) {
+ unsigned int i;
+
+ for (i = 0; i < sctppcbinfo.hashtblsize; i++) {
+ /*
+ * This is real gross, but we do NOT have a remote
+ * port at this point depending on who is calling.
+ * We must therefore look for ANY one that matches
+ * our local port :/
+ */
+ head = &sctppcbinfo.sctp_tcpephash[i];
+ if (LIST_FIRST(head)) {
+ inp = sctp_endpoint_probe(nam, head, lport, vrf_id);
+ if (inp) {
+ /* Found one */
+ break;
+ }
+ }
+ }
+ }
+ if (inp) {
+ SCTP_INP_INCR_REF(inp);
+ }
+ if (have_lock == 0) {
+ SCTP_INP_INFO_RUNLOCK();
+ }
+ return (inp);
+}
+
+/*
+ * Find an association for an endpoint with the pointer to whom you want to
+ * send to and the endpoint pointer. The address can be IPv4 or IPv6. We may
+ * need to change the *to to some other struct like a mbuf...
+ */
+struct sctp_tcb *
+sctp_findassociation_addr_sa(struct sockaddr *to, struct sockaddr *from,
+ struct sctp_inpcb **inp_p, struct sctp_nets **netp, int find_tcp_pool,
+ uint32_t vrf_id)
+{
+ struct sctp_inpcb *inp = NULL;
+ struct sctp_tcb *retval;
+
+ SCTP_INP_INFO_RLOCK();
+ if (find_tcp_pool) {
+ if (inp_p != NULL) {
+ retval = sctp_tcb_special_locate(inp_p, from, to, netp,
+ vrf_id);
+ } else {
+ retval = sctp_tcb_special_locate(&inp, from, to, netp,
+ vrf_id);
+ }
+ if (retval != NULL) {
+ SCTP_INP_INFO_RUNLOCK();
+ return (retval);
+ }
+ }
+ inp = sctp_pcb_findep(to, 0, 1, vrf_id);
+ if (inp_p != NULL) {
+ *inp_p = inp;
+ }
+ SCTP_INP_INFO_RUNLOCK();
+
+ if (inp == NULL) {
+ return (NULL);
+ }
+ /*
+ * ok, we have an endpoint, now lets find the assoc for it (if any)
+ * we now place the source address or from in the to of the find
+ * endpoint call. Since in reality this chain is used from the
+ * inbound packet side.
+ */
+ if (inp_p != NULL) {
+ retval = sctp_findassociation_ep_addr(inp_p, from, netp, to,
+ NULL);
+ } else {
+ retval = sctp_findassociation_ep_addr(&inp, from, netp, to,
+ NULL);
+ }
+ return retval;
+}
+
+
+/*
+ * This routine will grub through the mbuf that is a INIT or INIT-ACK and
+ * find all addresses that the sender has specified in any address list. Each
+ * address will be used to lookup the TCB and see if one exits.
+ */
+static struct sctp_tcb *
+sctp_findassociation_special_addr(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp,
+ struct sockaddr *dest)
+{
+ struct sockaddr_in sin4;
+ struct sockaddr_in6 sin6;
+ struct sctp_paramhdr *phdr, parm_buf;
+ struct sctp_tcb *retval;
+ uint32_t ptype, plen;
+
+ memset(&sin4, 0, sizeof(sin4));
+ memset(&sin6, 0, sizeof(sin6));
+ sin4.sin_len = sizeof(sin4);
+ sin4.sin_family = AF_INET;
+ sin4.sin_port = sh->src_port;
+ sin6.sin6_len = sizeof(sin6);
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_port = sh->src_port;
+
+ retval = NULL;
+ offset += sizeof(struct sctp_init_chunk);
+
+ phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+ while (phdr != NULL) {
+ /* now we must see if we want the parameter */
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+ if (plen == 0) {
+ break;
+ }
+ if (ptype == SCTP_IPV4_ADDRESS &&
+ plen == sizeof(struct sctp_ipv4addr_param)) {
+ /* Get the rest of the address */
+ struct sctp_ipv4addr_param ip4_parm, *p4;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&ip4_parm, min(plen, sizeof(ip4_parm)));
+ if (phdr == NULL) {
+ return (NULL);
+ }
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ memcpy(&sin4.sin_addr, &p4->addr, sizeof(p4->addr));
+ /* look it up */
+ retval = sctp_findassociation_ep_addr(inp_p,
+ (struct sockaddr *)&sin4, netp, dest, NULL);
+ if (retval != NULL) {
+ return (retval);
+ }
+ } else if (ptype == SCTP_IPV6_ADDRESS &&
+ plen == sizeof(struct sctp_ipv6addr_param)) {
+ /* Get the rest of the address */
+ struct sctp_ipv6addr_param ip6_parm, *p6;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&ip6_parm, min(plen, sizeof(ip6_parm)));
+ if (phdr == NULL) {
+ return (NULL);
+ }
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy(&sin6.sin6_addr, &p6->addr, sizeof(p6->addr));
+ /* look it up */
+ retval = sctp_findassociation_ep_addr(inp_p,
+ (struct sockaddr *)&sin6, netp, dest, NULL);
+ if (retval != NULL) {
+ return (retval);
+ }
+ }
+ offset += SCTP_SIZE32(plen);
+ phdr = sctp_get_next_param(m, offset, &parm_buf,
+ sizeof(parm_buf));
+ }
+ return (NULL);
+}
+
+
+static struct sctp_tcb *
+sctp_findassoc_by_vtag(struct sockaddr *from, uint32_t vtag,
+ struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint16_t rport,
+ uint16_t lport, int skip_src_check)
+{
+ /*
+ * Use my vtag to hash. If we find it we then verify the source addr
+ * is in the assoc. If all goes well we save a bit on rec of a
+ * packet.
+ */
+ struct sctpasochead *head;
+ struct sctp_nets *net;
+ struct sctp_tcb *stcb;
+
+ *netp = NULL;
+ *inp_p = NULL;
+ SCTP_INP_INFO_RLOCK();
+ head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(vtag,
+ sctppcbinfo.hashasocmark)];
+ if (head == NULL) {
+ /* invalid vtag */
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+ }
+ LIST_FOREACH(stcb, head, sctp_asocs) {
+ SCTP_INP_RLOCK(stcb->sctp_ep);
+ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ continue;
+ }
+ SCTP_TCB_LOCK(stcb);
+ SCTP_INP_RUNLOCK(stcb->sctp_ep);
+ if (stcb->asoc.my_vtag == vtag) {
+ /* candidate */
+ if (stcb->rport != rport) {
+ /*
+ * we could remove this if vtags are unique
+ * across the system.
+ */
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ if (stcb->sctp_ep->sctp_lport != lport) {
+ /*
+ * we could remove this if vtags are unique
+ * across the system.
+ */
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ SCTP_TCB_UNLOCK(stcb);
+ continue;
+ }
+ if (skip_src_check) {
+ *netp = NULL; /* unknown */
+ if (inp_p)
+ *inp_p = stcb->sctp_ep;
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ }
+ net = sctp_findnet(stcb, from);
+ if (net) {
+ /* yep its him. */
+ *netp = net;
+ SCTP_STAT_INCR(sctps_vtagexpress);
+ *inp_p = stcb->sctp_ep;
+ SCTP_INP_INFO_RUNLOCK();
+ return (stcb);
+ } else {
+ /*
+ * not him, this should only happen in rare
+ * cases so I peg it.
+ */
+ SCTP_STAT_INCR(sctps_vtagbogus);
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_INFO_RUNLOCK();
+ return (NULL);
+}
+
+/*
+ * Find an association with the pointer to the inbound IP packet. This can be
+ * a IPv4 or IPv6 packet.
+ */
+struct sctp_tcb *
+sctp_findassociation_addr(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_chunkhdr *ch,
+ struct sctp_inpcb **inp_p, struct sctp_nets **netp, uint32_t vrf_id)
+{
+ int find_tcp_pool;
+ struct ip *iph;
+ struct sctp_tcb *retval;
+ struct sockaddr_storage to_store, from_store;
+ struct sockaddr *to = (struct sockaddr *)&to_store;
+ struct sockaddr *from = (struct sockaddr *)&from_store;
+ struct sctp_inpcb *inp;
+
+ iph = mtod(m, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* its IPv4 */
+ struct sockaddr_in *from4;
+
+ from4 = (struct sockaddr_in *)&from_store;
+ bzero(from4, sizeof(*from4));
+ from4->sin_family = AF_INET;
+ from4->sin_len = sizeof(struct sockaddr_in);
+ from4->sin_addr.s_addr = iph->ip_src.s_addr;
+ from4->sin_port = sh->src_port;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* its IPv6 */
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 *from6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ from6 = (struct sockaddr_in6 *)&from_store;
+ bzero(from6, sizeof(*from6));
+ from6->sin6_family = AF_INET6;
+ from6->sin6_len = sizeof(struct sockaddr_in6);
+ from6->sin6_addr = ip6->ip6_src;
+ from6->sin6_port = sh->src_port;
+ /* Get the scopes in properly to the sin6 addr's */
+ /* we probably don't need these operations */
+ (void)sa6_recoverscope(from6);
+ sa6_embedscope(from6, ip6_use_defzone);
+ } else {
+ /* Currently not supported. */
+ return (NULL);
+ }
+ if (sh->v_tag) {
+ /* we only go down this path if vtag is non-zero */
+ retval = sctp_findassoc_by_vtag(from, ntohl(sh->v_tag),
+ inp_p, netp, sh->src_port, sh->dest_port, 0);
+ if (retval) {
+ return (retval);
+ }
+ }
+ if (iph->ip_v == IPVERSION) {
+ /* its IPv4 */
+ struct sockaddr_in *to4;
+
+ to4 = (struct sockaddr_in *)&to_store;
+ bzero(to4, sizeof(*to4));
+ to4->sin_family = AF_INET;
+ to4->sin_len = sizeof(struct sockaddr_in);
+ to4->sin_addr.s_addr = iph->ip_dst.s_addr;
+ to4->sin_port = sh->dest_port;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* its IPv6 */
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 *to6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ to6 = (struct sockaddr_in6 *)&to_store;
+ bzero(to6, sizeof(*to6));
+ to6->sin6_family = AF_INET6;
+ to6->sin6_len = sizeof(struct sockaddr_in6);
+ to6->sin6_addr = ip6->ip6_dst;
+ to6->sin6_port = sh->dest_port;
+ /* Get the scopes in properly to the sin6 addr's */
+ /* we probably don't need these operations */
+ (void)sa6_recoverscope(to6);
+ sa6_embedscope(to6, ip6_use_defzone);
+ }
+ find_tcp_pool = 0;
+ if ((ch->chunk_type != SCTP_INITIATION) &&
+ (ch->chunk_type != SCTP_INITIATION_ACK) &&
+ (ch->chunk_type != SCTP_COOKIE_ACK) &&
+ (ch->chunk_type != SCTP_COOKIE_ECHO)) {
+ /* Other chunk types go to the tcp pool. */
+ find_tcp_pool = 1;
+ }
+ if (inp_p) {
+ retval = sctp_findassociation_addr_sa(to, from, inp_p, netp,
+ find_tcp_pool, vrf_id);
+ inp = *inp_p;
+ } else {
+ retval = sctp_findassociation_addr_sa(to, from, &inp, netp,
+ find_tcp_pool, vrf_id);
+ }
+ SCTPDBG(SCTP_DEBUG_PCB1, "retval:%p inp:%p\n", retval, inp);
+ if (retval == NULL && inp) {
+ /* Found a EP but not this address */
+ if ((ch->chunk_type == SCTP_INITIATION) ||
+ (ch->chunk_type == SCTP_INITIATION_ACK)) {
+ /*-
+ * special hook, we do NOT return linp or an
+ * association that is linked to an existing
+ * association that is under the TCP pool (i.e. no
+ * listener exists). The endpoint finding routine
+ * will always find a listner before examining the
+ * TCP pool.
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+ if (inp_p) {
+ *inp_p = NULL;
+ }
+ return (NULL);
+ }
+ retval = sctp_findassociation_special_addr(m, iphlen,
+ offset, sh, &inp, netp, to);
+ if (inp_p != NULL) {
+ *inp_p = inp;
+ }
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_PCB1, "retval is %p\n", retval);
+ return (retval);
+}
+
+/*
+ * lookup an association by an ASCONF lookup address.
+ * if the lookup address is 0.0.0.0 or ::0, use the vtag to do the lookup
+ */
+struct sctp_tcb *
+sctp_findassociation_ep_asconf(struct mbuf *m, int iphlen, int offset,
+ struct sctphdr *sh, struct sctp_inpcb **inp_p, struct sctp_nets **netp)
+{
+ struct sctp_tcb *stcb;
+ struct sockaddr_in *sin;
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_storage local_store, remote_store;
+ struct ip *iph;
+ struct sctp_paramhdr parm_buf, *phdr;
+ int ptype;
+ int zero_address = 0;
+
+
+ memset(&local_store, 0, sizeof(local_store));
+ memset(&remote_store, 0, sizeof(remote_store));
+
+ /* First get the destination address setup too. */
+ iph = mtod(m, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* its IPv4 */
+ sin = (struct sockaddr_in *)&local_store;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = sh->dest_port;
+ sin->sin_addr.s_addr = iph->ip_dst.s_addr;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* its IPv6 */
+ struct ip6_hdr *ip6;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ sin6 = (struct sockaddr_in6 *)&local_store;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_port = sh->dest_port;
+ sin6->sin6_addr = ip6->ip6_dst;
+ } else {
+ return NULL;
+ }
+
+ phdr = sctp_get_next_param(m, offset + sizeof(struct sctp_asconf_chunk),
+ &parm_buf, sizeof(struct sctp_paramhdr));
+ if (phdr == NULL) {
+ SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf lookup addr\n",
+ __FUNCTION__);
+ return NULL;
+ }
+ ptype = (int)((uint32_t) ntohs(phdr->param_type));
+ /* get the correlation address */
+ if (ptype == SCTP_IPV6_ADDRESS) {
+ /* ipv6 address param */
+ struct sctp_ipv6addr_param *p6, p6_buf;
+
+ if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv6addr_param)) {
+ return NULL;
+ }
+ p6 = (struct sctp_ipv6addr_param *)sctp_get_next_param(m,
+ offset + sizeof(struct sctp_asconf_chunk),
+ &p6_buf.ph, sizeof(*p6));
+ if (p6 == NULL) {
+ SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v6 lookup addr\n",
+ __FUNCTION__);
+ return (NULL);
+ }
+ sin6 = (struct sockaddr_in6 *)&remote_store;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_port = sh->src_port;
+ memcpy(&sin6->sin6_addr, &p6->addr, sizeof(struct in6_addr));
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
+ zero_address = 1;
+ } else if (ptype == SCTP_IPV4_ADDRESS) {
+ /* ipv4 address param */
+ struct sctp_ipv4addr_param *p4, p4_buf;
+
+ if (ntohs(phdr->param_length) != sizeof(struct sctp_ipv4addr_param)) {
+ return NULL;
+ }
+ p4 = (struct sctp_ipv4addr_param *)sctp_get_next_param(m,
+ offset + sizeof(struct sctp_asconf_chunk),
+ &p4_buf.ph, sizeof(*p4));
+ if (p4 == NULL) {
+ SCTPDBG(SCTP_DEBUG_INPUT3, "%s: failed to get asconf v4 lookup addr\n",
+ __FUNCTION__);
+ return (NULL);
+ }
+ sin = (struct sockaddr_in *)&remote_store;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_port = sh->src_port;
+ memcpy(&sin->sin_addr, &p4->addr, sizeof(struct in_addr));
+ if (sin->sin_addr.s_addr == INADDR_ANY)
+ zero_address = 1;
+ } else {
+ /* invalid address param type */
+ return NULL;
+ }
+
+ if (zero_address) {
+ stcb = sctp_findassoc_by_vtag(NULL, ntohl(sh->v_tag), inp_p,
+ netp, sh->src_port, sh->dest_port, 1);
+ /*
+ * printf("findassociation_ep_asconf: zero lookup address
+ * finds stcb 0x%x\n", (uint32_t)stcb);
+ */
+ } else {
+ stcb = sctp_findassociation_ep_addr(inp_p,
+ (struct sockaddr *)&remote_store, netp,
+ (struct sockaddr *)&local_store, NULL);
+ }
+ return (stcb);
+}
+
+
+/*
+ * allocate a sctp_inpcb and setup a temporary binding to a port/all
+ * addresses. This way if we don't get a bind we by default pick a ephemeral
+ * port with all addresses bound.
+ */
+int
+sctp_inpcb_alloc(struct socket *so, uint32_t vrf_id)
+{
+ /*
+ * we get called when a new endpoint starts up. We need to allocate
+ * the sctp_inpcb structure from the zone and init it. Mark it as
+ * unbound and find a port that we can use as an ephemeral with
+ * INADDR_ANY. If the user binds later no problem we can then add in
+ * the specific addresses. And setup the default parameters for the
+ * EP.
+ */
+ int i, error;
+ struct sctp_inpcb *inp;
+ struct sctp_pcb *m;
+ struct timeval time;
+ sctp_sharedkey_t *null_key;
+
+ error = 0;
+
+ SCTP_INP_INFO_WLOCK();
+ inp = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_ep, struct sctp_inpcb);
+ if (inp == NULL) {
+ SCTP_PRINTF("Out of SCTP-INPCB structures - no resources\n");
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+ return (ENOBUFS);
+ }
+ /* zap it */
+ bzero(inp, sizeof(*inp));
+
+ /* bump generations */
+ /* setup socket pointers */
+ inp->sctp_socket = so;
+ inp->ip_inp.inp.inp_socket = so;
+
+ inp->partial_delivery_point = SCTP_SB_LIMIT_RCV(so) >> SCTP_PARTIAL_DELIVERY_SHIFT;
+ inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
+
+#ifdef IPSEC
+ {
+ struct inpcbpolicy *pcb_sp = NULL;
+
+ error = ipsec_init_policy(so, &pcb_sp);
+ /* Arrange to share the policy */
+ inp->ip_inp.inp.inp_sp = pcb_sp;
+ ((struct in6pcb *)(&inp->ip_inp.inp))->in6p_sp = pcb_sp;
+ }
+ if (error != 0) {
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+ SCTP_INP_INFO_WUNLOCK();
+ return error;
+ }
+#endif /* IPSEC */
+ SCTP_INCR_EP_COUNT();
+ inp->ip_inp.inp.inp_ip_ttl = ip_defttl;
+ SCTP_INP_INFO_WUNLOCK();
+
+ so->so_pcb = (caddr_t)inp;
+
+ if ((SCTP_SO_TYPE(so) == SOCK_DGRAM) ||
+ (SCTP_SO_TYPE(so) == SOCK_SEQPACKET)) {
+ /* UDP style socket */
+ inp->sctp_flags = (SCTP_PCB_FLAGS_UDPTYPE |
+ SCTP_PCB_FLAGS_UNBOUND);
+ /* Be sure it is NON-BLOCKING IO for UDP */
+ /* SCTP_SET_SO_NBIO(so); */
+ } else if (SCTP_SO_TYPE(so) == SOCK_STREAM) {
+ /* TCP style socket */
+ inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
+ SCTP_PCB_FLAGS_UNBOUND);
+ /* Be sure we have blocking IO by default */
+ SCTP_CLEAR_SO_NBIO(so);
+ } else {
+ /*
+ * unsupported socket type (RAW, etc)- in case we missed it
+ * in protosw
+ */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+ return (EOPNOTSUPP);
+ }
+ if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_1) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+ } else if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_2) {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+ } else if (sctp_default_frag_interleave == SCTP_FRAG_LEVEL_0) {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
+ }
+ inp->sctp_tcbhash = SCTP_HASH_INIT(sctp_pcbtblsize,
+ &inp->sctp_hashmark);
+ if (inp->sctp_tcbhash == NULL) {
+ SCTP_PRINTF("Out of SCTP-INPCB->hashinit - no resources\n");
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+ return (ENOBUFS);
+ }
+ inp->def_vrf_id = vrf_id;
+
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_LOCK_INIT(inp);
+ INP_LOCK_INIT(&inp->ip_inp.inp, "inp", "sctpinp");
+ SCTP_INP_READ_INIT(inp);
+ SCTP_ASOC_CREATE_LOCK_INIT(inp);
+ /* lock the new ep */
+ SCTP_INP_WLOCK(inp);
+
+ /* add it to the info area */
+ LIST_INSERT_HEAD(&sctppcbinfo.listhead, inp, sctp_list);
+ SCTP_INP_INFO_WUNLOCK();
+
+ TAILQ_INIT(&inp->read_queue);
+ LIST_INIT(&inp->sctp_addr_list);
+
+ LIST_INIT(&inp->sctp_asoc_list);
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+ /* TEMP CODE */
+ LIST_INIT(&inp->sctp_asoc_free_list);
+#endif
+ /* Init the timer structure for signature change */
+ SCTP_OS_TIMER_INIT(&inp->sctp_ep.signature_change.timer);
+ inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NEWCOOKIE;
+
+ /* now init the actual endpoint default data */
+ m = &inp->sctp_ep;
+
+ /* setup the base timeout information */
+ m->sctp_timeoutticks[SCTP_TIMER_SEND] = SEC_TO_TICKS(SCTP_SEND_SEC); /* needed ? */
+ m->sctp_timeoutticks[SCTP_TIMER_INIT] = SEC_TO_TICKS(SCTP_INIT_SEC); /* needed ? */
+ m->sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sctp_delayed_sack_time_default);
+ m->sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(sctp_heartbeat_interval_default);
+ m->sctp_timeoutticks[SCTP_TIMER_PMTU] = SEC_TO_TICKS(sctp_pmtu_raise_time_default);
+ m->sctp_timeoutticks[SCTP_TIMER_MAXSHUTDOWN] = SEC_TO_TICKS(sctp_shutdown_guard_time_default);
+ m->sctp_timeoutticks[SCTP_TIMER_SIGNATURE] = SEC_TO_TICKS(sctp_secret_lifetime_default);
+ /* all max/min max are in ms */
+ m->sctp_maxrto = sctp_rto_max_default;
+ m->sctp_minrto = sctp_rto_min_default;
+ m->initial_rto = sctp_rto_initial_default;
+ m->initial_init_rto_max = sctp_init_rto_max_default;
+ m->sctp_sack_freq = sctp_sack_freq_default;
+
+ m->max_open_streams_intome = MAX_SCTP_STREAMS;
+
+ m->max_init_times = sctp_init_rtx_max_default;
+ m->max_send_times = sctp_assoc_rtx_max_default;
+ m->def_net_failure = sctp_path_rtx_max_default;
+ m->sctp_sws_sender = SCTP_SWS_SENDER_DEF;
+ m->sctp_sws_receiver = SCTP_SWS_RECEIVER_DEF;
+ m->max_burst = sctp_max_burst_default;
+ if ((sctp_default_cc_module >= SCTP_CC_RFC2581) &&
+ (sctp_default_cc_module <= SCTP_CC_HTCP)) {
+ m->sctp_default_cc_module = sctp_default_cc_module;
+ } else {
+ /* sysctl done with invalid value, set to 2581 */
+ m->sctp_default_cc_module = SCTP_CC_RFC2581;
+ }
+ /* number of streams to pre-open on a association */
+ m->pre_open_stream_count = sctp_nr_outgoing_streams_default;
+
+ /* Add adaptation cookie */
+ m->adaptation_layer_indicator = 0x504C5253;
+
+ /* seed random number generator */
+ m->random_counter = 1;
+ m->store_at = SCTP_SIGNATURE_SIZE;
+ SCTP_READ_RANDOM(m->random_numbers, sizeof(m->random_numbers));
+ sctp_fill_random_store(m);
+
+ /* Minimum cookie size */
+ m->size_of_a_cookie = (sizeof(struct sctp_init_msg) * 2) +
+ sizeof(struct sctp_state_cookie);
+ m->size_of_a_cookie += SCTP_SIGNATURE_SIZE;
+
+ /* Setup the initial secret */
+ (void)SCTP_GETTIME_TIMEVAL(&time);
+ m->time_of_secret_change = time.tv_sec;
+
+ for (i = 0; i < SCTP_NUMBER_OF_SECRETS; i++) {
+ m->secret_key[0][i] = sctp_select_initial_TSN(m);
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL);
+
+ /* How long is a cookie good for ? */
+ m->def_cookie_life = MSEC_TO_TICKS(sctp_valid_cookie_life_default);
+ /*
+ * Initialize authentication parameters
+ */
+ m->local_hmacs = sctp_default_supported_hmaclist();
+ m->local_auth_chunks = sctp_alloc_chunklist();
+ sctp_auth_set_default_chunks(m->local_auth_chunks);
+ LIST_INIT(&m->shared_keys);
+ /* add default NULL key as key id 0 */
+ null_key = sctp_alloc_sharedkey();
+ sctp_insert_sharedkey(&m->shared_keys, null_key);
+ SCTP_INP_WUNLOCK(inp);
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 12);
+#endif
+ return (error);
+}
+
+
+void
+sctp_move_pcb_and_assoc(struct sctp_inpcb *old_inp, struct sctp_inpcb *new_inp,
+ struct sctp_tcb *stcb)
+{
+ struct sctp_nets *net;
+ uint16_t lport, rport;
+ struct sctppcbhead *head;
+ struct sctp_laddr *laddr, *oladdr;
+
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ SCTP_TCB_UNLOCK(stcb);
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_WLOCK(old_inp);
+ SCTP_INP_WLOCK(new_inp);
+ SCTP_TCB_LOCK(stcb);
+ atomic_subtract_int(&stcb->asoc.refcnt, 1);
+
+ new_inp->sctp_ep.time_of_secret_change =
+ old_inp->sctp_ep.time_of_secret_change;
+ memcpy(new_inp->sctp_ep.secret_key, old_inp->sctp_ep.secret_key,
+ sizeof(old_inp->sctp_ep.secret_key));
+ new_inp->sctp_ep.current_secret_number =
+ old_inp->sctp_ep.current_secret_number;
+ new_inp->sctp_ep.last_secret_number =
+ old_inp->sctp_ep.last_secret_number;
+ new_inp->sctp_ep.size_of_a_cookie = old_inp->sctp_ep.size_of_a_cookie;
+
+ /* make it so new data pours into the new socket */
+ stcb->sctp_socket = new_inp->sctp_socket;
+ stcb->sctp_ep = new_inp;
+
+ /* Copy the port across */
+ lport = new_inp->sctp_lport = old_inp->sctp_lport;
+ rport = stcb->rport;
+ /* Pull the tcb from the old association */
+ LIST_REMOVE(stcb, sctp_tcbhash);
+ LIST_REMOVE(stcb, sctp_tcblist);
+
+ /* Now insert the new_inp into the TCP connected hash */
+ head = &sctppcbinfo.sctp_tcpephash[SCTP_PCBHASH_ALLADDR((lport + rport),
+ sctppcbinfo.hashtcpmark)];
+
+ LIST_INSERT_HEAD(head, new_inp, sctp_hash);
+ /* Its safe to access */
+ new_inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+
+ /* Now move the tcb into the endpoint list */
+ LIST_INSERT_HEAD(&new_inp->sctp_asoc_list, stcb, sctp_tcblist);
+ /*
+ * Question, do we even need to worry about the ep-hash since we
+ * only have one connection? Probably not :> so lets get rid of it
+ * and not suck up any kernel memory in that.
+ */
+
+ /* Ok. Let's restart timer. */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, new_inp,
+ stcb, net);
+ }
+
+ SCTP_INP_INFO_WUNLOCK();
+ if (new_inp->sctp_tcbhash != NULL) {
+ SCTP_HASH_FREE(new_inp->sctp_tcbhash, new_inp->sctp_hashmark);
+ new_inp->sctp_tcbhash = NULL;
+ }
+ if ((new_inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
+ /* Subset bound, so copy in the laddr list from the old_inp */
+ LIST_FOREACH(oladdr, &old_inp->sctp_addr_list, sctp_nxt_addr) {
+ laddr = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+ if (laddr == NULL) {
+ /*
+ * Gak, what can we do? This assoc is really
+ * HOSED. We probably should send an abort
+ * here.
+ */
+ SCTPDBG(SCTP_DEBUG_PCB1, "Association hosed in TCP model, out of laddr memory\n");
+ continue;
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(laddr, sizeof(*laddr));
+ (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+ laddr->ifa = oladdr->ifa;
+ atomic_add_int(&laddr->ifa->refcount, 1);
+ LIST_INSERT_HEAD(&new_inp->sctp_addr_list, laddr,
+ sctp_nxt_addr);
+ new_inp->laddr_count++;
+ }
+ }
+ /*
+ * Now any running timers need to be adjusted since we really don't
+ * care if they are running or not just blast in the new_inp into
+ * all of them.
+ */
+
+ stcb->asoc.hb_timer.ep = (void *)new_inp;
+ stcb->asoc.dack_timer.ep = (void *)new_inp;
+ stcb->asoc.asconf_timer.ep = (void *)new_inp;
+ stcb->asoc.strreset_timer.ep = (void *)new_inp;
+ stcb->asoc.shut_guard_timer.ep = (void *)new_inp;
+ stcb->asoc.autoclose_timer.ep = (void *)new_inp;
+ stcb->asoc.delayed_event_timer.ep = (void *)new_inp;
+ stcb->asoc.delete_prim_timer.ep = (void *)new_inp;
+ /* now what about the nets? */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ net->pmtu_timer.ep = (void *)new_inp;
+ net->rxt_timer.ep = (void *)new_inp;
+ net->fr_timer.ep = (void *)new_inp;
+ }
+ SCTP_INP_WUNLOCK(new_inp);
+ SCTP_INP_WUNLOCK(old_inp);
+}
+
+static int
+sctp_isport_inuse(struct sctp_inpcb *inp, uint16_t lport, uint32_t vrf_id)
+{
+ struct sctppcbhead *head;
+ struct sctp_inpcb *t_inp;
+ int fnd;
+
+ head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+ sctppcbinfo.hashmark)];
+ LIST_FOREACH(t_inp, head, sctp_hash) {
+ if (t_inp->sctp_lport != lport) {
+ continue;
+ }
+ /* is it in the VRF in question */
+ fnd = 0;
+ if (t_inp->def_vrf_id == vrf_id)
+ fnd = 1;
+ if (!fnd)
+ continue;
+
+ /* This one is in use. */
+ /* check the v6/v4 binding issue */
+ if ((t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(t_inp)) {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ /* collision in V6 space */
+ return (1);
+ } else {
+ /* inp is BOUND_V4 no conflict */
+ continue;
+ }
+ } else if (t_inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
+ /* t_inp is bound v4 and v6, conflict always */
+ return (1);
+ } else {
+ /* t_inp is bound only V4 */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
+ SCTP_IPV6_V6ONLY(inp)) {
+ /* no conflict */
+ continue;
+ }
+ /* else fall through to conflict */
+ }
+ return (1);
+ }
+ return (0);
+}
+
+
+
+/* sctp_ifap is used to bypass normal local address validation checks */
+int
+sctp_inpcb_bind(struct socket *so, struct sockaddr *addr,
+ struct sctp_ifa *sctp_ifap, struct thread *p)
+{
+ /* bind a ep to a socket address */
+ struct sctppcbhead *head;
+ struct sctp_inpcb *inp, *inp_tmp;
+ struct inpcb *ip_inp;
+ int bindall;
+ int prison = 0;
+ uint16_t lport;
+ int error;
+ uint32_t vrf_id;
+
+ lport = 0;
+ error = 0;
+ bindall = 1;
+ inp = (struct sctp_inpcb *)so->so_pcb;
+ ip_inp = (struct inpcb *)so->so_pcb;
+#ifdef SCTP_DEBUG
+ if (addr) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "Bind called port:%d\n",
+ ntohs(((struct sockaddr_in *)addr)->sin_port));
+ SCTPDBG(SCTP_DEBUG_PCB1, "Addr :");
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB1, addr);
+ }
+#endif
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
+ /* already did a bind, subsequent binds NOT allowed ! */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+#ifdef INVARIANTS
+ if (p == NULL)
+ panic("null proc/thread");
+#endif
+ if (p && jailed(p->td_ucred)) {
+ prison = 1;
+ }
+ if (addr != NULL) {
+ if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ /* IPV6_V6ONLY socket? */
+ if (SCTP_IPV6_V6ONLY(ip_inp)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ if (addr->sa_len != sizeof(*sin)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ sin = (struct sockaddr_in *)addr;
+ lport = sin->sin_port;
+ if (prison) {
+ /*
+ * For INADDR_ANY and LOOPBACK the
+ * prison_ip() call will transmute the ip
+ * address to the proper value (i.e. the IP
+ * address owned by the jail).
+ */
+ if (prison_ip(p->td_ucred, 0, &sin->sin_addr.s_addr)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ }
+ if (sin->sin_addr.s_addr != INADDR_ANY) {
+ bindall = 0;
+ }
+ } else if (addr->sa_family == AF_INET6) {
+ /* Only for pure IPv6 Address. (No IPv4 Mapped!) */
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)addr;
+
+ if (addr->sa_len != sizeof(*sin6)) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ lport = sin6->sin6_port;
+ /*
+ * Jail checks for IPv6 should go HERE! i.e. add the
+ * prison_ip() equivilant in this postion to
+ * transmute the addresses to the proper one jailed.
+ */
+ if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ bindall = 0;
+ /* KAME hack: embed scopeid */
+ if (sa6_embedscope(sin6, ip6_use_defzone) != 0) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ }
+ /* this must be cleared for ifa_ifwithaddr() */
+ sin6->sin6_scope_id = 0;
+ } else {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EAFNOSUPPORT);
+ return (EAFNOSUPPORT);
+ }
+ }
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_WLOCK(inp);
+ /* Setup a vrf_id to be the default for the non-bind-all case. */
+ vrf_id = inp->def_vrf_id;
+
+ /* increase our count due to the unlock we do */
+ SCTP_INP_INCR_REF(inp);
+ if (lport) {
+ /*
+ * Did the caller specify a port? if so we must see if a ep
+ * already has this one bound.
+ */
+ /* got to be root to get at low ports */
+ if (ntohs(lport) < IPPORT_RESERVED) {
+ if (p && (error =
+ priv_check(p, PRIV_NETINET_RESERVEDPORT)
+ )) {
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ return (error);
+ }
+ }
+ if (p == NULL) {
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+ return (error);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ if (bindall) {
+ vrf_id = inp->def_vrf_id;
+ inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+ if (inp_tmp != NULL) {
+ /*
+ * lock guy returned and lower count note
+ * that we are not bound so inp_tmp should
+ * NEVER be inp. And it is this inp
+ * (inp_tmp) that gets the reference bump,
+ * so we must lower it.
+ */
+ SCTP_INP_DECR_REF(inp_tmp);
+ SCTP_INP_DECR_REF(inp);
+ /* unlock info */
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ } else {
+ inp_tmp = sctp_pcb_findep(addr, 0, 1, vrf_id);
+ if (inp_tmp != NULL) {
+ /*
+ * lock guy returned and lower count note
+ * that we are not bound so inp_tmp should
+ * NEVER be inp. And it is this inp
+ * (inp_tmp) that gets the reference bump,
+ * so we must lower it.
+ */
+ SCTP_INP_DECR_REF(inp_tmp);
+ SCTP_INP_DECR_REF(inp);
+ /* unlock info */
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ }
+ SCTP_INP_WLOCK(inp);
+ if (bindall) {
+ /* verify that no lport is not used by a singleton */
+ if (sctp_isport_inuse(inp, lport, vrf_id)) {
+ /* Sorry someone already has this one bound */
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ }
+ } else {
+ uint16_t first, last, candidate;
+ uint16_t count;
+ int done;
+
+ if (ip_inp->inp_flags & INP_HIGHPORT) {
+ first = ipport_hifirstauto;
+ last = ipport_hilastauto;
+ } else if (ip_inp->inp_flags & INP_LOWPORT) {
+ if (p && (error =
+ priv_check(p, PRIV_NETINET_RESERVEDPORT)
+ )) {
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, error);
+ return (error);
+ }
+ first = ipport_lowfirstauto;
+ last = ipport_lowlastauto;
+ } else {
+ first = ipport_firstauto;
+ last = ipport_lastauto;
+ }
+ if (first > last) {
+ uint16_t temp;
+
+ temp = first;
+ first = last;
+ last = temp;
+ }
+ count = last - first + 1; /* number of candidates */
+ candidate = first + sctp_select_initial_TSN(&inp->sctp_ep) % (count);
+
+ done = 0;
+ while (!done) {
+ if (sctp_isport_inuse(inp, htons(candidate), inp->def_vrf_id) == 0) {
+ done = 1;
+ }
+ if (!done) {
+ if (--count == 0) {
+ SCTP_INP_DECR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRINUSE);
+ return (EADDRINUSE);
+ }
+ if (candidate == last)
+ candidate = first;
+ else
+ candidate = candidate + 1;
+ }
+ }
+ lport = htons(candidate);
+ }
+ SCTP_INP_DECR_REF(inp);
+ if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE |
+ SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+ /*
+ * this really should not happen. The guy did a non-blocking
+ * bind and then did a close at the same time.
+ */
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ /* ok we look clear to give out this port, so lets setup the binding */
+ if (bindall) {
+ /* binding to all addresses, so just set in the proper flags */
+ inp->sctp_flags |= SCTP_PCB_FLAGS_BOUNDALL;
+ /* set the automatic addr changes from kernel flag */
+ if (sctp_auto_asconf == 0) {
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+ } else {
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+ }
+ /*
+ * set the automatic mobility_base from kernel flag (by
+ * micchie)
+ */
+ if (sctp_mobility_base == 0) {
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_BASE);
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ } else {
+ sctp_mobility_feature_on(inp, SCTP_MOBILITY_BASE);
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ }
+ /*
+ * set the automatic mobility_fasthandoff from kernel flag
+ * (by micchie)
+ */
+ if (sctp_mobility_fasthandoff == 0) {
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_FASTHANDOFF);
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ } else {
+ sctp_mobility_feature_on(inp, SCTP_MOBILITY_FASTHANDOFF);
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ }
+ } else {
+ /*
+ * bind specific, make sure flags is off and add a new
+ * address structure to the sctp_addr_list inside the ep
+ * structure.
+ *
+ * We will need to allocate one and insert it at the head. The
+ * socketopt call can just insert new addresses in there as
+ * well. It will also have to do the embed scope kame hack
+ * too (before adding).
+ */
+ struct sctp_ifa *ifa;
+ struct sockaddr_storage store_sa;
+
+ memset(&store_sa, 0, sizeof(store_sa));
+ if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&store_sa;
+ memcpy(sin, addr, sizeof(struct sockaddr_in));
+ sin->sin_port = 0;
+ } else if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&store_sa;
+ memcpy(sin6, addr, sizeof(struct sockaddr_in6));
+ sin6->sin6_port = 0;
+ }
+ /*
+ * first find the interface with the bound address need to
+ * zero out the port to find the address! yuck! can't do
+ * this earlier since need port for sctp_pcb_findep()
+ */
+ if (sctp_ifap != NULL)
+ ifa = sctp_ifap;
+ else {
+ /*
+ * Note for BSD we hit here always other O/S's will
+ * pass things in via the sctp_ifap argument
+ * (Panda).
+ */
+ ifa = sctp_find_ifa_by_addr((struct sockaddr *)&store_sa,
+ vrf_id, SCTP_ADDR_NOT_LOCKED);
+ }
+ if (ifa == NULL) {
+ /* Can't find an interface with that address */
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EADDRNOTAVAIL);
+ return (EADDRNOTAVAIL);
+ }
+ if (addr->sa_family == AF_INET6) {
+ /* GAK, more FIXME IFA lock? */
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ /* Can't bind a non-existent addr. */
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ }
+ /* we're not bound all */
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUNDALL;
+ /* allow bindx() to send ASCONF's for binding changes */
+ sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_ASCONF);
+ /* clear automatic addr changes from kernel flag */
+ sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
+
+ /* add this address to the endpoint list */
+ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, 0);
+ if (error != 0) {
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ return (error);
+ }
+ inp->laddr_count++;
+ }
+ /* find the bucket */
+ head = &sctppcbinfo.sctp_ephash[SCTP_PCBHASH_ALLADDR(lport,
+ sctppcbinfo.hashmark)];
+ /* put it in the bucket */
+ LIST_INSERT_HEAD(head, inp, sctp_hash);
+ SCTPDBG(SCTP_DEBUG_PCB1, "Main hash to bind at head:%p, bound port:%d\n",
+ head, ntohs(lport));
+ /* set in the port */
+ inp->sctp_lport = lport;
+
+ /* turn off just the unbound flag */
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_UNBOUND;
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ return (0);
+}
+
+
+static void
+sctp_iterator_inp_being_freed(struct sctp_inpcb *inp, struct sctp_inpcb *inp_next)
+{
+ struct sctp_iterator *it;
+
+ /*
+ * We enter with the only the ITERATOR_LOCK in place and a write
+ * lock on the inp_info stuff.
+ */
+
+ /*
+ * Go through all iterators, we must do this since it is possible
+ * that some iterator does NOT have the lock, but is waiting for it.
+ * And the one that had the lock has either moved in the last
+ * iteration or we just cleared it above. We need to find all of
+ * those guys. The list of iterators should never be very big
+ * though.
+ */
+ TAILQ_FOREACH(it, &sctppcbinfo.iteratorhead, sctp_nxt_itr) {
+ if (it == inp->inp_starting_point_for_iterator)
+ /* skip this guy, he's special */
+ continue;
+ if (it->inp == inp) {
+ /*
+ * This is tricky and we DON'T lock the iterator.
+ * Reason is he's running but waiting for me since
+ * inp->inp_starting_point_for_iterator has the lock
+ * on me (the guy above we skipped). This tells us
+ * its is not running but waiting for
+ * inp->inp_starting_point_for_iterator to be
+ * released by the guy that does have our INP in a
+ * lock.
+ */
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ it->inp = NULL;
+ it->stcb = NULL;
+ } else {
+ /* set him up to do the next guy not me */
+ it->inp = inp_next;
+ it->stcb = NULL;
+ }
+ }
+ }
+ it = inp->inp_starting_point_for_iterator;
+ if (it) {
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ it->inp = NULL;
+ } else {
+ it->inp = inp_next;
+ }
+ it->stcb = NULL;
+ }
+}
+
+/* release sctp_inpcb unbind the port */
+void
+sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
+{
+ /*
+ * Here we free a endpoint. We must find it (if it is in the Hash
+ * table) and remove it from there. Then we must also find it in the
+ * overall list and remove it from there. After all removals are
+ * complete then any timer has to be stopped. Then start the actual
+ * freeing. a) Any local lists. b) Any associations. c) The hash of
+ * all associations. d) finally the ep itself.
+ */
+ struct sctp_pcb *m;
+ struct sctp_inpcb *inp_save;
+ struct sctp_tcb *asoc, *nasoc;
+ struct sctp_laddr *laddr, *nladdr;
+ struct inpcb *ip_pcb;
+ struct socket *so;
+
+ struct sctp_queued_to_read *sq;
+
+
+ int cnt;
+ sctp_sharedkey_t *shared_key;
+
+
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 0);
+#endif
+ SCTP_ITERATOR_LOCK();
+ so = inp->sctp_socket;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
+ /* been here before.. eeks.. get out of here */
+ SCTP_PRINTF("This conflict in free SHOULD not be happening! from %d, imm %d\n", from, immediate);
+ SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 1);
+#endif
+ return;
+ }
+ SCTP_ASOC_CREATE_LOCK(inp);
+ SCTP_INP_INFO_WLOCK();
+
+ SCTP_INP_WLOCK(inp);
+ /* First time through we have the socket lock, after that no more. */
+ if (from == SCTP_CALLED_AFTER_CMPSET_OFCLOSE) {
+ /*
+ * Once we are in we can remove the flag from = 1 is only
+ * passed from the actual closing routines that are called
+ * via the sockets layer.
+ */
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_CLOSE_IP;
+ /* socket is gone, so no more wakeups allowed */
+ inp->sctp_flags |= SCTP_PCB_FLAGS_DONT_WAKE;
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
+ }
+ sctp_timer_stop(SCTP_TIMER_TYPE_NEWCOOKIE, inp, NULL, NULL,
+ SCTP_FROM_SCTP_PCB + SCTP_LOC_1);
+
+ if (inp->control) {
+ sctp_m_freem(inp->control);
+ inp->control = NULL;
+ }
+ if (inp->pkt) {
+ sctp_m_freem(inp->pkt);
+ inp->pkt = NULL;
+ }
+ m = &inp->sctp_ep;
+ ip_pcb = &inp->ip_inp.inp; /* we could just cast the main pointer
+ * here but I will be nice :> (i.e.
+ * ip_pcb = ep;) */
+ if (immediate == SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE) {
+ int cnt_in_sd;
+
+ cnt_in_sd = 0;
+ for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+ asoc = nasoc) {
+ SCTP_TCB_LOCK(asoc);
+ nasoc = LIST_NEXT(asoc, sctp_tcblist);
+ if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ /* Skip guys being freed */
+ /* asoc->sctp_socket = NULL; FIXME MT */
+ cnt_in_sd++;
+ SCTP_TCB_UNLOCK(asoc);
+ continue;
+ }
+ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_WAIT) ||
+ (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
+ /*
+ * If we have data in queue, we don't want
+ * to just free since the app may have done,
+ * send()/close or connect/send/close. And
+ * it wants the data to get across first.
+ */
+ /* Just abandon things in the front states */
+ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_NOFORCE,
+ SCTP_FROM_SCTP_PCB + SCTP_LOC_2) == 0) {
+ cnt_in_sd++;
+ }
+ continue;
+ }
+ /* Disconnect the socket please */
+ asoc->sctp_socket = NULL;
+ asoc->asoc.state |= SCTP_STATE_CLOSED_SOCKET;
+ if ((asoc->asoc.size_on_reasm_queue > 0) ||
+ (asoc->asoc.control_pdapi) ||
+ (asoc->asoc.size_on_all_streams > 0) ||
+ (so && (so->so_rcv.sb_cc > 0))
+ ) {
+ /* Left with Data unread */
+ struct mbuf *op_err;
+
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ /* Fill in the user initiated abort */
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(op_err) =
+ sizeof(struct sctp_paramhdr) + sizeof(uint32_t);
+ ph = mtod(op_err,
+ struct sctp_paramhdr *);
+ ph->param_type = htons(
+ SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_3);
+ }
+ asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_3;
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("inpcb_free does an abort");
+#endif
+ sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ if (sctp_free_assoc(inp, asoc,
+ SCTP_PCBFREE_NOFORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_4) == 0) {
+ cnt_in_sd++;
+ }
+ continue;
+ } else if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+ TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+ (asoc->asoc.stream_queue_cnt == 0)
+ ) {
+ if (asoc->asoc.locked_on_sending) {
+ goto abort_anyway;
+ }
+ if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
+ (SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
+ /*
+ * there is nothing queued to send,
+ * so I send shutdown
+ */
+ sctp_send_shutdown(asoc, asoc->asoc.primary_destination);
+ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(&asoc->asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, asoc->sctp_ep, asoc,
+ asoc->asoc.primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+ asoc->asoc.primary_destination);
+ sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_SHUT_TMR, SCTP_SO_LOCKED);
+ }
+ } else {
+ /* mark into shutdown pending */
+ struct sctp_stream_queue_pending *sp;
+
+ asoc->asoc.state |= SCTP_STATE_SHUTDOWN_PENDING;
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, asoc->sctp_ep, asoc,
+ asoc->asoc.primary_destination);
+ if (asoc->asoc.locked_on_sending) {
+ sp = TAILQ_LAST(&((asoc->asoc.locked_on_sending)->outqueue),
+ sctp_streamhead);
+ if (sp == NULL) {
+ SCTP_PRINTF("Error, sp is NULL, locked on sending is %p strm:%d\n",
+ asoc->asoc.locked_on_sending,
+ asoc->asoc.locked_on_sending->stream_no);
+ } else {
+ if ((sp->length == 0) && (sp->msg_is_complete == 0))
+ asoc->asoc.state |= SCTP_STATE_PARTIAL_MSG_LEFT;
+ }
+ }
+ if (TAILQ_EMPTY(&asoc->asoc.send_queue) &&
+ TAILQ_EMPTY(&asoc->asoc.sent_queue) &&
+ (asoc->asoc.state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
+ struct mbuf *op_err;
+
+ abort_anyway:
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ /*
+ * Fill in the user
+ * initiated abort
+ */
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(op_err) =
+ (sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t));
+ ph = mtod(op_err,
+ struct sctp_paramhdr *);
+ ph->param_type = htons(
+ SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_5);
+ }
+ asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_5;
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("inpcb_free does an abort");
+#endif
+
+ sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ if (sctp_free_assoc(inp, asoc,
+ SCTP_PCBFREE_NOFORCE,
+ SCTP_FROM_SCTP_PCB + SCTP_LOC_6) == 0) {
+ cnt_in_sd++;
+ }
+ continue;
+ } else {
+ sctp_chunk_output(inp, asoc, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
+ }
+ }
+ cnt_in_sd++;
+ SCTP_TCB_UNLOCK(asoc);
+ }
+ /* now is there some left in our SHUTDOWN state? */
+ if (cnt_in_sd) {
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 2);
+#endif
+ return;
+ }
+ }
+ inp->sctp_socket = NULL;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) !=
+ SCTP_PCB_FLAGS_UNBOUND) {
+ /*
+ * ok, this guy has been bound. It's port is somewhere in
+ * the sctppcbinfo hash table. Remove it!
+ */
+ LIST_REMOVE(inp, sctp_hash);
+ inp->sctp_flags |= SCTP_PCB_FLAGS_UNBOUND;
+ }
+ /*
+ * If there is a timer running to kill us, forget it, since it may
+ * have a contest on the INP lock.. which would cause us to die ...
+ */
+ cnt = 0;
+ for ((asoc = LIST_FIRST(&inp->sctp_asoc_list)); asoc != NULL;
+ asoc = nasoc) {
+ nasoc = LIST_NEXT(asoc, sctp_tcblist);
+ if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ cnt++;
+ continue;
+ }
+ /* Free associations that are NOT killing us */
+ SCTP_TCB_LOCK(asoc);
+ if ((SCTP_GET_STATE(&asoc->asoc) != SCTP_STATE_COOKIE_WAIT) &&
+ ((asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) {
+ struct mbuf *op_err;
+ uint32_t *ippp;
+
+ op_err = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (op_err) {
+ /* Fill in the user initiated abort */
+ struct sctp_paramhdr *ph;
+
+ SCTP_BUF_LEN(op_err) = (sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t));
+ ph = mtod(op_err, struct sctp_paramhdr *);
+ ph->param_type = htons(
+ SCTP_CAUSE_USER_INITIATED_ABT);
+ ph->param_length = htons(SCTP_BUF_LEN(op_err));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_PCB + SCTP_LOC_7);
+
+ }
+ asoc->sctp_ep->last_abort_code = SCTP_FROM_SCTP_PCB + SCTP_LOC_7;
+#if defined(SCTP_PANIC_ON_ABORT)
+ panic("inpcb_free does an abort");
+#endif
+ sctp_send_abort_tcb(asoc, op_err, SCTP_SO_LOCKED);
+ SCTP_STAT_INCR_COUNTER32(sctps_aborted);
+ } else if (asoc->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
+ cnt++;
+ SCTP_TCB_UNLOCK(asoc);
+ continue;
+ }
+ if ((SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(&asoc->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ if (sctp_free_assoc(inp, asoc, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_PCB + SCTP_LOC_8) == 0) {
+ cnt++;
+ }
+ }
+ if (cnt) {
+ /* Ok we have someone out there that will kill us */
+ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 3);
+#endif
+ return;
+ }
+ if ((inp->refcount) || (inp->sctp_flags & SCTP_PCB_FLAGS_CLOSE_IP)) {
+ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+ sctp_timer_start(SCTP_TIMER_TYPE_INPKILL, inp, NULL, NULL);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_ASOC_CREATE_UNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_ITERATOR_UNLOCK();
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 4);
+#endif
+ return;
+ }
+ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+ inp->sctp_ep.signature_change.type = 0;
+ inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_ALLGONE;
+
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 5);
+#endif
+
+ (void)SCTP_OS_TIMER_STOP(&inp->sctp_ep.signature_change.timer);
+ inp->sctp_ep.signature_change.type = SCTP_TIMER_TYPE_NONE;
+ /* Clear the read queue */
+ /* sa_ignore FREED_MEMORY */
+ while ((sq = TAILQ_FIRST(&inp->read_queue)) != NULL) {
+ /* Its only abandoned if it had data left */
+ if (sq->length)
+ SCTP_STAT_INCR(sctps_left_abandon);
+
+ TAILQ_REMOVE(&inp->read_queue, sq, next);
+ sctp_free_remote_addr(sq->whoFrom);
+ if (so)
+ so->so_rcv.sb_cc -= sq->length;
+ if (sq->data) {
+ sctp_m_freem(sq->data);
+ sq->data = NULL;
+ }
+ /*
+ * no need to free the net count, since at this point all
+ * assoc's are gone.
+ */
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, sq);
+ SCTP_DECR_READQ_COUNT();
+ }
+ /* Now the sctp_pcb things */
+ /*
+ * free each asoc if it is not already closed/free. we can't use the
+ * macro here since le_next will get freed as part of the
+ * sctp_free_assoc() call.
+ */
+ cnt = 0;
+ if (so) {
+#ifdef IPSEC
+ ipsec4_delete_pcbpolicy(ip_pcb);
+#endif /* IPSEC */
+
+ /* Unlocks not needed since the socket is gone now */
+ }
+ if (ip_pcb->inp_options) {
+ (void)sctp_m_free(ip_pcb->inp_options);
+ ip_pcb->inp_options = 0;
+ }
+ if (ip_pcb->inp_moptions) {
+ inp_freemoptions(ip_pcb->inp_moptions);
+ ip_pcb->inp_moptions = 0;
+ }
+#ifdef INET6
+ if (ip_pcb->inp_vflag & INP_IPV6) {
+ struct in6pcb *in6p;
+
+ in6p = (struct in6pcb *)inp;
+ ip6_freepcbopts(in6p->in6p_outputopts);
+ }
+#endif /* INET6 */
+ ip_pcb->inp_vflag = 0;
+ /* free up authentication fields */
+ if (inp->sctp_ep.local_auth_chunks != NULL)
+ sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
+ if (inp->sctp_ep.local_hmacs != NULL)
+ sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
+
+ shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+ while (shared_key) {
+ LIST_REMOVE(shared_key, next);
+ sctp_free_sharedkey(shared_key);
+ /* sa_ignore FREED_MEMORY */
+ shared_key = LIST_FIRST(&inp->sctp_ep.shared_keys);
+ }
+
+ inp_save = LIST_NEXT(inp, sctp_list);
+ LIST_REMOVE(inp, sctp_list);
+
+ /* fix any iterators only after out of the list */
+ sctp_iterator_inp_being_freed(inp, inp_save);
+ /*
+ * if we have an address list the following will free the list of
+ * ifaddr's that are set into this ep. Again macro limitations here,
+ * since the LIST_FOREACH could be a bad idea.
+ */
+ for ((laddr = LIST_FIRST(&inp->sctp_addr_list)); laddr != NULL;
+ laddr = nladdr) {
+ nladdr = LIST_NEXT(laddr, sctp_nxt_addr);
+ sctp_remove_laddr(laddr);
+ }
+
+#ifdef SCTP_TRACK_FREED_ASOCS
+ /* TEMP CODE */
+ for ((asoc = LIST_FIRST(&inp->sctp_asoc_free_list)); asoc != NULL;
+ asoc = nasoc) {
+ nasoc = LIST_NEXT(asoc, sctp_tcblist);
+ LIST_REMOVE(asoc, sctp_tcblist);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, asoc);
+ SCTP_DECR_ASOC_COUNT();
+ }
+ /* *** END TEMP CODE *** */
+#endif
+ /* Now lets see about freeing the EP hash table. */
+ if (inp->sctp_tcbhash != NULL) {
+ SCTP_HASH_FREE(inp->sctp_tcbhash, inp->sctp_hashmark);
+ inp->sctp_tcbhash = NULL;
+ }
+ /* Now we must put the ep memory back into the zone pool */
+ INP_LOCK_DESTROY(&inp->ip_inp.inp);
+ SCTP_INP_LOCK_DESTROY(inp);
+ SCTP_INP_READ_DESTROY(inp);
+ SCTP_ASOC_CREATE_LOCK_DESTROY(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_ITERATOR_UNLOCK();
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_ep, inp);
+ SCTP_DECR_EP_COUNT();
+}
+
+
+struct sctp_nets *
+sctp_findnet(struct sctp_tcb *stcb, struct sockaddr *addr)
+{
+ struct sctp_nets *net;
+
+ /* locate the address */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (sctp_cmpaddr(addr, (struct sockaddr *)&net->ro._l_addr))
+ return (net);
+ }
+ return (NULL);
+}
+
+
+int
+sctp_is_address_on_local_host(struct sockaddr *addr, uint32_t vrf_id)
+{
+ struct sctp_ifa *sctp_ifa;
+
+ sctp_ifa = sctp_find_ifa_by_addr(addr, vrf_id, SCTP_ADDR_NOT_LOCKED);
+ if (sctp_ifa) {
+ return (1);
+ } else {
+ return (0);
+ }
+}
+
+/*
+ * add's a remote endpoint address, done with the INIT/INIT-ACK as well as
+ * when a ASCONF arrives that adds it. It will also initialize all the cwnd
+ * stats of stuff.
+ */
+int
+sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
+ int set_scope, int from)
+{
+ /*
+ * The following is redundant to the same lines in the
+ * sctp_aloc_assoc() but is needed since other's call the add
+ * address function
+ */
+ struct sctp_nets *net, *netfirst;
+ int addr_inscope;
+
+ SCTPDBG(SCTP_DEBUG_PCB1, "Adding an address (from:%d) to the peer: ",
+ from);
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB1, newaddr);
+
+ netfirst = sctp_findnet(stcb, newaddr);
+ if (netfirst) {
+ /*
+ * Lie and return ok, we don't want to make the association
+ * go away for this behavior. It will happen in the TCP
+ * model in a connected socket. It does not reach the hash
+ * table until after the association is built so it can't be
+ * found. Mark as reachable, since the initial creation will
+ * have been cleared and the NOT_IN_ASSOC flag will have
+ * been added... and we don't want to end up removing it
+ * back out.
+ */
+ if (netfirst->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ netfirst->dest_state = (SCTP_ADDR_REACHABLE |
+ SCTP_ADDR_UNCONFIRMED);
+ } else {
+ netfirst->dest_state = SCTP_ADDR_REACHABLE;
+ }
+
+ return (0);
+ }
+ addr_inscope = 1;
+ if (newaddr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)newaddr;
+ if (sin->sin_addr.s_addr == 0) {
+ /* Invalid address */
+ return (-1);
+ }
+ /* zero out the bzero area */
+ memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+
+ /* assure len is set */
+ sin->sin_len = sizeof(struct sockaddr_in);
+ if (set_scope) {
+#ifdef SCTP_DONT_DO_PRIVADDR_SCOPE
+ stcb->ipv4_local_scope = 1;
+#else
+ if (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) {
+ stcb->asoc.ipv4_local_scope = 1;
+ }
+#endif /* SCTP_DONT_DO_PRIVADDR_SCOPE */
+ } else {
+ /* Validate the address is in scope */
+ if ((IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)) &&
+ (stcb->asoc.ipv4_local_scope == 0)) {
+ addr_inscope = 0;
+ }
+ }
+ } else if (newaddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)newaddr;
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ /* Invalid address */
+ return (-1);
+ }
+ /* assure len is set */
+ sin6->sin6_len = sizeof(struct sockaddr_in6);
+ if (set_scope) {
+ if (sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id)) {
+ stcb->asoc.loopback_scope = 1;
+ stcb->asoc.local_scope = 0;
+ stcb->asoc.ipv4_local_scope = 1;
+ stcb->asoc.site_scope = 1;
+ } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
+ /*
+ * If the new destination is a LINK_LOCAL we
+ * must have common site scope. Don't set
+ * the local scope since we may not share
+ * all links, only loopback can do this.
+ * Links on the local network would also be
+ * on our private network for v4 too.
+ */
+ stcb->asoc.ipv4_local_scope = 1;
+ stcb->asoc.site_scope = 1;
+ } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
+ /*
+ * If the new destination is SITE_LOCAL then
+ * we must have site scope in common.
+ */
+ stcb->asoc.site_scope = 1;
+ }
+ } else {
+ /* Validate the address is in scope */
+ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) &&
+ (stcb->asoc.loopback_scope == 0)) {
+ addr_inscope = 0;
+ } else if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
+ (stcb->asoc.local_scope == 0)) {
+ addr_inscope = 0;
+ } else if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
+ (stcb->asoc.site_scope == 0)) {
+ addr_inscope = 0;
+ }
+ }
+ } else {
+ /* not supported family type */
+ return (-1);
+ }
+ net = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_net, struct sctp_nets);
+ if (net == NULL) {
+ return (-1);
+ }
+ SCTP_INCR_RADDR_COUNT();
+ bzero(net, sizeof(*net));
+ (void)SCTP_GETTIME_TIMEVAL(&net->start_time);
+ memcpy(&net->ro._l_addr, newaddr, newaddr->sa_len);
+ if (newaddr->sa_family == AF_INET) {
+ ((struct sockaddr_in *)&net->ro._l_addr)->sin_port = stcb->rport;
+ } else if (newaddr->sa_family == AF_INET6) {
+ ((struct sockaddr_in6 *)&net->ro._l_addr)->sin6_port = stcb->rport;
+ }
+ net->addr_is_local = sctp_is_address_on_local_host(newaddr, stcb->asoc.vrf_id);
+ if (net->addr_is_local && ((set_scope || (from == SCTP_ADDR_IS_CONFIRMED)))) {
+ stcb->asoc.loopback_scope = 1;
+ stcb->asoc.ipv4_local_scope = 1;
+ stcb->asoc.local_scope = 0;
+ stcb->asoc.site_scope = 1;
+ addr_inscope = 1;
+ }
+ net->failure_threshold = stcb->asoc.def_net_failure;
+ if (addr_inscope == 0) {
+ net->dest_state = (SCTP_ADDR_REACHABLE |
+ SCTP_ADDR_OUT_OF_SCOPE);
+ } else {
+ if (from == SCTP_ADDR_IS_CONFIRMED)
+ /* SCTP_ADDR_IS_CONFIRMED is passed by connect_x */
+ net->dest_state = SCTP_ADDR_REACHABLE;
+ else
+ net->dest_state = SCTP_ADDR_REACHABLE |
+ SCTP_ADDR_UNCONFIRMED;
+ }
+ /*
+ * We set this to 0, the timer code knows that this means its an
+ * initial value
+ */
+ net->RTO = 0;
+ net->RTO_measured = 0;
+ stcb->asoc.numnets++;
+ *(&net->ref_count) = 1;
+ net->tos_flowlabel = 0;
+#ifdef INET
+ if (newaddr->sa_family == AF_INET)
+ net->tos_flowlabel = stcb->asoc.default_tos;
+#endif
+#ifdef INET6
+ if (newaddr->sa_family == AF_INET6)
+ net->tos_flowlabel = stcb->asoc.default_flowlabel;
+#endif
+ /* Init the timer structure */
+ SCTP_OS_TIMER_INIT(&net->rxt_timer.timer);
+ SCTP_OS_TIMER_INIT(&net->fr_timer.timer);
+ SCTP_OS_TIMER_INIT(&net->pmtu_timer.timer);
+
+ /* Now generate a route for this guy */
+ /* KAME hack: embed scopeid */
+ if (newaddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ (void)sa6_embedscope(sin6, ip6_use_defzone);
+ sin6->sin6_scope_id = 0;
+ }
+ SCTP_RTALLOC((sctp_route_t *) & net->ro, stcb->asoc.vrf_id);
+
+ if (newaddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
+ (void)sa6_recoverscope(sin6);
+ }
+ if (SCTP_ROUTE_HAS_VALID_IFN(&net->ro)) {
+ /* Get source address */
+ net->ro._s_addr = sctp_source_address_selection(stcb->sctp_ep,
+ stcb,
+ (sctp_route_t *) & net->ro,
+ net,
+ 0,
+ stcb->asoc.vrf_id);
+ /* Now get the interface MTU */
+ if (net->ro._s_addr && net->ro._s_addr->ifn_p) {
+ net->mtu = SCTP_GATHER_MTU_FROM_INTFC(net->ro._s_addr->ifn_p);
+ } else {
+ net->mtu = 0;
+ }
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("We have found an interface mtu of %d\n", net->mtu);
+#endif
+ if (net->mtu == 0) {
+ /* Huh ?? */
+ net->mtu = SCTP_DEFAULT_MTU;
+ } else {
+ uint32_t rmtu;
+
+ rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("The route mtu is %d\n", rmtu);
+#endif
+ if (rmtu == 0) {
+ /*
+ * Start things off to match mtu of
+ * interface please.
+ */
+ SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
+ net->ro.ro_rt, net->mtu);
+ } else {
+ /*
+ * we take the route mtu over the interface,
+ * since the route may be leading out the
+ * loopback, or a different interface.
+ */
+ net->mtu = rmtu;
+ }
+ }
+ if (from == SCTP_ALLOC_ASOC) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("New assoc sets mtu to :%d\n", net->mtu);
+#endif
+ stcb->asoc.smallest_mtu = net->mtu;
+ }
+ } else {
+ net->mtu = stcb->asoc.smallest_mtu;
+ }
+ if (stcb->asoc.smallest_mtu > net->mtu) {
+#ifdef SCTP_PRINT_FOR_B_AND_M
+ SCTP_PRINTF("new address mtu:%d smaller than smallest:%d\n",
+ net->mtu, stcb->asoc.smallest_mtu);
+#endif
+ stcb->asoc.smallest_mtu = net->mtu;
+ }
+ /* JRS - Use the congestion control given in the CC module */
+ stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
+
+ /*
+ * CMT: CUC algo - set find_pseudo_cumack to TRUE (1) at beginning
+ * of assoc (2005/06/27, iyengar at cis.udel.edu)
+ */
+ net->find_pseudo_cumack = 1;
+ net->find_rtx_pseudo_cumack = 1;
+ net->src_addr_selected = 0;
+ netfirst = TAILQ_FIRST(&stcb->asoc.nets);
+ if (net->ro.ro_rt == NULL) {
+ /* Since we have no route put it at the back */
+ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+ } else if (netfirst == NULL) {
+ /* We are the first one in the pool. */
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+ } else if (netfirst->ro.ro_rt == NULL) {
+ /*
+ * First one has NO route. Place this one ahead of the first
+ * one.
+ */
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+ } else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) {
+ /*
+ * This one has a different interface than the one at the
+ * top of the list. Place it ahead.
+ */
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
+ } else {
+ /*
+ * Ok we have the same interface as the first one. Move
+ * forward until we find either a) one with a NULL route...
+ * insert ahead of that b) one with a different ifp.. insert
+ * after that. c) end of the list.. insert at the tail.
+ */
+ struct sctp_nets *netlook;
+
+ do {
+ netlook = TAILQ_NEXT(netfirst, sctp_next);
+ if (netlook == NULL) {
+ /* End of the list */
+ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
+ break;
+ } else if (netlook->ro.ro_rt == NULL) {
+ /* next one has NO route */
+ TAILQ_INSERT_BEFORE(netfirst, net, sctp_next);
+ break;
+ } else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) {
+ TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook,
+ net, sctp_next);
+ break;
+ }
+ /* Shift forward */
+ netfirst = netlook;
+ } while (netlook != NULL);
+ }
+
+ /* got to have a primary set */
+ if (stcb->asoc.primary_destination == 0) {
+ stcb->asoc.primary_destination = net;
+ } else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) &&
+ (net->ro.ro_rt) &&
+ ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) {
+ /* No route to current primary adopt new primary */
+ stcb->asoc.primary_destination = net;
+ }
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb,
+ net);
+ /* Validate primary is first */
+ net = TAILQ_FIRST(&stcb->asoc.nets);
+ if ((net != stcb->asoc.primary_destination) &&
+ (stcb->asoc.primary_destination)) {
+ /*
+ * first one on the list is NOT the primary sctp_cmpaddr()
+ * is much more efficent if the primary is the first on the
+ * list, make it so.
+ */
+ TAILQ_REMOVE(&stcb->asoc.nets,
+ stcb->asoc.primary_destination, sctp_next);
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets,
+ stcb->asoc.primary_destination, sctp_next);
+ }
+ return (0);
+}
+
+
+/*
+ * allocate an association and add it to the endpoint. The caller must be
+ * careful to add all additional addresses once they are know right away or
+ * else the assoc will be may experience a blackout scenario.
+ */
+struct sctp_tcb *
+sctp_aloc_assoc(struct sctp_inpcb *inp, struct sockaddr *firstaddr,
+ int for_a_init, int *error, uint32_t override_tag, uint32_t vrf_id,
+ struct thread *p
+)
+{
+ /* note the p argument is only valid in unbound sockets */
+
+ struct sctp_tcb *stcb;
+ struct sctp_association *asoc;
+ struct sctpasochead *head;
+ uint16_t rport;
+ int err;
+
+ /*
+ * Assumption made here: Caller has done a
+ * sctp_findassociation_ep_addr(ep, addr's); to make sure the
+ * address does not exist already.
+ */
+ if (sctppcbinfo.ipi_count_asoc >= SCTP_MAX_NUM_OF_ASOC) {
+ /* Hit max assoc, sorry no more */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+ *error = ENOBUFS;
+ return (NULL);
+ }
+ if (firstaddr == NULL) {
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ SCTP_INP_RLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) {
+ /*
+ * If its in the TCP pool, its NOT allowed to create an
+ * association. The parent listener needs to call
+ * sctp_aloc_assoc.. or the one-2-many socket. If a peeled
+ * off, or connected one does this.. its an error.
+ */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ SCTPDBG(SCTP_DEBUG_PCB3, "Allocate an association for peer:");
+#ifdef SCTP_DEBUG
+ if (firstaddr) {
+ SCTPDBG_ADDR(SCTP_DEBUG_PCB3, firstaddr);
+ SCTPDBG(SCTP_DEBUG_PCB3, "Port:%d\n",
+ ntohs(((struct sockaddr_in *)firstaddr)->sin_port));
+ } else {
+ SCTPDBG(SCTP_DEBUG_PCB3, "None\n");
+ }
+#endif /* SCTP_DEBUG */
+ if (firstaddr->sa_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)firstaddr;
+ if ((sin->sin_port == 0) || (sin->sin_addr.s_addr == 0)) {
+ /* Invalid address */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ rport = sin->sin_port;
+ } else if (firstaddr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)firstaddr;
+ if ((sin6->sin6_port == 0) ||
+ (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))) {
+ /* Invalid address */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ rport = sin6->sin6_port;
+ } else {
+ /* not supported family type */
+ SCTP_INP_RUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
+ /*
+ * If you have not performed a bind, then we need to do the
+ * ephemerial bind for you.
+ */
+ if ((err = sctp_inpcb_bind(inp->sctp_socket,
+ (struct sockaddr *)NULL,
+ (struct sctp_ifa *)NULL,
+ p
+ ))) {
+ /* bind error, probably perm */
+ *error = err;
+ return (NULL);
+ }
+ }
+ stcb = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_asoc, struct sctp_tcb);
+ if (stcb == NULL) {
+ /* out of memory? */
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+ *error = ENOMEM;
+ return (NULL);
+ }
+ SCTP_INCR_ASOC_COUNT();
+
+ bzero(stcb, sizeof(*stcb));
+ asoc = &stcb->asoc;
+ SCTP_TCB_LOCK_INIT(stcb);
+ SCTP_TCB_SEND_LOCK_INIT(stcb);
+ /* setup back pointer's */
+ stcb->sctp_ep = inp;
+ stcb->sctp_socket = inp->sctp_socket;
+ if ((err = sctp_init_asoc(inp, stcb, for_a_init, override_tag, vrf_id))) {
+ /* failed */
+ SCTP_TCB_LOCK_DESTROY(stcb);
+ SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+ SCTP_DECR_ASOC_COUNT();
+ *error = err;
+ return (NULL);
+ }
+ /* and the port */
+ stcb->rport = rport;
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_WLOCK(inp);
+ if (inp->sctp_flags & (SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
+ /* inpcb freed while alloc going on */
+ SCTP_TCB_LOCK_DESTROY(stcb);
+ SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_DECR_ASOC_COUNT();
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ *error = EINVAL;
+ return (NULL);
+ }
+ SCTP_TCB_LOCK(stcb);
+
+ /* now that my_vtag is set, add it to the hash */
+ head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
+ sctppcbinfo.hashasocmark)];
+ /* put it in the bucket in the vtag hash of assoc's for the system */
+ LIST_INSERT_HEAD(head, stcb, sctp_asocs);
+ sctp_delete_from_timewait(stcb->asoc.my_vtag);
+
+ SCTP_INP_INFO_WUNLOCK();
+
+ if ((err = sctp_add_remote_addr(stcb, firstaddr, SCTP_DO_SETSCOPE, SCTP_ALLOC_ASOC))) {
+ /* failure.. memory error? */
+ if (asoc->strmout) {
+ SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+ asoc->strmout = NULL;
+ }
+ if (asoc->mapping_array) {
+ SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+ asoc->mapping_array = NULL;
+ }
+ SCTP_DECR_ASOC_COUNT();
+ SCTP_TCB_LOCK_DESTROY(stcb);
+ SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS);
+ *error = ENOBUFS;
+ return (NULL);
+ }
+ /* Init all the timers */
+ SCTP_OS_TIMER_INIT(&asoc->hb_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->dack_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->strreset_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->asconf_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->shut_guard_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->autoclose_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->delayed_event_timer.timer);
+ SCTP_OS_TIMER_INIT(&asoc->delete_prim_timer.timer);
+
+ LIST_INSERT_HEAD(&inp->sctp_asoc_list, stcb, sctp_tcblist);
+ /* now file the port under the hash as well */
+ if (inp->sctp_tcbhash != NULL) {
+ head = &inp->sctp_tcbhash[SCTP_PCBHASH_ALLADDR(stcb->rport,
+ inp->sctp_hashmark)];
+ LIST_INSERT_HEAD(head, stcb, sctp_tcbhash);
+ }
+ SCTP_INP_WUNLOCK(inp);
+ SCTPDBG(SCTP_DEBUG_PCB1, "Association %p now allocated\n", stcb);
+ return (stcb);
+}
+
+
+void
+sctp_remove_net(struct sctp_tcb *stcb, struct sctp_nets *net)
+{
+ struct sctp_association *asoc;
+
+ asoc = &stcb->asoc;
+ asoc->numnets--;
+ TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+ if (net == asoc->primary_destination) {
+ /* Reset primary */
+ struct sctp_nets *lnet;
+
+ lnet = TAILQ_FIRST(&asoc->nets);
+ /*
+ * Mobility adaptation Ideally, if deleted destination is
+ * the primary, it becomes a fast retransmission trigger by
+ * the subsequent SET PRIMARY. (by micchie)
+ */
+ if (sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_BASE) ||
+ sctp_is_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_FASTHANDOFF)) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: primary dst is deleting\n");
+ if (asoc->deleted_primary != NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "remove_net: deleted primary may be already stored\n");
+ goto leave;
+ }
+ asoc->deleted_primary = net;
+ atomic_add_int(&net->ref_count, 1);
+ memset(&net->lastsa, 0, sizeof(net->lastsa));
+ memset(&net->lastsv, 0, sizeof(net->lastsv));
+ sctp_mobility_feature_on(stcb->sctp_ep,
+ SCTP_MOBILITY_PRIM_DELETED);
+ sctp_timer_start(SCTP_TIMER_TYPE_PRIM_DELETED,
+ stcb->sctp_ep, stcb, NULL);
+ }
+leave:
+ /* Try to find a confirmed primary */
+ asoc->primary_destination = sctp_find_alternate_net(stcb, lnet, 0);
+ }
+ if (net == asoc->last_data_chunk_from) {
+ /* Reset primary */
+ asoc->last_data_chunk_from = TAILQ_FIRST(&asoc->nets);
+ }
+ if (net == asoc->last_control_chunk_from) {
+ /* Clear net */
+ asoc->last_control_chunk_from = NULL;
+ }
+ sctp_free_remote_addr(net);
+}
+
+/*
+ * remove a remote endpoint address from an association, it will fail if the
+ * address does not exist.
+ */
+int
+sctp_del_remote_addr(struct sctp_tcb *stcb, struct sockaddr *remaddr)
+{
+ /*
+ * Here we need to remove a remote address. This is quite simple, we
+ * first find it in the list of address for the association
+ * (tasoc->asoc.nets) and then if it is there, we do a LIST_REMOVE
+ * on that item. Note we do not allow it to be removed if there are
+ * no other addresses.
+ */
+ struct sctp_association *asoc;
+ struct sctp_nets *net, *net_tmp;
+
+ asoc = &stcb->asoc;
+
+ /* locate the address */
+ for (net = TAILQ_FIRST(&asoc->nets); net != NULL; net = net_tmp) {
+ net_tmp = TAILQ_NEXT(net, sctp_next);
+ if (net->ro._l_addr.sa.sa_family != remaddr->sa_family) {
+ continue;
+ }
+ if (sctp_cmpaddr((struct sockaddr *)&net->ro._l_addr,
+ remaddr)) {
+ /* we found the guy */
+ if (asoc->numnets < 2) {
+ /* Must have at LEAST two remote addresses */
+ return (-1);
+ } else {
+ sctp_remove_net(stcb, net);
+ return (0);
+ }
+ }
+ }
+ /* not found. */
+ return (-2);
+}
+
+void
+sctp_delete_from_timewait(uint32_t tag)
+{
+ struct sctpvtaghead *chain;
+ struct sctp_tagblock *twait_block;
+ int found = 0;
+ int i;
+
+ chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+ if (!SCTP_LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if (twait_block->vtag_block[i].v_tag == tag) {
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+ }
+}
+
+int
+sctp_is_in_timewait(uint32_t tag)
+{
+ struct sctpvtaghead *chain;
+ struct sctp_tagblock *twait_block;
+ int found = 0;
+ int i;
+
+ chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+ if (!SCTP_LIST_EMPTY(chain)) {
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if (twait_block->vtag_block[i].v_tag == tag) {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+ }
+ return (found);
+}
+
+
+void
+sctp_add_vtag_to_timewait(uint32_t tag, uint32_t time)
+{
+ struct sctpvtaghead *chain;
+ struct sctp_tagblock *twait_block;
+ struct timeval now;
+ int set, i;
+
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+ set = 0;
+ if (!SCTP_LIST_EMPTY(chain)) {
+ /* Block(s) present, lets find space, and expire on the fly */
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if ((twait_block->vtag_block[i].v_tag == 0) &&
+ !set) {
+ twait_block->vtag_block[i].tv_sec_at_expire =
+ now.tv_sec + time;
+ twait_block->vtag_block[i].v_tag = tag;
+ set = 1;
+ } else if ((twait_block->vtag_block[i].v_tag) &&
+ ((long)twait_block->vtag_block[i].tv_sec_at_expire < now.tv_sec)) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ if (set == 0) {
+ /* Reuse it for my new tag */
+ twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
+ twait_block->vtag_block[0].v_tag = tag;
+ set = 1;
+ }
+ }
+ }
+ if (set) {
+ /*
+ * We only do up to the block where we can
+ * place our tag for audits
+ */
+ break;
+ }
+ }
+ }
+ /* Need to add a new block to chain */
+ if (!set) {
+ SCTP_MALLOC(twait_block, struct sctp_tagblock *,
+ sizeof(struct sctp_tagblock), SCTP_M_TIMW);
+ if (twait_block == NULL) {
+ return;
+ }
+ memset(twait_block, 0, sizeof(struct sctp_tagblock));
+ LIST_INSERT_HEAD(chain, twait_block, sctp_nxt_tagblock);
+ twait_block->vtag_block[0].tv_sec_at_expire = now.tv_sec + time;
+ twait_block->vtag_block[0].v_tag = tag;
+ }
+}
+
+
+static void
+sctp_iterator_asoc_being_freed(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+ struct sctp_iterator *it;
+
+ /*
+ * Unlock the tcb lock we do this so we avoid a dead lock scenario
+ * where the iterator is waiting on the TCB lock and the TCB lock is
+ * waiting on the iterator lock.
+ */
+ it = stcb->asoc.stcb_starting_point_for_iterator;
+ if (it == NULL) {
+ return;
+ }
+ if (it->inp != stcb->sctp_ep) {
+ /* hmm, focused on the wrong one? */
+ return;
+ }
+ if (it->stcb != stcb) {
+ return;
+ }
+ it->stcb = LIST_NEXT(stcb, sctp_tcblist);
+ if (it->stcb == NULL) {
+ /* done with all asoc's in this assoc */
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ it->inp = NULL;
+ } else {
+ it->inp = LIST_NEXT(inp, sctp_list);
+ }
+ }
+}
+
+
+/*-
+ * Free the association after un-hashing the remote port. This
+ * function ALWAYS returns holding NO LOCK on the stcb. It DOES
+ * expect that the input to this function IS a locked TCB.
+ * It will return 0, if it did NOT destroy the association (instead
+ * it unlocks it. It will return NON-zero if it either destroyed the
+ * association OR the association is already destroyed.
+ */
+int
+sctp_free_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_inpcbfree, int from_location)
+{
+ int i;
+ struct sctp_association *asoc;
+ struct sctp_nets *net, *prev;
+ struct sctp_laddr *laddr;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_asconf_addr *aparam;
+ struct sctp_asconf_ack *aack;
+ struct sctp_stream_reset_list *liste;
+ struct sctp_queued_to_read *sq;
+ struct sctp_stream_queue_pending *sp;
+ sctp_sharedkey_t *shared_key;
+ struct socket *so;
+ int ccnt = 0;
+ int cnt = 0;
+
+ /* first, lets purge the entry from the hash table. */
+
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, stcb, 6);
+#endif
+ if (stcb->asoc.state == 0) {
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 7);
+#endif
+ /* there is no asoc, really TSNH :-0 */
+ return (1);
+ }
+ /* TEMP CODE */
+ if (stcb->freed_from_where == 0) {
+ /* Only record the first place free happened from */
+ stcb->freed_from_where = from_location;
+ }
+ /* TEMP CODE */
+
+ asoc = &stcb->asoc;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+ /* nothing around */
+ so = NULL;
+ else
+ so = inp->sctp_socket;
+
+ /*
+ * We used timer based freeing if a reader or writer is in the way.
+ * So we first check if we are actually being called from a timer,
+ * if so we abort early if a reader or writer is still in the way.
+ */
+ if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) &&
+ (from_inpcbfree == SCTP_NORMAL_PROC)) {
+ /*
+ * is it the timer driving us? if so are the reader/writers
+ * gone?
+ */
+ if (stcb->asoc.refcnt) {
+ /* nope, reader or writer in the way */
+ sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+ /* no asoc destroyed */
+ SCTP_TCB_UNLOCK(stcb);
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, stcb, 8);
+#endif
+ return (0);
+ }
+ }
+ /* now clean up any other timers */
+ (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+ asoc->hb_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+ asoc->dack_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+ /*-
+ * For stream reset we don't blast this unless
+ * it is a str-reset timer, it might be the
+ * free-asoc timer which we DON'T want to
+ * disturb.
+ */
+ if (asoc->strreset_timer.type == SCTP_TIMER_TYPE_STRRESET)
+ asoc->strreset_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+ asoc->asconf_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+ asoc->autoclose_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+ asoc->shut_guard_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+ asoc->delayed_event_timer.self = NULL;
+ /* Mobility adaptation */
+ (void)SCTP_OS_TIMER_STOP(&asoc->delete_prim_timer.timer);
+ asoc->delete_prim_timer.self = NULL;
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+ net->fr_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+ net->rxt_timer.self = NULL;
+ (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+ net->pmtu_timer.self = NULL;
+ }
+ /* Now the read queue needs to be cleaned up (only once) */
+ cnt = 0;
+ if ((stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0) {
+ stcb->asoc.state |= SCTP_STATE_ABOUT_TO_BE_FREED;
+ SCTP_INP_READ_LOCK(inp);
+ TAILQ_FOREACH(sq, &inp->read_queue, next) {
+ if (sq->stcb == stcb) {
+ sq->do_not_ref_stcb = 1;
+ sq->sinfo_cumtsn = stcb->asoc.cumulative_tsn;
+ /*
+ * If there is no end, there never will be
+ * now.
+ */
+ if (sq->end_added == 0) {
+ /* Held for PD-API clear that. */
+ sq->pdapi_aborted = 1;
+ sq->held_length = 0;
+ if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT) && (so != NULL)) {
+ /*
+ * Need to add a PD-API
+ * aborted indication.
+ * Setting the control_pdapi
+ * assures that it will be
+ * added right after this
+ * msg.
+ */
+ uint32_t strseq;
+
+ stcb->asoc.control_pdapi = sq;
+ strseq = (sq->sinfo_stream << 16) | sq->sinfo_ssn;
+ sctp_notify_partial_delivery_indication(stcb,
+ SCTP_PARTIAL_DELIVERY_ABORTED, 1, strseq);
+ stcb->asoc.control_pdapi = NULL;
+ }
+ }
+ /* Add an end to wake them */
+ sq->end_added = 1;
+ cnt++;
+ }
+ }
+ SCTP_INP_READ_UNLOCK(inp);
+ if (stcb->block_entry) {
+ cnt++;
+ SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_PCB, ECONNRESET);
+ stcb->block_entry->error = ECONNRESET;
+ stcb->block_entry = NULL;
+ }
+ }
+ if (stcb->asoc.refcnt) {
+ /*
+ * reader or writer in the way, we have hopefully given him
+ * something to chew on above.
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_ASOCKILL, inp, stcb, NULL);
+ SCTP_TCB_UNLOCK(stcb);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+ /* nothing around */
+ so = NULL;
+ if (so) {
+ /* Wake any reader/writers */
+ sctp_sorwakeup(inp, so);
+ sctp_sowwakeup(inp, so);
+ }
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, stcb, 9);
+#endif
+ /* no asoc destroyed */
+ return (0);
+ }
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, stcb, 10);
+#endif
+ /*
+ * When I reach here, no others want to kill the assoc yet.. and I
+ * own the lock. Now its possible an abort comes in when I do the
+ * lock exchange below to grab all the locks to do the final take
+ * out. to prevent this we increment the count, which will start a
+ * timer and blow out above thus assuring us that we hold exclusive
+ * killing of the asoc. Note that after getting back the TCB lock we
+ * will go ahead and increment the counter back up and stop any
+ * timer a passing stranger may have started :-S
+ */
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+
+ SCTP_TCB_UNLOCK(stcb);
+
+ SCTP_ITERATOR_LOCK();
+ SCTP_INP_INFO_WLOCK();
+ SCTP_INP_WLOCK(inp);
+ SCTP_TCB_LOCK(stcb);
+ }
+ /* Double check the GONE flag */
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE))
+ /* nothing around */
+ so = NULL;
+
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
+ (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
+ /*
+ * For TCP type we need special handling when we are
+ * connected. We also include the peel'ed off ones to.
+ */
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
+ inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED;
+ inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
+ if (so) {
+ SOCK_LOCK(so);
+ if (so->so_rcv.sb_cc == 0) {
+ so->so_state &= ~(SS_ISCONNECTING |
+ SS_ISDISCONNECTING |
+ SS_ISCONFIRMING |
+ SS_ISCONNECTED);
+ }
+ SOCK_UNLOCK(so);
+ socantrcvmore(so);
+ sctp_sowwakeup(inp, so);
+ sctp_sorwakeup(inp, so);
+ SCTP_SOWAKEUP(so);
+ }
+ }
+ }
+ /*
+ * Make it invalid too, that way if its about to run it will abort
+ * and return.
+ */
+ sctp_iterator_asoc_being_freed(inp, stcb);
+ /* re-increment the lock */
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ }
+ asoc->state = 0;
+ if (inp->sctp_tcbhash) {
+ LIST_REMOVE(stcb, sctp_tcbhash);
+ }
+ if (stcb->asoc.in_restart_hash) {
+ LIST_REMOVE(stcb, sctp_tcbrestarhash);
+ }
+ /* Now lets remove it from the list of ALL associations in the EP */
+ LIST_REMOVE(stcb, sctp_tcblist);
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ SCTP_INP_INCR_REF(inp);
+ SCTP_INP_WUNLOCK(inp);
+ SCTP_ITERATOR_UNLOCK();
+ }
+ /* pull from vtag hash */
+ LIST_REMOVE(stcb, sctp_asocs);
+ sctp_add_vtag_to_timewait(asoc->my_vtag, SCTP_TIME_WAIT);
+
+ /*
+ * Now restop the timers to be sure - this is paranoia at is finest!
+ */
+ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->hb_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->dack_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->strreset_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->asconf_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->shut_guard_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->autoclose_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&asoc->delayed_event_timer.timer);
+ TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
+ (void)SCTP_OS_TIMER_STOP(&net->fr_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&net->rxt_timer.timer);
+ (void)SCTP_OS_TIMER_STOP(&net->pmtu_timer.timer);
+ }
+
+ asoc->strreset_timer.type = SCTP_TIMER_TYPE_NONE;
+ prev = NULL;
+ /*
+ * The chunk lists and such SHOULD be empty but we check them just
+ * in case.
+ */
+ /* anything on the wheel needs to be removed */
+ for (i = 0; i < asoc->streamoutcnt; i++) {
+ struct sctp_stream_out *outs;
+
+ outs = &asoc->strmout[i];
+ /* now clean up any chunks here */
+ sp = TAILQ_FIRST(&outs->outqueue);
+ while (sp) {
+ TAILQ_REMOVE(&outs->outqueue, sp, next);
+ if (sp->data) {
+ sctp_m_freem(sp->data);
+ sp->data = NULL;
+ sp->tail_mbuf = NULL;
+ }
+ sctp_free_remote_addr(sp->net);
+ sctp_free_spbufspace(stcb, asoc, sp);
+ /* Free the zone stuff */
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_strmoq, sp);
+ SCTP_DECR_STRMOQ_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ sp = TAILQ_FIRST(&outs->outqueue);
+ }
+ }
+
+ /* sa_ignore FREED_MEMORY */
+ while ((liste = TAILQ_FIRST(&asoc->resetHead)) != NULL) {
+ TAILQ_REMOVE(&asoc->resetHead, liste, next_resp);
+ SCTP_FREE(liste, SCTP_M_STRESET);
+ }
+
+ sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+ while (sq) {
+ TAILQ_REMOVE(&asoc->pending_reply_queue, sq, next);
+ if (sq->data) {
+ sctp_m_freem(sq->data);
+ sq->data = NULL;
+ }
+ sctp_free_remote_addr(sq->whoFrom);
+ sq->whoFrom = NULL;
+ sq->stcb = NULL;
+ /* Free the ctl entry */
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, sq);
+ SCTP_DECR_READQ_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ sq = TAILQ_FIRST(&asoc->pending_reply_queue);
+ }
+
+ chk = TAILQ_FIRST(&asoc->free_chunks);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->free_chunks, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ ccnt++;
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+ SCTP_DECR_CHK_COUNT();
+ atomic_subtract_int(&sctppcbinfo.ipi_free_chunks, 1);
+ asoc->free_chunk_cnt--;
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->free_chunks);
+ }
+ /* pending send queue SHOULD be empty */
+ if (!TAILQ_EMPTY(&asoc->send_queue)) {
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ ccnt++;
+ sctp_free_remote_addr(chk->whoTo);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+ SCTP_DECR_CHK_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->send_queue);
+ }
+ }
+/*
+ if(ccnt) {
+ printf("Freed %d from send_queue\n", ccnt);
+ ccnt = 0;
+ }
+*/
+ /* sent queue SHOULD be empty */
+ if (!TAILQ_EMPTY(&asoc->sent_queue)) {
+ chk = TAILQ_FIRST(&asoc->sent_queue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ ccnt++;
+ sctp_free_remote_addr(chk->whoTo);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+ SCTP_DECR_CHK_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->sent_queue);
+ }
+ }
+/*
+ if(ccnt) {
+ printf("Freed %d from sent_queue\n", ccnt);
+ ccnt = 0;
+ }
+*/
+ /* control queue MAY not be empty */
+ if (!TAILQ_EMPTY(&asoc->control_send_queue)) {
+ chk = TAILQ_FIRST(&asoc->control_send_queue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ ccnt++;
+ sctp_free_remote_addr(chk->whoTo);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+ SCTP_DECR_CHK_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->control_send_queue);
+ }
+ }
+/*
+ if(ccnt) {
+ printf("Freed %d from ctrl_queue\n", ccnt);
+ ccnt = 0;
+ }
+*/
+ if (!TAILQ_EMPTY(&asoc->reasmqueue)) {
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ while (chk) {
+ TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_remote_addr(chk->whoTo);
+ ccnt++;
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_chunk, chk);
+ SCTP_DECR_CHK_COUNT();
+ /* sa_ignore FREED_MEMORY */
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ }
+ }
+/*
+ if(ccnt) {
+ printf("Freed %d from reasm_queue\n", ccnt);
+ ccnt = 0;
+ }
+*/
+ if (asoc->mapping_array) {
+ SCTP_FREE(asoc->mapping_array, SCTP_M_MAP);
+ asoc->mapping_array = NULL;
+ }
+ /* the stream outs */
+ if (asoc->strmout) {
+ SCTP_FREE(asoc->strmout, SCTP_M_STRMO);
+ asoc->strmout = NULL;
+ }
+ asoc->streamoutcnt = 0;
+ if (asoc->strmin) {
+ struct sctp_queued_to_read *ctl;
+
+ for (i = 0; i < asoc->streamincnt; i++) {
+ if (!TAILQ_EMPTY(&asoc->strmin[i].inqueue)) {
+ /* We have somethings on the streamin queue */
+ ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+ while (ctl) {
+ TAILQ_REMOVE(&asoc->strmin[i].inqueue,
+ ctl, next);
+ sctp_free_remote_addr(ctl->whoFrom);
+ if (ctl->data) {
+ sctp_m_freem(ctl->data);
+ ctl->data = NULL;
+ }
+ /*
+ * We don't free the address here
+ * since all the net's were freed
+ * above.
+ */
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, ctl);
+ SCTP_DECR_READQ_COUNT();
+ ctl = TAILQ_FIRST(&asoc->strmin[i].inqueue);
+ }
+ }
+ }
+ SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
+ asoc->strmin = NULL;
+ }
+ asoc->streamincnt = 0;
+ while (!TAILQ_EMPTY(&asoc->nets)) {
+ /* sa_ignore FREED_MEMORY */
+ net = TAILQ_FIRST(&asoc->nets);
+ /* pull from list */
+ if ((sctppcbinfo.ipi_count_raddr == 0) || (prev == net)) {
+#ifdef INVARIANTS
+ panic("no net's left alloc'ed, or list points to itself");
+#endif
+ break;
+ }
+ prev = net;
+ TAILQ_REMOVE(&asoc->nets, net, sctp_next);
+ sctp_free_remote_addr(net);
+ }
+
+ while (!SCTP_LIST_EMPTY(&asoc->sctp_restricted_addrs)) {
+ /* sa_ignore FREED_MEMORY */
+ laddr = LIST_FIRST(&asoc->sctp_restricted_addrs);
+ sctp_remove_laddr(laddr);
+ }
+
+ /* pending asconf (address) parameters */
+ while (!TAILQ_EMPTY(&asoc->asconf_queue)) {
+ /* sa_ignore FREED_MEMORY */
+ aparam = TAILQ_FIRST(&asoc->asconf_queue);
+ TAILQ_REMOVE(&asoc->asconf_queue, aparam, next);
+ SCTP_FREE(aparam, SCTP_M_ASC_ADDR);
+ }
+ while (!TAILQ_EMPTY(&asoc->asconf_ack_sent)) {
+ /* sa_ignore FREED_MEMORY */
+ aack = TAILQ_FIRST(&asoc->asconf_ack_sent);
+ TAILQ_REMOVE(&asoc->asconf_ack_sent, aack, next);
+ if (aack->data != NULL) {
+ sctp_m_freem(aack->data);
+ }
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asconf_ack, aack);
+ }
+ /* clean up auth stuff */
+ if (asoc->local_hmacs)
+ sctp_free_hmaclist(asoc->local_hmacs);
+ if (asoc->peer_hmacs)
+ sctp_free_hmaclist(asoc->peer_hmacs);
+
+ if (asoc->local_auth_chunks)
+ sctp_free_chunklist(asoc->local_auth_chunks);
+ if (asoc->peer_auth_chunks)
+ sctp_free_chunklist(asoc->peer_auth_chunks);
+
+ sctp_free_authinfo(&asoc->authinfo);
+
+ shared_key = LIST_FIRST(&asoc->shared_keys);
+ while (shared_key) {
+ LIST_REMOVE(shared_key, next);
+ sctp_free_sharedkey(shared_key);
+ /* sa_ignore FREED_MEMORY */
+ shared_key = LIST_FIRST(&asoc->shared_keys);
+ }
+
+ /* Insert new items here :> */
+
+ /* Get rid of LOCK */
+ SCTP_TCB_LOCK_DESTROY(stcb);
+ SCTP_TCB_SEND_LOCK_DESTROY(stcb);
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ SCTP_INP_INFO_WUNLOCK();
+ SCTP_INP_RLOCK(inp);
+ }
+#ifdef SCTP_TRACK_FREED_ASOCS
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /* now clean up the tasoc itself */
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+ SCTP_DECR_ASOC_COUNT();
+ } else {
+ LIST_INSERT_HEAD(&inp->sctp_asoc_free_list, stcb, sctp_tcblist);
+ }
+#else
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_asoc, stcb);
+ SCTP_DECR_ASOC_COUNT();
+#endif
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
+ /*
+ * If its NOT the inp_free calling us AND sctp_close
+ * as been called, we call back...
+ */
+ SCTP_INP_RUNLOCK(inp);
+ /*
+ * This will start the kill timer (if we are the
+ * lastone) since we hold an increment yet. But this
+ * is the only safe way to do this since otherwise
+ * if the socket closes at the same time we are here
+ * we might collide in the cleanup.
+ */
+ sctp_inpcb_free(inp,
+ SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
+ SCTP_CALLED_DIRECTLY_NOCMPSET);
+ SCTP_INP_DECR_REF(inp);
+ goto out_of;
+ } else {
+ /* The socket is still open. */
+ SCTP_INP_DECR_REF(inp);
+ }
+ }
+ if (from_inpcbfree == SCTP_NORMAL_PROC) {
+ SCTP_INP_RUNLOCK(inp);
+ }
+out_of:
+ /* destroyed the asoc */
+#ifdef SCTP_LOG_CLOSING
+ sctp_log_closing(inp, NULL, 11);
+#endif
+ return (1);
+}
+
+
+
+/*
+ * determine if a destination is "reachable" based upon the addresses bound
+ * to the current endpoint (e.g. only v4 or v6 currently bound)
+ */
+/*
+ * FIX: if we allow assoc-level bindx(), then this needs to be fixed to use
+ * assoc level v4/v6 flags, as the assoc *may* not have the same address
+ * types bound as its endpoint
+ */
+int
+sctp_destination_is_reachable(struct sctp_tcb *stcb, struct sockaddr *destaddr)
+{
+ struct sctp_inpcb *inp;
+ int answer;
+
+ /*
+ * No locks here, the TCB, in all cases is already locked and an
+ * assoc is up. There is either a INP lock by the caller applied (in
+ * asconf case when deleting an address) or NOT in the HB case,
+ * however if HB then the INP increment is up and the INP will not
+ * be removed (on top of the fact that we have a TCB lock). So we
+ * only want to read the sctp_flags, which is either bound-all or
+ * not.. no protection needed since once an assoc is up you can't be
+ * changing your binding.
+ */
+ inp = stcb->sctp_ep;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* if bound all, destination is not restricted */
+ /*
+ * RRS: Question during lock work: Is this correct? If you
+ * are bound-all you still might need to obey the V4--V6
+ * flags??? IMO this bound-all stuff needs to be removed!
+ */
+ return (1);
+ }
+ /* NOTE: all "scope" checks are done when local addresses are added */
+ if (destaddr->sa_family == AF_INET6) {
+ answer = inp->ip_inp.inp.inp_vflag & INP_IPV6;
+ } else if (destaddr->sa_family == AF_INET) {
+ answer = inp->ip_inp.inp.inp_vflag & INP_IPV4;
+ } else {
+ /* invalid family, so it's unreachable */
+ answer = 0;
+ }
+ return (answer);
+}
+
+/*
+ * update the inp_vflags on an endpoint
+ */
+static void
+sctp_update_ep_vflag(struct sctp_inpcb *inp)
+{
+ struct sctp_laddr *laddr;
+
+ /* first clear the flag */
+ inp->ip_inp.inp.inp_vflag = 0;
+ /* set the flag based on addresses on the ep list */
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == NULL) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "%s: NULL ifa\n",
+ __FUNCTION__);
+ continue;
+ }
+ if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) {
+ continue;
+ }
+ if (laddr->ifa->address.sa.sa_family == AF_INET6) {
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ } else if (laddr->ifa->address.sa.sa_family == AF_INET) {
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ }
+ }
+}
+
+/*
+ * Add the address to the endpoint local address list There is nothing to be
+ * done if we are bound to all addresses
+ */
+void
+sctp_add_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa, uint32_t action)
+{
+ struct sctp_laddr *laddr;
+ int fnd, error = 0;
+
+ fnd = 0;
+
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* You are already bound to all. You have it already */
+ return;
+ }
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ /* Can't bind a non-useable addr. */
+ return;
+ }
+ }
+ /* first, is it already present? */
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == ifa) {
+ fnd = 1;
+ break;
+ }
+ }
+
+ if (fnd == 0) {
+ /* Not in the ep list */
+ error = sctp_insert_laddr(&inp->sctp_addr_list, ifa, action);
+ if (error != 0)
+ return;
+ inp->laddr_count++;
+ /* update inp_vflag flags */
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ } else if (ifa->address.sa.sa_family == AF_INET) {
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ }
+ }
+ return;
+}
+
+
+/*
+ * select a new (hopefully reachable) destination net (should only be used
+ * when we deleted an ep addr that is the only usable source address to reach
+ * the destination net)
+ */
+static void
+sctp_select_primary_destination(struct sctp_tcb *stcb)
+{
+ struct sctp_nets *net;
+
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ /* for now, we'll just pick the first reachable one we find */
+ if (net->dest_state & SCTP_ADDR_UNCONFIRMED)
+ continue;
+ if (sctp_destination_is_reachable(stcb,
+ (struct sockaddr *)&net->ro._l_addr)) {
+ /* found a reachable destination */
+ stcb->asoc.primary_destination = net;
+ }
+ }
+ /* I can't there from here! ...we're gonna die shortly... */
+}
+
+
+/*
+ * Delete the address from the endpoint local address list There is nothing
+ * to be done if we are bound to all addresses
+ */
+void
+sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
+{
+ struct sctp_laddr *laddr;
+ int fnd;
+
+ fnd = 0;
+ if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
+ /* You are already bound to all. You have it already */
+ return;
+ }
+ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
+ if (laddr->ifa == ifa) {
+ fnd = 1;
+ break;
+ }
+ }
+ if (fnd && (inp->laddr_count < 2)) {
+ /* can't delete unless there are at LEAST 2 addresses */
+ return;
+ }
+ if (fnd) {
+ /*
+ * clean up any use of this address go through our
+ * associations and clear any last_used_address that match
+ * this one for each assoc, see if a new primary_destination
+ * is needed
+ */
+ struct sctp_tcb *stcb;
+
+ /* clean up "next_addr_touse" */
+ if (inp->next_addr_touse == laddr)
+ /* delete this address */
+ inp->next_addr_touse = NULL;
+
+ /* clean up "last_used_address" */
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ struct sctp_nets *net;
+
+ SCTP_TCB_LOCK(stcb);
+ if (stcb->asoc.last_used_address == laddr)
+ /* delete this address */
+ stcb->asoc.last_used_address = NULL;
+ /*
+ * Now spin through all the nets and purge any ref
+ * to laddr
+ */
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if (net->ro._s_addr &&
+ (net->ro._s_addr->ifa == laddr->ifa)) {
+ /* Yep, purge src address selected */
+ sctp_rtentry_t *rt;
+
+ /* delete this address if cached */
+ rt = net->ro.ro_rt;
+ if (rt != NULL) {
+ RTFREE(rt);
+ net->ro.ro_rt = NULL;
+ }
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ }
+ SCTP_TCB_UNLOCK(stcb);
+ } /* for each tcb */
+ /* remove it from the ep list */
+ sctp_remove_laddr(laddr);
+ inp->laddr_count--;
+ /* update inp_vflag flags */
+ sctp_update_ep_vflag(inp);
+ }
+ return;
+}
+
+/*
+ * Add the address to the TCB local address restricted list.
+ * This is a "pending" address list (eg. addresses waiting for an
+ * ASCONF-ACK response) and cannot be used as a valid source address.
+ */
+void
+sctp_add_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+ struct sctp_inpcb *inp;
+ struct sctp_laddr *laddr;
+ struct sctpladdr *list;
+
+ /*
+ * Assumes TCB is locked.. and possibly the INP. May need to
+ * confirm/fix that if we need it and is not the case.
+ */
+ list = &stcb->asoc.sctp_restricted_addrs;
+
+ inp = stcb->sctp_ep;
+ if (ifa->address.sa.sa_family == AF_INET6) {
+ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) {
+ /* Can't bind a non-existent addr. */
+ return;
+ }
+ }
+ /* does the address already exist? */
+ LIST_FOREACH(laddr, list, sctp_nxt_addr) {
+ if (laddr->ifa == ifa) {
+ return;
+ }
+ }
+
+ /* add to the list */
+ (void)sctp_insert_laddr(list, ifa, 0);
+ return;
+}
+
+/*
+ * insert an laddr entry with the given ifa for the desired list
+ */
+int
+sctp_insert_laddr(struct sctpladdr *list, struct sctp_ifa *ifa, uint32_t act)
+{
+ struct sctp_laddr *laddr;
+
+ laddr = SCTP_ZONE_GET(sctppcbinfo.ipi_zone_laddr, struct sctp_laddr);
+ if (laddr == NULL) {
+ /* out of memory? */
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, EINVAL);
+ return (EINVAL);
+ }
+ SCTP_INCR_LADDR_COUNT();
+ bzero(laddr, sizeof(*laddr));
+ (void)SCTP_GETTIME_TIMEVAL(&laddr->start_time);
+ laddr->ifa = ifa;
+ laddr->action = act;
+ atomic_add_int(&ifa->refcount, 1);
+ /* insert it */
+ LIST_INSERT_HEAD(list, laddr, sctp_nxt_addr);
+
+ return (0);
+}
+
+/*
+ * Remove an laddr entry from the local address list (on an assoc)
+ */
+void
+sctp_remove_laddr(struct sctp_laddr *laddr)
+{
+
+ /* remove from the list */
+ LIST_REMOVE(laddr, sctp_nxt_addr);
+ sctp_free_ifa(laddr->ifa);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_laddr, laddr);
+ SCTP_DECR_LADDR_COUNT();
+}
+
+/*
+ * Remove a local address from the TCB local address restricted list
+ */
+void
+sctp_del_local_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa)
+{
+ struct sctp_inpcb *inp;
+ struct sctp_laddr *laddr;
+
+ /*
+ * This is called by asconf work. It is assumed that a) The TCB is
+ * locked and b) The INP is locked. This is true in as much as I can
+ * trace through the entry asconf code where I did these locks.
+ * Again, the ASCONF code is a bit different in that it does lock
+ * the INP during its work often times. This must be since we don't
+ * want other proc's looking up things while what they are looking
+ * up is changing :-D
+ */
+
+ inp = stcb->sctp_ep;
+ /* if subset bound and don't allow ASCONF's, can't delete last */
+ if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) &&
+ sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_ASCONF)) {
+ if (stcb->sctp_ep->laddr_count < 2) {
+ /* can't delete last address */
+ return;
+ }
+ }
+ LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) {
+ /* remove the address if it exists */
+ if (laddr->ifa == NULL)
+ continue;
+ if (laddr->ifa == ifa) {
+ sctp_remove_laddr(laddr);
+ return;
+ }
+ }
+
+ /* address not found! */
+ return;
+}
+
+static char sctp_pcb_initialized = 0;
+
+/*
+ * Temporarily remove for __APPLE__ until we use the Tiger equivalents
+ */
+/* sysctl */
+static int sctp_max_number_of_assoc = SCTP_MAX_NUM_OF_ASOC;
+static int sctp_scale_up_for_address = SCTP_SCALE_FOR_ADDR;
+
+void
+sctp_pcb_init()
+{
+ /*
+ * SCTP initialization for the PCB structures should be called by
+ * the sctp_init() funciton.
+ */
+ int i;
+ struct timeval tv;
+
+ if (sctp_pcb_initialized != 0) {
+ /* error I was called twice */
+ return;
+ }
+ sctp_pcb_initialized = 1;
+
+ bzero(&sctpstat, sizeof(struct sctpstat));
+#if defined(SCTP_LOCAL_TRACE_BUF)
+ bzero(&sctp_log, sizeof(struct sctp_log));
+#endif
+ (void)SCTP_GETTIME_TIMEVAL(&tv);
+ sctpstat.sctps_discontinuitytime.tv_sec = (uint32_t) tv.tv_sec;
+ sctpstat.sctps_discontinuitytime.tv_usec = (uint32_t) tv.tv_usec;
+ /* init the empty list of (All) Endpoints */
+ LIST_INIT(&sctppcbinfo.listhead);
+
+ /* init the iterator head */
+ TAILQ_INIT(&sctppcbinfo.iteratorhead);
+
+ /* init the hash table of endpoints */
+ TUNABLE_INT_FETCH("net.inet.sctp.tcbhashsize", &sctp_hashtblsize);
+ TUNABLE_INT_FETCH("net.inet.sctp.pcbhashsize", &sctp_pcbtblsize);
+ TUNABLE_INT_FETCH("net.inet.sctp.chunkscale", &sctp_chunkscale);
+ sctppcbinfo.sctp_asochash = SCTP_HASH_INIT((sctp_hashtblsize * 31),
+ &sctppcbinfo.hashasocmark);
+ sctppcbinfo.sctp_ephash = SCTP_HASH_INIT(sctp_hashtblsize,
+ &sctppcbinfo.hashmark);
+ sctppcbinfo.sctp_tcpephash = SCTP_HASH_INIT(sctp_hashtblsize,
+ &sctppcbinfo.hashtcpmark);
+ sctppcbinfo.hashtblsize = sctp_hashtblsize;
+
+ /* init the small hash table we use to track restarted asoc's */
+ sctppcbinfo.sctp_restarthash = SCTP_HASH_INIT(SCTP_STACK_VTAG_HASH_SIZE,
+ &sctppcbinfo.hashrestartmark);
+
+
+ sctppcbinfo.sctp_vrfhash = SCTP_HASH_INIT(SCTP_SIZE_OF_VRF_HASH,
+ &sctppcbinfo.hashvrfmark);
+
+ sctppcbinfo.vrf_ifn_hash = SCTP_HASH_INIT(SCTP_VRF_IFN_HASH_SIZE,
+ &sctppcbinfo.vrf_ifn_hashmark);
+
+ /* init the zones */
+ /*
+ * FIX ME: Should check for NULL returns, but if it does fail we are
+ * doomed to panic anyways... add later maybe.
+ */
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_ep, "sctp_ep",
+ sizeof(struct sctp_inpcb), maxsockets);
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_asoc, "sctp_asoc",
+ sizeof(struct sctp_tcb), sctp_max_number_of_assoc);
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_laddr, "sctp_laddr",
+ sizeof(struct sctp_laddr),
+ (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_net, "sctp_raddr",
+ sizeof(struct sctp_nets),
+ (sctp_max_number_of_assoc * sctp_scale_up_for_address));
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_chunk, "sctp_chunk",
+ sizeof(struct sctp_tmit_chunk),
+ (sctp_max_number_of_assoc * sctp_chunkscale));
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_readq, "sctp_readq",
+ sizeof(struct sctp_queued_to_read),
+ (sctp_max_number_of_assoc * sctp_chunkscale));
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_strmoq, "sctp_stream_msg_out",
+ sizeof(struct sctp_stream_queue_pending),
+ (sctp_max_number_of_assoc * sctp_chunkscale));
+
+ SCTP_ZONE_INIT(sctppcbinfo.ipi_zone_asconf_ack, "sctp_asconf_ack",
+ sizeof(struct sctp_asconf_ack),
+ (sctp_max_number_of_assoc * sctp_chunkscale));
+
+
+ /* Master Lock INIT for info structure */
+ SCTP_INP_INFO_LOCK_INIT();
+ SCTP_STATLOG_INIT_LOCK();
+ SCTP_ITERATOR_LOCK_INIT();
+
+ SCTP_IPI_COUNT_INIT();
+ SCTP_IPI_ADDR_INIT();
+ SCTP_IPI_ITERATOR_WQ_INIT();
+#ifdef SCTP_PACKET_LOGGING
+ SCTP_IP_PKTLOG_INIT();
+#endif
+ LIST_INIT(&sctppcbinfo.addr_wq);
+
+ /* not sure if we need all the counts */
+ sctppcbinfo.ipi_count_ep = 0;
+ /* assoc/tcb zone info */
+ sctppcbinfo.ipi_count_asoc = 0;
+ /* local addrlist zone info */
+ sctppcbinfo.ipi_count_laddr = 0;
+ /* remote addrlist zone info */
+ sctppcbinfo.ipi_count_raddr = 0;
+ /* chunk info */
+ sctppcbinfo.ipi_count_chunk = 0;
+
+ /* socket queue zone info */
+ sctppcbinfo.ipi_count_readq = 0;
+
+ /* stream out queue cont */
+ sctppcbinfo.ipi_count_strmoq = 0;
+
+ sctppcbinfo.ipi_free_strmoq = 0;
+ sctppcbinfo.ipi_free_chunks = 0;
+
+ SCTP_OS_TIMER_INIT(&sctppcbinfo.addr_wq_timer.timer);
+
+ /* Init the TIMEWAIT list */
+ for (i = 0; i < SCTP_STACK_VTAG_HASH_SIZE_A; i++) {
+ LIST_INIT(&sctppcbinfo.vtag_timewait[i]);
+ }
+
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+ sctppcbinfo.iterator_running = 0;
+ sctp_startup_iterator();
+#endif
+
+ /*
+ * INIT the default VRF which for BSD is the only one, other O/S's
+ * may have more. But initially they must start with one and then
+ * add the VRF's as addresses are added.
+ */
+ sctp_init_vrf_list(SCTP_DEFAULT_VRF);
+
+}
+
+
+int
+sctp_load_addresses_from_init(struct sctp_tcb *stcb, struct mbuf *m,
+ int iphlen, int offset, int limit, struct sctphdr *sh,
+ struct sockaddr *altsa)
+{
+ /*
+ * grub through the INIT pulling addresses and loading them to the
+ * nets structure in the asoc. The from address in the mbuf should
+ * also be loaded (if it is not already). This routine can be called
+ * with either INIT or INIT-ACK's as long as the m points to the IP
+ * packet and the offset points to the beginning of the parameters.
+ */
+ struct sctp_inpcb *inp, *l_inp;
+ struct sctp_nets *net, *net_tmp;
+ struct ip *iph;
+ struct sctp_paramhdr *phdr, parm_buf;
+ struct sctp_tcb *stcb_tmp;
+ uint16_t ptype, plen;
+ struct sockaddr *sa;
+ struct sockaddr_storage dest_store;
+ struct sockaddr *local_sa = (struct sockaddr *)&dest_store;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ uint8_t random_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_random *p_random = NULL;
+ uint16_t random_len = 0;
+ uint8_t hmacs_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_hmac_algo *hmacs = NULL;
+ uint16_t hmacs_len = 0;
+ uint8_t saw_asconf = 0;
+ uint8_t saw_asconf_ack = 0;
+ uint8_t chunks_store[SCTP_PARAM_BUFFER_SIZE];
+ struct sctp_auth_chunk_list *chunks = NULL;
+ uint16_t num_chunks = 0;
+ sctp_key_t *new_key;
+ uint32_t keylen;
+ int got_random = 0, got_hmacs = 0, got_chklist = 0;
+
+ /* First get the destination address setup too. */
+ memset(&sin, 0, sizeof(sin));
+ memset(&sin6, 0, sizeof(sin6));
+
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ sin.sin_port = stcb->rport;
+
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_port = stcb->rport;
+ if (altsa == NULL) {
+ iph = mtod(m, struct ip *);
+ if (iph->ip_v == IPVERSION) {
+ /* its IPv4 */
+ struct sockaddr_in *sin_2;
+
+ sin_2 = (struct sockaddr_in *)(local_sa);
+ memset(sin_2, 0, sizeof(sin));
+ sin_2->sin_family = AF_INET;
+ sin_2->sin_len = sizeof(sin);
+ sin_2->sin_port = sh->dest_port;
+ sin_2->sin_addr.s_addr = iph->ip_dst.s_addr;
+ sin.sin_addr = iph->ip_src;
+ sa = (struct sockaddr *)&sin;
+ } else if (iph->ip_v == (IPV6_VERSION >> 4)) {
+ /* its IPv6 */
+ struct ip6_hdr *ip6;
+ struct sockaddr_in6 *sin6_2;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ sin6_2 = (struct sockaddr_in6 *)(local_sa);
+ memset(sin6_2, 0, sizeof(sin6));
+ sin6_2->sin6_family = AF_INET6;
+ sin6_2->sin6_len = sizeof(struct sockaddr_in6);
+ sin6_2->sin6_port = sh->dest_port;
+ sin6.sin6_addr = ip6->ip6_src;
+ sa = (struct sockaddr *)&sin6;
+ } else {
+ sa = NULL;
+ }
+ } else {
+ /*
+ * For cookies we use the src address NOT from the packet
+ * but from the original INIT
+ */
+ sa = altsa;
+ }
+ /* Turn off ECN until we get through all params */
+ stcb->asoc.ecn_allowed = 0;
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ /* mark all addresses that we have currently on the list */
+ net->dest_state |= SCTP_ADDR_NOT_IN_ASSOC;
+ }
+ /* does the source address already exist? if so skip it */
+ l_inp = inp = stcb->sctp_ep;
+
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net_tmp, local_sa, stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+
+ if ((stcb_tmp == NULL && inp == stcb->sctp_ep) || inp == NULL) {
+ /* we must add the source address */
+ /* no scope set here since we have a tcb already. */
+ if ((sa->sa_family == AF_INET) &&
+ (stcb->asoc.ipv4_addr_legal)) {
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_2)) {
+ return (-1);
+ }
+ } else if ((sa->sa_family == AF_INET6) &&
+ (stcb->asoc.ipv6_addr_legal)) {
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_3)) {
+ return (-2);
+ }
+ }
+ } else {
+ if (net_tmp != NULL && stcb_tmp == stcb) {
+ net_tmp->dest_state &= ~SCTP_ADDR_NOT_IN_ASSOC;
+ } else if (stcb_tmp != stcb) {
+ /* It belongs to another association? */
+ if (stcb_tmp)
+ SCTP_TCB_UNLOCK(stcb_tmp);
+ return (-3);
+ }
+ }
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-4);
+ }
+ /*
+ * peer must explicitly turn this on. This may have been initialized
+ * to be "on" in order to allow local addr changes while INIT's are
+ * in flight.
+ */
+ stcb->asoc.peer_supports_asconf = 0;
+ /* now we must go through each of the params. */
+ phdr = sctp_get_next_param(m, offset, &parm_buf, sizeof(parm_buf));
+ while (phdr) {
+ ptype = ntohs(phdr->param_type);
+ plen = ntohs(phdr->param_length);
+ /*
+ * printf("ptype => %0x, plen => %d\n", (uint32_t)ptype,
+ * (int)plen);
+ */
+ if (offset + plen > limit) {
+ break;
+ }
+ if (plen == 0) {
+ break;
+ }
+ if (ptype == SCTP_IPV4_ADDRESS) {
+ if (stcb->asoc.ipv4_addr_legal) {
+ struct sctp_ipv4addr_param *p4, p4_buf;
+
+ /* ok get the v4 address and check/add */
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf));
+ if (plen != sizeof(struct sctp_ipv4addr_param) ||
+ phdr == NULL) {
+ return (-5);
+ }
+ p4 = (struct sctp_ipv4addr_param *)phdr;
+ sin.sin_addr.s_addr = p4->addr;
+ if (IN_MULTICAST(sin.sin_addr.s_addr)) {
+ /* Skip multi-cast addresses */
+ goto next_param;
+ }
+ if ((sin.sin_addr.s_addr == INADDR_BROADCAST) ||
+ (sin.sin_addr.s_addr == INADDR_ANY)) {
+ goto next_param;
+ }
+ sa = (struct sockaddr *)&sin;
+ inp = stcb->sctp_ep;
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+ local_sa, stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+
+ if ((stcb_tmp == NULL && inp == stcb->sctp_ep) ||
+ inp == NULL) {
+ /* we must add the source address */
+ /*
+ * no scope set since we have a tcb
+ * already
+ */
+
+ /*
+ * we must validate the state again
+ * here
+ */
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-7);
+ }
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_4)) {
+ return (-8);
+ }
+ } else if (stcb_tmp == stcb) {
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-10);
+ }
+ if (net != NULL) {
+ /* clear flag */
+ net->dest_state &=
+ ~SCTP_ADDR_NOT_IN_ASSOC;
+ }
+ } else {
+ /*
+ * strange, address is in another
+ * assoc? straighten out locks.
+ */
+ if (stcb_tmp)
+ SCTP_TCB_UNLOCK(stcb_tmp);
+
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-12);
+ }
+ return (-13);
+ }
+ }
+ } else if (ptype == SCTP_IPV6_ADDRESS) {
+ if (stcb->asoc.ipv6_addr_legal) {
+ /* ok get the v6 address and check/add */
+ struct sctp_ipv6addr_param *p6, p6_buf;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf));
+ if (plen != sizeof(struct sctp_ipv6addr_param) ||
+ phdr == NULL) {
+ return (-14);
+ }
+ p6 = (struct sctp_ipv6addr_param *)phdr;
+ memcpy((caddr_t)&sin6.sin6_addr, p6->addr,
+ sizeof(p6->addr));
+ if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
+ /* Skip multi-cast addresses */
+ goto next_param;
+ }
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
+ /*
+ * Link local make no sense without
+ * scope
+ */
+ goto next_param;
+ }
+ sa = (struct sockaddr *)&sin6;
+ inp = stcb->sctp_ep;
+ atomic_add_int(&stcb->asoc.refcnt, 1);
+ stcb_tmp = sctp_findassociation_ep_addr(&inp, sa, &net,
+ local_sa, stcb);
+ atomic_add_int(&stcb->asoc.refcnt, -1);
+ if (stcb_tmp == NULL && (inp == stcb->sctp_ep ||
+ inp == NULL)) {
+ /*
+ * we must validate the state again
+ * here
+ */
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-16);
+ }
+ /*
+ * we must add the address, no scope
+ * set
+ */
+ if (sctp_add_remote_addr(stcb, sa, SCTP_DONOT_SETSCOPE, SCTP_LOAD_ADDR_5)) {
+ return (-17);
+ }
+ } else if (stcb_tmp == stcb) {
+ /*
+ * we must validate the state again
+ * here
+ */
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-19);
+ }
+ if (net != NULL) {
+ /* clear flag */
+ net->dest_state &=
+ ~SCTP_ADDR_NOT_IN_ASSOC;
+ }
+ } else {
+ /*
+ * strange, address is in another
+ * assoc? straighten out locks.
+ */
+ if (stcb_tmp)
+ SCTP_TCB_UNLOCK(stcb_tmp);
+
+ if (stcb->asoc.state == 0) {
+ /* the assoc was freed? */
+ return (-21);
+ }
+ return (-22);
+ }
+ }
+ } else if (ptype == SCTP_ECN_CAPABLE) {
+ stcb->asoc.ecn_allowed = 1;
+ } else if (ptype == SCTP_ULP_ADAPTATION) {
+ if (stcb->asoc.state != SCTP_STATE_OPEN) {
+ struct sctp_adaptation_layer_indication ai,
+ *aip;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&ai, sizeof(ai));
+ aip = (struct sctp_adaptation_layer_indication *)phdr;
+ if (aip) {
+ stcb->asoc.peers_adaptation = ntohl(aip->indication);
+ stcb->asoc.adaptation_needed = 1;
+ }
+ }
+ } else if (ptype == SCTP_SET_PRIM_ADDR) {
+ struct sctp_asconf_addr_param lstore, *fee;
+ struct sctp_asconf_addrv4_param *fii;
+ int lptype;
+ struct sockaddr *lsa = NULL;
+
+ stcb->asoc.peer_supports_asconf = 1;
+ if (plen > sizeof(lstore)) {
+ return (-23);
+ }
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&lstore, min(plen, sizeof(lstore)));
+ if (phdr == NULL) {
+ return (-24);
+ }
+ fee = (struct sctp_asconf_addr_param *)phdr;
+ lptype = ntohs(fee->addrp.ph.param_type);
+ if (lptype == SCTP_IPV4_ADDRESS) {
+ if (plen !=
+ sizeof(struct sctp_asconf_addrv4_param)) {
+ SCTP_PRINTF("Sizeof setprim in init/init ack not %d but %d - ignored\n",
+ (int)sizeof(struct sctp_asconf_addrv4_param),
+ plen);
+ } else {
+ fii = (struct sctp_asconf_addrv4_param *)fee;
+ sin.sin_addr.s_addr = fii->addrp.addr;
+ lsa = (struct sockaddr *)&sin;
+ }
+ } else if (lptype == SCTP_IPV6_ADDRESS) {
+ if (plen !=
+ sizeof(struct sctp_asconf_addr_param)) {
+ SCTP_PRINTF("Sizeof setprim (v6) in init/init ack not %d but %d - ignored\n",
+ (int)sizeof(struct sctp_asconf_addr_param),
+ plen);
+ } else {
+ memcpy(sin6.sin6_addr.s6_addr,
+ fee->addrp.addr,
+ sizeof(fee->addrp.addr));
+ lsa = (struct sockaddr *)&sin6;
+ }
+ }
+ if (lsa) {
+ (void)sctp_set_primary_addr(stcb, sa, NULL);
+ }
+ } else if (ptype == SCTP_PRSCTP_SUPPORTED) {
+ /* Peer supports pr-sctp */
+ stcb->asoc.peer_supports_prsctp = 1;
+ } else if (ptype == SCTP_SUPPORTED_CHUNK_EXT) {
+ /* A supported extension chunk */
+ struct sctp_supported_chunk_types_param *pr_supported;
+ uint8_t local_store[SCTP_PARAM_BUFFER_SIZE];
+ int num_ent, i;
+
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)&local_store, min(sizeof(local_store), plen));
+ if (phdr == NULL) {
+ return (-25);
+ }
+ stcb->asoc.peer_supports_asconf = 0;
+ stcb->asoc.peer_supports_prsctp = 0;
+ stcb->asoc.peer_supports_pktdrop = 0;
+ stcb->asoc.peer_supports_strreset = 0;
+ stcb->asoc.peer_supports_auth = 0;
+ pr_supported = (struct sctp_supported_chunk_types_param *)phdr;
+ num_ent = plen - sizeof(struct sctp_paramhdr);
+ for (i = 0; i < num_ent; i++) {
+ switch (pr_supported->chunk_types[i]) {
+ case SCTP_ASCONF:
+ case SCTP_ASCONF_ACK:
+ stcb->asoc.peer_supports_asconf = 1;
+ break;
+ case SCTP_FORWARD_CUM_TSN:
+ stcb->asoc.peer_supports_prsctp = 1;
+ break;
+ case SCTP_PACKET_DROPPED:
+ stcb->asoc.peer_supports_pktdrop = 1;
+ break;
+ case SCTP_STREAM_RESET:
+ stcb->asoc.peer_supports_strreset = 1;
+ break;
+ case SCTP_AUTHENTICATION:
+ stcb->asoc.peer_supports_auth = 1;
+ break;
+ default:
+ /* one I have not learned yet */
+ break;
+
+ }
+ }
+ } else if (ptype == SCTP_ECN_NONCE_SUPPORTED) {
+ /* Peer supports ECN-nonce */
+ stcb->asoc.peer_supports_ecn_nonce = 1;
+ stcb->asoc.ecn_nonce_allowed = 1;
+ } else if (ptype == SCTP_RANDOM) {
+ if (plen > sizeof(random_store))
+ break;
+ if (got_random) {
+ /* already processed a RANDOM */
+ goto next_param;
+ }
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)random_store,
+ min(sizeof(random_store), plen));
+ if (phdr == NULL)
+ return (-26);
+ p_random = (struct sctp_auth_random *)phdr;
+ random_len = plen - sizeof(*p_random);
+ /* enforce the random length */
+ if (random_len != SCTP_AUTH_RANDOM_SIZE_REQUIRED) {
+ SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP: invalid RANDOM len\n");
+ return (-27);
+ }
+ got_random = 1;
+ } else if (ptype == SCTP_HMAC_LIST) {
+ int num_hmacs;
+ int i;
+
+ if (plen > sizeof(hmacs_store))
+ break;
+ if (got_hmacs) {
+ /* already processed a HMAC list */
+ goto next_param;
+ }
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)hmacs_store,
+ min(plen, sizeof(hmacs_store)));
+ if (phdr == NULL)
+ return (-28);
+ hmacs = (struct sctp_auth_hmac_algo *)phdr;
+ hmacs_len = plen - sizeof(*hmacs);
+ num_hmacs = hmacs_len / sizeof(hmacs->hmac_ids[0]);
+ /* validate the hmac list */
+ if (sctp_verify_hmac_param(hmacs, num_hmacs)) {
+ return (-29);
+ }
+ if (stcb->asoc.peer_hmacs != NULL)
+ sctp_free_hmaclist(stcb->asoc.peer_hmacs);
+ stcb->asoc.peer_hmacs = sctp_alloc_hmaclist(num_hmacs);
+ if (stcb->asoc.peer_hmacs != NULL) {
+ for (i = 0; i < num_hmacs; i++) {
+ (void)sctp_auth_add_hmacid(stcb->asoc.peer_hmacs,
+ ntohs(hmacs->hmac_ids[i]));
+ }
+ }
+ got_hmacs = 1;
+ } else if (ptype == SCTP_CHUNK_LIST) {
+ int i;
+
+ if (plen > sizeof(chunks_store))
+ break;
+ if (got_chklist) {
+ /* already processed a Chunks list */
+ goto next_param;
+ }
+ phdr = sctp_get_next_param(m, offset,
+ (struct sctp_paramhdr *)chunks_store,
+ min(plen, sizeof(chunks_store)));
+ if (phdr == NULL)
+ return (-30);
+ chunks = (struct sctp_auth_chunk_list *)phdr;
+ num_chunks = plen - sizeof(*chunks);
+ if (stcb->asoc.peer_auth_chunks != NULL)
+ sctp_clear_chunklist(stcb->asoc.peer_auth_chunks);
+ else
+ stcb->asoc.peer_auth_chunks = sctp_alloc_chunklist();
+ for (i = 0; i < num_chunks; i++) {
+ (void)sctp_auth_add_chunk(chunks->chunk_types[i],
+ stcb->asoc.peer_auth_chunks);
+ /* record asconf/asconf-ack if listed */
+ if (chunks->chunk_types[i] == SCTP_ASCONF)
+ saw_asconf = 1;
+ if (chunks->chunk_types[i] == SCTP_ASCONF_ACK)
+ saw_asconf_ack = 1;
+
+ }
+ got_chklist = 1;
+ } else if ((ptype == SCTP_HEARTBEAT_INFO) ||
+ (ptype == SCTP_STATE_COOKIE) ||
+ (ptype == SCTP_UNRECOG_PARAM) ||
+ (ptype == SCTP_COOKIE_PRESERVE) ||
+ (ptype == SCTP_SUPPORTED_ADDRTYPE) ||
+ (ptype == SCTP_ADD_IP_ADDRESS) ||
+ (ptype == SCTP_DEL_IP_ADDRESS) ||
+ (ptype == SCTP_ERROR_CAUSE_IND) ||
+ (ptype == SCTP_SUCCESS_REPORT)) {
+ /* don't care */ ;
+ } else {
+ if ((ptype & 0x8000) == 0x0000) {
+ /*
+ * must stop processing the rest of the
+ * param's. Any report bits were handled
+ * with the call to
+ * sctp_arethere_unrecognized_parameters()
+ * when the INIT or INIT-ACK was first seen.
+ */
+ break;
+ }
+ }
+next_param:
+ offset += SCTP_SIZE32(plen);
+ if (offset >= limit) {
+ break;
+ }
+ phdr = sctp_get_next_param(m, offset, &parm_buf,
+ sizeof(parm_buf));
+ }
+ /* Now check to see if we need to purge any addresses */
+ for (net = TAILQ_FIRST(&stcb->asoc.nets); net != NULL; net = net_tmp) {
+ net_tmp = TAILQ_NEXT(net, sctp_next);
+ if ((net->dest_state & SCTP_ADDR_NOT_IN_ASSOC) ==
+ SCTP_ADDR_NOT_IN_ASSOC) {
+ /* This address has been removed from the asoc */
+ /* remove and free it */
+ stcb->asoc.numnets--;
+ TAILQ_REMOVE(&stcb->asoc.nets, net, sctp_next);
+ sctp_free_remote_addr(net);
+ if (net == stcb->asoc.primary_destination) {
+ stcb->asoc.primary_destination = NULL;
+ sctp_select_primary_destination(stcb);
+ }
+ }
+ }
+ /* validate authentication required parameters */
+ if (got_random && got_hmacs) {
+ stcb->asoc.peer_supports_auth = 1;
+ } else {
+ stcb->asoc.peer_supports_auth = 0;
+ }
+ if (!stcb->asoc.peer_supports_auth && got_chklist) {
+ /* peer does not support auth but sent a chunks list? */
+ return (-31);
+ }
+ if (!sctp_asconf_auth_nochk && stcb->asoc.peer_supports_asconf &&
+ !stcb->asoc.peer_supports_auth) {
+ /* peer supports asconf but not auth? */
+ return (-32);
+ } else if ((stcb->asoc.peer_supports_asconf) && (stcb->asoc.peer_supports_auth) &&
+ ((saw_asconf == 0) || (saw_asconf_ack == 0))) {
+ return (-33);
+ }
+ /* concatenate the full random key */
+#ifdef SCTP_AUTH_DRAFT_04
+ keylen = random_len;
+ new_key = sctp_alloc_key(keylen);
+ if (new_key != NULL) {
+ /* copy in the RANDOM */
+ if (p_random != NULL)
+ bcopy(p_random->random_data, new_key->key, random_len);
+ }
+#else
+ keylen = sizeof(*p_random) + random_len + sizeof(*chunks) + num_chunks +
+ sizeof(*hmacs) + hmacs_len;
+ new_key = sctp_alloc_key(keylen);
+ if (new_key != NULL) {
+ /* copy in the RANDOM */
+ if (p_random != NULL) {
+ keylen = sizeof(*p_random) + random_len;
+ bcopy(p_random, new_key->key, keylen);
+ }
+ /* append in the AUTH chunks */
+ if (chunks != NULL) {
+ bcopy(chunks, new_key->key + keylen,
+ sizeof(*chunks) + num_chunks);
+ keylen += sizeof(*chunks) + num_chunks;
+ }
+ /* append in the HMACs */
+ if (hmacs != NULL) {
+ bcopy(hmacs, new_key->key + keylen,
+ sizeof(*hmacs) + hmacs_len);
+ }
+ }
+#endif
+ else {
+ /* failed to get memory for the key */
+ return (-34);
+ }
+ if (stcb->asoc.authinfo.peer_random != NULL)
+ sctp_free_key(stcb->asoc.authinfo.peer_random);
+ stcb->asoc.authinfo.peer_random = new_key;
+#ifdef SCTP_AUTH_DRAFT_04
+ /* don't include the chunks and hmacs for draft -04 */
+ stcb->asoc.authinfo.peer_random->keylen = random_len;
+#endif
+ sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.assoc_keyid);
+ sctp_clear_cachedkeys(stcb, stcb->asoc.authinfo.recv_keyid);
+
+ return (0);
+}
+
+int
+sctp_set_primary_addr(struct sctp_tcb *stcb, struct sockaddr *sa,
+ struct sctp_nets *net)
+{
+ /* make sure the requested primary address exists in the assoc */
+ if (net == NULL && sa)
+ net = sctp_findnet(stcb, sa);
+
+ if (net == NULL) {
+ /* didn't find the requested primary address! */
+ return (-1);
+ } else {
+ /* set the primary address */
+ if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
+ /* Must be confirmed, so queue to set */
+ net->dest_state |= SCTP_ADDR_REQ_PRIMARY;
+ return (0);
+ }
+ stcb->asoc.primary_destination = net;
+ net->dest_state &= ~SCTP_ADDR_WAS_PRIMARY;
+ net = TAILQ_FIRST(&stcb->asoc.nets);
+ if (net != stcb->asoc.primary_destination) {
+ /*
+ * first one on the list is NOT the primary
+ * sctp_cmpaddr() is much more efficent if the
+ * primary is the first on the list, make it so.
+ */
+ TAILQ_REMOVE(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+ TAILQ_INSERT_HEAD(&stcb->asoc.nets, stcb->asoc.primary_destination, sctp_next);
+ }
+ return (0);
+ }
+}
+
+int
+sctp_is_vtag_good(struct sctp_inpcb *inp, uint32_t tag, struct timeval *now, int save_in_twait)
+{
+ /*
+ * This function serves two purposes. It will see if a TAG can be
+ * re-used and return 1 for yes it is ok and 0 for don't use that
+ * tag. A secondary function it will do is purge out old tags that
+ * can be removed.
+ */
+ struct sctpasochead *head;
+ struct sctpvtaghead *chain;
+ struct sctp_tagblock *twait_block;
+ struct sctp_tcb *stcb;
+ int i;
+
+ SCTP_INP_INFO_WLOCK();
+ chain = &sctppcbinfo.vtag_timewait[(tag % SCTP_STACK_VTAG_HASH_SIZE)];
+ /* First is the vtag in use ? */
+
+ head = &sctppcbinfo.sctp_asochash[SCTP_PCBHASH_ASOC(tag,
+ sctppcbinfo.hashasocmark)];
+ if (head == NULL) {
+ goto check_restart;
+ }
+ LIST_FOREACH(stcb, head, sctp_asocs) {
+
+ if (stcb->asoc.my_vtag == tag) {
+ /*
+ * We should remove this if and return 0 always if
+ * we want vtags unique across all endpoints. For
+ * now within a endpoint is ok.
+ */
+ if (inp == stcb->sctp_ep) {
+ /* bad tag, in use */
+ SCTP_INP_INFO_WUNLOCK();
+ return (0);
+ }
+ }
+ }
+check_restart:
+ /* Now lets check the restart hash */
+ head = &sctppcbinfo.sctp_restarthash[SCTP_PCBHASH_ASOC(tag,
+ sctppcbinfo.hashrestartmark)];
+ if (head == NULL) {
+ goto check_time_wait;
+ }
+ LIST_FOREACH(stcb, head, sctp_tcbrestarhash) {
+ if (stcb->asoc.assoc_id == tag) {
+ /* candidate */
+ if (inp == stcb->sctp_ep) {
+ /* bad tag, in use */
+ SCTP_INP_INFO_WUNLOCK();
+ return (0);
+ }
+ }
+ }
+check_time_wait:
+ /* Now what about timed wait ? */
+ if (!SCTP_LIST_EMPTY(chain)) {
+ /*
+ * Block(s) are present, lets see if we have this tag in the
+ * list
+ */
+ LIST_FOREACH(twait_block, chain, sctp_nxt_tagblock) {
+ for (i = 0; i < SCTP_NUMBER_IN_VTAG_BLOCK; i++) {
+ if (twait_block->vtag_block[i].v_tag == 0) {
+ /* not used */
+ continue;
+ } else if ((long)twait_block->vtag_block[i].tv_sec_at_expire <
+ now->tv_sec) {
+ /* Audit expires this guy */
+ twait_block->vtag_block[i].tv_sec_at_expire = 0;
+ twait_block->vtag_block[i].v_tag = 0;
+ } else if (twait_block->vtag_block[i].v_tag ==
+ tag) {
+ /* Bad tag, sorry :< */
+ SCTP_INP_INFO_WUNLOCK();
+ return (0);
+ }
+ }
+ }
+ }
+ /*-
+ * Not found, ok to use the tag, add it to the time wait hash
+ * as well this will prevent two sucessive cookies from getting
+ * the same tag or two inits sent quickly on multi-processors.
+ * We only keep the tag for the life of a cookie and when we
+ * add this tag to the assoc hash we need to purge it from
+ * the t-wait hash.
+ */
+ if (save_in_twait)
+ sctp_add_vtag_to_timewait(tag, TICKS_TO_SEC(inp->sctp_ep.def_cookie_life));
+ SCTP_INP_INFO_WUNLOCK();
+ return (1);
+}
+
+
+static sctp_assoc_t reneged_asoc_ids[256];
+static uint8_t reneged_at = 0;
+
+
+static void
+sctp_drain_mbufs(struct sctp_inpcb *inp, struct sctp_tcb *stcb)
+{
+ /*
+ * We must hunt this association for MBUF's past the cumack (i.e.
+ * out of order data that we can renege on).
+ */
+ struct sctp_association *asoc;
+ struct sctp_tmit_chunk *chk, *nchk;
+ uint32_t cumulative_tsn_p1, tsn;
+ struct sctp_queued_to_read *ctl, *nctl;
+ int cnt, strmat, gap;
+
+ /* We look for anything larger than the cum-ack + 1 */
+
+ SCTP_STAT_INCR(sctps_protocol_drain_calls);
+ if (sctp_do_drain == 0) {
+ return;
+ }
+ asoc = &stcb->asoc;
+ if (asoc->cumulative_tsn == asoc->highest_tsn_inside_map) {
+ /* none we can reneg on. */
+ return;
+ }
+ SCTP_STAT_INCR(sctps_protocol_drains_done);
+ cumulative_tsn_p1 = asoc->cumulative_tsn + 1;
+ cnt = 0;
+ /* First look in the re-assembly queue */
+ chk = TAILQ_FIRST(&asoc->reasmqueue);
+ while (chk) {
+ /* Get the next one */
+ nchk = TAILQ_NEXT(chk, sctp_next);
+ if (compare_with_wrap(chk->rec.data.TSN_seq,
+ cumulative_tsn_p1, MAX_TSN)) {
+ /* Yep it is above cum-ack */
+ cnt++;
+ tsn = chk->rec.data.TSN_seq;
+ if (tsn >= asoc->mapping_array_base_tsn) {
+ gap = tsn - asoc->mapping_array_base_tsn;
+ } else {
+ gap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+ tsn + 1;
+ }
+ asoc->size_on_reasm_queue = sctp_sbspace_sub(asoc->size_on_reasm_queue, chk->send_size);
+ sctp_ucount_decr(asoc->cnt_on_reasm_queue);
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array, gap);
+ TAILQ_REMOVE(&asoc->reasmqueue, chk, sctp_next);
+ if (chk->data) {
+ sctp_m_freem(chk->data);
+ chk->data = NULL;
+ }
+ sctp_free_a_chunk(stcb, chk);
+ }
+ chk = nchk;
+ }
+ /* Ok that was fun, now we will drain all the inbound streams? */
+ for (strmat = 0; strmat < asoc->streamincnt; strmat++) {
+ ctl = TAILQ_FIRST(&asoc->strmin[strmat].inqueue);
+ while (ctl) {
+ nctl = TAILQ_NEXT(ctl, next);
+ if (compare_with_wrap(ctl->sinfo_tsn,
+ cumulative_tsn_p1, MAX_TSN)) {
+ /* Yep it is above cum-ack */
+ cnt++;
+ tsn = ctl->sinfo_tsn;
+ if (tsn >= asoc->mapping_array_base_tsn) {
+ gap = tsn -
+ asoc->mapping_array_base_tsn;
+ } else {
+ gap = (MAX_TSN -
+ asoc->mapping_array_base_tsn) +
+ tsn + 1;
+ }
+ asoc->size_on_all_streams = sctp_sbspace_sub(asoc->size_on_all_streams, ctl->length);
+ sctp_ucount_decr(asoc->cnt_on_all_streams);
+
+ SCTP_UNSET_TSN_PRESENT(asoc->mapping_array,
+ gap);
+ TAILQ_REMOVE(&asoc->strmin[strmat].inqueue,
+ ctl, next);
+ if (ctl->data) {
+ sctp_m_freem(ctl->data);
+ ctl->data = NULL;
+ }
+ sctp_free_remote_addr(ctl->whoFrom);
+ SCTP_ZONE_FREE(sctppcbinfo.ipi_zone_readq, ctl);
+ SCTP_DECR_READQ_COUNT();
+ }
+ ctl = nctl;
+ }
+ }
+ /*
+ * Question, should we go through the delivery queue? The only
+ * reason things are on here is the app not reading OR a p-d-api up.
+ * An attacker COULD send enough in to initiate the PD-API and then
+ * send a bunch of stuff to other streams... these would wind up on
+ * the delivery queue.. and then we would not get to them. But in
+ * order to do this I then have to back-track and un-deliver
+ * sequence numbers in streams.. el-yucko. I think for now we will
+ * NOT look at the delivery queue and leave it to be something to
+ * consider later. An alternative would be to abort the P-D-API with
+ * a notification and then deliver the data.... Or another method
+ * might be to keep track of how many times the situation occurs and
+ * if we see a possible attack underway just abort the association.
+ */
+#ifdef SCTP_DEBUG
+ if (cnt) {
+ SCTPDBG(SCTP_DEBUG_PCB1, "Freed %d chunks from reneg harvest\n", cnt);
+ }
+#endif
+ if (cnt) {
+ /*
+ * Now do we need to find a new
+ * asoc->highest_tsn_inside_map?
+ */
+ if (asoc->highest_tsn_inside_map >= asoc->mapping_array_base_tsn) {
+ gap = asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn;
+ } else {
+ gap = (MAX_TSN - asoc->mapping_array_base_tsn) +
+ asoc->highest_tsn_inside_map + 1;
+ }
+ if (gap >= (asoc->mapping_array_size << 3)) {
+ /*
+ * Something bad happened or cum-ack and high were
+ * behind the base, but if so earlier checks should
+ * have found NO data... wierd... we will start at
+ * end of mapping array.
+ */
+ SCTP_PRINTF("Gap was larger than array?? %d set to max:%d maparraymax:%x\n",
+ (int)gap,
+ (int)(asoc->mapping_array_size << 3),
+ (int)asoc->highest_tsn_inside_map);
+ gap = asoc->mapping_array_size << 3;
+ }
+ while (gap > 0) {
+ if (SCTP_IS_TSN_PRESENT(asoc->mapping_array, gap)) {
+ /* found the new highest */
+ asoc->highest_tsn_inside_map = asoc->mapping_array_base_tsn + gap;
+ break;
+ }
+ gap--;
+ }
+ if (gap == 0) {
+ /* Nothing left in map */
+ memset(asoc->mapping_array, 0, asoc->mapping_array_size);
+ asoc->mapping_array_base_tsn = asoc->cumulative_tsn + 1;
+ asoc->highest_tsn_inside_map = asoc->cumulative_tsn;
+ }
+ asoc->last_revoke_count = cnt;
+ (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer);
+ /* sa_ignore NO_NULL_CHK */
+ sctp_send_sack(stcb);
+ sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_DRAIN, SCTP_SO_NOT_LOCKED);
+ reneged_asoc_ids[reneged_at] = sctp_get_associd(stcb);
+ reneged_at++;
+ }
+ /*
+ * Another issue, in un-setting the TSN's in the mapping array we
+ * DID NOT adjust the higest_tsn marker. This will cause one of two
+ * things to occur. It may cause us to do extra work in checking for
+ * our mapping array movement. More importantly it may cause us to
+ * SACK every datagram. This may not be a bad thing though since we
+ * will recover once we get our cum-ack above and all this stuff we
+ * dumped recovered.
+ */
+}
+
+void
+sctp_drain()
+{
+ /*
+ * We must walk the PCB lists for ALL associations here. The system
+ * is LOW on MBUF's and needs help. This is where reneging will
+ * occur. We really hope this does NOT happen!
+ */
+ struct sctp_inpcb *inp;
+ struct sctp_tcb *stcb;
+
+ SCTP_INP_INFO_RLOCK();
+ LIST_FOREACH(inp, &sctppcbinfo.listhead, sctp_list) {
+ /* For each endpoint */
+ SCTP_INP_RLOCK(inp);
+ LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
+ /* For each association */
+ SCTP_TCB_LOCK(stcb);
+ sctp_drain_mbufs(inp, stcb);
+ SCTP_TCB_UNLOCK(stcb);
+ }
+ SCTP_INP_RUNLOCK(inp);
+ }
+ SCTP_INP_INFO_RUNLOCK();
+}
+
+/*
+ * start a new iterator
+ * iterates through all endpoints and associations based on the pcb_state
+ * flags and asoc_state. "af" (mandatory) is executed for all matching
+ * assocs and "ef" (optional) is executed when the iterator completes.
+ * "inpf" (optional) is executed for each new endpoint as it is being
+ * iterated through. inpe (optional) is called when the inp completes
+ * its way through all the stcbs.
+ */
+int
+sctp_initiate_iterator(inp_func inpf,
+ asoc_func af,
+ inp_func inpe,
+ uint32_t pcb_state,
+ uint32_t pcb_features,
+ uint32_t asoc_state,
+ void *argp,
+ uint32_t argi,
+ end_func ef,
+ struct sctp_inpcb *s_inp,
+ uint8_t chunk_output_off)
+{
+ struct sctp_iterator *it = NULL;
+
+ if (af == NULL) {
+ return (-1);
+ }
+ SCTP_MALLOC(it, struct sctp_iterator *, sizeof(struct sctp_iterator),
+ SCTP_M_ITER);
+ if (it == NULL) {
+ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOMEM);
+ return (ENOMEM);
+ }
+ memset(it, 0, sizeof(*it));
+ it->function_assoc = af;
+ it->function_inp = inpf;
+ if (inpf)
+ it->done_current_ep = 0;
+ else
+ it->done_current_ep = 1;
+ it->function_atend = ef;
+ it->pointer = argp;
+ it->val = argi;
+ it->pcb_flags = pcb_state;
+ it->pcb_features = pcb_features;
+ it->asoc_state = asoc_state;
+ it->function_inp_end = inpe;
+ it->no_chunk_output = chunk_output_off;
+ if (s_inp) {
+ it->inp = s_inp;
+ it->iterator_flags = SCTP_ITERATOR_DO_SINGLE_INP;
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ it->inp = LIST_FIRST(&sctppcbinfo.listhead);
+
+ SCTP_INP_INFO_RUNLOCK();
+ it->iterator_flags = SCTP_ITERATOR_DO_ALL_INP;
+
+ }
+ SCTP_IPI_ITERATOR_WQ_LOCK();
+ if (it->inp) {
+ SCTP_INP_INCR_REF(it->inp);
+ }
+ TAILQ_INSERT_TAIL(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+#if defined(SCTP_USE_THREAD_BASED_ITERATOR)
+ if (sctppcbinfo.iterator_running == 0) {
+ sctp_wakeup_iterator();
+ }
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+#else
+ if (it->inp)
+ SCTP_INP_DECR_REF(it->inp);
+ SCTP_IPI_ITERATOR_WQ_UNLOCK();
+ /* Init the timer */
+ SCTP_OS_TIMER_INIT(&it->tmr.timer);
+ /* add to the list of all iterators */
+ sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR, (struct sctp_inpcb *)it,
+ NULL, NULL);
+#endif
+ /* sa_ignore MEMLEAK {memory is put on the tailq for the iterator} */
+ return (0);
+}
--- /dev/null
+++ sys/netinet/sctp_uio.h
@@ -0,0 +1,1104 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_uio.h,v 1.11 2005/03/06 16:04:18 itojun Exp $ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_uio.h,v 1.29 2007/09/18 15:16:39 rrs Exp $");
+
+#ifndef __sctp_uio_h__
+#define __sctp_uio_h__
+
+
+#if ! defined(_KERNEL)
+#include <stdint.h>
+#endif
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+typedef uint32_t sctp_assoc_t;
+
+/* On/Off setup for subscription to events */
+struct sctp_event_subscribe {
+ uint8_t sctp_data_io_event;
+ uint8_t sctp_association_event;
+ uint8_t sctp_address_event;
+ uint8_t sctp_send_failure_event;
+ uint8_t sctp_peer_error_event;
+ uint8_t sctp_shutdown_event;
+ uint8_t sctp_partial_delivery_event;
+ uint8_t sctp_adaptation_layer_event;
+ uint8_t sctp_authentication_event;
+ uint8_t sctp_stream_reset_events;
+};
+
+/* ancillary data types */
+#define SCTP_INIT 0x0001
+#define SCTP_SNDRCV 0x0002
+#define SCTP_EXTRCV 0x0003
+/*
+ * ancillary data structures
+ */
+struct sctp_initmsg {
+ uint32_t sinit_num_ostreams;
+ uint32_t sinit_max_instreams;
+ uint16_t sinit_max_attempts;
+ uint16_t sinit_max_init_timeo;
+};
+
+/* We add 96 bytes to the size of sctp_sndrcvinfo.
+ * This makes the current structure 128 bytes long
+ * which is nicely 64 bit aligned but also has room
+ * for us to add more and keep ABI compatability.
+ * For example, already we have the sctp_extrcvinfo
+ * when enabled which is 48 bytes.
+ */
+
+/*
+ * The assoc up needs a verfid
+ * all sendrcvinfo's need a verfid for SENDING only.
+ */
+
+
+#define SCTP_ALIGN_RESV_PAD 96
+#define SCTP_ALIGN_RESV_PAD_SHORT 80
+
+struct sctp_sndrcvinfo {
+ uint16_t sinfo_stream;
+ uint16_t sinfo_ssn;
+ uint16_t sinfo_flags;
+ uint16_t sinfo_pr_policy;
+ uint32_t sinfo_ppid;
+ uint32_t sinfo_context;
+ uint32_t sinfo_timetolive;
+ uint32_t sinfo_tsn;
+ uint32_t sinfo_cumtsn;
+ sctp_assoc_t sinfo_assoc_id;
+ uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD];
+};
+
+struct sctp_extrcvinfo {
+ uint16_t sinfo_stream;
+ uint16_t sinfo_ssn;
+ uint16_t sinfo_flags;
+ uint16_t sinfo_pr_policy;
+ uint32_t sinfo_ppid;
+ uint32_t sinfo_context;
+ uint32_t sinfo_timetolive;
+ uint32_t sinfo_tsn;
+ uint32_t sinfo_cumtsn;
+ sctp_assoc_t sinfo_assoc_id;
+ uint16_t sreinfo_next_flags;
+ uint16_t sreinfo_next_stream;
+ uint32_t sreinfo_next_aid;
+ uint32_t sreinfo_next_length;
+ uint32_t sreinfo_next_ppid;
+ uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT];
+};
+
+#define SCTP_NO_NEXT_MSG 0x0000
+#define SCTP_NEXT_MSG_AVAIL 0x0001
+#define SCTP_NEXT_MSG_ISCOMPLETE 0x0002
+#define SCTP_NEXT_MSG_IS_UNORDERED 0x0004
+#define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008
+
+struct sctp_snd_all_completes {
+ uint16_t sall_stream;
+ uint16_t sall_flags;
+ uint32_t sall_ppid;
+ uint32_t sall_context;
+ uint32_t sall_num_sent;
+ uint32_t sall_num_failed;
+};
+
+/* Flags that go into the sinfo->sinfo_flags field */
+#define SCTP_EOF 0x0100/* Start shutdown procedures */
+#define SCTP_ABORT 0x0200/* Send an ABORT to peer */
+#define SCTP_UNORDERED 0x0400/* Message is un-ordered */
+#define SCTP_ADDR_OVER 0x0800/* Override the primary-address */
+#define SCTP_SENDALL 0x1000/* Send this on all associations */
+#define SCTP_EOR 0x2000/* end of message signal */
+#define SCTP_PR_POLICY_VALID 0x4000 /* pr sctp policy valid */
+
+#define INVALID_SINFO_FLAG(x) (((x) & 0xffffff00 \
+ & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\
+ SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR)) != 0)
+/* for the endpoint */
+
+/* The lower byte is an enumeration of PR-SCTP policies */
+#define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */
+#define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */
+#define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */
+
+#define PR_SCTP_POLICY(x) ((x) & 0xff)
+#define PR_SCTP_ENABLED(x) (PR_SCTP_POLICY(x) != 0)
+#define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF)
+#define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_RTX)
+/* Stat's */
+struct sctp_pcbinfo {
+ uint32_t ep_count;
+ uint32_t asoc_count;
+ uint32_t laddr_count;
+ uint32_t raddr_count;
+ uint32_t chk_count;
+ uint32_t readq_count;
+ uint32_t free_chunks;
+ uint32_t stream_oque;
+};
+
+struct sctp_sockstat {
+ sctp_assoc_t ss_assoc_id;
+ uint32_t ss_total_sndbuf;
+ uint32_t ss_total_recv_buf;
+};
+
+/*
+ * notification event structures
+ */
+
+/*
+ * association change event
+ */
+struct sctp_assoc_change {
+ uint16_t sac_type;
+ uint16_t sac_flags;
+ uint32_t sac_length;
+ uint16_t sac_state;
+ uint16_t sac_error;
+ uint16_t sac_outbound_streams;
+ uint16_t sac_inbound_streams;
+ sctp_assoc_t sac_assoc_id;
+};
+
+/* sac_state values */
+#define SCTP_COMM_UP 0x0001
+#define SCTP_COMM_LOST 0x0002
+#define SCTP_RESTART 0x0003
+#define SCTP_SHUTDOWN_COMP 0x0004
+#define SCTP_CANT_STR_ASSOC 0x0005
+
+
+/*
+ * Address event
+ */
+struct sctp_paddr_change {
+ uint16_t spc_type;
+ uint16_t spc_flags;
+ uint32_t spc_length;
+ struct sockaddr_storage spc_aaddr;
+ uint32_t spc_state;
+ uint32_t spc_error;
+ sctp_assoc_t spc_assoc_id;
+ uint8_t spc_padding[4];
+};
+
+/* paddr state values */
+#define SCTP_ADDR_AVAILABLE 0x0001
+#define SCTP_ADDR_UNREACHABLE 0x0002
+#define SCTP_ADDR_REMOVED 0x0003
+#define SCTP_ADDR_ADDED 0x0004
+#define SCTP_ADDR_MADE_PRIM 0x0005
+#define SCTP_ADDR_CONFIRMED 0x0006
+
+/*
+ * CAUTION: these are user exposed SCTP addr reachability states must be
+ * compatible with SCTP_ADDR states in sctp_constants.h
+ */
+#ifdef SCTP_ACTIVE
+#undef SCTP_ACTIVE
+#endif
+#define SCTP_ACTIVE 0x0001 /* SCTP_ADDR_REACHABLE */
+
+#ifdef SCTP_INACTIVE
+#undef SCTP_INACTIVE
+#endif
+#define SCTP_INACTIVE 0x0002 /* SCTP_ADDR_NOT_REACHABLE */
+
+#ifdef SCTP_UNCONFIRMED
+#undef SCTP_UNCONFIRMED
+#endif
+#define SCTP_UNCONFIRMED 0x0200 /* SCTP_ADDR_UNCONFIRMED */
+
+#ifdef SCTP_NOHEARTBEAT
+#undef SCTP_NOHEARTBEAT
+#endif
+#define SCTP_NOHEARTBEAT 0x0040 /* SCTP_ADDR_NOHB */
+
+
+/* remote error events */
+struct sctp_remote_error {
+ uint16_t sre_type;
+ uint16_t sre_flags;
+ uint32_t sre_length;
+ uint16_t sre_error;
+ sctp_assoc_t sre_assoc_id;
+ uint8_t sre_data[4];
+};
+
+/* data send failure event */
+struct sctp_send_failed {
+ uint16_t ssf_type;
+ uint16_t ssf_flags;
+ uint32_t ssf_length;
+ uint32_t ssf_error;
+ struct sctp_sndrcvinfo ssf_info;
+ sctp_assoc_t ssf_assoc_id;
+ uint8_t ssf_data[0];
+};
+
+/* flag that indicates state of data */
+#define SCTP_DATA_UNSENT 0x0001 /* inqueue never on wire */
+#define SCTP_DATA_SENT 0x0002 /* on wire at failure */
+
+/* shutdown event */
+struct sctp_shutdown_event {
+ uint16_t sse_type;
+ uint16_t sse_flags;
+ uint32_t sse_length;
+ sctp_assoc_t sse_assoc_id;
+};
+
+/* Adaptation layer indication stuff */
+struct sctp_adaptation_event {
+ uint16_t sai_type;
+ uint16_t sai_flags;
+ uint32_t sai_length;
+ uint32_t sai_adaptation_ind;
+ sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaptation {
+ uint32_t ssb_adaptation_ind;
+};
+
+/* compatable old spelling */
+struct sctp_adaption_event {
+ uint16_t sai_type;
+ uint16_t sai_flags;
+ uint32_t sai_length;
+ uint32_t sai_adaption_ind;
+ sctp_assoc_t sai_assoc_id;
+};
+
+struct sctp_setadaption {
+ uint32_t ssb_adaption_ind;
+};
+
+
+/*
+ * Partial Delivery API event
+ */
+struct sctp_pdapi_event {
+ uint16_t pdapi_type;
+ uint16_t pdapi_flags;
+ uint32_t pdapi_length;
+ uint32_t pdapi_indication;
+ uint16_t pdapi_stream;
+ uint16_t pdapi_seq;
+ sctp_assoc_t pdapi_assoc_id;
+};
+
+/* indication values */
+#define SCTP_PARTIAL_DELIVERY_ABORTED 0x0001
+
+
+/*
+ * authentication key event
+ */
+struct sctp_authkey_event {
+ uint16_t auth_type;
+ uint16_t auth_flags;
+ uint32_t auth_length;
+ uint16_t auth_keynumber;
+ uint16_t auth_altkeynumber;
+ uint32_t auth_indication;
+ sctp_assoc_t auth_assoc_id;
+};
+
+/* indication values */
+#define SCTP_AUTH_NEWKEY 0x0001
+
+
+/*
+ * stream reset event
+ */
+struct sctp_stream_reset_event {
+ uint16_t strreset_type;
+ uint16_t strreset_flags;
+ uint32_t strreset_length;
+ sctp_assoc_t strreset_assoc_id;
+ uint16_t strreset_list[0];
+};
+
+/* flags in strreset_flags field */
+#define SCTP_STRRESET_INBOUND_STR 0x0001
+#define SCTP_STRRESET_OUTBOUND_STR 0x0002
+#define SCTP_STRRESET_ALL_STREAMS 0x0004
+#define SCTP_STRRESET_STREAM_LIST 0x0008
+#define SCTP_STRRESET_FAILED 0x0010
+
+
+/* SCTP notification event */
+struct sctp_tlv {
+ uint16_t sn_type;
+ uint16_t sn_flags;
+ uint32_t sn_length;
+};
+
+union sctp_notification {
+ struct sctp_tlv sn_header;
+ struct sctp_assoc_change sn_assoc_change;
+ struct sctp_paddr_change sn_paddr_change;
+ struct sctp_remote_error sn_remote_error;
+ struct sctp_send_failed sn_send_failed;
+ struct sctp_shutdown_event sn_shutdown_event;
+ struct sctp_adaptation_event sn_adaptation_event;
+ /* compatability same as above */
+ struct sctp_adaption_event sn_adaption_event;
+ struct sctp_pdapi_event sn_pdapi_event;
+ struct sctp_authkey_event sn_auth_event;
+ struct sctp_stream_reset_event sn_strreset_event;
+};
+
+/* notification types */
+#define SCTP_ASSOC_CHANGE 0x0001
+#define SCTP_PEER_ADDR_CHANGE 0x0002
+#define SCTP_REMOTE_ERROR 0x0003
+#define SCTP_SEND_FAILED 0x0004
+#define SCTP_SHUTDOWN_EVENT 0x0005
+#define SCTP_ADAPTATION_INDICATION 0x0006
+/* same as above */
+#define SCTP_ADAPTION_INDICATION 0x0006
+#define SCTP_PARTIAL_DELIVERY_EVENT 0x0007
+#define SCTP_AUTHENTICATION_EVENT 0x0008
+#define SCTP_STREAM_RESET_EVENT 0x0009
+
+
+/*
+ * socket option structs
+ */
+
+struct sctp_paddrparams {
+ struct sockaddr_storage spp_address;
+ sctp_assoc_t spp_assoc_id;
+ uint32_t spp_hbinterval;
+ uint32_t spp_pathmtu;
+ uint32_t spp_flags;
+ uint32_t spp_ipv6_flowlabel;
+ uint16_t spp_pathmaxrxt;
+ uint8_t spp_ipv4_tos;
+};
+
+#define SPP_HB_ENABLE 0x00000001
+#define SPP_HB_DISABLE 0x00000002
+#define SPP_HB_DEMAND 0x00000004
+#define SPP_PMTUD_ENABLE 0x00000008
+#define SPP_PMTUD_DISABLE 0x00000010
+#define SPP_HB_TIME_IS_ZERO 0x00000080
+#define SPP_IPV6_FLOWLABEL 0x00000100
+#define SPP_IPV4_TOS 0x00000200
+
+struct sctp_paddrinfo {
+ struct sockaddr_storage spinfo_address;
+ sctp_assoc_t spinfo_assoc_id;
+ int32_t spinfo_state;
+ uint32_t spinfo_cwnd;
+ uint32_t spinfo_srtt;
+ uint32_t spinfo_rto;
+ uint32_t spinfo_mtu;
+};
+
+struct sctp_rtoinfo {
+ sctp_assoc_t srto_assoc_id;
+ uint32_t srto_initial;
+ uint32_t srto_max;
+ uint32_t srto_min;
+};
+
+struct sctp_assocparams {
+ sctp_assoc_t sasoc_assoc_id;
+ uint32_t sasoc_peer_rwnd;
+ uint32_t sasoc_local_rwnd;
+ uint32_t sasoc_cookie_life;
+ uint16_t sasoc_asocmaxrxt;
+ uint16_t sasoc_number_peer_destinations;
+};
+
+struct sctp_setprim {
+ struct sockaddr_storage ssp_addr;
+ sctp_assoc_t ssp_assoc_id;
+ uint8_t ssp_padding[4];
+};
+
+struct sctp_setpeerprim {
+ struct sockaddr_storage sspp_addr;
+ sctp_assoc_t sspp_assoc_id;
+ uint8_t sspp_padding[4];
+};
+
+struct sctp_getaddresses {
+ sctp_assoc_t sget_assoc_id;
+ /* addr is filled in for N * sockaddr_storage */
+ struct sockaddr addr[1];
+};
+
+struct sctp_setstrm_timeout {
+ sctp_assoc_t ssto_assoc_id;
+ uint32_t ssto_timeout;
+ uint32_t ssto_streamid_start;
+ uint32_t ssto_streamid_end;
+};
+
+struct sctp_status {
+ sctp_assoc_t sstat_assoc_id;
+ int32_t sstat_state;
+ uint32_t sstat_rwnd;
+ uint16_t sstat_unackdata;
+ uint16_t sstat_penddata;
+ uint16_t sstat_instrms;
+ uint16_t sstat_outstrms;
+ uint32_t sstat_fragmentation_point;
+ struct sctp_paddrinfo sstat_primary;
+};
+
+/*
+ * AUTHENTICATION support
+ */
+/* SCTP_AUTH_CHUNK */
+struct sctp_authchunk {
+ uint8_t sauth_chunk;
+};
+
+/* SCTP_AUTH_KEY */
+struct sctp_authkey {
+ sctp_assoc_t sca_assoc_id;
+ uint16_t sca_keynumber;
+ uint8_t sca_key[0];
+};
+
+/* SCTP_HMAC_IDENT */
+struct sctp_hmacalgo {
+ uint16_t shmac_idents[0];
+};
+
+/* AUTH hmac_id */
+#define SCTP_AUTH_HMAC_ID_RSVD 0x0000
+#define SCTP_AUTH_HMAC_ID_SHA1 0x0001 /* default, mandatory */
+#define SCTP_AUTH_HMAC_ID_MD5 0x0002 /* deprecated */
+#define SCTP_AUTH_HMAC_ID_SHA256 0x0003
+#define SCTP_AUTH_HMAC_ID_SHA224 0x0004
+#define SCTP_AUTH_HMAC_ID_SHA384 0x0005
+#define SCTP_AUTH_HMAC_ID_SHA512 0x0006
+
+
+/* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */
+struct sctp_authkeyid {
+ sctp_assoc_t scact_assoc_id;
+ uint16_t scact_keynumber;
+};
+
+/* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */
+struct sctp_authchunks {
+ sctp_assoc_t gauth_assoc_id;
+ uint8_t gauth_chunks[0];
+};
+
+struct sctp_assoc_value {
+ sctp_assoc_t assoc_id;
+ uint32_t assoc_value;
+};
+
+struct sctp_assoc_ids {
+ sctp_assoc_t gaids_assoc_id[0];
+};
+
+struct sctp_sack_info {
+ sctp_assoc_t sack_assoc_id;
+ uint32_t sack_delay;
+ uint32_t sack_freq;
+};
+
+struct sctp_cwnd_args {
+ struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */
+ uint32_t cwnd_new_value;/* cwnd in k */
+ uint32_t inflight; /* flightsize in k */
+ uint32_t pseudo_cumack;
+ uint32_t cwnd_augment; /* increment to it */
+ uint8_t meets_pseudo_cumack;
+ uint8_t need_new_pseudo_cumack;
+ uint8_t cnt_in_send;
+ uint8_t cnt_in_str;
+};
+
+struct sctp_blk_args {
+ uint32_t onsb; /* in 1k bytes */
+ uint32_t sndlen; /* len of send being attempted */
+ uint32_t peer_rwnd; /* rwnd of peer */
+ uint16_t send_sent_qcnt;/* chnk cnt */
+ uint16_t stream_qcnt; /* chnk cnt */
+ uint16_t chunks_on_oque;/* chunks out */
+ uint16_t flight_size; /* flight size in k */
+};
+
+/*
+ * Max we can reset in one setting, note this is dictated not by the define
+ * but the size of a mbuf cluster so don't change this define and think you
+ * can specify more. You must do multiple resets if you want to reset more
+ * than SCTP_MAX_EXPLICIT_STR_RESET.
+ */
+#define SCTP_MAX_EXPLICT_STR_RESET 1000
+
+#define SCTP_RESET_LOCAL_RECV 0x0001
+#define SCTP_RESET_LOCAL_SEND 0x0002
+#define SCTP_RESET_BOTH 0x0003
+#define SCTP_RESET_TSN 0x0004
+
+struct sctp_stream_reset {
+ sctp_assoc_t strrst_assoc_id;
+ uint16_t strrst_flags;
+ uint16_t strrst_num_streams; /* 0 == ALL */
+ uint16_t strrst_list[0];/* list if strrst_num_streams is not 0 */
+};
+
+
+struct sctp_get_nonce_values {
+ sctp_assoc_t gn_assoc_id;
+ uint32_t gn_peers_tag;
+ uint32_t gn_local_tag;
+};
+
+/* Debugging logs */
+struct sctp_str_log {
+ void *stcb; /* FIXME: LP64 issue */
+ uint32_t n_tsn;
+ uint32_t e_tsn;
+ uint16_t n_sseq;
+ uint16_t e_sseq;
+ uint16_t strm;
+};
+
+struct sctp_sb_log {
+ void *stcb; /* FIXME: LP64 issue */
+ uint32_t so_sbcc;
+ uint32_t stcb_sbcc;
+ uint32_t incr;
+};
+
+struct sctp_fr_log {
+ uint32_t largest_tsn;
+ uint32_t largest_new_tsn;
+ uint32_t tsn;
+};
+
+struct sctp_fr_map {
+ uint32_t base;
+ uint32_t cum;
+ uint32_t high;
+};
+
+struct sctp_rwnd_log {
+ uint32_t rwnd;
+ uint32_t send_size;
+ uint32_t overhead;
+ uint32_t new_rwnd;
+};
+
+struct sctp_mbcnt_log {
+ uint32_t total_queue_size;
+ uint32_t size_change;
+ uint32_t total_queue_mb_size;
+ uint32_t mbcnt_change;
+};
+
+struct sctp_sack_log {
+ uint32_t cumack;
+ uint32_t oldcumack;
+ uint32_t tsn;
+ uint16_t numGaps;
+ uint16_t numDups;
+};
+
+struct sctp_lock_log {
+ void *sock; /* FIXME: LP64 issue */
+ void *inp; /* FIXME: LP64 issue */
+ uint8_t tcb_lock;
+ uint8_t inp_lock;
+ uint8_t info_lock;
+ uint8_t sock_lock;
+ uint8_t sockrcvbuf_lock;
+ uint8_t socksndbuf_lock;
+ uint8_t create_lock;
+ uint8_t resv;
+};
+
+struct sctp_rto_log {
+ void *net; /* FIXME: LP64 issue */
+ uint32_t rtt;
+};
+
+struct sctp_nagle_log {
+ void *stcb; /* FIXME: LP64 issue */
+ uint32_t total_flight;
+ uint32_t total_in_queue;
+ uint16_t count_in_queue;
+ uint16_t count_in_flight;
+};
+
+struct sctp_sbwake_log {
+ void *stcb; /* FIXME: LP64 issue */
+ uint16_t send_q;
+ uint16_t sent_q;
+ uint16_t flight;
+ uint16_t wake_cnt;
+ uint8_t stream_qcnt; /* chnk cnt */
+ uint8_t chunks_on_oque; /* chunks out */
+ uint8_t sbflags;
+ uint8_t sctpflags;
+};
+
+struct sctp_misc_info {
+ uint32_t log1;
+ uint32_t log2;
+ uint32_t log3;
+ uint32_t log4;
+};
+
+struct sctp_log_closing {
+ void *inp; /* FIXME: LP64 issue */
+ void *stcb; /* FIXME: LP64 issue */
+ uint32_t sctp_flags;
+ uint16_t state;
+ int16_t loc;
+};
+
+struct sctp_mbuf_log {
+ struct mbuf *mp; /* FIXME: LP64 issue */
+ caddr_t ext;
+ caddr_t data;
+ uint16_t size;
+ uint8_t refcnt;
+ uint8_t mbuf_flags;
+};
+
+struct sctp_cwnd_log {
+ uint64_t time_event;
+ uint8_t from;
+ uint8_t event_type;
+ uint8_t resv[2];
+ union {
+ struct sctp_log_closing close;
+ struct sctp_blk_args blk;
+ struct sctp_cwnd_args cwnd;
+ struct sctp_str_log strlog;
+ struct sctp_fr_log fr;
+ struct sctp_fr_map map;
+ struct sctp_rwnd_log rwnd;
+ struct sctp_mbcnt_log mbcnt;
+ struct sctp_sack_log sack;
+ struct sctp_lock_log lock;
+ struct sctp_rto_log rto;
+ struct sctp_sb_log sb;
+ struct sctp_nagle_log nagle;
+ struct sctp_sbwake_log wake;
+ struct sctp_mbuf_log mb;
+ struct sctp_misc_info misc;
+ } x;
+};
+
+struct sctp_cwnd_log_req {
+ int32_t num_in_log; /* Number in log */
+ int32_t num_ret; /* Number returned */
+ int32_t start_at; /* start at this one */
+ int32_t end_at; /* end at this one */
+ struct sctp_cwnd_log log[0];
+};
+
+struct sctp_timeval {
+ uint32_t tv_sec;
+ uint32_t tv_usec;
+};
+
+struct sctpstat {
+ /* MIB according to RFC 3873 */
+ uint32_t sctps_currestab; /* sctpStats 1 (Gauge32) */
+ uint32_t sctps_activeestab; /* sctpStats 2 (Counter32) */
+ uint32_t sctps_restartestab;
+ uint32_t sctps_collisionestab;
+ uint32_t sctps_passiveestab; /* sctpStats 3 (Counter32) */
+ uint32_t sctps_aborted; /* sctpStats 4 (Counter32) */
+ uint32_t sctps_shutdown;/* sctpStats 5 (Counter32) */
+ uint32_t sctps_outoftheblue; /* sctpStats 6 (Counter32) */
+ uint32_t sctps_checksumerrors; /* sctpStats 7 (Counter32) */
+ uint32_t sctps_outcontrolchunks; /* sctpStats 8 (Counter64) */
+ uint32_t sctps_outorderchunks; /* sctpStats 9 (Counter64) */
+ uint32_t sctps_outunorderchunks; /* sctpStats 10 (Counter64) */
+ uint32_t sctps_incontrolchunks; /* sctpStats 11 (Counter64) */
+ uint32_t sctps_inorderchunks; /* sctpStats 12 (Counter64) */
+ uint32_t sctps_inunorderchunks; /* sctpStats 13 (Counter64) */
+ uint32_t sctps_fragusrmsgs; /* sctpStats 14 (Counter64) */
+ uint32_t sctps_reasmusrmsgs; /* sctpStats 15 (Counter64) */
+ uint32_t sctps_outpackets; /* sctpStats 16 (Counter64) */
+ uint32_t sctps_inpackets; /* sctpStats 17 (Counter64) */
+
+ /* input statistics: */
+ uint32_t sctps_recvpackets; /* total input packets */
+ uint32_t sctps_recvdatagrams; /* total input datagrams */
+ uint32_t sctps_recvpktwithdata; /* total packets that had data */
+ uint32_t sctps_recvsacks; /* total input SACK chunks */
+ uint32_t sctps_recvdata;/* total input DATA chunks */
+ uint32_t sctps_recvdupdata; /* total input duplicate DATA chunks */
+ uint32_t sctps_recvheartbeat; /* total input HB chunks */
+ uint32_t sctps_recvheartbeatack; /* total input HB-ACK chunks */
+ uint32_t sctps_recvecne;/* total input ECNE chunks */
+ uint32_t sctps_recvauth;/* total input AUTH chunks */
+ uint32_t sctps_recvauthmissing; /* total input chunks missing AUTH */
+ uint32_t sctps_recvivalhmacid; /* total number of invalid HMAC ids
+ * received */
+ uint32_t sctps_recvivalkeyid; /* total number of invalid secret ids
+ * received */
+ uint32_t sctps_recvauthfailed; /* total number of auth failed */
+ uint32_t sctps_recvexpress; /* total fast path receives all one
+ * chunk */
+ uint32_t sctps_recvexpressm; /* total fast path multi-part data */
+ /* output statistics: */
+ uint32_t sctps_sendpackets; /* total output packets */
+ uint32_t sctps_sendsacks; /* total output SACKs */
+ uint32_t sctps_senddata;/* total output DATA chunks */
+ uint32_t sctps_sendretransdata; /* total output retransmitted DATA
+ * chunks */
+ uint32_t sctps_sendfastretrans; /* total output fast retransmitted
+ * DATA chunks */
+ uint32_t sctps_sendmultfastretrans; /* total FR's that happened
+ * more than once to same
+ * chunk (u-del multi-fr
+ * algo). */
+ uint32_t sctps_sendheartbeat; /* total output HB chunks */
+ uint32_t sctps_sendecne;/* total output ECNE chunks */
+ uint32_t sctps_sendauth;/* total output AUTH chunks FIXME */
+ uint32_t sctps_senderrors; /* ip_output error counter */
+ /* PCKDROPREP statistics: */
+ uint32_t sctps_pdrpfmbox; /* Packet drop from middle box */
+ uint32_t sctps_pdrpfehos; /* P-drop from end host */
+ uint32_t sctps_pdrpmbda;/* P-drops with data */
+ uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */
+ uint32_t sctps_pdrpbwrpt; /* P-drop, non-endhost, bandwidth rep
+ * only */
+ uint32_t sctps_pdrpcrupt; /* P-drop, not enough for chunk header */
+ uint32_t sctps_pdrpnedat; /* P-drop, not enough data to confirm */
+ uint32_t sctps_pdrppdbrk; /* P-drop, where process_chunk_drop
+ * said break */
+ uint32_t sctps_pdrptsnnf; /* P-drop, could not find TSN */
+ uint32_t sctps_pdrpdnfnd; /* P-drop, attempt reverse TSN lookup */
+ uint32_t sctps_pdrpdiwnp; /* P-drop, e-host confirms zero-rwnd */
+ uint32_t sctps_pdrpdizrw; /* P-drop, midbox confirms no space */
+ uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */
+ uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */
+ /* timeouts */
+ uint32_t sctps_timoiterator; /* Number of iterator timers that
+ * fired */
+ uint32_t sctps_timodata;/* Number of T3 data time outs */
+ uint32_t sctps_timowindowprobe; /* Number of window probe (T3) timers
+ * that fired */
+ uint32_t sctps_timoinit;/* Number of INIT timers that fired */
+ uint32_t sctps_timosack;/* Number of sack timers that fired */
+ uint32_t sctps_timoshutdown; /* Number of shutdown timers that
+ * fired */
+ uint32_t sctps_timoheartbeat; /* Number of heartbeat timers that
+ * fired */
+ uint32_t sctps_timocookie; /* Number of times a cookie timeout
+ * fired */
+ uint32_t sctps_timosecret; /* Number of times an endpoint changed
+ * its cookie secret */
+ uint32_t sctps_timopathmtu; /* Number of PMTU timers that fired */
+ uint32_t sctps_timoshutdownack; /* Number of shutdown ack timers that
+ * fired */
+ uint32_t sctps_timoshutdownguard; /* Number of shutdown guard
+ * timers that fired */
+ uint32_t sctps_timostrmrst; /* Number of stream reset timers that
+ * fired */
+ uint32_t sctps_timoearlyfr; /* Number of early FR timers that
+ * fired */
+ uint32_t sctps_timoasconf; /* Number of times an asconf timer
+ * fired */
+ uint32_t sctps_timodelprim; /* Number of times a prim_deleted
+ * timer fired */
+ uint32_t sctps_timoautoclose; /* Number of times auto close timer
+ * fired */
+ uint32_t sctps_timoassockill; /* Number of asoc free timers expired */
+ uint32_t sctps_timoinpkill; /* Number of inp free timers expired */
+ /* Early fast retransmission counters */
+ uint32_t sctps_earlyfrstart;
+ uint32_t sctps_earlyfrstop;
+ uint32_t sctps_earlyfrmrkretrans;
+ uint32_t sctps_earlyfrstpout;
+ uint32_t sctps_earlyfrstpidsck1;
+ uint32_t sctps_earlyfrstpidsck2;
+ uint32_t sctps_earlyfrstpidsck3;
+ uint32_t sctps_earlyfrstpidsck4;
+ uint32_t sctps_earlyfrstrid;
+ uint32_t sctps_earlyfrstrout;
+ uint32_t sctps_earlyfrstrtmr;
+ /* otheres */
+ uint32_t sctps_hdrops; /* packet shorter than header */
+ uint32_t sctps_badsum; /* checksum error */
+ uint32_t sctps_noport; /* no endpoint for port */
+ uint32_t sctps_badvtag; /* bad v-tag */
+ uint32_t sctps_badsid; /* bad SID */
+ uint32_t sctps_nomem; /* no memory */
+ uint32_t sctps_fastretransinrtt; /* number of multiple FR in a
+ * RTT window */
+ uint32_t sctps_markedretrans;
+ uint32_t sctps_naglesent; /* nagle allowed sending */
+ uint32_t sctps_naglequeued; /* nagle does't allow sending */
+ uint32_t sctps_maxburstqueued; /* max burst dosn't allow sending */
+ uint32_t sctps_ifnomemqueued; /* look ahead tells us no memory in
+ * interface ring buffer OR we had a
+ * send error and are queuing one
+ * send. */
+ uint32_t sctps_windowprobed; /* total number of window probes sent */
+ uint32_t sctps_lowlevelerr; /* total times an output error causes
+ * us to clamp down on next user send. */
+ uint32_t sctps_lowlevelerrusr; /* total times sctp_senderrors were
+ * caused from a user send from a user
+ * invoked send not a sack response */
+ uint32_t sctps_datadropchklmt; /* Number of in data drops due to
+ * chunk limit reached */
+ uint32_t sctps_datadroprwnd; /* Number of in data drops due to rwnd
+ * limit reached */
+ uint32_t sctps_ecnereducedcwnd; /* Number of times a ECN reduced the
+ * cwnd */
+ uint32_t sctps_vtagexpress; /* Used express lookup via vtag */
+ uint32_t sctps_vtagbogus; /* Collision in express lookup. */
+ uint32_t sctps_primary_randry; /* Number of times the sender ran dry
+ * of user data on primary */
+ uint32_t sctps_cmt_randry; /* Same for above */
+ uint32_t sctps_slowpath_sack; /* Sacks the slow way */
+ uint32_t sctps_wu_sacks_sent; /* Window Update only sacks sent */
+ uint32_t sctps_sends_with_flags; /* number of sends with
+ * sinfo_flags !=0 */
+ uint32_t sctps_sends_with_unord /* number of undordered sends */ ;
+ uint32_t sctps_sends_with_eof; /* number of sends with EOF flag set */
+ uint32_t sctps_sends_with_abort; /* number of sends with ABORT
+ * flag set */
+ uint32_t sctps_protocol_drain_calls; /* number of times protocol
+ * drain called */
+ uint32_t sctps_protocol_drains_done; /* number of times we did a
+ * protocol drain */
+ uint32_t sctps_read_peeks; /* Number of times recv was called
+ * with peek */
+ uint32_t sctps_cached_chk; /* Number of cached chunks used */
+ uint32_t sctps_cached_strmoq; /* Number of cached stream oq's used */
+ uint32_t sctps_left_abandon; /* Number of unread message abandonded
+ * by close */
+ uint32_t sctps_send_burst_avoid; /* Send burst avoidance,
+ * already max burst inflight
+ * to net */
+ uint32_t sctps_send_cwnd_avoid; /* Send cwnd full avoidance, already
+ * max burst inflight to net */
+ uint32_t sctps_fwdtsn_map_over; /* number of map array over-runs via
+ * fwd-tsn's */
+
+ struct sctp_timeval sctps_discontinuitytime; /* sctpStats 18
+ * (TimeStamp) */
+};
+
+#define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1)
+#define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1)
+#define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&sctpstat._x, _d)
+#define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&sctpstat._x, _d)
+
+/* The following macros are for handling MIB values, */
+#define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x)
+#define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x)
+#define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x)
+
+union sctp_sockstore {
+#if defined(INET) || !defined(_KERNEL)
+ struct sockaddr_in sin;
+#endif
+#if defined(INET6) || !defined(_KERNEL)
+ struct sockaddr_in6 sin6;
+#endif
+ struct sockaddr sa;
+};
+
+struct xsctp_inpcb {
+ uint32_t last;
+ uint32_t flags;
+ uint32_t features;
+ uint32_t total_sends;
+ uint32_t total_recvs;
+ uint32_t total_nospaces;
+ uint32_t fragmentation_point;
+ uint16_t local_port;
+ uint16_t qlen;
+ uint16_t maxqlen;
+};
+
+struct xsctp_tcb {
+ union sctp_sockstore primary_addr; /* sctpAssocEntry 5/6 */
+ uint32_t last;
+ uint32_t heartbeat_interval; /* sctpAssocEntry 7 */
+ uint32_t state; /* sctpAssocEntry 8 */
+ uint32_t in_streams; /* sctpAssocEntry 9 */
+ uint32_t out_streams; /* sctpAssocEntry 10 */
+ uint32_t max_nr_retrans;/* sctpAssocEntry 11 */
+ uint32_t primary_process; /* sctpAssocEntry 12 */
+ uint32_t T1_expireries; /* sctpAssocEntry 13 */
+ uint32_t T2_expireries; /* sctpAssocEntry 14 */
+ uint32_t retransmitted_tsns; /* sctpAssocEntry 15 */
+ uint32_t total_sends;
+ uint32_t total_recvs;
+ uint32_t local_tag;
+ uint32_t remote_tag;
+ uint32_t initial_tsn;
+ uint32_t highest_tsn;
+ uint32_t cumulative_tsn;
+ uint32_t cumulative_tsn_ack;
+ uint32_t mtu;
+ uint32_t refcnt;
+ uint16_t local_port; /* sctpAssocEntry 3 */
+ uint16_t remote_port; /* sctpAssocEntry 4 */
+ struct sctp_timeval start_time; /* sctpAssocEntry 16 */
+ struct sctp_timeval discontinuity_time; /* sctpAssocEntry 17 */
+};
+
+struct xsctp_laddr {
+ union sctp_sockstore address; /* sctpAssocLocalAddrEntry 1/2 */
+ uint32_t last;
+ struct sctp_timeval start_time; /* sctpAssocLocalAddrEntry 3 */
+};
+
+struct xsctp_raddr {
+ union sctp_sockstore address; /* sctpAssocLocalRemEntry 1/2 */
+ uint32_t last;
+ uint32_t rto; /* sctpAssocLocalRemEntry 5 */
+ uint32_t max_path_rtx; /* sctpAssocLocalRemEntry 6 */
+ uint32_t rtx; /* sctpAssocLocalRemEntry 7 */
+ uint32_t error_counter; /* */
+ uint32_t cwnd; /* */
+ uint32_t flight_size; /* */
+ uint32_t mtu; /* */
+ uint8_t active; /* sctpAssocLocalRemEntry 3 */
+ uint8_t confirmed; /* */
+ uint8_t heartbeat_enabled; /* sctpAssocLocalRemEntry 4 */
+ struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */
+};
+
+#define SCTP_MAX_LOGGING_SIZE 30000
+#define SCTP_TRACE_PARAMS 6 /* This number MUST be even */
+
+struct sctp_log_entry {
+ uint64_t timestamp;
+ uint32_t subsys;
+ uint32_t padding;
+ uint32_t params[SCTP_TRACE_PARAMS];
+};
+
+struct sctp_log {
+ struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE];
+ uint32_t index;
+ uint32_t padding;
+};
+
+/*
+ * Kernel defined for sctp_send
+ */
+#if defined(_KERNEL)
+int
+sctp_lower_sosend(struct socket *so,
+ struct sockaddr *addr,
+ struct uio *uio,
+ struct mbuf *i_pak,
+ struct mbuf *control,
+ int flags,
+ int use_rcvinfo,
+ struct sctp_sndrcvinfo *srcv
+ ,struct thread *p
+);
+
+int
+sctp_sorecvmsg(struct socket *so,
+ struct uio *uio,
+ struct mbuf **mp,
+ struct sockaddr *from,
+ int fromlen,
+ int *msg_flags,
+ struct sctp_sndrcvinfo *sinfo,
+ int filling_sinfo);
+
+#endif
+
+/*
+ * API system calls
+ */
+#if !(defined(_KERNEL))
+
+__BEGIN_DECLS
+int sctp_peeloff __P((int, sctp_assoc_t));
+int sctp_bindx __P((int, struct sockaddr *, int, int));
+int sctp_connectx __P((int, const struct sockaddr *, int, sctp_assoc_t *));
+int sctp_getaddrlen __P((sa_family_t));
+int sctp_getpaddrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freepaddrs __P((struct sockaddr *));
+int sctp_getladdrs __P((int, sctp_assoc_t, struct sockaddr **));
+void sctp_freeladdrs __P((struct sockaddr *));
+int sctp_opt_info __P((int, sctp_assoc_t, int, void *, socklen_t *));
+
+ssize_t sctp_sendmsg
+__P((int, const void *, size_t,
+ const struct sockaddr *,
+ socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+ ssize_t sctp_send __P((int sd, const void *msg, size_t len,
+ const struct sctp_sndrcvinfo *sinfo, int flags));
+
+ ssize_t sctp_sendx __P((int sd, const void *msg, size_t len,
+ struct sockaddr *addrs, int addrcnt,
+ struct sctp_sndrcvinfo *sinfo, int flags));
+
+ ssize_t sctp_sendmsgx __P((int sd, const void *, size_t,
+ struct sockaddr *, int,
+ uint32_t, uint32_t, uint16_t, uint32_t, uint32_t));
+
+ sctp_assoc_t sctp_getassocid __P((int sd, struct sockaddr *sa));
+
+ ssize_t sctp_recvmsg __P((int, void *, size_t, struct sockaddr *,
+ socklen_t *, struct sctp_sndrcvinfo *, int *));
+
+__END_DECLS
+
+#endif /* !_KERNEL */
+#endif /* !__sctp_uio_h__ */
Index: ip_fastfwd.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fastfwd.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_fastfwd.c -L sys/netinet/ip_fastfwd.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_fastfwd.c
+++ sys/netinet/ip_fastfwd.c
@@ -25,8 +25,6 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fastfwd.c,v 1.28.2.2 2005/08/29 17:52:53 andre Exp $
*/
/*
@@ -75,6 +73,9 @@
* is being followed here.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fastfwd.c,v 1.41 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_ipfw.h"
#include "opt_ipstealth.h"
@@ -100,6 +101,7 @@
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
+#include <netinet/ip_options.h>
#include <machine/in_cksum.h>
@@ -150,7 +152,7 @@
* otherwise 0 is returned and the packet should be delivered
* to ip_input for full processing.
*/
-int
+struct mbuf *
ip_fastforward(struct mbuf *m)
{
struct ip *ip;
@@ -170,7 +172,7 @@
* Are we active and forwarding packets?
*/
if (!ipfastforward_active || !ipforwarding)
- return 0;
+ return m;
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
@@ -195,7 +197,7 @@
if (m->m_len < sizeof (struct ip) &&
(m = m_pullup(m, sizeof (struct ip))) == NULL) {
ipstat.ips_toosmall++;
- return 1; /* mbuf already free'd */
+ return NULL; /* mbuf already free'd */
}
ip = mtod(m, struct ip *);
@@ -217,9 +219,9 @@
goto drop;
}
if (hlen > m->m_len) {
- if ((m = m_pullup(m, hlen)) == 0) {
+ if ((m = m_pullup(m, hlen)) == NULL) {
ipstat.ips_badhlen++;
- return 1;
+ return NULL; /* mbuf already free'd */
}
ip = mtod(m, struct ip *);
}
@@ -280,7 +282,7 @@
* Is packet dropped by traffic conditioner?
*/
if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
- return 1;
+ goto drop;
#endif
/*
@@ -292,11 +294,11 @@
*/
if (ip->ip_hl != (sizeof(struct ip) >> 2)) {
if (ip_doopts == 1)
- return 0;
+ return m;
else if (ip_doopts == 2) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB,
0, 0);
- return 1;
+ return NULL; /* mbuf already free'd */
}
/* else ignore IP options and continue */
}
@@ -317,15 +319,17 @@
ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) ||
+ IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
ip->ip_src.s_addr == INADDR_ANY ||
ip->ip_dst.s_addr == INADDR_ANY )
- return 0;
+ return m;
/*
* Is it for a local address on this host?
*/
if (in_localip(ip->ip_dst))
- return 0;
+ return m;
ipstat.ips_total++;
@@ -344,12 +348,12 @@
/*
* Run through list of ipfilter hooks for input packets
*/
- if (inet_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet_pfil_hook))
goto passin;
if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) ||
m == NULL)
- return 1;
+ goto drop;
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
@@ -392,7 +396,7 @@
#endif
if (ip->ip_ttl <= IPTTLDEC) {
icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
- return 1;
+ return NULL; /* mbuf already free'd */
}
/*
@@ -413,13 +417,15 @@
* Find route to destination.
*/
if ((dst = ip_findroute(&ro, dest, m)) == NULL)
- return 1; /* icmp unreach already sent */
+ return NULL; /* icmp unreach already sent */
ifp = ro.ro_rt->rt_ifp;
/*
- * Immediately drop blackholed traffic.
+ * Immediately drop blackholed traffic, and directed broadcasts
+ * for either the all-ones or all-zero subnet addresses on
+ * locally attached networks.
*/
- if (ro.ro_rt->rt_flags & RTF_BLACKHOLE)
+ if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0)
goto drop;
/*
@@ -429,11 +435,11 @@
/*
* Run through list of hooks for output packets.
*/
- if (inet_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet_pfil_hook))
goto passout;
if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) {
- goto consumed;
+ goto drop;
}
M_ASSERTVALID(m);
@@ -468,21 +474,21 @@
m->m_flags |= M_FASTFWD_OURS;
if (ro.ro_rt)
RTFREE(ro.ro_rt);
- return 0;
+ return m;
}
/*
* Redo route lookup with new destination address
*/
#ifdef IPFIREWALL_FORWARD
if (fwd_tag) {
- if (!in_localip(ip->ip_src) && !in_localaddr(ip->ip_dst))
- dest.s_addr = ((struct sockaddr_in *)(fwd_tag+1))->sin_addr.s_addr;
+ dest.s_addr = ((struct sockaddr_in *)
+ (fwd_tag + 1))->sin_addr.s_addr;
m_tag_delete(m, fwd_tag);
}
#endif /* IPFIREWALL_FORWARD */
RTFREE(ro.ro_rt);
if ((dst = ip_findroute(&ro, dest, m)) == NULL)
- return 1; /* icmp unreach already sent */
+ return NULL; /* icmp unreach already sent */
ifp = ro.ro_rt->rt_ifp;
}
@@ -495,7 +501,8 @@
* Check if route is dampned (when ARP is unable to resolve)
*/
if ((ro.ro_rt->rt_flags & RTF_REJECT) &&
- ro.ro_rt->rt_rmx.rmx_expire >= time_second) {
+ (ro.ro_rt->rt_rmx.rmx_expire == 0 ||
+ time_uptime < ro.ro_rt->rt_rmx.rmx_expire)) {
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
goto consumed;
}
@@ -596,11 +603,11 @@
}
consumed:
RTFREE(ro.ro_rt);
- return 1;
+ return NULL;
drop:
if (m)
m_freem(m);
if (ro.ro_rt)
RTFREE(ro.ro_rt);
- return 1;
+ return NULL;
}
--- /dev/null
+++ sys/netinet/tcp_timewait.c
@@ -0,0 +1,649 @@
+/*-
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_timewait.c,v 1.287 2007/10/07 20:44:24 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_mac.h"
+#include "opt_tcpdebug.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/random.h>
+
+#include <vm/uma.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#endif
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#ifdef INET6
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet6/nd6.h>
+#endif
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
+#include <netinet/tcpip.h>
+#ifdef TCPDEBUG
+#include <netinet/tcp_debug.h>
+#endif
+#include <netinet6/ip6protosw.h>
+
+#include <machine/in_cksum.h>
+
+#include <security/mac/mac_framework.h>
+
+static uma_zone_t tcptw_zone;
+static int maxtcptw;
+
+/*
+ * The timed wait queue contains references to each of the TCP sessions
+ * currently in the TIME_WAIT state. The queue pointers, including the
+ * queue pointers in each tcptw structure, are protected using the global
+ * tcbinfo lock, which must be held over queue iteration and modification.
+ */
+static TAILQ_HEAD(, tcptw) twq_2msl;
+
+static void tcp_tw_2msl_reset(struct tcptw *, int);
+static void tcp_tw_2msl_stop(struct tcptw *);
+
+static int
+tcptw_auto_size(void)
+{
+ int halfrange;
+
+ /*
+ * Max out at half the ephemeral port range so that TIME_WAIT
+ * sockets don't tie up too many ephemeral ports.
+ */
+ if (ipport_lastauto > ipport_firstauto)
+ halfrange = (ipport_lastauto - ipport_firstauto) / 2;
+ else
+ halfrange = (ipport_firstauto - ipport_lastauto) / 2;
+ /* Protect against goofy port ranges smaller than 32. */
+ return (imin(imax(halfrange, 32), maxsockets / 5));
+}
+
+static int
+sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
+{
+ int error, new;
+
+ if (maxtcptw == 0)
+ new = tcptw_auto_size();
+ else
+ new = maxtcptw;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr)
+ if (new >= 32) {
+ maxtcptw = new;
+ uma_zone_set_max(tcptw_zone, maxtcptw);
+ }
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
+ &maxtcptw, 0, sysctl_maxtcptw, "IU",
+ "Maximum number of compressed TCP TIME_WAIT entries");
+
+static int nolocaltimewait = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_RW,
+ &nolocaltimewait, 0,
+ "Do not create compressed TCP TIME_WAIT entries for local connections");
+
+void
+tcp_tw_zone_change(void)
+{
+
+ if (maxtcptw == 0)
+ uma_zone_set_max(tcptw_zone, tcptw_auto_size());
+}
+
+void
+tcp_tw_init(void)
+{
+
+ tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
+ if (maxtcptw == 0)
+ uma_zone_set_max(tcptw_zone, tcptw_auto_size());
+ else
+ uma_zone_set_max(tcptw_zone, maxtcptw);
+ TAILQ_INIT(&twq_2msl);
+}
+
+/*
+ * Move a TCP connection into TIME_WAIT state.
+ * tcbinfo is locked.
+ * inp is locked, and is unlocked before returning.
+ */
+void
+tcp_twstart(struct tcpcb *tp)
+{
+ struct tcptw *tw;
+ struct inpcb *inp = tp->t_inpcb;
+ int acknow;
+ struct socket *so;
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_tw_2msl_reset(). */
+ INP_LOCK_ASSERT(inp);
+
+ if (nolocaltimewait && in_localip(inp->inp_faddr)) {
+ tp = tcp_close(tp);
+ if (tp != NULL)
+ INP_UNLOCK(inp);
+ return;
+ }
+
+ tw = uma_zalloc(tcptw_zone, M_NOWAIT);
+ if (tw == NULL) {
+ tw = tcp_tw_2msl_scan(1);
+ if (tw == NULL) {
+ tp = tcp_close(tp);
+ if (tp != NULL)
+ INP_UNLOCK(inp);
+ return;
+ }
+ }
+ tw->tw_inpcb = inp;
+
+ /*
+ * Recover last window size sent.
+ */
+ tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
+
+ /*
+ * Set t_recent if timestamps are used on the connection.
+ */
+ if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
+ (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
+ tw->t_recent = tp->ts_recent;
+ tw->ts_offset = tp->ts_offset;
+ } else {
+ tw->t_recent = 0;
+ tw->ts_offset = 0;
+ }
+
+ tw->snd_nxt = tp->snd_nxt;
+ tw->rcv_nxt = tp->rcv_nxt;
+ tw->iss = tp->iss;
+ tw->irs = tp->irs;
+ tw->t_starttime = tp->t_starttime;
+ tw->tw_time = 0;
+
+/* XXX
+ * If this code will
+ * be used for fin-wait-2 state also, then we may need
+ * a ts_recent from the last segment.
+ */
+ acknow = tp->t_flags & TF_ACKNOW;
+
+ /*
+ * First, discard tcpcb state, which includes stopping its timers and
+ * freeing it. tcp_discardcb() used to also release the inpcb, but
+ * that work is now done in the caller.
+ *
+ * Note: soisdisconnected() call used to be made in tcp_discardcb(),
+ * and might not be needed here any longer.
+ */
+ tcp_discardcb(tp);
+ so = inp->inp_socket;
+ soisdisconnected(so);
+ tw->tw_cred = crhold(so->so_cred);
+ SOCK_LOCK(so);
+ tw->tw_so_options = so->so_options;
+ SOCK_UNLOCK(so);
+ if (acknow)
+ tcp_twrespond(tw, TH_ACK);
+ inp->inp_ppcb = tw;
+ inp->inp_vflag |= INP_TIMEWAIT;
+ tcp_tw_2msl_reset(tw, 0);
+
+ /*
+ * If the inpcb owns the sole reference to the socket, then we can
+ * detach and free the socket as it is not needed in time wait.
+ */
+ if (inp->inp_vflag & INP_SOCKREF) {
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_twstart: !SS_PROTOREF"));
+ inp->inp_vflag &= ~INP_SOCKREF;
+ INP_UNLOCK(inp);
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ } else
+ INP_UNLOCK(inp);
+}
+
+#if 0
+/*
+ * The appromixate rate of ISN increase of Microsoft TCP stacks;
+ * the actual rate is slightly higher due to the addition of
+ * random positive increments.
+ *
+ * Most other new OSes use semi-randomized ISN values, so we
+ * do not need to worry about them.
+ */
+#define MS_ISN_BYTES_PER_SECOND 250000
+
+/*
+ * Determine if the ISN we will generate has advanced beyond the last
+ * sequence number used by the previous connection. If so, indicate
+ * that it is safe to recycle this tw socket by returning 1.
+ */
+int
+tcp_twrecycleable(struct tcptw *tw)
+{
+ tcp_seq new_iss = tw->iss;
+ tcp_seq new_irs = tw->irs;
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ new_iss += (ticks - tw->t_starttime) * (ISN_BYTES_PER_SECOND / hz);
+ new_irs += (ticks - tw->t_starttime) * (MS_ISN_BYTES_PER_SECOND / hz);
+
+ if (SEQ_GT(new_iss, tw->snd_nxt) && SEQ_GT(new_irs, tw->rcv_nxt))
+ return (1);
+ else
+ return (0);
+}
+#endif
+
+/*
+ * Returns 1 if the TIME_WAIT state was killed and we should start over,
+ * looking for a pcb in the listen state. Returns 0 otherwise.
+ */
+int
+tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th,
+ struct mbuf *m, int tlen)
+{
+ struct tcptw *tw;
+ int thflags;
+ tcp_seq seq;
+#ifdef INET6
+ int isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
+#else
+ const int isipv6 = 0;
+#endif
+
+ /* tcbinfo lock required for tcp_twclose(), tcp_tw_2msl_reset(). */
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(inp);
+
+ /*
+ * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
+ * still present. This is undesirable, but temporarily necessary
+ * until we work out how to handle inpcb's who's timewait state has
+ * been removed.
+ */
+ tw = intotw(inp);
+ if (tw == NULL)
+ goto drop;
+
+ thflags = th->th_flags;
+
+ /*
+ * NOTE: for FIN_WAIT_2 (to be added later),
+ * must validate sequence number before accepting RST
+ */
+
+ /*
+ * If the segment contains RST:
+ * Drop the segment - see Stevens, vol. 2, p. 964 and
+ * RFC 1337.
+ */
+ if (thflags & TH_RST)
+ goto drop;
+
+#if 0
+/* PAWS not needed at the moment */
+ /*
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
+ TSTMP_LT(to.to_tsval, tp->ts_recent)) {
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+ goto ack;
+ }
+ /*
+ * ts_recent is never updated because we never accept new segments.
+ */
+#endif
+
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
+ tcp_twclose(tw, 0);
+ return (1);
+ }
+
+ /*
+ * Drop the the segment if it does not contain an ACK.
+ */
+ if ((thflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Reset the 2MSL timer if this is a duplicate FIN.
+ */
+ if (thflags & TH_FIN) {
+ seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
+ if (seq + 1 == tw->rcv_nxt)
+ tcp_tw_2msl_reset(tw, 1);
+ }
+
+ /*
+ * Acknowledge the segment if it has data or is not a duplicate ACK.
+ */
+ if (thflags != TH_ACK || tlen != 0 ||
+ th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
+ tcp_twrespond(tw, TH_ACK);
+ goto drop;
+
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST))
+ goto drop;
+ if (isipv6) {
+#ifdef INET6
+ struct ip6_hdr *ip6;
+
+ /* IPv6 anycast check is done at tcp6_input() */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
+ IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
+ goto drop;
+#endif
+ } else {
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+ if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+ IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+ ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
+ in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
+ goto drop;
+ }
+ if (thflags & TH_ACK) {
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, 0, th->th_ack, TH_RST);
+ } else {
+ seq = th->th_seq + (thflags & TH_SYN ? 1 : 0);
+ tcp_respond(NULL,
+ mtod(m, void *), th, m, seq, 0, TH_RST|TH_ACK);
+ }
+ INP_UNLOCK(inp);
+ return (0);
+
+drop:
+ INP_UNLOCK(inp);
+ m_freem(m);
+ return (0);
+}
+
+void
+tcp_twclose(struct tcptw *tw, int reuse)
+{
+ struct socket *so;
+ struct inpcb *inp;
+
+ /*
+ * At this point, we are in one of two situations:
+ *
+ * (1) We have no socket, just an inpcb<->twtcp pair. We can free
+ * all state.
+ *
+ * (2) We have a socket -- if we own a reference, release it and
+ * notify the socket layer.
+ */
+ inp = tw->tw_inpcb;
+ KASSERT((inp->inp_vflag & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
+ KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
+ INP_INFO_WLOCK_ASSERT(&tcbinfo); /* tcp_tw_2msl_stop(). */
+ INP_LOCK_ASSERT(inp);
+
+ tw->tw_inpcb = NULL;
+ tcp_tw_2msl_stop(tw);
+ inp->inp_ppcb = NULL;
+ in_pcbdrop(inp);
+
+ so = inp->inp_socket;
+ if (so != NULL) {
+ /*
+ * If there's a socket, handle two cases: first, we own a
+ * strong reference, which we will now release, or we don't
+ * in which case another reference exists (XXXRW: think
+ * about this more), and we don't need to take action.
+ */
+ if (inp->inp_vflag & INP_SOCKREF) {
+ inp->inp_vflag &= ~INP_SOCKREF;
+ INP_UNLOCK(inp);
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ KASSERT(so->so_state & SS_PROTOREF,
+ ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
+ so->so_state &= ~SS_PROTOREF;
+ sofree(so);
+ } else {
+ /*
+ * If we don't own the only reference, the socket and
+ * inpcb need to be left around to be handled by
+ * tcp_usr_detach() later.
+ */
+ INP_UNLOCK(inp);
+ }
+ } else {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ in6_pcbfree(inp);
+ else
+#endif
+ in_pcbfree(inp);
+ }
+ tcpstat.tcps_closed++;
+ crfree(tw->tw_cred);
+ tw->tw_cred = NULL;
+ if (reuse)
+ return;
+ uma_zfree(tcptw_zone, tw);
+}
+
+int
+tcp_twrespond(struct tcptw *tw, int flags)
+{
+ struct inpcb *inp = tw->tw_inpcb;
+ struct tcphdr *th;
+ struct mbuf *m;
+ struct ip *ip = NULL;
+ u_int hdrlen, optlen;
+ int error;
+ struct tcpopt to;
+#ifdef INET6
+ struct ip6_hdr *ip6 = NULL;
+ int isipv6 = inp->inp_inc.inc_isipv6;
+#endif
+
+ INP_LOCK_ASSERT(inp);
+
+ m = m_gethdr(M_DONTWAIT, MT_DATA);
+ if (m == NULL)
+ return (ENOBUFS);
+ m->m_data += max_linkhdr;
+
+#ifdef MAC
+ mac_create_mbuf_from_inpcb(inp, m);
+#endif
+
+#ifdef INET6
+ if (isipv6) {
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ ip6 = mtod(m, struct ip6_hdr *);
+ th = (struct tcphdr *)(ip6 + 1);
+ tcpip_fillheaders(inp, ip6, th);
+ } else
+#endif
+ {
+ hdrlen = sizeof(struct tcpiphdr);
+ ip = mtod(m, struct ip *);
+ th = (struct tcphdr *)(ip + 1);
+ tcpip_fillheaders(inp, ip, th);
+ }
+ to.to_flags = 0;
+
+ /*
+ * Send a timestamp and echo-reply if both our side and our peer
+ * have sent timestamps in our SYN's and this is not a RST.
+ */
+ if (tw->t_recent && flags == TH_ACK) {
+ to.to_flags |= TOF_TS;
+ to.to_tsval = ticks + tw->ts_offset;
+ to.to_tsecr = tw->t_recent;
+ }
+ optlen = tcp_addoptions(&to, (u_char *)(th + 1));
+
+ m->m_len = hdrlen + optlen;
+ m->m_pkthdr.len = m->m_len;
+
+ KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
+
+ th->th_seq = htonl(tw->snd_nxt);
+ th->th_ack = htonl(tw->rcv_nxt);
+ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
+ th->th_flags = flags;
+ th->th_win = htons(tw->last_win);
+
+#ifdef INET6
+ if (isipv6) {
+ th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
+ sizeof(struct tcphdr) + optlen);
+ ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ error = ip6_output(m, inp->in6p_outputopts, NULL,
+ (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
+ } else
+#endif
+ {
+ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
+ m->m_pkthdr.csum_flags = CSUM_TCP;
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
+ ip->ip_len = m->m_pkthdr.len;
+ if (path_mtu_discovery)
+ ip->ip_off |= IP_DF;
+ error = ip_output(m, inp->inp_options, NULL,
+ ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
+ NULL, inp);
+ }
+ if (flags & TH_ACK)
+ tcpstat.tcps_sndacks++;
+ else
+ tcpstat.tcps_sndctrl++;
+ tcpstat.tcps_sndtotal++;
+ return (error);
+}
+
+static void
+tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
+{
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ INP_LOCK_ASSERT(tw->tw_inpcb);
+ if (rearm)
+ TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+ tw->tw_time = ticks + 2 * tcp_msl;
+ TAILQ_INSERT_TAIL(&twq_2msl, tw, tw_2msl);
+}
+
+static void
+tcp_tw_2msl_stop(struct tcptw *tw)
+{
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ TAILQ_REMOVE(&twq_2msl, tw, tw_2msl);
+}
+
+struct tcptw *
+tcp_tw_2msl_scan(int reuse)
+{
+ struct tcptw *tw;
+
+ INP_INFO_WLOCK_ASSERT(&tcbinfo);
+ for (;;) {
+ tw = TAILQ_FIRST(&twq_2msl);
+ if (tw == NULL || (!reuse && tw->tw_time > ticks))
+ break;
+ INP_LOCK(tw->tw_inpcb);
+ tcp_twclose(tw, reuse);
+ if (reuse)
+ return (tw);
+ }
+ return (NULL);
+}
Index: ip_id.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_id.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_id.c -L sys/netinet/ip_id.c -u -r1.2 -r1.3
--- sys/netinet/ip_id.c
+++ sys/netinet/ip_id.c
@@ -1,17 +1,27 @@
+/* $OpenBSD: ip_id.c,v 1.2 1999/08/26 13:37:01 provos Exp $ */
/*-
- * Copyright (c) 2008 Michael J. Silbersack.
+ * Copyright 1998 Niels Provos <provos at citi.umich.edu>
* All rights reserved.
*
+ * Theo de Raadt <deraadt at openbsd.org> came up with the idea of using
+ * such a mathematical system to generate more random (yet non-repeating)
+ * ids to solve the resolver/named problem. But Niels designed the
+ * actual system based on the constraints.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
+ * notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
@@ -23,189 +33,169 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_id.c,v 1.7 2005/01/07 01:45:44 imp Exp $
*/
-#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
-
-/*
- * IP ID generation is a fascinating topic.
- *
- * In order to avoid ID collisions during packet reassembly, common sense
- * dictates that the period between reuse of IDs be as large as possible.
- * This leads to the classic implementation of a system-wide counter, thereby
- * ensuring that IDs repeat only once every 2^16 packets.
- *
- * Subsequent security researchers have pointed out that using a global
- * counter makes ID values predictable. This predictability allows traffic
- * analysis, idle scanning, and even packet injection in specific cases.
- * These results suggest that IP IDs should be as random as possible.
- *
- * The "searchable queues" algorithm used in this IP ID implementation was
- * proposed by Amit Klein. It is a compromise between the above two
- * viewpoints that has provable behavior that can be tuned to the user's
- * requirements.
- *
- * The basic concept is that we supplement a standard random number generator
- * with a queue of the last L IDs that we have handed out to ensure that all
- * IDs have a period of at least L.
- *
- * To efficiently implement this idea, we keep two data structures: a
- * circular array of IDs of size L and a bitstring of 65536 bits.
- *
- * To start, we ask the RNG for a new ID. A quick index into the bitstring
- * is used to determine if this is a recently used value. The process is
- * repeated until a value is returned that is not in the bitstring.
- *
- * Having found a usable ID, we remove the ID stored at the current position
- * in the queue from the bitstring and replace it with our new ID. Our new
- * ID is then added to the bitstring and the queue pointer is incremented.
- *
- * The lower limit of 512 was chosen because there doesn't seem to be much
- * point to having a smaller value. The upper limit of 32768 was chosen for
- * two reasons. First, every step above 32768 decreases the entropy. Taken
- * to an extreme, 65533 would offer 1 bit of entropy. Second, the number of
- * attempts it takes the algorithm to find an unused ID drastically
- * increases, killing performance. The default value of 8192 was chosen
- * because it provides a good tradeoff between randomness and non-repetition.
- *
- * With L=8192, the queue will use 16K of memory. The bitstring always
- * uses 8K of memory. No memory is allocated until the use of random ids is
- * enabled.
+/*-
+ * seed = random 15bit
+ * n = prime, g0 = generator to n,
+ * j = random so that gcd(j,n-1) == 1
+ * g = g0^j mod n will be a generator again.
+ *
+ * X[0] = random seed.
+ * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator
+ * with a = 7^(even random) mod m,
+ * b = random with gcd(b,m) == 1
+ * m = 31104 and a maximal period of m-1.
+ *
+ * The transaction id is determined by:
+ * id[n] = seed xor (g^X[n] mod n)
+ *
+ * Effectivly the id is restricted to the lower 15 bits, thus yielding two
+ * different cycles by toggling the msb on and off. This avoids reuse issues
+ * caused by reseeding.
*/
-#include <sys/types.h>
-#include <sys/malloc.h>
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_id.c,v 1.9 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_pf.h"
#include <sys/param.h>
#include <sys/time.h>
#include <sys/kernel.h>
-#include <sys/libkern.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
#include <sys/random.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <netinet/in.h>
-#include <netinet/ip_var.h>
-#include <sys/bitstring.h>
-
-static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state");
-
-static u_int16_t *id_array = NULL;
-static bitstr_t *id_bits = NULL;
-static int array_ptr = 0;
-static int array_size = 8192;
-static int random_id_collisions = 0;
-static int random_id_total = 0;
-static struct mtx ip_id_mtx;
-
-static void ip_initid(void);
-static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);
-
-MTX_SYSINIT(ip_id_mtx, &ip_id_mtx, "ip_id_mtx", MTX_DEF);
-
-SYSCTL_DECL(_net_inet_ip);
-SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT|CTLFLAG_RW,
- &array_size, 0, sysctl_ip_id_change, "IU", "IP ID Array size");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, CTLFLAG_RD,
- &random_id_collisions, 0, "Count of IP ID collisions");
-SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD,
- &random_id_total, 0, "Count of IP IDs created");
-static int
-sysctl_ip_id_change(SYSCTL_HANDLER_ARGS)
+#define RU_OUT 180 /* Time after wich will be reseeded */
+#define RU_MAX 30000 /* Uniq cycle, avoid blackjack prediction */
+#define RU_GEN 2 /* Starting generator */
+#define RU_N 32749 /* RU_N-1 = 2*2*3*2729 */
+#define RU_AGEN 7 /* determine ru_a as RU_AGEN^(2*rand) */
+#define RU_M 31104 /* RU_M = 2^7*3^5 - don't change */
+
+#define PFAC_N 3
+const static u_int16_t pfacts[PFAC_N] = {
+ 2,
+ 3,
+ 2729
+};
+
+static u_int16_t ru_x;
+static u_int16_t ru_seed, ru_seed2;
+static u_int16_t ru_a, ru_b;
+static u_int16_t ru_g;
+static u_int16_t ru_counter = 0;
+static u_int16_t ru_msb = 0;
+static long ru_reseed;
+static u_int32_t tmp; /* Storage for unused random */
+
+static u_int16_t pmod(u_int16_t, u_int16_t, u_int16_t);
+static void ip_initid(void);
+u_int16_t ip_randomid(void);
+
+/*
+ * Do a fast modular exponation, returned value will be in the range of 0 -
+ * (mod-1).
+ */
+
+static u_int16_t
+pmod(u_int16_t gen, u_int16_t exp, u_int16_t mod)
{
- int error, new;
+ u_int16_t s, t, u;
- new = array_size;
- error = sysctl_handle_int(oidp, &new, 0, req);
- if (error == 0 && req->newptr) {
- if (new >= 512 && new <= 32768) {
- mtx_lock(&ip_id_mtx);
- array_size = new;
- ip_initid();
- mtx_unlock(&ip_id_mtx);
- } else
- error = EINVAL;
- }
- return (error);
+ s = 1;
+ t = gen;
+ u = exp;
+
+ while (u) {
+ if (u & 1)
+ s = (s*t) % mod;
+ u >>= 1;
+ t = (t*t) % mod;
+ }
+ return (s);
}
/*
- * ip_initid() runs with a mutex held and may execute in a network context.
- * As a result, it uses M_NOWAIT. Ideally, we would always do this
- * allocation from the sysctl contact and have it be an invariant that if
- * this random ID allocation mode is selected, the buffers are present. This
- * would also avoid potential network context failures of IP ID generation.
+ * Initalizes the seed and chooses a suitable generator. Also toggles the
+ * msb flag. The msb flag is used to generate two distinct cycles of random
+ * numbers and thus avoiding reuse of ids.
+ *
+ * This function is called from id_randomid() when needed, an application
+ * does not have to worry about it.
*/
static void
ip_initid(void)
{
- mtx_assert(&ip_id_mtx, MA_OWNED);
+ u_int16_t j, i;
+ int noprime = 1;
+ struct timeval time;
+
+ getmicrotime(&time);
+ read_random((void *) &tmp, sizeof(tmp));
+ ru_x = (tmp & 0xFFFF) % RU_M;
+
+ /* 15 bits of random seed. */
+ ru_seed = (tmp >> 16) & 0x7FFF;
+ read_random((void *) &tmp, sizeof(tmp));
+ ru_seed2 = tmp & 0x7FFF;
+
+ read_random((void *) &tmp, sizeof(tmp));
+
+ /* Determine the LCG we use. */
+ ru_b = (tmp & 0xfffe) | 1;
+ ru_a = pmod(RU_AGEN, (tmp >> 16) & 0xfffe, RU_M);
+ while (ru_b % 3 == 0)
+ ru_b += 2;
+
+ read_random((void *) &tmp, sizeof(tmp));
+ j = tmp % RU_N;
+ tmp = tmp >> 16;
- if (id_array != NULL) {
- free(id_array, M_IPID);
- free(id_bits, M_IPID);
- }
- random_id_collisions = 0;
- random_id_total = 0;
- array_ptr = 0;
- id_array = (u_int16_t *) malloc(array_size * sizeof(u_int16_t),
- M_IPID, M_NOWAIT | M_ZERO);
- id_bits = (bitstr_t *) malloc(bitstr_size(65536), M_IPID,
- M_NOWAIT | M_ZERO);
- if (id_array == NULL || id_bits == NULL) {
- /* Neither or both. */
- if (id_array != NULL) {
- free(id_array, M_IPID);
- id_array = NULL;
- }
- if (id_bits != NULL) {
- free(id_bits, M_IPID);
- id_bits = NULL;
- }
+ /*
+ * Do a fast gcd(j,RU_N-1), so we can find a j with gcd(j, RU_N-1) ==
+ * 1, giving a new generator for RU_GEN^j mod RU_N.
+ */
+ while (noprime) {
+ for (i=0; i<PFAC_N; i++)
+ if (j%pfacts[i] == 0)
+ break;
+
+ if (i>=PFAC_N)
+ noprime = 0;
+ else
+ j = (j+1) % RU_N;
}
+
+ ru_g = pmod(RU_GEN,j,RU_N);
+ ru_counter = 0;
+
+ ru_reseed = time.tv_sec + RU_OUT;
+ ru_msb = ru_msb == 0x8000 ? 0 : 0x8000;
}
u_int16_t
ip_randomid(void)
{
- u_int16_t new_id;
+ int i, n;
+ struct timeval time;
- mtx_lock(&ip_id_mtx);
- if (id_array == NULL)
- ip_initid();
+ /* XXX not reentrant */
+ getmicrotime(&time);
+ if (ru_counter >= RU_MAX || time.tv_sec > ru_reseed)
+ ip_initid();
- /*
- * Fail gracefully; return a fixed id if memory allocation failed;
- * ideally we wouldn't do allocation in this context in order to
- * avoid the possibility of this failure mode.
- */
- if (id_array == NULL) {
- mtx_unlock(&ip_id_mtx);
- return (1);
- }
+ if (!tmp)
+ read_random((void *) &tmp, sizeof(tmp));
- /*
- * To avoid a conflict with the zeros that the array is initially
- * filled with, we never hand out an id of zero.
- */
- new_id = 0;
- do {
- if (new_id != 0)
- random_id_collisions++;
- arc4rand(&new_id, sizeof(new_id), 0);
- } while (bit_test(id_bits, new_id) || new_id == 0);
- bit_clear(id_bits, id_array[array_ptr]);
- bit_set(id_bits, new_id);
- id_array[array_ptr] = new_id;
- array_ptr++;
- if (array_ptr == array_size)
- array_ptr = 0;
- random_id_total++;
- mtx_unlock(&ip_id_mtx);
- return (new_id);
-}
+ /* Skip a random number of ids. */
+ n = tmp & 0x3; tmp = tmp >> 2;
+ if (ru_counter + n >= RU_MAX)
+ ip_initid();
+ for (i = 0; i <= n; i++)
+ /* Linear Congruential Generator. */
+ ru_x = (ru_a*ru_x + ru_b) % RU_M;
+
+ ru_counter += i;
+
+ return (ru_seed ^ pmod(ru_g,ru_seed2 ^ ru_x,RU_N)) | ru_msb;
+}
Index: icmp_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/icmp_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/icmp_var.h -L sys/netinet/icmp_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/icmp_var.h
+++ sys/netinet/icmp_var.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)icmp_var.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/icmp_var.h,v 1.25 2005/01/07 01:45:44 imp Exp $
+ * $FreeBSD: src/sys/netinet/icmp_var.h,v 1.26 2007/07/19 22:34:24 rwatson Exp $
*/
#ifndef _NETINET_ICMP_VAR_H_
@@ -82,7 +82,8 @@
#define BANDLIM_ICMP_TSTAMP 2
#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */
#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */
-#define BANDLIM_MAX 4
+#define BANDLIM_ICMP6_UNREACH 5
+#define BANDLIM_MAX 5
#endif
#endif
--- /dev/null
+++ sys/netinet/sctp_header.h
@@ -0,0 +1,584 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_header.h,v 1.14 2005/03/06 16:04:17 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_header.h,v 1.6 2007/08/24 00:53:51 rrs Exp $");
+
+#ifndef __sctp_header_h__
+#define __sctp_header_h__
+
+#include <sys/time.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_constants.h>
+
+
+/*
+ * Parameter structures
+ */
+struct sctp_ipv4addr_param {
+ struct sctp_paramhdr ph;/* type=SCTP_IPV4_PARAM_TYPE, len=8 */
+ uint32_t addr; /* IPV4 address */
+} SCTP_PACKED;
+
+#define SCTP_V6_ADDR_BYTES 16
+
+
+struct sctp_ipv6addr_param {
+ struct sctp_paramhdr ph;/* type=SCTP_IPV6_PARAM_TYPE, len=20 */
+ uint8_t addr[SCTP_V6_ADDR_BYTES]; /* IPV6 address */
+} SCTP_PACKED;
+
+/* Cookie Preservative */
+struct sctp_cookie_perserve_param {
+ struct sctp_paramhdr ph;/* type=SCTP_COOKIE_PRESERVE, len=8 */
+ uint32_t time; /* time in ms to extend cookie */
+};
+
+#define SCTP_ARRAY_MIN_LEN 1
+/* Host Name Address */
+struct sctp_host_name_param {
+ struct sctp_paramhdr ph;/* type=SCTP_HOSTNAME_ADDRESS */
+ char name[SCTP_ARRAY_MIN_LEN]; /* host name */
+} SCTP_PACKED;
+
+/*
+ * This is the maximum padded size of a s-a-p
+ * so paramheadr + 3 address types (6 bytes) + 2 byte pad = 12
+ */
+#define SCTP_MAX_ADDR_PARAMS_SIZE 12
+/* supported address type */
+struct sctp_supported_addr_param {
+ struct sctp_paramhdr ph;/* type=SCTP_SUPPORTED_ADDRTYPE */
+ uint16_t addr_type[SCTP_ARRAY_MIN_LEN]; /* array of supported address
+ * types */
+} SCTP_PACKED;
+
+/* ECN parameter */
+struct sctp_ecn_supported_param {
+ struct sctp_paramhdr ph;/* type=SCTP_ECN_CAPABLE */
+} SCTP_PACKED;
+
+
+/* heartbeat info parameter */
+struct sctp_heartbeat_info_param {
+ struct sctp_paramhdr ph;
+ uint32_t time_value_1;
+ uint32_t time_value_2;
+ uint32_t random_value1;
+ uint32_t random_value2;
+ uint16_t user_req;
+ uint8_t addr_family;
+ uint8_t addr_len;
+ char address[SCTP_ADDRMAX];
+} SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-prsctp */
+/* PR-SCTP supported parameter */
+struct sctp_prsctp_supported_param {
+ struct sctp_paramhdr ph;
+} SCTP_PACKED;
+
+
+/* draft-ietf-tsvwg-addip-sctp */
+struct sctp_asconf_paramhdr { /* an ASCONF "parameter" */
+ struct sctp_paramhdr ph;/* a SCTP parameter header */
+ uint32_t correlation_id;/* correlation id for this param */
+} SCTP_PACKED;
+
+struct sctp_asconf_addr_param { /* an ASCONF address parameter */
+ struct sctp_asconf_paramhdr aph; /* asconf "parameter" */
+ struct sctp_ipv6addr_param addrp; /* max storage size */
+} SCTP_PACKED;
+
+struct sctp_asconf_addrv4_param { /* an ASCONF address (v4) parameter */
+ struct sctp_asconf_paramhdr aph; /* asconf "parameter" */
+ struct sctp_ipv4addr_param addrp; /* max storage size */
+} SCTP_PACKED;
+
+#define SCTP_MAX_SUPPORTED_EXT 256
+
+struct sctp_supported_chunk_types_param {
+ struct sctp_paramhdr ph;/* type = 0x8008 len = x */
+ uint8_t chunk_types[0];
+} SCTP_PACKED;
+
+
+/* ECN Nonce: draft-ladha-sctp-ecn-nonce */
+struct sctp_ecn_nonce_supported_param {
+ struct sctp_paramhdr ph;/* type = 0x8001 len = 4 */
+} SCTP_PACKED;
+
+
+/*
+ * Structures for DATA chunks
+ */
+struct sctp_data {
+ uint32_t tsn;
+ uint16_t stream_id;
+ uint16_t stream_sequence;
+ uint32_t protocol_id;
+ /* user data follows */
+} SCTP_PACKED;
+
+struct sctp_data_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_data dp;
+} SCTP_PACKED;
+
+/*
+ * Structures for the control chunks
+ */
+
+/* Initiate (INIT)/Initiate Ack (INIT ACK) */
+struct sctp_init {
+ uint32_t initiate_tag; /* initiate tag */
+ uint32_t a_rwnd; /* a_rwnd */
+ uint16_t num_outbound_streams; /* OS */
+ uint16_t num_inbound_streams; /* MIS */
+ uint32_t initial_tsn; /* I-TSN */
+ /* optional param's follow */
+} SCTP_PACKED;
+
+#define SCTP_IDENTIFICATION_SIZE 16
+#define SCTP_ADDRESS_SIZE 4
+#define SCTP_RESERVE_SPACE 6
+/* state cookie header */
+struct sctp_state_cookie { /* this is our definition... */
+ uint8_t identification[SCTP_IDENTIFICATION_SIZE]; /* id of who we are */
+ struct timeval time_entered; /* the time I built cookie */
+ uint32_t cookie_life; /* life I will award this cookie */
+ uint32_t tie_tag_my_vtag; /* my tag in old association */
+
+ uint32_t tie_tag_peer_vtag; /* peers tag in old association */
+ uint32_t peers_vtag; /* peers tag in INIT (for quick ref) */
+
+ uint32_t my_vtag; /* my tag in INIT-ACK (for quick ref) */
+ uint32_t address[SCTP_ADDRESS_SIZE]; /* 4 ints/128 bits */
+ uint32_t addr_type; /* address type */
+ uint32_t laddress[SCTP_ADDRESS_SIZE]; /* my local from address */
+ uint32_t laddr_type; /* my local from address type */
+ uint32_t scope_id; /* v6 scope id for link-locals */
+
+ uint16_t peerport; /* port address of the peer in the INIT */
+ uint16_t myport; /* my port address used in the INIT */
+ uint8_t ipv4_addr_legal;/* Are V4 addr legal? */
+ uint8_t ipv6_addr_legal;/* Are V6 addr legal? */
+ uint8_t local_scope; /* IPv6 local scope flag */
+ uint8_t site_scope; /* IPv6 site scope flag */
+
+ uint8_t ipv4_scope; /* IPv4 private addr scope */
+ uint8_t loopback_scope; /* loopback scope information */
+ uint8_t reserved[SCTP_RESERVE_SPACE]; /* Align to 64 bits */
+ /*
+ * at the end is tacked on the INIT chunk and the INIT-ACK chunk
+ * (minus the cookie).
+ */
+} SCTP_PACKED;
+
+struct sctp_inv_mandatory_param {
+ uint16_t cause;
+ uint16_t length;
+ uint32_t num_param;
+ uint16_t param;
+ /*
+ * We include this to 0 it since only a missing cookie will cause
+ * this error.
+ */
+ uint16_t resv;
+} SCTP_PACKED;
+
+struct sctp_unresolv_addr {
+ uint16_t cause;
+ uint16_t length;
+ uint16_t addr_type;
+ uint16_t reserved; /* Only one invalid addr type */
+} SCTP_PACKED;
+
+/* state cookie parameter */
+struct sctp_state_cookie_param {
+ struct sctp_paramhdr ph;
+ struct sctp_state_cookie cookie;
+} SCTP_PACKED;
+
+struct sctp_init_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_init init;
+} SCTP_PACKED;
+
+struct sctp_init_msg {
+ struct sctphdr sh;
+ struct sctp_init_chunk msg;
+} SCTP_PACKED;
+
+/* ... used for both INIT and INIT ACK */
+#define sctp_init_ack sctp_init
+#define sctp_init_ack_chunk sctp_init_chunk
+#define sctp_init_ack_msg sctp_init_msg
+
+
+/* Selective Ack (SACK) */
+struct sctp_gap_ack_block {
+ uint16_t start; /* Gap Ack block start */
+ uint16_t end; /* Gap Ack block end */
+} SCTP_PACKED;
+
+struct sctp_sack {
+ uint32_t cum_tsn_ack; /* cumulative TSN Ack */
+ uint32_t a_rwnd; /* updated a_rwnd of sender */
+ uint16_t num_gap_ack_blks; /* number of Gap Ack blocks */
+ uint16_t num_dup_tsns; /* number of duplicate TSNs */
+ /* struct sctp_gap_ack_block's follow */
+ /* uint32_t duplicate_tsn's follow */
+} SCTP_PACKED;
+
+struct sctp_sack_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_sack sack;
+} SCTP_PACKED;
+
+
+/* Heartbeat Request (HEARTBEAT) */
+struct sctp_heartbeat {
+ struct sctp_heartbeat_info_param hb_info;
+} SCTP_PACKED;
+
+struct sctp_heartbeat_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_heartbeat heartbeat;
+} SCTP_PACKED;
+
+/* ... used for Heartbeat Ack (HEARTBEAT ACK) */
+#define sctp_heartbeat_ack sctp_heartbeat
+#define sctp_heartbeat_ack_chunk sctp_heartbeat_chunk
+
+
+/* Abort Asssociation (ABORT) */
+struct sctp_abort_chunk {
+ struct sctp_chunkhdr ch;
+ /* optional error cause may follow */
+} SCTP_PACKED;
+
+struct sctp_abort_msg {
+ struct sctphdr sh;
+ struct sctp_abort_chunk msg;
+} SCTP_PACKED;
+
+
+/* Shutdown Association (SHUTDOWN) */
+struct sctp_shutdown_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t cumulative_tsn_ack;
+} SCTP_PACKED;
+
+
+/* Shutdown Acknowledgment (SHUTDOWN ACK) */
+struct sctp_shutdown_ack_chunk {
+ struct sctp_chunkhdr ch;
+} SCTP_PACKED;
+
+
+/* Operation Error (ERROR) */
+struct sctp_error_chunk {
+ struct sctp_chunkhdr ch;
+ /* optional error causes follow */
+} SCTP_PACKED;
+
+
+/* Cookie Echo (COOKIE ECHO) */
+struct sctp_cookie_echo_chunk {
+ struct sctp_chunkhdr ch;
+ struct sctp_state_cookie cookie;
+} SCTP_PACKED;
+
+/* Cookie Acknowledgment (COOKIE ACK) */
+struct sctp_cookie_ack_chunk {
+ struct sctp_chunkhdr ch;
+} SCTP_PACKED;
+
+/* Explicit Congestion Notification Echo (ECNE) */
+struct sctp_ecne_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t tsn;
+} SCTP_PACKED;
+
+/* Congestion Window Reduced (CWR) */
+struct sctp_cwr_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t tsn;
+} SCTP_PACKED;
+
+/* Shutdown Complete (SHUTDOWN COMPLETE) */
+struct sctp_shutdown_complete_chunk {
+ struct sctp_chunkhdr ch;
+} SCTP_PACKED;
+
+/* Oper error holding a stale cookie */
+struct sctp_stale_cookie_msg {
+ struct sctp_paramhdr ph;/* really an error cause */
+ uint32_t time_usec;
+} SCTP_PACKED;
+
+struct sctp_adaptation_layer_indication {
+ struct sctp_paramhdr ph;
+ uint32_t indication;
+} SCTP_PACKED;
+
+struct sctp_cookie_while_shutting_down {
+ struct sctphdr sh;
+ struct sctp_chunkhdr ch;
+ struct sctp_paramhdr ph;/* really an error cause */
+} SCTP_PACKED;
+
+struct sctp_shutdown_complete_msg {
+ struct sctphdr sh;
+ struct sctp_shutdown_complete_chunk shut_cmp;
+} SCTP_PACKED;
+
+/*
+ * draft-ietf-tsvwg-addip-sctp
+ */
+/* Address/Stream Configuration Change (ASCONF) */
+struct sctp_asconf_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t serial_number;
+ /* lookup address parameter (mandatory) */
+ /* asconf parameters follow */
+} SCTP_PACKED;
+
+/* Address/Stream Configuration Acknowledge (ASCONF ACK) */
+struct sctp_asconf_ack_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t serial_number;
+ /* asconf parameters follow */
+} SCTP_PACKED;
+
+/* draft-ietf-tsvwg-prsctp */
+/* Forward Cumulative TSN (FORWARD TSN) */
+struct sctp_forward_tsn_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t new_cumulative_tsn;
+ /* stream/sequence pairs (sctp_strseq) follow */
+} SCTP_PACKED;
+
+struct sctp_strseq {
+ uint16_t stream;
+ uint16_t sequence;
+} SCTP_PACKED;
+
+struct sctp_forward_tsn_msg {
+ struct sctphdr sh;
+ struct sctp_forward_tsn_chunk msg;
+} SCTP_PACKED;
+
+/* should be a multiple of 4 - 1 aka 3/7/11 etc. */
+
+#define SCTP_NUM_DB_TO_VERIFY 31
+
+struct sctp_chunk_desc {
+ uint8_t chunk_type;
+ uint8_t data_bytes[SCTP_NUM_DB_TO_VERIFY];
+ uint32_t tsn_ifany;
+} SCTP_PACKED;
+
+
+struct sctp_pktdrop_chunk {
+ struct sctp_chunkhdr ch;
+ uint32_t bottle_bw;
+ uint32_t current_onq;
+ uint16_t trunc_len;
+ uint16_t reserved;
+ uint8_t data[0];
+} SCTP_PACKED;
+
+/**********STREAM RESET STUFF ******************/
+
+struct sctp_stream_reset_out_request {
+ struct sctp_paramhdr ph;
+ uint32_t request_seq; /* monotonically increasing seq no */
+ uint32_t response_seq; /* if a response, the resp seq no */
+ uint32_t send_reset_at_tsn; /* last TSN I assigned outbound */
+ uint16_t list_of_streams[0]; /* if not all list of streams */
+} SCTP_PACKED;
+
+struct sctp_stream_reset_in_request {
+ struct sctp_paramhdr ph;
+ uint32_t request_seq;
+ uint16_t list_of_streams[0]; /* if not all list of streams */
+} SCTP_PACKED;
+
+
+struct sctp_stream_reset_tsn_request {
+ struct sctp_paramhdr ph;
+ uint32_t request_seq;
+} SCTP_PACKED;
+
+struct sctp_stream_reset_response {
+ struct sctp_paramhdr ph;
+ uint32_t response_seq; /* if a response, the resp seq no */
+ uint32_t result;
+} SCTP_PACKED;
+
+struct sctp_stream_reset_response_tsn {
+ struct sctp_paramhdr ph;
+ uint32_t response_seq; /* if a response, the resp seq no */
+ uint32_t result;
+ uint32_t senders_next_tsn;
+ uint32_t receivers_next_tsn;
+} SCTP_PACKED;
+
+
+
+#define SCTP_STREAM_RESET_NOTHING 0x00000000 /* Nothing for me to do */
+#define SCTP_STREAM_RESET_PERFORMED 0x00000001 /* Did it */
+#define SCTP_STREAM_RESET_DENIED 0x00000002 /* refused to do it */
+#define SCTP_STREAM_RESET_ERROR_STR 0x00000003 /* bad Stream no */
+#define SCTP_STREAM_RESET_TRY_LATER 0x00000004 /* collision, try again */
+#define SCTP_STREAM_RESET_BAD_SEQNO 0x00000005 /* bad str-reset seq no */
+
+/*
+ * convience structures, note that if you are making a request for specific
+ * streams then the request will need to be an overlay structure.
+ */
+
+struct sctp_stream_reset_out_req {
+ struct sctp_chunkhdr ch;
+ struct sctp_stream_reset_out_request sr_req;
+} SCTP_PACKED;
+
+struct sctp_stream_reset_in_req {
+ struct sctp_chunkhdr ch;
+ struct sctp_stream_reset_in_request sr_req;
+} SCTP_PACKED;
+
+struct sctp_stream_reset_tsn_req {
+ struct sctp_chunkhdr ch;
+ struct sctp_stream_reset_tsn_request sr_req;
+} SCTP_PACKED;
+
+struct sctp_stream_reset_resp {
+ struct sctp_chunkhdr ch;
+ struct sctp_stream_reset_response sr_resp;
+} SCTP_PACKED;
+
+/* respone only valid with a TSN request */
+struct sctp_stream_reset_resp_tsn {
+ struct sctp_chunkhdr ch;
+ struct sctp_stream_reset_response_tsn sr_resp;
+} SCTP_PACKED;
+
+/****************************************************/
+
+/*
+ * Authenticated chunks support draft-ietf-tsvwg-sctp-auth
+ */
+
+/* Should we make the max be 32? */
+#define SCTP_RANDOM_MAX_SIZE 256
+struct sctp_auth_random {
+ struct sctp_paramhdr ph;/* type = 0x8002 */
+ uint8_t random_data[0];
+} SCTP_PACKED;
+
+struct sctp_auth_chunk_list {
+ struct sctp_paramhdr ph;/* type = 0x8003 */
+ uint8_t chunk_types[0];
+} SCTP_PACKED;
+
+struct sctp_auth_hmac_algo {
+ struct sctp_paramhdr ph;/* type = 0x8004 */
+ uint16_t hmac_ids[0];
+} SCTP_PACKED;
+
+struct sctp_auth_chunk {
+ struct sctp_chunkhdr ch;
+ uint16_t shared_key_id;
+ uint16_t hmac_id;
+ uint8_t hmac[0];
+} SCTP_PACKED;
+
+struct sctp_auth_invalid_hmac {
+ struct sctp_paramhdr ph;
+ uint16_t hmac_id;
+ uint16_t padding;
+} SCTP_PACKED;
+
+/*
+ * we pre-reserve enough room for a ECNE or CWR AND a SACK with no missing
+ * pieces. If ENCE is missing we could have a couple of blocks. This way we
+ * optimize so we MOST likely can bundle a SACK/ECN with the smallest size
+ * data chunk I will split into. We could increase throughput slightly by
+ * taking out these two but the 24-sack/8-CWR i.e. 32 bytes I pre-reserve I
+ * feel is worth it for now.
+ */
+#ifndef SCTP_MAX_OVERHEAD
+#ifdef INET6
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+ sizeof(struct sctphdr) + \
+ sizeof(struct sctp_ecne_chunk) + \
+ sizeof(struct sctp_sack_chunk) + \
+ sizeof(struct ip6_hdr))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+ sizeof(struct sctphdr) + \
+ sizeof(struct ip6_hdr))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip6_hdr) + \
+ sizeof(struct sctphdr))
+
+#else
+#define SCTP_MAX_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+ sizeof(struct sctphdr) + \
+ sizeof(struct sctp_ecne_chunk) + \
+ sizeof(struct sctp_sack_chunk) + \
+ sizeof(struct ip))
+
+#define SCTP_MED_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+ sizeof(struct sctphdr) + \
+ sizeof(struct ip))
+
+
+#define SCTP_MIN_OVERHEAD (sizeof(struct ip) + \
+ sizeof(struct sctphdr))
+
+#endif /* INET6 */
+#endif /* !SCTP_MAX_OVERHEAD */
+
+#define SCTP_MED_V4_OVERHEAD (sizeof(struct sctp_data_chunk) + \
+ sizeof(struct sctphdr) + \
+ sizeof(struct ip))
+
+#define SCTP_MIN_V4_OVERHEAD (sizeof(struct ip) + \
+ sizeof(struct sctphdr))
+
+#endif /* !__sctp_header_h__ */
Index: tcp_debug.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_debug.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_debug.h -L sys/netinet/tcp_debug.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_debug.h
+++ sys/netinet/tcp_debug.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_debug.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_debug.h,v 1.15 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/tcp_debug.h,v 1.16 2007/03/24 22:15:02 maxim Exp $
*/
#ifndef _NETINET_TCP_DEBUG_H_
@@ -45,10 +45,11 @@
*/
struct tcpiphdr td_ti;
struct {
+#define IP6_HDR_LEN 40 /* sizeof(struct ip6_hdr) */
#if !defined(_KERNEL) && defined(INET6)
struct ip6_hdr ip6;
#else
- u_char ip6buf[40]; /* sizeof(struct ip6_hdr) */
+ u_char ip6buf[IP6_HDR_LEN];
#endif
struct tcphdr th;
} td_ti6;
Index: ip_gre.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_gre.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_gre.c -L sys/netinet/ip_gre.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_gre.c
+++ sys/netinet/ip_gre.c
@@ -1,5 +1,4 @@
/* $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
-/* $FreeBSD: src/sys/netinet/ip_gre.c,v 1.19.2.2 2006/01/27 21:50:10 bz Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -45,6 +44,9 @@
* This currently handles IPPROTO_GRE, IPPROTO_MOBILE
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_gre.c,v 1.25 2007/10/07 20:44:23 silby Exp $");
+
#include "opt_inet.h"
#include "opt_atalk.h"
#include "opt_inet6.h"
@@ -95,7 +97,7 @@
static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
-static int gre_input2(struct mbuf *, int, u_char);
+static struct mbuf *gre_input2(struct mbuf *, int, u_char);
/*
* De-encapsulate a packet and feed it back through ip input (this
@@ -106,29 +108,27 @@
void
gre_input(struct mbuf *m, int off)
{
- int ret, proto;
+ int proto;
proto = (mtod(m, struct ip *))->ip_p;
- ret = gre_input2(m, off, proto);
+ m = gre_input2(m, off, proto);
+
/*
- * ret == 0 : packet not processed, meaning that
- * no matching tunnel that is up is found.
- * we inject it to raw ip socket to see if anyone picks it up.
+ * If no matching tunnel that is up is found. We inject
+ * the mbuf to raw ip socket to see if anyone picks it up.
*/
- if (ret == 0)
+ if (m != NULL)
rip_input(m, off);
}
/*
- * decapsulate.
- * Does the real work and is called from gre_input() (above)
- * returns 0 if packet is not yet processed
- * and 1 if it needs no further processing
- * proto is the protocol number of the "calling" foo_input()
- * routine.
+ * Decapsulate. Does the real work and is called from gre_input()
+ * (above). Returns an mbuf back if packet is not yet processed,
+ * and NULL if it needs no further processing. proto is the protocol
+ * number of the "calling" foo_input() routine.
*/
-static int
+static struct mbuf *
gre_input2(struct mbuf *m ,int hlen, u_char proto)
{
struct greip *gip;
@@ -139,13 +139,13 @@
if ((sc = gre_lookup(m, proto)) == NULL) {
/* No matching tunnel or tunnel is down. */
- return (0);
+ return (m);
}
if (m->m_len < sizeof(*gip)) {
m = m_pullup(m, sizeof(*gip));
if (m == NULL)
- return (ENOBUFS);
+ return (NULL);
}
gip = mtod(m, struct greip *);
@@ -164,7 +164,7 @@
hlen += 4;
/* We don't support routing fields (variable length) */
if (flags & GRE_RP)
- return (0);
+ return (m);
if (flags & GRE_KP)
hlen += 4;
if (flags & GRE_SP)
@@ -191,23 +191,24 @@
af = AF_APPLETALK;
break;
#endif
- default: /* others not yet supported */
- return (0);
+ default:
+ /* Others not yet supported. */
+ return (m);
}
break;
default:
- /* others not yet supported */
- return (0);
+ /* Others not yet supported. */
+ return (m);
}
if (hlen > m->m_pkthdr.len) {
m_freem(m);
- return (EINVAL);
+ return (NULL);
}
/* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
m_adj(m, hlen);
- if (GRE2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
@@ -215,7 +216,8 @@
netisr_dispatch(isr, m);
- return (1); /* packet is done, no further processing needed */
+ /* Packet is done, no further processing needed. */
+ return (NULL);
}
/*
@@ -289,7 +291,7 @@
ip->ip_sum = 0;
ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
- if (GRE2IFP(sc)->if_bpf) {
+ if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
u_int32_t af = AF_INET;
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
@@ -310,9 +312,7 @@
* in_gre.c during destroy.
*/
static struct gre_softc *
-gre_lookup(m, proto)
- struct mbuf *m;
- u_int8_t proto;
+gre_lookup(struct mbuf *m, u_int8_t proto)
{
struct ip *ip = mtod(m, struct ip *);
struct gre_softc *sc;
Index: ip6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip6.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip6.h -L sys/netinet/ip6.h -u -r1.1.1.1 -r1.2
--- sys/netinet/ip6.h
+++ sys/netinet/ip6.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet/ip6.h,v 1.13.2.2 2005/10/09 05:50:43 ume Exp $ */
+/* $FreeBSD: src/sys/netinet/ip6.h,v 1.15 2005/07/20 10:30:52 ume Exp $ */
/* $KAME: ip6.h,v 1.18 2001/03/29 05:34:30 itojun Exp $ */
/*-
Index: in_rmx.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_rmx.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_rmx.c -L sys/netinet/in_rmx.c -u -r1.1.1.1 -r1.2
--- sys/netinet/in_rmx.c
+++ sys/netinet/in_rmx.c
@@ -25,8 +25,6 @@
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/in_rmx.c,v 1.53 2005/01/07 01:45:44 imp Exp $
*/
/*
@@ -42,6 +40,9 @@
* indefinitely. See in_rtqtimo() below for the exact mechanism.
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/in_rmx.c,v 1.57 2007/10/07 20:44:22 silby Exp $");
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -66,7 +67,7 @@
*/
static struct radix_node *
in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
- struct radix_node *treenodes)
+ struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
@@ -190,7 +191,7 @@
*/
if (rtq_reallyold != 0) {
rt->rt_flags |= RTPRF_OURS;
- rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
+ rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
} else {
rtexpunge(rt);
}
@@ -220,7 +221,7 @@
if (rt->rt_flags & RTPRF_OURS) {
ap->found++;
- if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
+ if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
if (rt->rt_refcnt > 0)
panic("rtqkill route really not free");
@@ -235,10 +236,10 @@
}
} else {
if (ap->updating &&
- (rt->rt_rmx.rmx_expire - time_second >
+ (rt->rt_rmx.rmx_expire - time_uptime >
rtq_reallyold)) {
rt->rt_rmx.rmx_expire =
- time_second + rtq_reallyold;
+ time_uptime + rtq_reallyold;
}
ap->nextstop = lmin(ap->nextstop,
rt->rt_rmx.rmx_expire);
@@ -262,7 +263,7 @@
arg.found = arg.killed = 0;
arg.rnh = rnh;
- arg.nextstop = time_second + rtq_timeout;
+ arg.nextstop = time_uptime + rtq_timeout;
arg.draining = arg.updating = 0;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in_rtqkill, &arg);
@@ -277,14 +278,14 @@
* hard.
*/
if ((arg.found - arg.killed > rtq_toomany) &&
- (time_second - last_adjusted_timeout >= rtq_timeout) &&
+ (time_uptime - last_adjusted_timeout >= rtq_timeout) &&
rtq_reallyold > rtq_minreallyold) {
rtq_reallyold = 2 * rtq_reallyold / 3;
if (rtq_reallyold < rtq_minreallyold) {
rtq_reallyold = rtq_minreallyold;
}
- last_adjusted_timeout = time_second;
+ last_adjusted_timeout = time_uptime;
#ifdef DIAGNOSTIC
log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n",
rtq_reallyold);
@@ -297,7 +298,7 @@
}
atv.tv_usec = 0;
- atv.tv_sec = arg.nextstop - time_second;
+ atv.tv_sec = arg.nextstop - time_uptime;
callout_reset(&rtq_timer, tvtohz(&atv), in_rtqtimo, rock);
}
Index: accf_data.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/accf_data.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/accf_data.c -L sys/netinet/accf_data.c -u -r1.1.1.1 -r1.2
--- sys/netinet/accf_data.c
+++ sys/netinet/accf_data.c
@@ -22,10 +22,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/accf_data.c,v 1.10 2004/05/30 20:23:30 phk Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/accf_data.c,v 1.11 2007/10/07 20:44:22 silby Exp $");
+
#define ACCEPT_FILTER_MOD
#include <sys/param.h>
Index: ip_divert.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/ip_divert.c -L sys/netinet/ip_divert.c -u -r1.1.1.1 -r1.2
--- sys/netinet/ip_divert.c
+++ sys/netinet/ip_divert.c
@@ -25,10 +25,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_divert.c,v 1.113.2.1 2005/11/16 10:31:22 ru Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_divert.c,v 1.130 2007/10/07 20:44:22 silby Exp $");
+
#if !defined(KLD_MODULE)
#include "opt_inet.h"
#include "opt_ipfw.h"
@@ -45,10 +46,10 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/kernel.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
@@ -61,6 +62,7 @@
#include <vm/uma.h>
#include <net/if.h>
+#include <net/netisr.h>
#include <net/route.h>
#include <netinet/in.h>
@@ -72,6 +74,8 @@
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
+#include <security/mac/mac_framework.h>
+
/*
* Divert sockets
*/
@@ -116,22 +120,51 @@
/*
* Initialize divert connection block queue.
*/
+static void
+div_zone_change(void *tag)
+{
+
+ uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+}
+
+static int
+div_inpcb_init(void *mem, int size, int flags)
+{
+ struct inpcb *inp = mem;
+
+ INP_LOCK_INIT(inp, "inp", "divinp");
+ return (0);
+}
+
+static void
+div_inpcb_fini(void *mem, int size)
+{
+ struct inpcb *inp = mem;
+
+ INP_LOCK_DESTROY(inp);
+}
+
void
div_init(void)
{
+
INP_INFO_LOCK_INIT(&divcbinfo, "div");
LIST_INIT(&divcb);
- divcbinfo.listhead = &divcb;
+ divcbinfo.ipi_listhead = &divcb;
/*
* XXX We don't use the hash list for divert IP, but it's easier
* to allocate a one entry hash list than it is to check all
* over the place for hashbase == NULL.
*/
- divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
- divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
+ divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask);
+ divcbinfo.ipi_porthashbase = hashinit(1, M_PCB,
+ &divcbinfo.ipi_porthashmask);
divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR,
+ UMA_ZONE_NOFREE);
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+ EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
+ NULL, EVENTHANDLER_PRI_ANY);
}
/*
@@ -197,8 +230,6 @@
/* Find IP address for receive interface */
TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
- if (ifa->ifa_addr == NULL)
- continue;
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
divsrc.sin_addr =
@@ -270,12 +301,13 @@
* the interface with that address.
*/
static int
-div_output(struct socket *so, struct mbuf *m,
- struct sockaddr_in *sin, struct mbuf *control)
+div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
+ struct mbuf *control)
{
struct m_tag *mtag;
struct divert_tag *dt;
int error = 0;
+ struct mbuf *options;
/*
* An mbuf may hasn't come from userland, but we pretend
@@ -332,6 +364,8 @@
if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options) ||
((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
error = EINVAL;
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&divcbinfo);
m_freem(m);
} else {
/* Convert fields to host order for ip_output() */
@@ -344,15 +378,46 @@
#ifdef MAC
mac_create_mbuf_from_inpcb(inp, m);
#endif
- error = ip_output(m,
- inp->inp_options, NULL,
- ((so->so_options & SO_DONTROUTE) ?
- IP_ROUTETOIF : 0) |
- IP_ALLOWBROADCAST | IP_RAWOUTPUT,
- inp->inp_moptions, NULL);
+ /*
+ * Get ready to inject the packet into ip_output().
+ * Just in case socket options were specified on the
+ * divert socket, we duplicate them. This is done
+ * to avoid having to hold the PCB locks over the call
+ * to ip_output(), as doing this results in a number of
+ * lock ordering complexities.
+ *
+ * Note that we set the multicast options argument for
+ * ip_output() to NULL since it should be invariant that
+ * they are not present.
+ */
+ KASSERT(inp->inp_moptions == NULL,
+ ("multicast options set on a divert socket"));
+ options = NULL;
+ /*
+ * XXXCSJP: It is unclear to me whether or not it makes
+ * sense for divert sockets to have options. However,
+ * for now we will duplicate them with the INP locks
+ * held so we can use them in ip_output() without
+ * requring a reference to the pcb.
+ */
+ if (inp->inp_options != NULL) {
+ options = m_dup(inp->inp_options, M_DONTWAIT);
+ if (options == NULL)
+ error = ENOBUFS;
+ }
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&divcbinfo);
+ if (error == ENOBUFS) {
+ m_freem(m);
+ return (error);
+ }
+ error = ip_output(m, options, NULL,
+ ((so->so_options & SO_DONTROUTE) ?
+ IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST |
+ IP_RAWOUTPUT, NULL, NULL);
+ if (options != NULL)
+ m_freem(options);
}
- INP_UNLOCK(inp);
- INP_INFO_WUNLOCK(&divcbinfo);
} else {
dt->info |= IP_FW_DIVERT_LOOPBACK_FLAG;
if (m->m_pkthdr.rcvif == NULL) {
@@ -377,8 +442,8 @@
mac_create_mbuf_from_socket(so, m);
SOCK_UNLOCK(so);
#endif
- /* Send packet to input processing */
- ip_input(m);
+ /* Send packet to input processing via netisr */
+ netisr_queue(NETISR_IP, m);
}
return error;
@@ -394,28 +459,23 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK(&divcbinfo);
inp = sotoinpcb(so);
- if (inp != 0) {
- INP_INFO_WUNLOCK(&divcbinfo);
- return EINVAL;
- }
- if (td && (error = suser(td)) != 0) {
- INP_INFO_WUNLOCK(&divcbinfo);
- return error;
+ KASSERT(inp == NULL, ("div_attach: inp != NULL"));
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NETINET_DIVERT);
+ if (error)
+ return (error);
}
error = soreserve(so, div_sendspace, div_recvspace);
- if (error) {
- INP_INFO_WUNLOCK(&divcbinfo);
+ if (error)
return error;
- }
- error = in_pcballoc(so, &divcbinfo, "divinp");
+ INP_INFO_WLOCK(&divcbinfo);
+ error = in_pcballoc(so, &divcbinfo);
if (error) {
INP_INFO_WUNLOCK(&divcbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
- INP_LOCK(inp);
INP_INFO_WUNLOCK(&divcbinfo);
inp->inp_ip_p = proto;
inp->inp_vflag |= INP_IPV4;
@@ -424,21 +484,18 @@
return 0;
}
-static int
+static void
div_detach(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&divcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&divcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("div_detach: inp == NULL"));
+ INP_INFO_WLOCK(&divcbinfo);
INP_LOCK(inp);
in_pcbdetach(inp);
+ in_pcbfree(inp);
INP_INFO_WUNLOCK(&divcbinfo);
- return 0;
}
static int
@@ -447,12 +504,8 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK(&divcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&divcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("div_bind: inp == NULL"));
/* in_pcbbind assumes that nam is a sockaddr_in
* and in_pcbbind requires a valid address. Since divert
* sockets don't we need to make sure the address is
@@ -461,13 +514,12 @@
* and should probably have its own family.
*/
if (nam->sa_family != AF_INET)
- error = EAFNOSUPPORT;
- else {
- ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
- INP_LOCK(inp);
- error = in_pcbbind(inp, nam, td->td_ucred);
- INP_UNLOCK(inp);
- }
+ return EAFNOSUPPORT;
+ ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY;
+ INP_INFO_WLOCK(&divcbinfo);
+ INP_LOCK(inp);
+ error = in_pcbbind(inp, nam, td->td_ucred);
+ INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&divcbinfo);
return error;
}
@@ -477,14 +529,9 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&divcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_RUNLOCK(&divcbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("div_shutdown: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&divcbinfo);
socantsendmore(so);
INP_UNLOCK(inp);
return 0;
@@ -492,7 +539,7 @@
static int
div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
- struct mbuf *control, struct thread *td)
+ struct mbuf *control, struct thread *td)
{
/* Packet must have a header (but that's about it) */
if (m->m_len < sizeof (struct ip) &&
@@ -566,7 +613,7 @@
return ENOMEM;
INP_INFO_RLOCK(&divcbinfo);
- for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n;
+ for (inp = LIST_FIRST(divcbinfo.ipi_listhead), i = 0; inp && i < n;
inp = LIST_NEXT(inp, inp_list)) {
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt &&
@@ -580,6 +627,7 @@
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
+ INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@@ -588,8 +636,10 @@
bcopy(inp, &xi.xi_inp, sizeof *inp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
+ INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
- }
+ } else
+ INP_UNLOCK(inp);
}
if (!error) {
/*
@@ -610,26 +660,6 @@
return error;
}
-/*
- * This is the wrapper function for in_setsockaddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-div_sockaddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setsockaddr(so, nam, &divcbinfo));
-}
-
-/*
- * This is the wrapper function for in_setpeeraddr. We just pass down
- * the pcbinfo for in_setpeeraddr to lock.
- */
-static int
-div_peeraddr(struct socket *so, struct sockaddr **nam)
-{
- return (in_setpeeraddr(so, nam, &divcbinfo));
-}
-
#ifdef SYSCTL_NODE
SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, divert, CTLFLAG_RW, 0, "IPDIVERT");
SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, CTLFLAG_RD, 0, 0,
@@ -641,10 +671,10 @@
.pru_bind = div_bind,
.pru_control = in_control,
.pru_detach = div_detach,
- .pru_peeraddr = div_peeraddr,
+ .pru_peeraddr = in_getpeeraddr,
.pru_send = div_send,
.pru_shutdown = div_shutdown,
- .pru_sockaddr = div_sockaddr,
+ .pru_sockaddr = in_getsockaddr,
.pru_sosetlabel = in_pcbsosetlabel
};
Index: accf_http.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/accf_http.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/accf_http.c -L sys/netinet/accf_http.c -u -r1.1.1.1 -r1.2
--- sys/netinet/accf_http.c
+++ sys/netinet/accf_http.c
@@ -23,10 +23,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/accf_http.c,v 1.16 2005/01/07 01:45:44 imp Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/accf_http.c,v 1.17 2007/10/07 20:44:22 silby Exp $");
+
#define ACCEPT_FILTER_MOD
#include <sys/param.h>
--- /dev/null
+++ sys/netinet/sctp_crc32.c
@@ -0,0 +1,712 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_crc32.c,v 1.12 2005/03/06 16:04:17 itojun Exp $ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_crc32.c,v 1.8 2007/05/08 17:01:10 rrs Exp $");
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_crc32.h>
+
+#ifndef SCTP_USE_ADLER32
+
+
+/**
+ *
+ * Routine Description:
+ *
+ * Computes the CRC32c checksum for the specified buffer using the slicing by 8
+ * algorithm over 64 bit quantities.
+ *
+ * Arguments:
+ *
+ * p_running_crc - pointer to the initial or final remainder value
+ * used in CRC computations. It should be set to
+ * non-NULL if the mode argument is equal to CONT or END
+ * p_buf - the packet buffer where crc computations are being performed
+ * length - the length of p_buf in bytes
+ * init_bytes - the number of initial bytes that need to be procesed before
+ * aligning p_buf to multiples of 4 bytes
+ * mode - can be any of the following: BEGIN, CONT, END, BODY, ALIGN
+ *
+ * Return value:
+ *
+ * The computed CRC32c value
+ */
+
+
+/*
+ * Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
+ *
+ *
+ * This software program is licensed subject to the BSD License, available at
+ * http://www.opensource.org/licenses/bsd-license.html.
+ *
+ * Abstract:
+ *
+ * Tables for software CRC generation
+ */
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o32[256] =
+{
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o32
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o40[256] =
+{
+ 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
+ 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
+ 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
+ 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
+ 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
+ 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
+ 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
+ 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
+ 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
+ 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
+ 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
+ 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
+ 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
+ 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
+ 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
+ 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
+ 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
+ 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
+ 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
+ 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
+ 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
+ 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
+ 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
+ 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
+ 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
+ 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
+ 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
+ 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
+ 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
+ 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
+ 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
+ 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o40
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o48[256] =
+{
+ 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
+ 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
+ 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
+ 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
+ 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
+ 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
+ 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
+ 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
+ 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
+ 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
+ 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
+ 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
+ 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
+ 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
+ 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
+ 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
+ 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
+ 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
+ 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
+ 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
+ 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
+ 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
+ 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
+ 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
+ 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
+ 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
+ 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
+ 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
+ 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
+ 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
+ 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
+ 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o48
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o56[256] =
+{
+ 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
+ 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
+ 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
+ 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
+ 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
+ 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
+ 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
+ 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
+ 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
+ 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
+ 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
+ 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
+ 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
+ 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
+ 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
+ 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
+ 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
+ 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
+ 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
+ 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
+ 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
+ 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
+ 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
+ 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
+ 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
+ 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
+ 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
+ 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
+ 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
+ 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
+ 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
+ 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o56
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o64[256] =
+{
+ 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
+ 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
+ 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
+ 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
+ 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
+ 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
+ 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
+ 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
+ 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
+ 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
+ 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
+ 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
+ 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
+ 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
+ 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
+ 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
+ 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
+ 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
+ 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
+ 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
+ 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
+ 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
+ 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
+ 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
+ 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
+ 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
+ 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
+ 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
+ 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
+ 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
+ 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
+ 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o64
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o72[256] =
+{
+ 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
+ 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
+ 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
+ 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
+ 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
+ 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
+ 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
+ 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
+ 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
+ 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
+ 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
+ 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
+ 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
+ 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
+ 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
+ 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
+ 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
+ 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
+ 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
+ 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
+ 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
+ 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
+ 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
+ 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
+ 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
+ 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
+ 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
+ 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
+ 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
+ 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
+ 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
+ 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o72
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o80[256] =
+{
+ 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
+ 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
+ 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
+ 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
+ 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
+ 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
+ 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
+ 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
+ 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
+ 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
+ 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
+ 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
+ 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
+ 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
+ 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
+ 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
+ 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
+ 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
+ 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
+ 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
+ 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
+ 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
+ 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
+ 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
+ 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
+ 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
+ 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
+ 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
+ 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
+ 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
+ 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
+ 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o80
+ */
+
+
+
+/*
+ * The following CRC lookup table was generated automagically using the
+ * following model parameters:
+ *
+ * Generator Polynomial = ................. 0x1EDC6F41 Generator Polynomial
+ * Length = .......... 32 bits Reflected Bits = ....................... TRUE
+ * Table Generation Offset = .............. 32 bits Number of Slices =
+ * ..................... 8 slices Slice Lengths = ........................ 8
+ * 8 8 8 8 8 8 8 Directory Name = ....................... .\ File Name =
+ * ............................ 8x256_tables.c
+ */
+
+uint32_t sctp_crc_tableil8_o88[256] =
+{
+ 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
+ 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
+ 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
+ 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
+ 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
+ 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
+ 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
+ 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
+ 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
+ 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
+ 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
+ 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
+ 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
+ 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
+ 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
+ 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
+ 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
+ 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
+ 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
+ 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
+ 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
+ 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
+ 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
+ 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
+ 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
+ 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
+ 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
+ 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
+ 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
+ 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
+ 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
+ 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
+};
+
+/*
+ * end of the CRC lookup table crc_tableil8_o88
+ */
+
+static uint32_t
+sctp_crc32c_sb8_64_bit(uint32_t crc,
+ unsigned char *p_buf,
+ uint32_t length,
+ uint32_t init_bytes)
+{
+ uint32_t li;
+ uint32_t term1, term2;
+ uint32_t running_length;
+ uint32_t end_bytes;
+
+ running_length = ((length - init_bytes) / 8) * 8;
+ end_bytes = length - init_bytes - running_length;
+
+ for (li = 0; li < init_bytes; li++)
+ crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^
+ (crc >> 8);
+ for (li = 0; li < running_length / 8; li++) {
+#if BYTE_ORDER == BIG_ENDIAN
+ crc ^= *p_buf++;
+ crc ^= (*p_buf++) << 8;
+ crc ^= (*p_buf++) << 16;
+ crc ^= (*p_buf++) << 24;
+#else
+ crc ^= *(uint32_t *) p_buf;
+ p_buf += 4;
+#endif
+ term1 = sctp_crc_tableil8_o88[crc & 0x000000FF] ^
+ sctp_crc_tableil8_o80[(crc >> 8) & 0x000000FF];
+ term2 = crc >> 16;
+ crc = term1 ^
+ sctp_crc_tableil8_o72[term2 & 0x000000FF] ^
+ sctp_crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
+
+#if BYTE_ORDER == BIG_ENDIAN
+ crc ^= sctp_crc_tableil8_o56[*p_buf++];
+ crc ^= sctp_crc_tableil8_o48[*p_buf++];
+ crc ^= sctp_crc_tableil8_o40[*p_buf++];
+ crc ^= sctp_crc_tableil8_o32[*p_buf++];
+#else
+ term1 = sctp_crc_tableil8_o56[(*(uint32_t *) p_buf) & 0x000000FF] ^
+ sctp_crc_tableil8_o48[((*(uint32_t *) p_buf) >> 8) & 0x000000FF];
+
+ term2 = (*(uint32_t *) p_buf) >> 16;
+ crc = crc ^
+ term1 ^
+ sctp_crc_tableil8_o40[term2 & 0x000000FF] ^
+ sctp_crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
+ p_buf += 4;
+#endif
+ }
+ for (li = 0; li < end_bytes; li++)
+ crc = sctp_crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^
+ (crc >> 8);
+ return crc;
+}
+
+
+/**
+ *
+ * Routine Description:
+ *
+ * warms the tables
+ *
+ * Arguments:
+ *
+ * none
+ *
+ * Return value:
+ *
+ * none
+ */
+uint32_t
+update_crc32(uint32_t crc32c,
+ unsigned char *buffer,
+ unsigned int length)
+{
+ uint32_t offset;
+
+ if (length == 0) {
+ return (crc32c);
+ }
+ offset = ((uintptr_t) buffer) & 0x3;
+ return (sctp_crc32c_sb8_64_bit(crc32c, buffer, length, offset));
+}
+
+uint32_t sctp_crc_c[256] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351,
+};
+
+
+#define SCTP_CRC32C(c,d) (c=(c>>8)^sctp_crc_c[(c^(d))&0xFF])
+
+uint32_t
+old_update_crc32(uint32_t crc32c,
+ unsigned char *buffer,
+ unsigned int length)
+{
+ unsigned int i;
+
+ for (i = 0; i < length; i++) {
+ SCTP_CRC32C(crc32c, buffer[i]);
+ }
+ return (crc32c);
+}
+
+
+uint32_t
+sctp_csum_finalize(uint32_t crc32c)
+{
+ uint32_t result;
+
+#if BYTE_ORDER == BIG_ENDIAN
+ uint8_t byte0, byte1, byte2, byte3;
+
+#endif
+ /* Complement the result */
+ result = ~crc32c;
+#if BYTE_ORDER == BIG_ENDIAN
+ /*
+ * For BIG-ENDIAN.. aka Motorola byte order the result is in
+ * little-endian form. So we must manually swap the bytes. Then we
+ * can call htonl() which does nothing...
+ */
+ byte0 = result & 0x000000ff;
+ byte1 = (result >> 8) & 0x000000ff;
+ byte2 = (result >> 16) & 0x000000ff;
+ byte3 = (result >> 24) & 0x000000ff;
+ crc32c = ((byte0 << 24) | (byte1 << 16) | (byte2 << 8) | byte3);
+#else
+ /*
+ * For INTEL platforms the result comes out in network order. No
+ * htonl is required or the swap above. So we optimize out both the
+ * htonl and the manual swap above.
+ */
+ crc32c = result;
+#endif
+ return (crc32c);
+}
+
+#endif
Index: tcp_timer.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/tcp_timer.h -L sys/netinet/tcp_timer.h -u -r1.2 -r1.3
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.27.2.1 2006/03/01 21:13:29 andre Exp $
+ * $FreeBSD: src/sys/netinet/tcp_timer.h,v 1.39 2007/09/24 05:26:24 silby Exp $
*/
#ifndef _NETINET_TCP_TIMER_H_
@@ -89,6 +89,8 @@
#define TCPTV_INFLIGHT_RTTTHRESH (10*hz/1000) /* below which inflight
disengages, in msec */
+#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
+
/*
* Minimum retransmit timer is 3 ticks, for algorithmic stability.
* TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
@@ -109,7 +111,7 @@
* The prior minimum of 1*hz (1 second) badly breaks throughput on any
* networks faster then a modem that has minor (e.g. 1%) packet loss.
*/
-#define TCPTV_MIN ( 3 ) /* minimum allowable value */
+#define TCPTV_MIN ( hz/33 ) /* minimum allowable value */
#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */
#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */
@@ -133,11 +135,25 @@
(tv) = (value) + tcp_rexmit_slop; \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
- else if ((u_long)(tv) > (u_long)(tvmax)) \
+ if ((u_long)(tv) > (u_long)(tvmax)) \
(tv) = (tvmax); \
} while(0)
#ifdef _KERNEL
+
+struct tcp_timer {
+ struct callout tt_rexmt; /* retransmit timer */
+ struct callout tt_persist; /* retransmit persistence */
+ struct callout tt_keep; /* keepalive */
+ struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
+ struct callout tt_delack; /* delayed ACK timer */
+};
+#define TT_DELACK 0x01
+#define TT_REXMT 0x02
+#define TT_PERSIST 0x04
+#define TT_KEEP 0x08
+#define TT_2MSL 0x10
+
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
@@ -150,14 +166,13 @@
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
-struct tcptw;
+extern int tcp_finwait2_timeout;
+extern int tcp_fast_finwait2_recycle;
void tcp_timer_init(void);
void tcp_timer_2msl(void *xtp);
struct tcptw *
- tcp_timer_2msl_tw(int _reuse); /* XXX temporary */
-void tcp_timer_2msl_reset(struct tcptw *_tw, int _timeo);
-void tcp_timer_2msl_stop(struct tcptw *_tw);
+ tcp_tw_2msl_scan(int _reuse); /* XXX temporary */
void tcp_timer_keep(void *xtp);
void tcp_timer_persist(void *xtp);
void tcp_timer_rexmt(void *xtp);
Index: tcp_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_output.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet/tcp_output.c -L sys/netinet/tcp_output.c -u -r1.3 -r1.4
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -27,29 +27,28 @@
* SUCH DAMAGE.
*
* @(#)tcp_output.c 8.4 (Berkeley) 5/24/95
- * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.112 2005/05/21 00:38:29 ps Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/tcp_output.c,v 1.141.2.3 2007/12/05 10:37:17 bz Exp $");
+
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
#include "opt_tcpdebug.h"
-#include "opt_tcp_sack.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/mac.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
-#include <sys/syslog.h>
#include <net/route.h>
@@ -58,6 +57,7 @@
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
+#include <netinet/ip_options.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#include <netinet/ip6.h>
@@ -75,16 +75,13 @@
#endif
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
-#define IPSEC
-#endif /*FAST_IPSEC*/
+#endif /*IPSEC*/
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
#ifdef notyet
extern struct mbuf *m_copypack();
#endif
@@ -102,8 +99,12 @@
&ss_fltsz_local, 1, "Slow start flight size for local networks");
int tcp_do_newreno = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
- 0, "Enable NewReno Algorithms");
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW,
+ &tcp_do_newreno, 0, "Enable NewReno Algorithms");
+
+int tcp_do_tso = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
+ &tcp_do_tso, 0, "Enable TCP Segmentation Offload");
int tcp_do_autosndbuf = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
@@ -117,6 +118,7 @@
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
&tcp_autosndbuf_max, 0, "Max size of automatic send buffer");
+
/*
* Tcp output routine: figure out what should be sent and send it.
*/
@@ -126,19 +128,20 @@
struct socket *so = tp->t_inpcb->inp_socket;
long len, recwin, sendwin;
int off, flags, error;
-#ifdef TCP_SIGNATURE
- int sigoff = 0;
-#endif
struct mbuf *m;
struct ip *ip = NULL;
struct ipovly *ipov = NULL;
struct tcphdr *th;
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
+#ifdef IPSEC
+ unsigned ipsec_optlen = 0;
+#endif
int idle, sendalot;
- int i, sack_rxmit;
- int sack_bytes_rxmt;
+ int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
+ int tso = 0;
+ struct tcpopt to;
#if 0
int maxburst = TCP_MAXBURST;
#endif
@@ -191,7 +194,8 @@
* snd_nxt. There may be SACK information that allows us to avoid
* resending already delivered data. Adjust snd_nxt accordingly.
*/
- if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max))
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ SEQ_LT(tp->snd_nxt, tp->snd_max))
tcp_sack_adjust(tp);
sendalot = 0;
off = tp->snd_nxt - tp->snd_una;
@@ -213,7 +217,7 @@
sack_bytes_rxmt = 0;
len = 0;
p = NULL;
- if (tp->sack_enable && IN_FASTRECOVERY(tp) &&
+ if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp) &&
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
long cwin;
@@ -292,7 +296,7 @@
flags &= ~TH_FIN;
sendwin = 1;
} else {
- callout_stop(tp->tt_persist);
+ tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
}
@@ -350,7 +354,8 @@
* know that foreign host supports TAO, suppress sending segment.
*/
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
- flags &= ~TH_SYN;
+ if (tp->t_state != TCPS_SYN_RECEIVED)
+ flags &= ~TH_SYN;
off--, len++;
}
@@ -377,10 +382,10 @@
*/
len = 0;
if (sendwin == 0) {
- callout_stop(tp->tt_rexmt);
+ tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
- if (!callout_active(tp->tt_persist))
+ if (!tcp_timer_active(tp, TT_PERSIST))
tcp_setpersist(tp);
}
}
@@ -410,7 +415,16 @@
* growing of the send buffer before it reaches its allowed
* maximum.
*
- * Optional: Shrink send buffer during idle periods together
+ * It scales directly with slow start or congestion window
+ * and does at most one step per received ACK. This fast
+ * scaling has the drawback of growing the send buffer beyond
+ * what is strictly necessary to make full use of a given
+ * delay*bandwith product. However testing has shown this not
+ * to be much of an problem. At worst we are trading wasting
+ * of available bandwith (the non-use of it) for wasting some
+ * socket buffer memory.
+ *
+ * TODO: Shrink send buffer during idle periods together
* with congestion window. Requires another timer. Has to
* wait for upcoming tcp timer rewrite.
*/
@@ -419,13 +433,6 @@
so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
so->so_snd.sb_cc < tcp_autosndbuf_max &&
sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
-#if 0
- log(LOG_DEBUG, "%s: inc sockbuf, old %i, new %i, "
- "sb_cc %i, snd_wnd %i, sendwnd %i\n",
- __func__, so->so_snd.sb_hiwat,
- so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
- so->so_snd.sb_cc, (int)tp->snd_wnd, (int)sendwin);
-#endif
if (!sbreserve_locked(&so->so_snd,
min(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
tcp_autosndbuf_max), so, curthread))
@@ -437,10 +444,41 @@
* Truncate to the maximum segment length or enable TCP Segmentation
* Offloading (if supported by hardware) and ensure that FIN is removed
* if the length no longer contains the last data byte.
+ *
+ * TSO may only be used if we are in a pure bulk sending state. The
+ * presence of TCP-MD5, SACK retransmits, SACK advertizements and
+ * IP options prevent using TSO. With TSO the TCP header is the same
+ * (except for the sequence number) for all generated packets. This
+ * makes it impossible to transmit any options which vary per generated
+ * segment or packet.
+ *
+ * The length of TSO bursts is limited to TCP_MAXWIN. That limit and
+ * removal of FIN (if not already catched here) are handled later after
+ * the exact length of the TCP options are known.
+ */
+#ifdef IPSEC
+ /*
+ * Pre-calculate here as we save another lookup into the darknesses
+ * of IPsec that way and can actually decide if TSO is ok.
*/
+ ipsec_optlen = ipsec_hdrsiz_tcp(tp);
+#endif
if (len > tp->t_maxseg) {
- len = tp->t_maxseg;
- sendalot = 1;
+ if ((tp->t_flags & TF_TSO) && tcp_do_tso &&
+ ((tp->t_flags & TF_SIGNATURE) == 0) &&
+ tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
+ tp->t_inpcb->inp_options == NULL &&
+ tp->t_inpcb->in6p_options == NULL
+#ifdef IPSEC
+ && ipsec_optlen == 0
+#endif
+ ) {
+ tso = 1;
+ } else {
+ len = tp->t_maxseg;
+ sendalot = 1;
+ tso = 0;
+ }
}
if (sack_rxmit) {
if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
@@ -456,7 +494,7 @@
* Sender silly window avoidance. We transmit under the following
* conditions when len is non-zero:
*
- * - We have a full segment
+ * - We have a full segment (or more with TSO)
* - This is the last buffer in a write()/send() and we are
* either idle or running NODELAY
* - we've timed out (e.g. persist timer)
@@ -465,7 +503,7 @@
* - we need to retransmit
*/
if (len) {
- if (len == tp->t_maxseg)
+ if (len >= tp->t_maxseg)
goto send;
/*
* NOTE! on localhost connections an 'ack' from the remote
@@ -497,8 +535,11 @@
* max size segments, or at least 50% of the maximum possible
* window, then want to send a window update to peer.
* Skip this if the connection is in T/TCP half-open state.
+ * Don't send pure window updates when the peer has closed
+ * the connection and won't ever send more data.
*/
- if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN)) {
+ if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
+ !TCPS_HAVERCVDFIN(tp->t_state)) {
/*
* "adv" is the amount we can increase the window,
* taking into account that we are limited by
@@ -536,11 +577,11 @@
* after the retransmission timer has been turned off. Make sure
* that the retransmission timer is set.
*/
- if (tp->sack_enable && SEQ_GT(tp->snd_max, tp->snd_una) &&
- !callout_active(tp->tt_rexmt) &&
- !callout_active(tp->tt_persist)) {
- callout_reset(tp->tt_rexmt, tp->t_rxtcur,
- tcp_timer_rexmt, tp);
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) &&
+ !tcp_timer_active(tp, TT_REXMT) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
goto just_return;
}
/*
@@ -551,11 +592,11 @@
* persisting to move a small or zero window
* (re)transmitting and thereby not persisting
*
- * callout_active(tp->tt_persist)
+ * tcp_timer_active(tp, TT_PERSIST)
* is true when we are in persist state.
* (tp->t_flags & TF_FORCEDATA)
* is set when we are called to send a persist packet.
- * callout_active(tp->tt_rexmt)
+ * tcp_timer_active(tp, TT_REXMT)
* is set when we are retransmitting
* The output side is idle when both timers are zero.
*
@@ -565,8 +606,8 @@
* if window is nonzero, transmit what we can,
* otherwise force out a byte.
*/
- if (so->so_snd.sb_cc && !callout_active(tp->tt_rexmt) &&
- !callout_active(tp->tt_persist)) {
+ if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
tp->t_rxtshift = 0;
tcp_setpersist(tp);
}
@@ -595,157 +636,63 @@
else
#endif
hdrlen = sizeof (struct tcpiphdr);
- if (flags & TH_SYN) {
- tp->snd_nxt = tp->iss;
- if ((tp->t_flags & TF_NOOPT) == 0) {
- u_short mss;
-
- opt[0] = TCPOPT_MAXSEG;
- opt[1] = TCPOLEN_MAXSEG;
- mss = htons((u_short) tcp_mssopt(&tp->t_inpcb->inp_inc));
- (void)memcpy(opt + 2, &mss, sizeof(mss));
- optlen = TCPOLEN_MAXSEG;
-
- if ((tp->t_flags & TF_REQ_SCALE) &&
- ((flags & TH_ACK) == 0 ||
- (tp->t_flags & TF_RCVD_SCALE))) {
- *((u_int32_t *)(opt + optlen)) = htonl(
- TCPOPT_NOP << 24 |
- TCPOPT_WINDOW << 16 |
- TCPOLEN_WINDOW << 8 |
- tp->request_r_scale);
- optlen += 4;
- }
- }
- }
/*
- * Send a timestamp and echo-reply if this is a SYN and our side
- * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
- * and our peer have sent timestamps in our SYN's.
- */
- if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
- (flags & TH_RST) == 0 &&
- ((flags & TH_ACK) == 0 ||
- (tp->t_flags & TF_RCVD_TSTMP))) {
- u_int32_t *lp = (u_int32_t *)(opt + optlen);
-
- /* Form timestamp option as shown in appendix A of RFC 1323. */
- *lp++ = htonl(TCPOPT_TSTAMP_HDR);
- *lp++ = htonl(ticks);
- *lp = htonl(tp->ts_recent);
- optlen += TCPOLEN_TSTAMP_APPA;
- }
-
- /* Set receive buffer autosizing timestamp. */
- if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = ticks;
-
-#ifdef TCP_SIGNATURE
-#ifdef INET6
- if (!isipv6)
-#endif
- if (tp->t_flags & TF_SIGNATURE) {
- int i;
- u_char *bp;
-
- /* Initialize TCP-MD5 option (RFC2385) */
- bp = (u_char *)opt + optlen;
- *bp++ = TCPOPT_SIGNATURE;
- *bp++ = TCPOLEN_SIGNATURE;
- sigoff = optlen + 2;
- for (i = 0; i < TCP_SIGLEN; i++)
- *bp++ = 0;
- optlen += TCPOLEN_SIGNATURE;
- }
-#endif /* TCP_SIGNATURE */
-
- if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) {
- /*
- * Tack on the SACK permitted option *last*.
- * And do padding of options after tacking this on.
- * This is because of MSS, TS, WinScale and Signatures are
- * all present, we have just 2 bytes left for the SACK
- * permitted option, which is just enough.
- */
- /*
- * If this is the first SYN of connection (not a SYN
- * ACK), include SACK permitted option. If this is a
- * SYN ACK, include SACK permitted option if peer has
- * already done so. This is only for active connect,
- * since the syncache takes care of the passive connect.
- */
- if ((flags & TH_SYN) &&
- (!(flags & TH_ACK) || (tp->t_flags & TF_SACK_PERMIT))) {
- u_char *bp;
- bp = (u_char *)opt + optlen;
-
- *bp++ = TCPOPT_SACK_PERMITTED;
- *bp++ = TCPOLEN_SACK_PERMITTED;
- optlen += TCPOLEN_SACK_PERMITTED;
+ * Compute options for segment.
+ * We only have to care about SYN and established connection
+ * segments. Options for SYN-ACK segments are handled in TCP
+ * syncache.
+ */
+ if ((tp->t_flags & TF_NOOPT) == 0) {
+ to.to_flags = 0;
+ /* Maximum segment size. */
+ if (flags & TH_SYN) {
+ tp->snd_nxt = tp->iss;
+ to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
+ to.to_flags |= TOF_MSS;
+ }
+ /* Window scaling. */
+ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
+ to.to_wscale = tp->request_r_scale;
+ to.to_flags |= TOF_SCALE;
+ }
+ /* Timestamps. */
+ if ((tp->t_flags & TF_RCVD_TSTMP) ||
+ ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
+ to.to_tsval = ticks + tp->ts_offset;
+ to.to_tsecr = tp->ts_recent;
+ to.to_flags |= TOF_TS;
+ /* Set receive buffer autosizing timestamp. */
+ if (tp->rfbuf_ts == 0 &&
+ (so->so_rcv.sb_flags & SB_AUTOSIZE))
+ tp->rfbuf_ts = ticks;
}
-
- /*
- * Send SACKs if necessary. This should be the last
- * option processed. Only as many SACKs are sent as
- * are permitted by the maximum options size.
- *
- * In general, SACK blocks consume 8*n+2 bytes.
- * So a full size SACK blocks option is 34 bytes
- * (to generate 4 SACK blocks). At a minimum,
- * we need 10 bytes (to generate 1 SACK block).
- * If TCP Timestamps (12 bytes) and TCP Signatures
- * (18 bytes) are both present, we'll just have
- * 10 bytes for SACK options 40 - (12 + 18).
- */
- if (TCPS_HAVEESTABLISHED(tp->t_state) &&
- (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0 &&
- MAX_TCPOPTLEN - optlen - 2 >= TCPOLEN_SACK) {
- int nsack, sackoptlen, padlen;
- u_char *bp = (u_char *)opt + optlen;
- u_int32_t *lp;
-
- nsack = (MAX_TCPOPTLEN - optlen - 2) / TCPOLEN_SACK;
- nsack = min(nsack, tp->rcv_numsacks);
- sackoptlen = (2 + nsack * TCPOLEN_SACK);
-
- /*
- * First we need to pad options so that the
- * SACK blocks can start at a 4-byte boundary
- * (sack option and length are at a 2 byte offset).
- */
- padlen = (MAX_TCPOPTLEN - optlen - sackoptlen) % 4;
- optlen += padlen;
- while (padlen-- > 0)
- *bp++ = TCPOPT_NOP;
-
- tcpstat.tcps_sack_send_blocks++;
- *bp++ = TCPOPT_SACK;
- *bp++ = sackoptlen;
- lp = (u_int32_t *)bp;
- for (i = 0; i < nsack; i++) {
- struct sackblk sack = tp->sackblks[i];
- *lp++ = htonl(sack.start);
- *lp++ = htonl(sack.end);
+ /* Selective ACK's. */
+ if (tp->t_flags & TF_SACK_PERMIT) {
+ if (flags & TH_SYN)
+ to.to_flags |= TOF_SACKPERM;
+ else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) &&
+ tp->rcv_numsacks > 0) {
+ to.to_flags |= TOF_SACK;
+ to.to_nsacks = tp->rcv_numsacks;
+ to.to_sacks = (u_char *)tp->sackblks;
}
- optlen += sackoptlen;
}
- }
-
- /* Pad TCP options to a 4 byte boundary */
- if (optlen < MAX_TCPOPTLEN && (optlen % sizeof(u_int32_t))) {
- int pad = sizeof(u_int32_t) - (optlen % sizeof(u_int32_t));
- u_char *bp = (u_char *)opt + optlen;
+#ifdef TCP_SIGNATURE
+ /* TCP-MD5 (RFC2385). */
+#ifdef INET6
+ if (!isipv6 && (tp->t_flags & TF_SIGNATURE))
+#else
+ if (tp->t_flags & TF_SIGNATURE)
+#endif /* INET6 */
+ to.to_flags |= TOF_SIGNATURE;
+#endif /* TCP_SIGNATURE */
- optlen += pad;
- while (pad) {
- *bp++ = TCPOPT_EOL;
- pad--;
- }
+ /* Processing the options. */
+ hdrlen += optlen = tcp_addoptions(&to, opt);
}
- hdrlen += optlen;
-
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(tp->t_inpcb);
@@ -757,7 +704,7 @@
else
ipoptlen = 0;
#ifdef IPSEC
- ipoptlen += ipsec_hdrsiz_tcp(tp);
+ ipoptlen += ipsec_optlen;
#endif
/*
@@ -765,14 +712,27 @@
* bump the packet length beyond the t_maxopd length.
* Clear the FIN bit because we cut off the tail of
* the segment.
+ *
+ * When doing TSO limit a burst to TCP_MAXWIN minus the
+ * IP, TCP and Options length to keep ip->ip_len from
+ * overflowing. Prevent the last segment from being
+ * fractional thus making them all equal sized and set
+ * the flag to continue sending. TSO is disabled when
+ * IP options or IPSEC are present.
*/
if (len + optlen + ipoptlen > tp->t_maxopd) {
- /*
- * If there is still more to send, don't close the connection.
- */
flags &= ~TH_FIN;
- len = tp->t_maxopd - optlen - ipoptlen;
- sendalot = 1;
+ if (tso) {
+ if (len > TCP_MAXWIN - hdrlen - optlen) {
+ len = TCP_MAXWIN - hdrlen - optlen;
+ len = len - (len % (tp->t_maxopd - optlen));
+ sendalot = 1;
+ } else if (tp->t_flags & TF_NEEDFIN)
+ sendalot = 1;
+ } else {
+ len = tp->t_maxopd - optlen - ipoptlen;
+ sendalot = 1;
+ }
}
/*#ifdef DIAGNOSTIC*/
@@ -790,9 +750,12 @@
* the template for sends on this connection.
*/
if (len) {
+ struct mbuf *mb;
+ u_int moff;
+
if ((tp->t_flags & TF_FORCEDATA) && len == 1)
tcpstat.tcps_sndprobe++;
- else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tcpstat.tcps_sndrexmitpack++;
tcpstat.tcps_sndrexmitbyte += len;
} else {
@@ -812,7 +775,7 @@
m->m_len += hdrlen;
m->m_data -= hdrlen;
#else
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
error = ENOBUFS;
@@ -831,13 +794,20 @@
#endif
m->m_data += max_linkhdr;
m->m_len = hdrlen;
+
+ /*
+ * Start the m_copy functions from the closest mbuf
+ * to the offset in the socket buffer chain.
+ */
+ mb = sbsndptr(&so->so_snd, off, len, &moff);
+
if (len <= MHLEN - hdrlen - max_linkhdr) {
- m_copydata(so->so_snd.sb_mb, off, (int) len,
+ m_copydata(mb, moff, (int)len,
mtod(m, caddr_t) + hdrlen);
m->m_len += len;
} else {
- m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len);
- if (m->m_next == 0) {
+ m->m_next = m_copy(mb, moff, (int)len);
+ if (m->m_next == NULL) {
SOCKBUF_UNLOCK(&so->so_snd);
(void) m_free(m);
error = ENOBUFS;
@@ -865,7 +835,7 @@
else
tcpstat.tcps_sndwinup++;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
goto out;
@@ -920,8 +890,8 @@
* (retransmit and persist are mutually exclusive...)
*/
if (sack_rxmit == 0) {
- if (len || (flags & (TH_SYN|TH_FIN))
- || callout_active(tp->tt_persist))
+ if (len || (flags & (TH_SYN|TH_FIN)) ||
+ tcp_timer_active(tp, TT_PERSIST))
th->th_seq = htonl(tp->snd_nxt);
else
th->th_seq = htonl(tp->snd_max);
@@ -947,8 +917,17 @@
recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
recwin = (long)TCP_MAXWIN << tp->rcv_scale;
- th->th_win = htons((u_short) (recwin >> tp->rcv_scale));
+ /*
+ * According to RFC1323 the window field in a SYN (i.e., a <SYN>
+ * or <SYN,ACK>) segment itself is never scaled. The <SYN,ACK>
+ * case is handled in syncache.
+ */
+ if (flags & TH_SYN)
+ th->th_win = htons((u_short)
+ (min(sbspace(&so->so_rcv), TCP_MAXWIN)));
+ else
+ th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
/*
* Adjust the RXWIN0SENT flag - indicate that we have advertised
@@ -978,9 +957,11 @@
#ifdef INET6
if (!isipv6)
#endif
- if (tp->t_flags & TF_SIGNATURE)
+ if (tp->t_flags & TF_SIGNATURE) {
+ int sigoff = to.to_signature - opt;
tcp_signature_compute(m, sizeof(struct ip), len, optlen,
(u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
+ }
#endif
/*
@@ -1010,11 +991,21 @@
}
/*
+ * Enable TSO and specify the size of the segments.
+ * The TCP pseudo header checksum is always provided.
+ * XXX: Fixme: This is currently not the case for IPv6.
+ */
+ if (tso) {
+ m->m_pkthdr.csum_flags = CSUM_TSO;
+ m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+ }
+
+ /*
* In transmit state, time the transmission and arrange for
* the retransmit. In persist state, just set snd_max.
*/
if ((tp->t_flags & TF_FORCEDATA) == 0 ||
- !callout_active(tp->tt_persist)) {
+ !tcp_timer_active(tp, TT_PERSIST)) {
tcp_seq startseq = tp->snd_nxt;
/*
@@ -1053,15 +1044,14 @@
* of retransmit time.
*/
timer:
- if (!callout_active(tp->tt_rexmt) &&
+ if (!tcp_timer_active(tp, TT_REXMT) &&
((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
(tp->snd_nxt != tp->snd_una))) {
- if (callout_active(tp->tt_persist)) {
- callout_stop(tp->tt_persist);
+ if (tcp_timer_active(tp, TT_PERSIST)) {
+ tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
- callout_reset(tp->tt_rexmt, tp->t_rxtcur,
- tcp_timer_rexmt, tp);
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
}
} else {
/*
@@ -1151,51 +1141,72 @@
/*
* We know that the packet was lost, so back out the
* sequence number advance, if any.
+ *
+ * If the error is EPERM the packet got blocked by the
+ * local firewall. Normally we should terminate the
+ * connection but the blocking may have been spurious
+ * due to a firewall reconfiguration cycle. So we treat
+ * it like a packet loss and let the retransmit timer and
+ * timeouts do their work over time.
+ * XXX: It is a POLA question whether calling tcp_drop right
+ * away would be the really correct behavior instead.
*/
- if ((tp->t_flags & TF_FORCEDATA) == 0 ||
- !callout_active(tp->tt_persist)) {
- /*
- * No need to check for TH_FIN here because
- * the TF_SENTFIN flag handles that case.
- */
- if ((flags & TH_SYN) == 0) {
- if (sack_rxmit) {
- p->rxmit -= len;
- tp->sackhint.sack_bytes_rexmit -= len;
- KASSERT(tp->sackhint.sack_bytes_rexmit
- >= 0,
- ("sackhint bytes rtx >= 0"));
- } else
- tp->snd_nxt -= len;
- }
+ if (((tp->t_flags & TF_FORCEDATA) == 0 ||
+ !tcp_timer_active(tp, TT_PERSIST)) &&
+ ((flags & TH_SYN) == 0) &&
+ (error != EPERM)) {
+ if (sack_rxmit) {
+ p->rxmit -= len;
+ tp->sackhint.sack_bytes_rexmit -= len;
+ KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
+ ("sackhint bytes rtx >= 0"));
+ } else
+ tp->snd_nxt -= len;
}
-
out:
SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */
- if (error == ENOBUFS) {
- if (!callout_active(tp->tt_rexmt) &&
- !callout_active(tp->tt_persist))
- callout_reset(tp->tt_rexmt, tp->t_rxtcur,
- tcp_timer_rexmt, tp);
+ switch (error) {
+ case EPERM:
+ tp->t_softerror = error;
+ return (error);
+ case ENOBUFS:
+ if (!tcp_timer_active(tp, TT_REXMT) &&
+ !tcp_timer_active(tp, TT_PERSIST))
+ tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
tp->snd_cwnd = tp->t_maxseg;
return (0);
- }
- if (error == EMSGSIZE) {
+ case EMSGSIZE:
/*
- * ip_output() will have already fixed the route
- * for us. tcp_mtudisc() will, as its last action,
- * initiate retransmission, so it is important to
- * not do so here.
+ * For some reason the interface we used initially
+ * to send segments changed to another or lowered
+ * its MTU.
+ *
+ * tcp_mtudisc() will find out the new MTU and as
+ * its last action, initiate retransmission, so it
+ * is important to not do so here.
+ *
+ * If TSO was active we either got an interface
+ * without TSO capabilits or TSO was turned off.
+ * Disable it for this connection as too and
+ * immediatly retry with MSS sized segments generated
+ * by this function.
*/
+ if (tso)
+ tp->t_flags &= ~TF_TSO;
tcp_mtudisc(tp->t_inpcb, 0);
- return 0;
- }
- if ((error == EHOSTUNREACH || error == ENETDOWN)
- && TCPS_HAVERCVDSYN(tp->t_state)) {
- tp->t_softerror = error;
return (0);
+ case EHOSTDOWN:
+ case EHOSTUNREACH:
+ case ENETDOWN:
+ case ENETUNREACH:
+ if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_softerror = error;
+ return (0);
+ }
+ /* FALLTHROUGH */
+ default:
+ return (error);
}
- return (error);
}
tcpstat.tcps_sndtotal++;
@@ -1209,8 +1220,8 @@
tp->rcv_adv = tp->rcv_nxt + recwin;
tp->last_ack_sent = tp->rcv_nxt;
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
- if (callout_active(tp->tt_delack))
- callout_stop(tp->tt_delack);
+ if (tcp_timer_active(tp, TT_DELACK))
+ tcp_timer_activate(tp, TT_DELACK, 0);
#if 0
/*
* This completely breaks TCP if newreno is turned on. What happens
@@ -1227,20 +1238,159 @@
}
void
-tcp_setpersist(tp)
- register struct tcpcb *tp;
+tcp_setpersist(struct tcpcb *tp)
{
int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
int tt;
- if (callout_active(tp->tt_rexmt))
+ if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
/*
* Start/restart persistance timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
TCPTV_PERSMIN, TCPTV_PERSMAX);
- callout_reset(tp->tt_persist, tt, tcp_timer_persist, tp);
+ tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
}
+
+/*
+ * Insert TCP options according to the supplied parameters to the place
+ * optp in a consistent way. Can handle unaligned destinations.
+ *
+ * The order of the option processing is crucial for optimal packing and
+ * alignment for the scarce option space.
+ *
+ * The optimal order for a SYN/SYN-ACK segment is:
+ * MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
+ * Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
+ *
+ * The SACK options should be last. SACK blocks consume 8*n+2 bytes.
+ * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
+ * At minimum we need 10 bytes (to generate 1 SACK block). If both
+ * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
+ * we only have 10 bytes for SACK options (40 - (12 + 18)).
+ */
+int
+tcp_addoptions(struct tcpopt *to, u_char *optp)
+{
+ u_int mask, optlen = 0;
+
+ for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
+ if ((to->to_flags & mask) != mask)
+ continue;
+ switch (to->to_flags & mask) {
+ case TOF_MSS:
+ while (optlen % 4) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_MAXSEG;
+ *optp++ = TCPOPT_MAXSEG;
+ *optp++ = TCPOLEN_MAXSEG;
+ to->to_mss = htons(to->to_mss);
+ bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
+ optp += sizeof(to->to_mss);
+ break;
+ case TOF_SCALE:
+ while (!optlen || optlen % 2 != 1) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_WINDOW;
+ *optp++ = TCPOPT_WINDOW;
+ *optp++ = TCPOLEN_WINDOW;
+ *optp++ = to->to_wscale;
+ break;
+ case TOF_SACKPERM:
+ while (optlen % 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_SACK_PERMITTED;
+ *optp++ = TCPOPT_SACK_PERMITTED;
+ *optp++ = TCPOLEN_SACK_PERMITTED;
+ break;
+ case TOF_TS:
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ optlen += TCPOLEN_TIMESTAMP;
+ *optp++ = TCPOPT_TIMESTAMP;
+ *optp++ = TCPOLEN_TIMESTAMP;
+ to->to_tsval = htonl(to->to_tsval);
+ to->to_tsecr = htonl(to->to_tsecr);
+ bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
+ optp += sizeof(to->to_tsval);
+ bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
+ optp += sizeof(to->to_tsecr);
+ break;
+ case TOF_SIGNATURE:
+ {
+ int siglen = TCPOLEN_SIGNATURE - 2;
+
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
+ continue;
+ optlen += TCPOLEN_SIGNATURE;
+ *optp++ = TCPOPT_SIGNATURE;
+ *optp++ = TCPOLEN_SIGNATURE;
+ to->to_signature = optp;
+ while (siglen--)
+ *optp++ = 0;
+ break;
+ }
+ case TOF_SACK:
+ {
+ int sackblks = 0;
+ struct sackblk *sack = (struct sackblk *)to->to_sacks;
+ tcp_seq sack_seq;
+
+ while (!optlen || optlen % 4 != 2) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+ if (TCP_MAXOLEN - optlen < 2 + TCPOLEN_SACK)
+ continue;
+ optlen += TCPOLEN_SACKHDR;
+ *optp++ = TCPOPT_SACK;
+ sackblks = min(to->to_nsacks,
+ (TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
+ *optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
+ while (sackblks--) {
+ sack_seq = htonl(sack->start);
+ bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+ optp += sizeof(sack_seq);
+ sack_seq = htonl(sack->end);
+ bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
+ optp += sizeof(sack_seq);
+ optlen += TCPOLEN_SACK;
+ sack++;
+ }
+ tcpstat.tcps_sack_send_blocks++;
+ break;
+ }
+ default:
+ panic("%s: unknown TCP option type", __func__);
+ break;
+ }
+ }
+
+ /* Terminate and pad TCP options to a 4 byte boundary. */
+ if (optlen % 4) {
+ optlen += TCPOLEN_EOL;
+ *optp++ = TCPOPT_EOL;
+ }
+ while (optlen % 4) {
+ optlen += TCPOLEN_NOP;
+ *optp++ = TCPOPT_NOP;
+ }
+
+ KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
+ return (optlen);
+}
Index: in_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/in_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/in_var.h -L sys/netinet/in_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet/in_var.h
+++ sys/netinet/in_var.h
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)in_var.h 8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/netinet/in_var.h,v 1.53.2.2 2005/08/24 17:30:44 rwatson Exp $
+ * $FreeBSD: src/sys/netinet/in_var.h,v 1.61 2007/06/12 16:24:53 bms Exp $
*/
#ifndef _NETINET_IN_VAR_H_
@@ -94,6 +94,19 @@
#define INADDR_HASH(x) \
(&in_ifaddrhashtbl[INADDR_HASHVAL(x) & in_ifaddrhmask])
+/*
+ * Macro for finding the internet address structure (in_ifaddr)
+ * corresponding to one of our IP addresses (in_addr).
+ */
+#define INADDR_TO_IFADDR(addr, ia) \
+ /* struct in_addr addr; */ \
+ /* struct in_ifaddr *ia; */ \
+do { \
+\
+ LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
+ if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
+ break; \
+} while (0)
/*
* Macro for finding the interface (ifnet structure) corresponding to one
@@ -105,9 +118,7 @@
{ \
struct in_ifaddr *ia; \
\
- LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \
- if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \
- break; \
+ INADDR_TO_IFADDR(addr, ia); \
(ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \
}
@@ -136,6 +147,12 @@
int rti_type; /* type of router which is querier on this interface */
int rti_time; /* # of slow timeouts since last old query */
SLIST_ENTRY(router_info) rti_list;
+#ifdef notyet
+ int rti_timev1; /* IGMPv1 querier present */
+ int rti_timev2; /* IGMPv2 querier present */
+ int rti_timer; /* report to general query */
+ int rti_qrv; /* querier robustness */
+#endif
};
/*
@@ -154,8 +171,45 @@
u_int inm_timer; /* IGMP membership report timer */
u_int inm_state; /* state of the membership */
struct router_info *inm_rti; /* router info*/
+ u_int inm_refcount; /* reference count */
+#ifdef notyet /* IGMPv3 source-specific multicast fields */
+ TAILQ_HEAD(, in_msfentry) inm_msf; /* all active source filters */
+ TAILQ_HEAD(, in_msfentry) inm_msf_record; /* recorded sources */
+ TAILQ_HEAD(, in_msfentry) inm_msf_exclude; /* exclude sources */
+ TAILQ_HEAD(, in_msfentry) inm_msf_include; /* include sources */
+ /* XXX: should this lot go to the router_info structure? */
+ /* XXX: can/should these be callouts? */
+ /* IGMP protocol timers */
+ int32_t inm_ti_curstate; /* current state timer */
+ int32_t inm_ti_statechg; /* state change timer */
+ /* IGMP report timers */
+ uint16_t inm_rpt_statechg; /* state change report timer */
+ uint16_t inm_rpt_toxx; /* fmode change report timer */
+ /* IGMP protocol state */
+ uint16_t inm_fmode; /* filter mode */
+ uint32_t inm_recsrc_count; /* # of recorded sources */
+ uint16_t inm_exclude_sock_count; /* # of exclude-mode sockets */
+ uint16_t inm_gass_count; /* # of g-a-s queries */
+#endif
};
+#ifdef notyet
+/*
+ * Internet multicast source filter list. This list is used to store
+ * IP multicast source addresses for each membership on an interface.
+ * TODO: Allocate these structures using UMA.
+ * TODO: Find an easier way of linking the struct into two lists at once.
+ */
+struct in_msfentry {
+ TAILQ_ENTRY(in_msfentry) isf_link; /* next filter in all-list */
+ TAILQ_ENTRY(in_msfentry) isf_next; /* next filter in queue */
+ struct in_addr isf_addr; /* the address of this source */
+ uint16_t isf_refcount; /* reference count */
+ uint16_t isf_reporttag; /* what to report to the IGMP router */
+ uint16_t isf_rexmit; /* retransmission state/count */
+};
+#endif
+
#ifdef _KERNEL
#ifdef SYSCTL_DECL
@@ -234,15 +288,22 @@
} while(0)
struct route;
+struct ip_moptions;
+
+size_t imo_match_group(struct ip_moptions *, struct ifnet *,
+ struct sockaddr *);
+struct in_msource *imo_match_source(struct ip_moptions *, size_t,
+ struct sockaddr *);
struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
void in_delmulti(struct in_multi *);
+void in_delmulti_locked(struct in_multi *);
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
struct thread *);
void in_rtqdrain(void);
void ip_input(struct mbuf *);
int in_ifadown(struct ifaddr *ifa, int);
void in_ifscrub(struct ifnet *, struct in_ifaddr *);
-int ip_fastforward(struct mbuf *);
+struct mbuf *ip_fastforward(struct mbuf *);
#endif /* _KERNEL */
Index: ip_fw2.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_fw2.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/ip_fw2.c -L sys/netinet/ip_fw2.c -u -r1.2 -r1.3
--- sys/netinet/ip_fw2.c
+++ sys/netinet/ip_fw2.c
@@ -21,10 +21,11 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
- * $FreeBSD: src/sys/netinet/ip_fw2.c,v 1.106.2.12 2006/03/09 13:42:44 glebius Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.4.1 2008/01/28 17:44:30 rwatson Exp $");
+
#define DEB(x)
#define DDB(x) x
@@ -34,7 +35,7 @@
#if !defined(KLD_MODULE)
#include "opt_ipfw.h"
-#include "opt_ip6fw.h"
+#include "opt_ipdivert.h"
#include "opt_ipdn.h"
#include "opt_inet.h"
#ifndef INET
@@ -43,16 +44,21 @@
#endif
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/condvar.h>
+#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/jail.h>
#include <sys/module.h>
+#include <sys/priv.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -61,6 +67,10 @@
#include <net/if.h>
#include <net/radix.h>
#include <net/route.h>
+#include <net/pf_mtag.h>
+
+#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */
+
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
@@ -71,21 +81,23 @@
#include <netinet/ip_fw.h>
#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
+#include <netinet/ip_carp.h>
+#include <netinet/pim.h>
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcpip.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
-
+#include <netinet/sctp.h>
+#ifdef IPFIREWALL_NAT
+#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
+#endif
#include <netgraph/ng_ipfw.h>
#include <altq/if_altq.h>
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#endif
-
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#ifdef INET6
@@ -96,6 +108,8 @@
#include <machine/in_cksum.h> /* XXX for in_cksum */
+#include <security/mac/mac_framework.h>
+
/*
* set_disable contains one bit per set value (0..31).
* If the bit is set, all rules with the corresponding set
@@ -130,55 +144,19 @@
struct ip_fw_chain {
struct ip_fw *rules; /* list of rules */
struct ip_fw *reap; /* list of rules to reap */
+ LIST_HEAD(, cfg_nat) nat; /* list of nat entries */
struct radix_node_head *tables[IPFW_TABLES_MAX];
- struct mtx mtx; /* lock guarding rule list */
- int busy_count; /* busy count for rw locks */
- int want_write;
- struct cv cv;
+ struct rwlock rwmtx;
};
#define IPFW_LOCK_INIT(_chain) \
- mtx_init(&(_chain)->mtx, "IPFW static rules", NULL, \
- MTX_DEF | MTX_RECURSE)
-#define IPFW_LOCK_DESTROY(_chain) mtx_destroy(&(_chain)->mtx)
-#define IPFW_WLOCK_ASSERT(_chain) do { \
- mtx_assert(&(_chain)->mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
-
-static __inline void
-IPFW_RLOCK(struct ip_fw_chain *chain)
-{
- mtx_lock(&chain->mtx);
- chain->busy_count++;
- mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_RUNLOCK(struct ip_fw_chain *chain)
-{
- mtx_lock(&chain->mtx);
- chain->busy_count--;
- if (chain->busy_count == 0 && chain->want_write)
- cv_signal(&chain->cv);
- mtx_unlock(&chain->mtx);
-}
-
-static __inline void
-IPFW_WLOCK(struct ip_fw_chain *chain)
-{
- mtx_lock(&chain->mtx);
- chain->want_write++;
- while (chain->busy_count > 0)
- cv_wait(&chain->cv, &chain->mtx);
-}
-
-static __inline void
-IPFW_WUNLOCK(struct ip_fw_chain *chain)
-{
- chain->want_write--;
- cv_signal(&chain->cv);
- mtx_unlock(&chain->mtx);
-}
+ rw_init(&(_chain)->rwmtx, "IPFW static rules")
+#define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx)
+#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
+
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
/*
* list of rules for layer 3
@@ -197,11 +175,13 @@
static int fw_debug = 1;
static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */
+extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
+
#ifdef SYSCTL_NODE
SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
-SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, enable,
- CTLFLAG_RW | CTLFLAG_SECURE3,
- &fw_enable, 0, "Enable ipfw");
+SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &fw_enable, 0,
+ ipfw_chg_hook, "I", "Enable ipfw");
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW,
&autoinc_step, 0, "Rule number autincrement step");
SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
@@ -327,6 +307,9 @@
#endif /* INET6 */
#endif /* SYSCTL_NODE */
+#ifdef IPFIREWALL_NAT
+MODULE_DEPEND(ipfw, libalias, 1, 1, 1);
+#endif
static int fw_deny_unknown_exthdrs = 1;
@@ -336,6 +319,7 @@
*/
#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl))
#define TCP(p) ((struct tcphdr *)(p))
+#define SCTP(p) ((struct sctphdr *)(p))
#define UDP(p) ((struct udphdr *)(p))
#define ICMP(p) ((struct icmphdr *)(p))
#define ICMP6(p) ((struct icmp6_hdr *)(p))
@@ -496,8 +480,6 @@
/* XXX lock? */
TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) {
- if (ia->ifa_addr == NULL)
- continue;
if (ia->ifa_addr->sa_family != AF_INET)
continue;
if (cmd->p.ip.s_addr == ((struct sockaddr_in *)
@@ -605,10 +587,7 @@
struct in6_addr copia;
TAILQ_FOREACH(mdc, &ifnet, if_link)
- for (mdc2 = mdc->if_addrlist.tqh_first; mdc2;
- mdc2 = mdc2->ifa_list.tqe_next) {
- if (!mdc2->ifa_addr)
- continue;
+ TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) {
if (mdc2->ifa_addr->sa_family == AF_INET6) {
fdm = (struct in6_ifaddr *)mdc2;
copia = fdm->ia_addr.sin6_addr;
@@ -672,11 +651,11 @@
hash_packet6(struct ipfw_flow_id *id)
{
u_int32_t i;
- i = (id->dst_ip6.__u6_addr.__u6_addr32[0]) ^
- (id->dst_ip6.__u6_addr.__u6_addr32[1]) ^
- (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
+ i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^
(id->dst_ip6.__u6_addr.__u6_addr32[3]) ^
- (id->dst_port) ^ (id->src_port) ^ (id->flow_id6);
+ (id->src_ip6.__u6_addr.__u6_addr32[2]) ^
+ (id->src_ip6.__u6_addr.__u6_addr32[3]) ^
+ (id->dst_port) ^ (id->src_port);
return i;
}
@@ -695,11 +674,12 @@
}
static void
-send_reject6(struct ip_fw_args *args, int code, u_short offset, u_int hlen)
+send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6)
{
- if (code == ICMP6_UNREACH_RST && offset == 0 &&
- args->f_id.proto == IPPROTO_TCP) {
- struct ip6_hdr *ip6;
+ struct mbuf *m;
+
+ m = args->m;
+ if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) {
struct tcphdr *tcp;
tcp_seq ack, seq;
int flags;
@@ -707,18 +687,11 @@
struct ip6_hdr ip6;
struct tcphdr th;
} ti;
-
- if (args->m->m_len < (hlen+sizeof(struct tcphdr))) {
- args->m = m_pullup(args->m, hlen+sizeof(struct tcphdr));
- if (args->m == NULL)
- return;
- }
-
- ip6 = mtod(args->m, struct ip6_hdr *);
- tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
+ tcp = (struct tcphdr *)((char *)ip6 + hlen);
if ((tcp->th_flags & TH_RST) != 0) {
- m_freem(args->m);
+ m_freem(m);
+ args->m = NULL;
return;
}
@@ -734,14 +707,20 @@
flags = TH_RST;
} else {
ack = ti.th.th_seq;
- if (((args->m)->m_flags & M_PKTHDR) != 0) {
- ack += (args->m)->m_pkthdr.len - hlen
+ if ((m->m_flags & M_PKTHDR) != 0) {
+ /*
+ * total new data to ACK is:
+ * total packet length,
+ * minus the header length,
+ * minus the tcp header length.
+ */
+ ack += m->m_pkthdr.len - hlen
- (ti.th.th_off << 2);
} else if (ip6->ip6_plen) {
- ack += ntohs(ip6->ip6_plen) + sizeof(*ip6)
- - hlen - (ti.th.th_off << 2);
+ ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) -
+ hlen - (ti.th.th_off << 2);
} else {
- m_freem(args->m);
+ m_freem(m);
return;
}
if (tcp->th_flags & TH_SYN)
@@ -750,14 +729,28 @@
flags = TH_RST|TH_ACK;
}
bcopy(&ti, ip6, sizeof(ti));
- tcp_respond(NULL, ip6, (struct tcphdr *)(ip6 + 1),
- args->m, ack, seq, flags);
-
+ /*
+ * m is only used to recycle the mbuf
+ * The data in it is never read so we don't need
+ * to correct the offsets or anything
+ */
+ tcp_respond(NULL, ip6, tcp, m, ack, seq, flags);
} else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */
- icmp6_error(args->m, ICMP6_DST_UNREACH, code, 0);
-
+#if 0
+ /*
+ * Unlike above, the mbufs need to line up with the ip6 hdr,
+ * as the contents are read. We need to m_adj() the
+ * needed amount.
+ * The mbuf will however be thrown away so we can adjust it.
+ * Remember we did an m_pullup on it already so we
+ * can make some assumptions about contiguousness.
+ */
+ if (args->L3offset)
+ m_adj(m, args->L3offset);
+#endif
+ icmp6_error(m, ICMP6_DST_UNREACH, code, 0);
} else
- m_freem(args->m);
+ m_freem(m);
args->m = NULL;
}
@@ -775,7 +768,8 @@
*/
static void
ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args,
- struct mbuf *m, struct ifnet *oif, u_short offset)
+ struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg,
+ struct ip *ip)
{
struct ether_header *eh = args->eh;
char *action;
@@ -812,6 +806,9 @@
if (cmd->opcode == O_PROB)
cmd += F_LEN(cmd);
+ if (cmd->opcode == O_TAG)
+ cmd += F_LEN(cmd);
+
action = action2;
switch (cmd->opcode) {
case O_DENY:
@@ -865,9 +862,15 @@
case O_FORWARD_IP: {
ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd;
int len;
+ struct in_addr dummyaddr;
+ if (sa->sa.sin_addr.s_addr == INADDR_ANY)
+ dummyaddr.s_addr = htonl(tablearg);
+ else
+ dummyaddr.s_addr = sa->sa.sin_addr.s_addr;
len = snprintf(SNPARGS(action2, 0), "Forward to %s",
- inet_ntoa(sa->sa.sin_addr));
+ inet_ntoa(dummyaddr));
+
if (sa->sa.sin_port)
snprintf(SNPARGS(action2, len), ":%d",
sa->sa.sin_port);
@@ -881,6 +884,9 @@
snprintf(SNPARGS(action2, 0), "Ngtee %d",
cmd->arg1);
break;
+ case O_NAT:
+ action = "Nat";
+ break;
default:
action = "UNKNOWN";
break;
@@ -896,8 +902,6 @@
struct icmphdr *icmp;
struct tcphdr *tcp;
struct udphdr *udp;
- /* Initialize to make compiler happy. */
- struct ip *ip = NULL;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
struct icmp6_hdr *icmp6;
@@ -905,19 +909,19 @@
src[0] = '\0';
dst[0] = '\0';
#ifdef INET6
- if (args->f_id.addr_type == 6) {
+ if (IS_IP6_FLOW_ID(&(args->f_id))) {
+ char ip6buf[INET6_ADDRSTRLEN];
snprintf(src, sizeof(src), "[%s]",
- ip6_sprintf(&args->f_id.src_ip6));
+ ip6_sprintf(ip6buf, &args->f_id.src_ip6));
snprintf(dst, sizeof(dst), "[%s]",
- ip6_sprintf(&args->f_id.dst_ip6));
+ ip6_sprintf(ip6buf, &args->f_id.dst_ip6));
- ip6 = (struct ip6_hdr *)mtod(m, struct ip6_hdr *);
- tcp = (struct tcphdr *)(mtod(args->m, char *) + hlen);
- udp = (struct udphdr *)(mtod(args->m, char *) + hlen);
+ ip6 = (struct ip6_hdr *)ip;
+ tcp = (struct tcphdr *)(((char *)ip) + hlen);
+ udp = (struct udphdr *)(((char *)ip) + hlen);
} else
#endif
{
- ip = mtod(m, struct ip *);
tcp = L3HDR(struct tcphdr, ip);
udp = L3HDR(struct udphdr, ip);
@@ -961,7 +965,7 @@
break;
#ifdef INET6
case IPPROTO_ICMPV6:
- icmp6 = (struct icmp6_hdr *)(mtod(args->m, char *) + hlen);
+ icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen);
if (offset == 0)
len = snprintf(SNPARGS(proto, 0),
"ICMPv6:%u.%u ",
@@ -980,7 +984,7 @@
}
#ifdef INET6
- if (args->f_id.addr_type == 6) {
+ if (IS_IP6_FLOW_ID(&(args->f_id))) {
if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
snprintf(SNPARGS(fragment, 0),
" (frag %08x:%d@%d%s)",
@@ -1096,9 +1100,9 @@
if (ipfw_dyn_v == NULL || dyn_count == 0)
return;
/* do not expire more than once per second, it is useless */
- if (!FORCE && last_remove == time_second)
+ if (!FORCE && last_remove == time_uptime)
return;
- last_remove = time_second;
+ last_remove = time_uptime;
/*
* because O_LIMIT refer to parent rules, during the first pass only
@@ -1130,7 +1134,7 @@
}
} else {
if (!FORCE &&
- !TIME_LEQ( q->expire, time_second ))
+ !TIME_LEQ( q->expire, time_uptime ))
goto next;
}
if (q->dyn_type != O_LIMIT_PARENT || !q->count) {
@@ -1152,7 +1156,7 @@
*/
static ipfw_dyn_rule *
lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction,
- struct tcphdr *tcp)
+ struct tcphdr *tcp)
{
/*
* stateful ipfw extensions.
@@ -1173,7 +1177,7 @@
for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) {
if (q->dyn_type == O_LIMIT_PARENT && q->count)
goto next;
- if (TIME_LEQ( q->expire, time_second)) { /* expire entry */
+ if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */
UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q);
continue;
}
@@ -1235,7 +1239,7 @@
q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8);
switch (q->state) {
case TH_SYN: /* opening */
- q->expire = time_second + dyn_syn_lifetime;
+ q->expire = time_uptime + dyn_syn_lifetime;
break;
case BOTH_SYN: /* move to established */
@@ -1258,13 +1262,13 @@
}
}
}
- q->expire = time_second + dyn_ack_lifetime;
+ q->expire = time_uptime + dyn_ack_lifetime;
break;
case BOTH_SYN | BOTH_FIN: /* both sides closed */
if (dyn_fin_lifetime >= dyn_keepalive_period)
dyn_fin_lifetime = dyn_keepalive_period - 1;
- q->expire = time_second + dyn_fin_lifetime;
+ q->expire = time_uptime + dyn_fin_lifetime;
break;
default:
@@ -1278,14 +1282,14 @@
#endif
if (dyn_rst_lifetime >= dyn_keepalive_period)
dyn_rst_lifetime = dyn_keepalive_period - 1;
- q->expire = time_second + dyn_rst_lifetime;
+ q->expire = time_uptime + dyn_rst_lifetime;
break;
}
} else if (pkt->proto == IPPROTO_UDP) {
- q->expire = time_second + dyn_udp_lifetime;
+ q->expire = time_uptime + dyn_udp_lifetime;
} else {
/* other protocols */
- q->expire = time_second + dyn_short_lifetime;
+ q->expire = time_uptime + dyn_short_lifetime;
}
done:
if (match_direction)
@@ -1295,7 +1299,7 @@
static ipfw_dyn_rule *
lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction,
- struct tcphdr *tcp)
+ struct tcphdr *tcp)
{
ipfw_dyn_rule *q;
@@ -1379,7 +1383,7 @@
}
r->id = *id;
- r->expire = time_second + dyn_syn_lifetime;
+ r->expire = time_uptime + dyn_syn_lifetime;
r->rule = rule;
r->dyn_type = dyn_type;
r->pcnt = r->bcnt = 0;
@@ -1429,7 +1433,7 @@
pkt->dst_ip == q->id.dst_ip)
)
) {
- q->expire = time_second + dyn_short_lifetime;
+ q->expire = time_uptime + dyn_short_lifetime;
DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);)
return q;
}
@@ -1445,62 +1449,76 @@
*/
static int
install_state(struct ip_fw *rule, ipfw_insn_limit *cmd,
- struct ip_fw_args *args)
+ struct ip_fw_args *args, uint32_t tablearg)
{
static int last_log;
-
ipfw_dyn_rule *q;
+ struct in_addr da;
+ char src[48], dst[48];
- DEB(printf("ipfw: install state type %d 0x%08x %u -> 0x%08x %u\n",
- cmd->o.opcode,
+ src[0] = '\0';
+ dst[0] = '\0';
+
+ DEB(
+ printf("ipfw: %s: type %d 0x%08x %u -> 0x%08x %u\n",
+ __func__, cmd->o.opcode,
(args->f_id.src_ip), (args->f_id.src_port),
- (args->f_id.dst_ip), (args->f_id.dst_port) );)
+ (args->f_id.dst_ip), (args->f_id.dst_port));
+ )
IPFW_DYN_LOCK();
q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
- if (q != NULL) { /* should never occur */
- if (last_log != time_second) {
- last_log = time_second;
- printf("ipfw: install_state: entry already present, done\n");
+ if (q != NULL) { /* should never occur */
+ if (last_log != time_uptime) {
+ last_log = time_uptime;
+ printf("ipfw: %s: entry already present, done\n",
+ __func__);
}
IPFW_DYN_UNLOCK();
- return 0;
+ return (0);
}
if (dyn_count >= dyn_max)
- /*
- * Run out of slots, try to remove any expired rule.
- */
+ /* Run out of slots, try to remove any expired rule. */
remove_dyn_rule(NULL, (ipfw_dyn_rule *)1);
if (dyn_count >= dyn_max) {
- if (last_log != time_second) {
- last_log = time_second;
- printf("ipfw: install_state: Too many dynamic rules\n");
+ if (last_log != time_uptime) {
+ last_log = time_uptime;
+ printf("ipfw: %s: Too many dynamic rules\n", __func__);
}
IPFW_DYN_UNLOCK();
- return 1; /* cannot install, notify caller */
+ return (1); /* cannot install, notify caller */
}
switch (cmd->o.opcode) {
- case O_KEEP_STATE: /* bidir rule */
+ case O_KEEP_STATE: /* bidir rule */
add_dyn_rule(&args->f_id, O_KEEP_STATE, rule);
break;
- case O_LIMIT: /* limit number of sessions */
- {
- u_int16_t limit_mask = cmd->limit_mask;
+ case O_LIMIT: { /* limit number of sessions */
struct ipfw_flow_id id;
ipfw_dyn_rule *parent;
+ uint32_t conn_limit;
+ uint16_t limit_mask = cmd->limit_mask;
- DEB(printf("ipfw: installing dyn-limit rule %d\n",
- cmd->conn_limit);)
+ conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ?
+ tablearg : cmd->conn_limit;
+
+ DEB(
+ if (cmd->conn_limit == IP_FW_TABLEARG)
+ printf("ipfw: %s: O_LIMIT rule, conn_limit: %u "
+ "(tablearg)\n", __func__, conn_limit);
+ else
+ printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n",
+ __func__, conn_limit);
+ )
- id.dst_ip = id.src_ip = 0;
- id.dst_port = id.src_port = 0;
+ id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0;
id.proto = args->f_id.proto;
+ id.addr_type = args->f_id.addr_type;
if (IS_IP6_FLOW_ID (&(args->f_id))) {
if (limit_mask & DYN_SRC_ADDR)
@@ -1517,38 +1535,68 @@
id.src_port = args->f_id.src_port;
if (limit_mask & DYN_DST_PORT)
id.dst_port = args->f_id.dst_port;
- parent = lookup_dyn_parent(&id, rule);
- if (parent == NULL) {
- printf("ipfw: add parent failed\n");
+ if ((parent = lookup_dyn_parent(&id, rule)) == NULL) {
+ printf("ipfw: %s: add parent failed\n", __func__);
IPFW_DYN_UNLOCK();
- return 1;
+ return (1);
}
- if (parent->count >= cmd->conn_limit) {
- /*
- * See if we can remove some expired rule.
- */
+
+ if (parent->count >= conn_limit) {
+ /* See if we can remove some expired rule. */
remove_dyn_rule(rule, parent);
- if (parent->count >= cmd->conn_limit) {
- if (fw_verbose && last_log != time_second) {
- last_log = time_second;
+ if (parent->count >= conn_limit) {
+ if (fw_verbose && last_log != time_uptime) {
+ last_log = time_uptime;
+#ifdef INET6
+ /*
+ * XXX IPv6 flows are not
+ * supported yet.
+ */
+ if (IS_IP6_FLOW_ID(&(args->f_id))) {
+ char ip6buf[INET6_ADDRSTRLEN];
+ snprintf(src, sizeof(src),
+ "[%s]", ip6_sprintf(ip6buf,
+ &args->f_id.src_ip6));
+ snprintf(dst, sizeof(dst),
+ "[%s]", ip6_sprintf(ip6buf,
+ &args->f_id.dst_ip6));
+ } else
+#endif
+ {
+ da.s_addr =
+ htonl(args->f_id.src_ip);
+ inet_ntoa_r(da, src);
+ da.s_addr =
+ htonl(args->f_id.dst_ip);
+ inet_ntoa_r(da, dst);
+ }
log(LOG_SECURITY | LOG_DEBUG,
- "drop session, too many entries\n");
+ "ipfw: %d %s %s:%u -> %s:%u, %s\n",
+ parent->rule->rulenum,
+ "drop session",
+ src, (args->f_id.src_port),
+ dst, (args->f_id.dst_port),
+ "too many entries");
}
IPFW_DYN_UNLOCK();
- return 1;
+ return (1);
}
}
add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent);
- }
break;
+ }
default:
- printf("ipfw: unknown dynamic rule type %u\n", cmd->o.opcode);
+ printf("ipfw: %s: unknown dynamic rule type %u\n",
+ __func__, cmd->o.opcode);
IPFW_DYN_UNLOCK();
- return 1;
+ return (1);
}
- lookup_dyn_rule_locked(&args->f_id, NULL, NULL); /* XXX just set lifetime */
+
+ /* XXX just set lifetime */
+ lookup_dyn_rule_locked(&args->f_id, NULL, NULL);
+
IPFW_DYN_UNLOCK();
- return 0;
+ return (0);
}
/*
@@ -1556,18 +1604,31 @@
* When flags & TH_RST, we are sending a RST packet, because of a
* "reset" action matched the packet.
* Otherwise we are sending a keepalive, and flags & TH_
+ * The 'replyto' mbuf is the mbuf being replied to, if any, and is required
+ * so that MAC can label the reply appropriately.
*/
static struct mbuf *
-send_pkt(struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags)
+send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
+ u_int32_t ack, int flags)
{
struct mbuf *m;
struct ip *ip;
struct tcphdr *tcp;
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == 0)
return (NULL);
m->m_pkthdr.rcvif = (struct ifnet *)0;
+
+#ifdef MAC
+ if (replyto != NULL)
+ mac_create_mbuf_netlayer(replyto, m);
+ else
+ mac_create_mbuf_from_firewall(m);
+#else
+ (void)replyto; /* don't warn about unused arg */
+#endif
+
m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr);
m->m_data += max_linkhdr;
@@ -1636,24 +1697,33 @@
* sends a reject message, consuming the mbuf passed as an argument.
*/
static void
-send_reject(struct ip_fw_args *args, int code, u_short offset, int ip_len)
+send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip)
{
+#if 0
+ /* XXX When ip is not guaranteed to be at mtod() we will
+ * need to account for this */
+ * The mbuf will however be thrown away so we can adjust it.
+ * Remember we did an m_pullup on it already so we
+ * can make some assumptions about contiguousness.
+ */
+ if (args->L3offset)
+ m_adj(m, args->L3offset);
+#endif
if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
/* We need the IP header in host order for icmp_error(). */
if (args->eh != NULL) {
- struct ip *ip = mtod(args->m, struct ip *);
ip->ip_len = ntohs(ip->ip_len);
ip->ip_off = ntohs(ip->ip_off);
}
icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
- } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
+ } else if (args->f_id.proto == IPPROTO_TCP) {
struct tcphdr *const tcp =
L3HDR(struct tcphdr, mtod(args->m, struct ip *));
if ( (tcp->th_flags & TH_RST) == 0) {
struct mbuf *m;
- m = send_pkt(&(args->f_id), ntohl(tcp->th_seq),
- ntohl(tcp->th_ack),
+ m = send_pkt(args->m, &(args->f_id),
+ ntohl(tcp->th_seq), ntohl(tcp->th_ack),
tcp->th_flags | TH_RST);
if (m != NULL)
ip_output(m, NULL, NULL, 0, NULL, NULL);
@@ -1691,6 +1761,8 @@
cmd += F_LEN(cmd);
if (cmd->opcode == O_ALTQ)
cmd += F_LEN(cmd);
+ if (cmd->opcode == O_TAG)
+ cmd += F_LEN(cmd);
if ( cmd->opcode == O_SKIPTO )
for (rule = me->next; rule ; rule = rule->next)
if (rule->rulenum >= cmd->arg1)
@@ -1703,7 +1775,7 @@
static int
add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint8_t mlen, uint32_t value)
+ uint8_t mlen, uint32_t value)
{
struct radix_node_head *rnh;
struct table_entry *ent;
@@ -1731,7 +1803,7 @@
static int
del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint8_t mlen)
+ uint8_t mlen)
{
struct radix_node_head *rnh;
struct table_entry *ent;
@@ -1812,7 +1884,7 @@
static int
lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
- uint32_t *val)
+ uint32_t *val)
{
struct radix_node_head *rnh;
struct table_entry *ent;
@@ -1904,11 +1976,10 @@
}
static int
-check_uidgid(ipfw_insn_u32 *insn,
- int proto, struct ifnet *oif,
- struct in_addr dst_ip, u_int16_t dst_port,
- struct in_addr src_ip, u_int16_t src_port,
- struct ip_fw_ugid *ugp, int *lookup, struct inpcb *inp)
+check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
+ struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
+ u_int16_t src_port, struct ip_fw_ugid *ugp, int *lookup,
+ struct inpcb *inp)
{
struct inpcbinfo *pi;
int wildcard;
@@ -1939,7 +2010,7 @@
wildcard = 0;
pi = &tcbinfo;
} else if (proto == IPPROTO_UDP) {
- wildcard = 1;
+ wildcard = INPLOOKUP_WILDCARD;
pi = &udbinfo;
} else
return 0;
@@ -1989,6 +2060,185 @@
return match;
}
+#ifdef IPFIREWALL_NAT
+static eventhandler_tag ifaddr_event_tag;
+
+static void
+ifaddr_change(void *arg __unused, struct ifnet *ifp)
+{
+ struct cfg_nat *ptr;
+ struct ifaddr *ifa;
+
+ IPFW_WLOCK(&layer3_chain);
+ /* Check every nat entry... */
+ LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
+ /* ...using nic 'ifp->if_xname' as dynamic alias address. */
+ if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) == 0) {
+ mtx_lock(&ifp->if_addr_mtx);
+ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
+ if (ifa->ifa_addr == NULL)
+ continue;
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ ptr->ip = ((struct sockaddr_in *)
+ (ifa->ifa_addr))->sin_addr;
+ LibAliasSetAddress(ptr->lib, ptr->ip);
+ }
+ mtx_unlock(&ifp->if_addr_mtx);
+ }
+ }
+ IPFW_WUNLOCK(&layer3_chain);
+}
+
+static void
+flush_nat_ptrs(const int i)
+{
+ struct ip_fw *rule;
+
+ IPFW_WLOCK_ASSERT(&layer3_chain);
+ for (rule = layer3_chain.rules; rule; rule = rule->next) {
+ ipfw_insn_nat *cmd = (ipfw_insn_nat *)ACTION_PTR(rule);
+ if (cmd->o.opcode != O_NAT)
+ continue;
+ if (cmd->nat != NULL && cmd->nat->id == i)
+ cmd->nat = NULL;
+ }
+}
+
+static struct cfg_nat *
+lookup_nat(const int i)
+{
+ struct cfg_nat *ptr;
+
+ LIST_FOREACH(ptr, &layer3_chain.nat, _next)
+ if (ptr->id == i)
+ return(ptr);
+ return (NULL);
+}
+
+#define HOOK_NAT(b, p) do { \
+ IPFW_WLOCK_ASSERT(&layer3_chain); \
+ LIST_INSERT_HEAD(b, p, _next); \
+} while (0)
+
+#define UNHOOK_NAT(p) do { \
+ IPFW_WLOCK_ASSERT(&layer3_chain); \
+ LIST_REMOVE(p, _next); \
+} while (0)
+
+#define HOOK_REDIR(b, p) do { \
+ LIST_INSERT_HEAD(b, p, _next); \
+} while (0)
+
+#define HOOK_SPOOL(b, p) do { \
+ LIST_INSERT_HEAD(b, p, _next); \
+} while (0)
+
+static void
+del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head)
+{
+ struct cfg_redir *r, *tmp_r;
+ struct cfg_spool *s, *tmp_s;
+ int i, num;
+
+ LIST_FOREACH_SAFE(r, head, _next, tmp_r) {
+ num = 1; /* Number of alias_link to delete. */
+ switch (r->mode) {
+ case REDIR_PORT:
+ num = r->pport_cnt;
+ /* FALLTHROUGH */
+ case REDIR_ADDR:
+ case REDIR_PROTO:
+ /* Delete all libalias redirect entry. */
+ for (i = 0; i < num; i++)
+ LibAliasRedirectDelete(n->lib, r->alink[i]);
+ /* Del spool cfg if any. */
+ LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) {
+ LIST_REMOVE(s, _next);
+ free(s, M_IPFW);
+ }
+ free(r->alink, M_IPFW);
+ LIST_REMOVE(r, _next);
+ free(r, M_IPFW);
+ break;
+ default:
+ printf("unknown redirect mode: %u\n", r->mode);
+ /* XXX - panic?!?!? */
+ break;
+ }
+ }
+}
+
+static int
+add_redir_spool_cfg(char *buf, struct cfg_nat *ptr)
+{
+ struct cfg_redir *r, *ser_r;
+ struct cfg_spool *s, *ser_s;
+ int cnt, off, i;
+ char *panic_err;
+
+ for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) {
+ ser_r = (struct cfg_redir *)&buf[off];
+ r = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
+ memcpy(r, ser_r, SOF_REDIR);
+ LIST_INIT(&r->spool_chain);
+ off += SOF_REDIR;
+ r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt,
+ M_IPFW, M_WAITOK | M_ZERO);
+ switch (r->mode) {
+ case REDIR_ADDR:
+ r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr,
+ r->paddr);
+ break;
+ case REDIR_PORT:
+ for (i = 0 ; i < r->pport_cnt; i++) {
+ /* If remotePort is all ports, set it to 0. */
+ u_short remotePortCopy = r->rport + i;
+ if (r->rport_cnt == 1 && r->rport == 0)
+ remotePortCopy = 0;
+ r->alink[i] = LibAliasRedirectPort(ptr->lib,
+ r->laddr, htons(r->lport + i), r->raddr,
+ htons(remotePortCopy), r->paddr,
+ htons(r->pport + i), r->proto);
+ if (r->alink[i] == NULL) {
+ r->alink[0] = NULL;
+ break;
+ }
+ }
+ break;
+ case REDIR_PROTO:
+ r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr,
+ r->raddr, r->paddr, r->proto);
+ break;
+ default:
+ printf("unknown redirect mode: %u\n", r->mode);
+ break;
+ }
+ if (r->alink[0] == NULL) {
+ panic_err = "LibAliasRedirect* returned NULL";
+ goto bad;
+ } else /* LSNAT handling. */
+ for (i = 0; i < r->spool_cnt; i++) {
+ ser_s = (struct cfg_spool *)&buf[off];
+ s = malloc(SOF_REDIR, M_IPFW,
+ M_WAITOK | M_ZERO);
+ memcpy(s, ser_s, SOF_SPOOL);
+ LibAliasAddServer(ptr->lib, r->alink[0],
+ s->addr, htons(s->port));
+ off += SOF_SPOOL;
+ /* Hook spool entry. */
+ HOOK_SPOOL(&r->spool_chain, s);
+ }
+ /* And finally hook this redir entry. */
+ HOOK_REDIR(&ptr->redir_chain, r);
+ }
+ return (1);
+bad:
+ /* something really bad happened: panic! */
+ panic("%s\n", panic_err);
+}
+#endif
+
/*
* The main check routine for the firewall.
*
@@ -2000,6 +2250,8 @@
* args->m (in/out) The packet; we set to NULL when/if we nuke it.
* Starts with the IP header.
* args->eh (in) Mac header if present, or NULL for layer3 packet.
+ * args->L3offset Number of bytes bypassed if we came from L2.
+ * e.g. often sizeof(eh) ** NOTYET **
* args->oif Outgoing interface, or NULL if packet is incoming.
* The incoming interface is in the mbuf. (in)
* args->divert_rule (in/out)
@@ -2021,12 +2273,11 @@
* IP_FW_NETGRAPH into netgraph, cookie args->cookie
*
*/
-
int
ipfw_chk(struct ip_fw_args *args)
{
/*
- * Local variables hold state during the processing of a packet.
+ * Local variables holding state during the processing of a packet:
*
* IMPORTANT NOTE: to speed up the processing of rules, there
* are some assumption on the values of the variables, which
@@ -2036,15 +2287,18 @@
*
* args->eh The MAC header. It is non-null for a layer2
* packet, it is NULL for a layer-3 packet.
+ * **notyet**
+ * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
*
* m | args->m Pointer to the mbuf, as received from the caller.
* It may change if ipfw_chk() does an m_pullup, or if it
* consumes the packet because it calls send_reject().
* XXX This has to change, so that ipfw_chk() never modifies
* or consumes the buffer.
- * ip is simply an alias of the value of m, and it is kept
- * in sync with it (the packet is supposed to start with
- * the ip header).
+ * ip is the beginning of the ip(4 or 6) header.
+ * Calculated by adding the L3offset to the start of data.
+ * (Until we start using L3offset, the packet is
+ * supposed to start with the ip header).
*/
struct mbuf *m = args->m;
struct ip *ip = mtod(m, struct ip *);
@@ -2069,7 +2323,7 @@
/*
* oif | args->oif If NULL, ipfw_chk has been called on the
- * inbound path (ether_input, bdg_forward, ip_input).
+ * inbound path (ether_input, ip_input).
* If non-NULL, ipfw_chk has been called on the outbound path
* (ether_output, ip_output).
*/
@@ -2115,6 +2369,7 @@
struct in_addr src_ip, dst_ip; /* NOTE: network format */
u_int16_t ip_len=0;
int pktlen;
+ u_int16_t etype = 0; /* Host order stored ether type */
/*
* dyn_dir = MATCH_UNKNOWN when rules unchecked,
@@ -2163,14 +2418,20 @@
p = (mtod(m, char *) + (len)); \
} while (0)
+ /*
+ * if we have an ether header,
+ */
+ if (args->eh)
+ etype = ntohs(args->eh->ether_type);
+
/* Identify IP packets and fill up variables. */
if (pktlen >= sizeof(struct ip6_hdr) &&
- (args->eh == NULL || ntohs(args->eh->ether_type)==ETHERTYPE_IPV6) &&
- mtod(m, struct ip *)->ip_v == 6) {
+ (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
is_ipv6 = 1;
args->f_id.addr_type = 6;
hlen = sizeof(struct ip6_hdr);
- proto = mtod(m, struct ip6_hdr *)->ip6_nxt;
+ proto = ip6->ip6_nxt;
/* Search extension headers to find upper layer protocols */
while (ulp == NULL) {
@@ -2187,6 +2448,12 @@
args->f_id.flags = TCP(ulp)->th_flags;
break;
+ case IPPROTO_SCTP:
+ PULLUP_TO(hlen, ulp, struct sctphdr);
+ src_port = SCTP(ulp)->src_port;
+ dst_port = SCTP(ulp)->dest_port;
+ break;
+
case IPPROTO_UDP:
PULLUP_TO(hlen, ulp, struct udphdr);
dst_port = UDP(ulp)->uh_dport;
@@ -2203,7 +2470,14 @@
case IPPROTO_ROUTING: /* RFC 2460 */
PULLUP_TO(hlen, ulp, struct ip6_rthdr);
- if (((struct ip6_rthdr *)ulp)->ip6r_type != 0) {
+ switch (((struct ip6_rthdr *)ulp)->ip6r_type) {
+ case 0:
+ ext_hd |= EXT_RTHDR0;
+ break;
+ case 2:
+ ext_hd |= EXT_RTHDR2;
+ break;
+ default:
printf("IPFW2: IPV6 - Unknown Routing "
"Header type(%d)\n",
((struct ip6_rthdr *)ulp)->ip6r_type);
@@ -2274,24 +2548,48 @@
PULLUP_TO(hlen, ulp, struct ip6_ext);
break;
+ case IPPROTO_PIM:
+ /* XXX PIM header check? */
+ PULLUP_TO(hlen, ulp, struct pim);
+ break;
+
+ case IPPROTO_CARP:
+ PULLUP_TO(hlen, ulp, struct carp_header);
+ if (((struct carp_header *)ulp)->carp_version !=
+ CARP_VERSION)
+ return (IP_FW_DENY);
+ if (((struct carp_header *)ulp)->carp_type !=
+ CARP_ADVERTISEMENT)
+ return (IP_FW_DENY);
+ break;
+
+ case IPPROTO_IPV6: /* RFC 2893 */
+ PULLUP_TO(hlen, ulp, struct ip6_hdr);
+ break;
+
+ case IPPROTO_IPV4: /* RFC 2893 */
+ PULLUP_TO(hlen, ulp, struct ip);
+ break;
+
default:
printf("IPFW2: IPV6 - Unknown Extension "
"Header(%d), ext_hd=%x\n", proto, ext_hd);
if (fw_deny_unknown_exthdrs)
return (IP_FW_DENY);
+ PULLUP_TO(hlen, ulp, struct ip6_ext);
break;
} /*switch */
}
- args->f_id.src_ip6 = mtod(m,struct ip6_hdr *)->ip6_src;
- args->f_id.dst_ip6 = mtod(m,struct ip6_hdr *)->ip6_dst;
+ ip = mtod(m, struct ip *);
+ ip6 = (struct ip6_hdr *)ip;
+ args->f_id.src_ip6 = ip6->ip6_src;
+ args->f_id.dst_ip6 = ip6->ip6_dst;
args->f_id.src_ip = 0;
args->f_id.dst_ip = 0;
- args->f_id.flow_id6 = ntohl(mtod(m, struct ip6_hdr *)->ip6_flow);
+ args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
} else if (pktlen >= sizeof(struct ip) &&
- (args->eh == NULL || ntohs(args->eh->ether_type) == ETHERTYPE_IP) &&
- mtod(m, struct ip *)->ip_v == 4) {
+ (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
is_ipv4 = 1;
- ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
args->f_id.addr_type = 4;
@@ -2335,6 +2633,7 @@
}
}
+ ip = mtod(m, struct ip *);
args->f_id.src_ip = ntohl(src_ip.s_addr);
args->f_id.dst_ip = ntohl(dst_ip.s_addr);
}
@@ -2501,15 +2800,14 @@
case O_MAC_TYPE:
if (args->eh != NULL) {
- u_int16_t t =
- ntohs(args->eh->ether_type);
u_int16_t *p =
((ipfw_insn_u16 *)cmd)->ports;
int i;
for (i = cmdlen - 1; !match && i>0;
i--, p += 2)
- match = (t>=p[0] && t<=p[1]);
+ match = (etype >= p[0] &&
+ etype <= p[1]);
}
break;
@@ -2661,12 +2959,12 @@
case O_IPOPT:
match = (is_ipv4 &&
- ipopts_match(mtod(m, struct ip *), cmd) );
+ ipopts_match(ip, cmd) );
break;
case O_IPVER:
match = (is_ipv4 &&
- cmd->arg1 == mtod(m, struct ip *)->ip_v);
+ cmd->arg1 == ip->ip_v);
break;
case O_IPID:
@@ -2680,9 +2978,9 @@
if (cmd->opcode == O_IPLEN)
x = ip_len;
else if (cmd->opcode == O_IPTTL)
- x = mtod(m, struct ip *)->ip_ttl;
+ x = ip->ip_ttl;
else /* must be IPID */
- x = ntohs(mtod(m, struct ip *)->ip_id);
+ x = ntohs(ip->ip_id);
if (cmdlen == 1) {
match = (cmd->arg1 == x);
break;
@@ -2697,12 +2995,12 @@
case O_IPPRECEDENCE:
match = (is_ipv4 &&
- (cmd->arg1 == (mtod(m, struct ip *)->ip_tos & 0xe0)) );
+ (cmd->arg1 == (ip->ip_tos & 0xe0)) );
break;
case O_IPTOS:
match = (is_ipv4 &&
- flags_match(cmd, mtod(m, struct ip *)->ip_tos));
+ flags_match(cmd, ip->ip_tos));
break;
case O_TCPDATALEN:
@@ -2763,37 +3061,34 @@
break;
case O_ALTQ: {
- struct altq_tag *at;
+ struct pf_mtag *at;
ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
match = 1;
- mtag = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
- if (mtag != NULL)
+ at = pf_find_mtag(m);
+ if (at != NULL && at->qid != 0)
break;
- mtag = m_tag_get(PACKET_TAG_PF_QID,
- sizeof(struct altq_tag),
- M_NOWAIT);
- if (mtag == NULL) {
+ at = pf_get_mtag(m);
+ if (at == NULL) {
/*
* Let the packet fall back to the
* default ALTQ.
*/
break;
}
- at = (struct altq_tag *)(mtag+1);
at->qid = altq->qid;
if (is_ipv4)
at->af = AF_INET;
else
at->af = AF_LINK;
at->hdr = ip;
- m_tag_prepend(m, mtag);
break;
}
case O_LOG:
if (fw_verbose)
- ipfw_log(f, hlen, args, m, oif, offset);
+ ipfw_log(f, hlen, args, m,
+ oif, offset, tablearg, ip);
match = 1;
break;
@@ -2847,13 +3142,10 @@
break;
case O_IPSEC:
-#ifdef FAST_IPSEC
+#ifdef IPSEC
match = (m_tag_find(m,
PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL);
#endif
-#ifdef IPSEC
- match = (ipsec_getnhist(m) != 0);
-#endif
/* otherwise no match */
break;
@@ -2870,22 +3162,25 @@
&((ipfw_insn_ip6 *)cmd)->addr6);
break;
case O_IP6_SRC_MASK:
- if (is_ipv6) {
- ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
- struct in6_addr p = args->f_id.src_ip6;
-
- APPLY_MASK(&p, &te->mask6);
- match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
- }
- break;
-
case O_IP6_DST_MASK:
if (is_ipv6) {
- ipfw_insn_ip6 *te = (ipfw_insn_ip6 *)cmd;
- struct in6_addr p = args->f_id.dst_ip6;
-
- APPLY_MASK(&p, &te->mask6);
- match = IN6_ARE_ADDR_EQUAL(&te->addr6, &p);
+ int i = cmdlen - 1;
+ struct in6_addr p;
+ struct in6_addr *d =
+ &((ipfw_insn_ip6 *)cmd)->addr6;
+
+ for (; !match && i > 0; d += 2,
+ i -= F_INSN_SIZE(struct in6_addr)
+ * 2) {
+ p = (cmd->opcode ==
+ O_IP6_SRC_MASK) ?
+ args->f_id.src_ip6:
+ args->f_id.dst_ip6;
+ APPLY_MASK(&p, &d[1]);
+ match =
+ IN6_ARE_ADDR_EQUAL(&d[0],
+ &p);
+ }
}
break;
@@ -2917,6 +3212,62 @@
match = is_ipv4;
break;
+ case O_TAG: {
+ uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+ tablearg : cmd->arg1;
+
+ /* Packet is already tagged with this tag? */
+ mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL);
+
+ /* We have `untag' action when F_NOT flag is
+ * present. And we must remove this mtag from
+ * mbuf and reset `match' to zero (`match' will
+ * be inversed later).
+ * Otherwise we should allocate new mtag and
+ * push it into mbuf.
+ */
+ if (cmd->len & F_NOT) { /* `untag' action */
+ if (mtag != NULL)
+ m_tag_delete(m, mtag);
+ } else if (mtag == NULL) {
+ if ((mtag = m_tag_alloc(MTAG_IPFW,
+ tag, 0, M_NOWAIT)) != NULL)
+ m_tag_prepend(m, mtag);
+ }
+ match = (cmd->len & F_NOT) ? 0: 1;
+ break;
+ }
+
+ case O_TAGGED: {
+ uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ?
+ tablearg : cmd->arg1;
+
+ if (cmdlen == 1) {
+ match = m_tag_locate(m, MTAG_IPFW,
+ tag, NULL) != NULL;
+ break;
+ }
+
+ /* we have ranges */
+ for (mtag = m_tag_first(m);
+ mtag != NULL && !match;
+ mtag = m_tag_next(m, mtag)) {
+ uint16_t *p;
+ int i;
+
+ if (mtag->m_tag_cookie != MTAG_IPFW)
+ continue;
+
+ p = ((ipfw_insn_u16 *)cmd)->ports;
+ i = cmdlen - 1;
+ for(; !match && i > 0; i--, p += 2)
+ match =
+ mtag->m_tag_id >= p[0] &&
+ mtag->m_tag_id <= p[1];
+ }
+ break;
+ }
+
/*
* The second set of opcodes represents 'actions',
* i.e. the terminal part of a rule once the packet
@@ -2936,7 +3287,7 @@
* or to the SKIPTO target ('goto again' after
* having set f, cmd and l), respectively.
*
- * O_LOG and O_ALTQ action parameters:
+ * O_TAG, O_LOG and O_ALTQ action parameters:
* perform some action and set match = 1;
*
* O_LIMIT and O_KEEP_STATE: these opcodes are
@@ -2961,7 +3312,7 @@
case O_LIMIT:
case O_KEEP_STATE:
if (install_state(f,
- (ipfw_insn_limit *)cmd, args)) {
+ (ipfw_insn_limit *)cmd, args, tablearg)) {
retval = IP_FW_DENY;
goto done; /* error/limit violation */
}
@@ -3052,7 +3403,7 @@
case O_SKIPTO:
f->pcnt++; /* update stats */
f->bcnt += pktlen;
- f->timestamp = time_second;
+ f->timestamp = time_uptime;
if (cmd->opcode == O_COUNT)
goto next_rule;
/* handle skipto */
@@ -3072,20 +3423,21 @@
is_icmp_query(ICMP(ulp))) &&
!(m->m_flags & (M_BCAST|M_MCAST)) &&
!IN_MULTICAST(ntohl(dst_ip.s_addr))) {
- send_reject(args, cmd->arg1,
- offset,ip_len);
+ send_reject(args, cmd->arg1, ip_len, ip);
m = args->m;
}
/* FALLTHROUGH */
#ifdef INET6
case O_UNREACH6:
if (hlen > 0 && is_ipv6 &&
+ ((offset & IP6F_OFF_MASK) == 0) &&
(proto != IPPROTO_ICMPV6 ||
(is_icmp6_query(args->f_id.flags) == 1)) &&
!(m->m_flags & (M_BCAST|M_MCAST)) &&
!IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
- send_reject6(args, cmd->arg1,
- offset, hlen);
+ send_reject6(
+ args, cmd->arg1, hlen,
+ (struct ip6_hdr *)ip);
m = args->m;
}
/* FALLTHROUGH */
@@ -3094,14 +3446,26 @@
retval = IP_FW_DENY;
goto done;
- case O_FORWARD_IP:
+ case O_FORWARD_IP: {
+ struct sockaddr_in *sa;
+ sa = &(((ipfw_insn_sa *)cmd)->sa);
if (args->eh) /* not valid on layer2 pkts */
break;
- if (!q || dyn_dir == MATCH_FORWARD)
- args->next_hop =
- &((ipfw_insn_sa *)cmd)->sa;
+ if (!q || dyn_dir == MATCH_FORWARD) {
+ if (sa->sin_addr.s_addr == INADDR_ANY) {
+ bcopy(sa, &args->hopstore,
+ sizeof(*sa));
+ args->hopstore.sin_addr.s_addr =
+ htonl(tablearg);
+ args->next_hop =
+ &args->hopstore;
+ } else {
+ args->next_hop = sa;
+ }
+ }
retval = IP_FW_PASS;
- goto done;
+ }
+ goto done;
case O_NETGRAPH:
case O_NGTEE:
@@ -3114,6 +3478,179 @@
IP_FW_NETGRAPH : IP_FW_NGTEE;
goto done;
+#ifdef IPFIREWALL_NAT
+ case O_NAT: {
+ struct cfg_nat *t;
+ struct mbuf *mcl;
+ /* XXX - libalias duct tape */
+ int ldt;
+ char *c;
+
+ ldt = 0;
+ args->rule = f; /* Report matching rule. */
+ retval = 0;
+ t = ((ipfw_insn_nat *)cmd)->nat;
+ if (t == NULL) {
+ t = lookup_nat(cmd->arg1);
+ if (t == NULL) {
+ retval = IP_FW_DENY;
+ goto done;
+ } else
+ ((ipfw_insn_nat *)cmd)->nat =
+ t;
+ }
+ if ((mcl = m_megapullup(m, m->m_pkthdr.len)) ==
+ NULL)
+ goto badnat;
+ ip = mtod(mcl, struct ip *);
+ if (args->eh == NULL) {
+ ip->ip_len = htons(ip->ip_len);
+ ip->ip_off = htons(ip->ip_off);
+ }
+
+ /*
+ * XXX - Libalias checksum offload 'duct tape':
+ *
+ * locally generated packets have only
+ * pseudo-header checksum calculated
+ * and libalias will screw it[1], so
+ * mark them for later fix. Moreover
+ * there are cases when libalias
+ * modify tcp packet data[2], mark it
+ * for later fix too.
+ *
+ * [1] libalias was never meant to run
+ * in kernel, so it doesn't have any
+ * knowledge about checksum
+ * offloading, and it expects a packet
+ * with a full internet
+ * checksum. Unfortunately, packets
+ * generated locally will have just the
+ * pseudo header calculated, and when
+ * libalias tries to adjust the
+ * checksum it will actually screw it.
+ *
+ * [2] when libalias modify tcp's data
+ * content, full TCP checksum has to
+ * be recomputed: the problem is that
+ * libalias doesn't have any idea
+ * about checksum offloading To
+ * workaround this, we do not do
+ * checksumming in LibAlias, but only
+ * mark the packets in th_x2 field. If
+ * we receive a marked packet, we
+ * calculate correct checksum for it
+ * aware of offloading. Why such a
+ * terrible hack instead of
+ * recalculating checksum for each
+ * packet? Because the previous
+ * checksum was not checked!
+ * Recalculating checksums for EVERY
+ * packet will hide ALL transmission
+ * errors. Yes, marked packets still
+ * suffer from this problem. But,
+ * sigh, natd(8) has this problem,
+ * too.
+ *
+ * TODO: -make libalias mbuf aware (so
+ * it can handle delayed checksum and tso)
+ */
+
+ if (mcl->m_pkthdr.rcvif == NULL &&
+ mcl->m_pkthdr.csum_flags &
+ CSUM_DELAY_DATA)
+ ldt = 1;
+
+ c = mtod(mcl, char *);
+ if (oif == NULL)
+ retval = LibAliasIn(t->lib, c,
+ MCLBYTES);
+ else
+ retval = LibAliasOut(t->lib, c,
+ MCLBYTES);
+ if (retval != PKT_ALIAS_OK) {
+ /* XXX - should i add some logging? */
+ m_free(mcl);
+ badnat:
+ args->m = NULL;
+ retval = IP_FW_DENY;
+ goto done;
+ }
+ mcl->m_pkthdr.len = mcl->m_len =
+ ntohs(ip->ip_len);
+
+ /*
+ * XXX - libalias checksum offload
+ * 'duct tape' (see above)
+ */
+
+ if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
+ ip->ip_p == IPPROTO_TCP) {
+ struct tcphdr *th;
+
+ th = (struct tcphdr *)(ip + 1);
+ if (th->th_x2)
+ ldt = 1;
+ }
+
+ if (ldt) {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ u_short cksum;
+
+ ip->ip_len = ntohs(ip->ip_len);
+ cksum = in_pseudo(
+ ip->ip_src.s_addr,
+ ip->ip_dst.s_addr,
+ htons(ip->ip_p + ip->ip_len -
+ (ip->ip_hl << 2))
+ );
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip + 1);
+ /*
+ * Maybe it was set in
+ * libalias...
+ */
+ th->th_x2 = 0;
+ th->th_sum = cksum;
+ mcl->m_pkthdr.csum_data =
+ offsetof(struct tcphdr,
+ th_sum);
+ break;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip + 1);
+ uh->uh_sum = cksum;
+ mcl->m_pkthdr.csum_data =
+ offsetof(struct udphdr,
+ uh_sum);
+ break;
+ }
+ /*
+ * No hw checksum offloading: do it
+ * by ourself.
+ */
+ if ((mcl->m_pkthdr.csum_flags &
+ CSUM_DELAY_DATA) == 0) {
+ in_delayed_cksum(mcl);
+ mcl->m_pkthdr.csum_flags &=
+ ~CSUM_DELAY_DATA;
+ }
+ ip->ip_len = htons(ip->ip_len);
+ }
+
+ if (args->eh == NULL) {
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+ }
+
+ args->m = mcl;
+ retval = IP_FW_NAT;
+ goto done;
+ }
+#endif
+
default:
panic("-- unknown opcode %d\n", cmd->opcode);
} /* end of switch() on opcodes */
@@ -3142,7 +3679,7 @@
/* Update statistics */
f->pcnt++;
f->bcnt += pktlen;
- f->timestamp = time_second;
+ f->timestamp = time_uptime;
IPFW_RUNLOCK(chain);
return (retval);
@@ -3168,34 +3705,6 @@
}
/*
- * When pipes/queues are deleted, clear the "pipe_ptr" pointer to a given
- * pipe/queue, or to all of them (match == NULL).
- */
-void
-flush_pipe_ptrs(struct dn_flow_set *match)
-{
- struct ip_fw *rule;
-
- IPFW_WLOCK(&layer3_chain);
- for (rule = layer3_chain.rules; rule; rule = rule->next) {
- ipfw_insn_pipe *cmd = (ipfw_insn_pipe *)ACTION_PTR(rule);
-
- if (cmd->o.opcode != O_PIPE && cmd->o.opcode != O_QUEUE)
- continue;
- /*
- * XXX Use bcmp/bzero to handle pipe_ptr to overcome
- * possible alignment problems on 64-bit architectures.
- * This code is seldom used so we do not worry too
- * much about efficiency.
- */
- if (match == NULL ||
- !bcmp(&cmd->pipe_ptr, &match, sizeof(match)) )
- bzero(&cmd->pipe_ptr, sizeof(cmd->pipe_ptr));
- }
- IPFW_WUNLOCK(&layer3_chain);
-}
-
-/*
* Add a new rule to the list. Copy the rule into a malloc'ed area, then
* possibly create a rule number and add the rule to the list.
* Update the rule_number in the input struct so the caller knows it as well.
@@ -3285,7 +3794,8 @@
* Arguments are not checked, so they better be correct.
*/
static struct ip_fw *
-remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, struct ip_fw *prev)
+remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule,
+ struct ip_fw *prev)
{
struct ip_fw *n;
int l = RULESIZE(rule);
@@ -3360,6 +3870,7 @@
* 2 move rules with given number to new set
* 3 move rules with given set number to new set
* 4 swap sets with given numbers
+ * 5 delete rules with given number and with given set number
*/
static int
del_entry(struct ip_fw_chain *chain, u_int32_t arg)
@@ -3372,11 +3883,9 @@
cmd = (arg >> 24) & 0xff;
new_set = (arg >> 16) & 0xff;
- if (cmd > 4)
+ if (cmd > 5 || new_set > RESVD_SET)
return EINVAL;
- if (new_set > RESVD_SET)
- return EINVAL;
- if (cmd == 0 || cmd == 2) {
+ if (cmd == 0 || cmd == 2 || cmd == 5) {
if (rulenum >= IPFW_DEFAULT_RULE)
return EINVAL;
} else {
@@ -3440,6 +3949,25 @@
else if (rule->set == new_set)
rule->set = rulenum;
break;
+ case 5: /* delete rules with given number and with given set number.
+ * rulenum - given rule number;
+ * new_set - given set number.
+ */
+ for (; rule->rulenum < rulenum; prev = rule, rule = rule->next)
+ ;
+ if (rule->rulenum != rulenum) {
+ IPFW_WUNLOCK(chain);
+ return (EINVAL);
+ }
+ flush_rule_ptrs(chain);
+ while (rule->rulenum == rulenum) {
+ if (rule->set == new_set)
+ rule = remove_rule(chain, rule, prev);
+ else {
+ prev = rule;
+ rule = rule->next;
+ }
+ }
}
/*
* Look for rules to reclaim. We grab the list before
@@ -3473,23 +4001,39 @@
/**
* Reset some or all counters on firewall rules.
- * @arg frwl is null to clear all entries, or contains a specific
- * rule number.
- * @arg log_only is 1 if we only want to reset logs, zero otherwise.
+ * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
+ * the next 8 bits are the set number, the top 8 bits are the command:
+ * 0 work with rules from all set's;
+ * 1 work with rules only from specified set.
+ * Specified rule number is zero if we want to clear all entries.
+ * log_only is 1 if we only want to reset logs, zero otherwise.
*/
static int
-zero_entry(struct ip_fw_chain *chain, int rulenum, int log_only)
+zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
{
struct ip_fw *rule;
char *msg;
+ uint16_t rulenum = arg & 0xffff;
+ uint8_t set = (arg >> 16) & 0xff;
+ uint8_t cmd = (arg >> 24) & 0xff;
+
+ if (cmd > 1)
+ return (EINVAL);
+ if (cmd == 1 && set > RESVD_SET)
+ return (EINVAL);
+
IPFW_WLOCK(chain);
if (rulenum == 0) {
norule_counter = 0;
- for (rule = chain->rules; rule; rule = rule->next)
+ for (rule = chain->rules; rule; rule = rule->next) {
+ /* Skip rules from another set. */
+ if (cmd == 1 && rule->set != set)
+ continue;
clear_counters(rule, log_only);
+ }
msg = log_only ? "ipfw: All logging counts reset.\n" :
- "ipfw: Accounting cleared.\n";
+ "ipfw: Accounting cleared.\n";
} else {
int cleared = 0;
/*
@@ -3499,7 +4043,8 @@
for (rule = chain->rules; rule; rule = rule->next)
if (rule->rulenum == rulenum) {
while (rule && rule->rulenum == rulenum) {
- clear_counters(rule, log_only);
+ if (cmd == 0 || rule->set == set)
+ clear_counters(rule, log_only);
rule = rule->next;
}
cleared = 1;
@@ -3510,7 +4055,7 @@
return (EINVAL);
}
msg = log_only ? "ipfw: Entry %d logging count reset.\n" :
- "ipfw: Entry %d cleared.\n";
+ "ipfw: Entry %d cleared.\n";
}
IPFW_WUNLOCK(chain);
@@ -3587,6 +4132,7 @@
case O_IP6:
#endif
case O_IP4:
+ case O_TAG:
if (cmdlen != F_INSN_SIZE(ipfw_insn))
goto bad_size;
break;
@@ -3659,6 +4205,7 @@
case O_IPTTL:
case O_IPLEN:
case O_TCPDATALEN:
+ case O_TAGGED:
if (cmdlen < 1 || cmdlen > 31)
goto bad_size;
break;
@@ -3684,7 +4231,7 @@
case O_PIPE:
case O_QUEUE:
- if (cmdlen != F_INSN_SIZE(ipfw_insn_pipe))
+ if (cmdlen != F_INSN_SIZE(ipfw_insn))
goto bad_size;
goto check_action;
@@ -3709,6 +4256,14 @@
return EINVAL;
else
goto check_size;
+ case O_NAT:
+#ifdef IPFIREWALL_NAT
+ if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
+ goto bad_size;
+ goto check_action;
+#else
+ return EINVAL;
+#endif
case O_FORWARD_MAC: /* XXX not implemented yet */
case O_CHECK_STATE:
case O_COUNT:
@@ -3809,7 +4364,9 @@
char *ep = bp + space;
struct ip_fw *rule;
int i;
+ time_t boot_seconds;
+ boot_seconds = boottime.tv_sec;
/* XXX this can take a long time and locking will block packet flow */
IPFW_RLOCK(chain);
for (rule = chain->rules; rule ; rule = rule->next) {
@@ -3822,8 +4379,15 @@
i = RULESIZE(rule);
if (bp + i <= ep) {
bcopy(rule, bp, i);
+ /*
+ * XXX HACK. Store the disable mask in the "next" pointer
+ * in a wild attempt to keep the ABI the same.
+ * Why do we do this on EVERY rule?
+ */
bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule),
sizeof(set_disable));
+ if (((struct ip_fw *)bp)->timestamp)
+ ((struct ip_fw *)bp)->timestamp += boot_seconds;
bp += i;
}
}
@@ -3841,6 +4405,14 @@
bcopy(&(p->rule->rulenum), &(dst->rule),
sizeof(p->rule->rulenum));
/*
+ * store set number into high word of
+ * dst->rule pointer.
+ */
+ bcopy(&(p->rule->set),
+ (char *)&dst->rule +
+ sizeof(p->rule->rulenum),
+ sizeof(p->rule->set));
+ /*
* store a non-null value in "next".
* The userland code will interpret a
* NULL here as a marker
@@ -3849,8 +4421,8 @@
bcopy(&dst, &dst->next, sizeof(dst));
last = dst;
dst->expire =
- TIME_LEQ(dst->expire, time_second) ?
- 0 : dst->expire - time_second ;
+ TIME_LEQ(dst->expire, time_uptime) ?
+ 0 : dst->expire - time_uptime ;
bp += sizeof(ipfw_dyn_rule);
}
}
@@ -3869,12 +4441,12 @@
ipfw_ctl(struct sockopt *sopt)
{
#define RULE_MAXSIZE (256*sizeof(u_int32_t))
- int error, rule_num;
+ int error;
size_t size;
struct ip_fw *buf, *rule;
u_int32_t rulenum[2];
- error = suser(sopt->sopt_td);
+ error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
if (error)
return (error);
@@ -3884,14 +4456,9 @@
*/
if (sopt->sopt_name == IP_FW_ADD ||
(sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
-#if __FreeBSD_version >= 500034
error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
if (error)
return (error);
-#else /* FreeBSD 4.x */
- if (securelevel >= 3)
- return (EPERM);
-#endif
}
error = 0;
@@ -3941,9 +4508,10 @@
IPFW_WLOCK(&layer3_chain);
layer3_chain.reap = NULL;
free_chain(&layer3_chain, 0 /* keep default rule */);
- rule = layer3_chain.reap, layer3_chain.reap = NULL;
+ rule = layer3_chain.reap;
+ layer3_chain.reap = NULL;
IPFW_WUNLOCK(&layer3_chain);
- if (layer3_chain.reap != NULL)
+ if (rule != NULL)
reap_rules(rule);
break;
@@ -3991,15 +4559,15 @@
break;
case IP_FW_ZERO:
- case IP_FW_RESETLOG: /* argument is an int, the rule number */
- rule_num = 0;
+ case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
+ rulenum[0] = 0;
if (sopt->sopt_val != 0) {
- error = sooptcopyin(sopt, &rule_num,
- sizeof(int), sizeof(int));
+ error = sooptcopyin(sopt, rulenum,
+ sizeof(u_int32_t), sizeof(u_int32_t));
if (error)
break;
}
- error = zero_entry(&layer3_chain, rule_num,
+ error = zero_entry(&layer3_chain, rulenum[0],
sopt->sopt_name == IP_FW_RESETLOG);
break;
@@ -4069,10 +4637,6 @@
}
size = sopt->sopt_valsize;
tbl = malloc(size, M_TEMP, M_WAITOK);
- if (tbl == NULL) {
- error = ENOMEM;
- break;
- }
error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
if (error) {
free(tbl, M_TEMP);
@@ -4092,6 +4656,187 @@
}
break;
+#ifdef IPFIREWALL_NAT
+ case IP_FW_NAT_CFG:
+ {
+ struct cfg_nat *ptr, *ser_n;
+ char *buf;
+
+ buf = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+ error = sooptcopyin(sopt, buf, NAT_BUF_LEN,
+ sizeof(struct cfg_nat));
+ ser_n = (struct cfg_nat *)buf;
+
+ /*
+ * Find/create nat rule.
+ */
+ IPFW_WLOCK(&layer3_chain);
+ ptr = lookup_nat(ser_n->id);
+ if (ptr == NULL) {
+ /* New rule: allocate and init new instance. */
+ ptr = malloc(sizeof(struct cfg_nat),
+ M_IPFW, M_NOWAIT | M_ZERO);
+ if (ptr == NULL) {
+ IPFW_WUNLOCK(&layer3_chain);
+ free(buf, M_IPFW);
+ return (ENOSPC);
+ }
+ ptr->lib = LibAliasInit(NULL);
+ if (ptr->lib == NULL) {
+ IPFW_WUNLOCK(&layer3_chain);
+ free(ptr, M_IPFW);
+ free(buf, M_IPFW);
+ return (EINVAL);
+ }
+ LIST_INIT(&ptr->redir_chain);
+ } else {
+ /* Entry already present: temporarly unhook it. */
+ UNHOOK_NAT(ptr);
+ flush_nat_ptrs(ser_n->id);
+ }
+ IPFW_WUNLOCK(&layer3_chain);
+
+ /*
+ * Basic nat configuration.
+ */
+ ptr->id = ser_n->id;
+ /*
+ * XXX - what if this rule doesn't nat any ip and just
+ * redirect?
+ * do we set aliasaddress to 0.0.0.0?
+ */
+ ptr->ip = ser_n->ip;
+ ptr->redir_cnt = ser_n->redir_cnt;
+ ptr->mode = ser_n->mode;
+ LibAliasSetMode(ptr->lib, ser_n->mode, ser_n->mode);
+ LibAliasSetAddress(ptr->lib, ptr->ip);
+ memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
+
+ /*
+ * Redir and LSNAT configuration.
+ */
+ /* Delete old cfgs. */
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ /* Add new entries. */
+ add_redir_spool_cfg(&buf[(sizeof(struct cfg_nat))], ptr);
+ free(buf, M_IPFW);
+ IPFW_WLOCK(&layer3_chain);
+ HOOK_NAT(&layer3_chain.nat, ptr);
+ IPFW_WUNLOCK(&layer3_chain);
+ }
+ break;
+
+ case IP_FW_NAT_DEL:
+ {
+ struct cfg_nat *ptr;
+ int i;
+
+ error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ IPFW_WLOCK(&layer3_chain);
+ ptr = lookup_nat(i);
+ if (ptr == NULL) {
+ error = EINVAL;
+ IPFW_WUNLOCK(&layer3_chain);
+ break;
+ }
+ UNHOOK_NAT(ptr);
+ flush_nat_ptrs(i);
+ IPFW_WUNLOCK(&layer3_chain);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ }
+ break;
+
+ case IP_FW_NAT_GET_CONFIG:
+ {
+ uint8_t *data;
+ struct cfg_nat *n;
+ struct cfg_redir *r;
+ struct cfg_spool *s;
+ int nat_cnt, off;
+
+ nat_cnt = 0;
+ off = sizeof(nat_cnt);
+
+ data = malloc(NAT_BUF_LEN, M_IPFW, M_WAITOK | M_ZERO);
+ IPFW_RLOCK(&layer3_chain);
+ /* Serialize all the data. */
+ LIST_FOREACH(n, &layer3_chain.nat, _next) {
+ nat_cnt++;
+ if (off + SOF_NAT < NAT_BUF_LEN) {
+ bcopy(n, &data[off], SOF_NAT);
+ off += SOF_NAT;
+ LIST_FOREACH(r, &n->redir_chain, _next) {
+ if (off + SOF_REDIR < NAT_BUF_LEN) {
+ bcopy(r, &data[off],
+ SOF_REDIR);
+ off += SOF_REDIR;
+ LIST_FOREACH(s, &r->spool_chain,
+ _next) {
+ if (off + SOF_SPOOL <
+ NAT_BUF_LEN) {
+ bcopy(s,
+ &data[off],
+ SOF_SPOOL);
+ off +=
+ SOF_SPOOL;
+ } else
+ goto nospace;
+ }
+ } else
+ goto nospace;
+ }
+ } else
+ goto nospace;
+ }
+ bcopy(&nat_cnt, data, sizeof(nat_cnt));
+ IPFW_RUNLOCK(&layer3_chain);
+ error = sooptcopyout(sopt, data, NAT_BUF_LEN);
+ free(data, M_IPFW);
+ break;
+ nospace:
+ IPFW_RUNLOCK(&layer3_chain);
+ printf("serialized data buffer not big enough:"
+ "please increase NAT_BUF_LEN\n");
+ free(data, M_IPFW);
+ }
+ break;
+
+ case IP_FW_NAT_GET_LOG:
+ {
+ uint8_t *data;
+ struct cfg_nat *ptr;
+ int i, size, cnt, sof;
+
+ data = NULL;
+ sof = LIBALIAS_BUF_SIZE;
+ cnt = 0;
+
+ IPFW_RLOCK(&layer3_chain);
+ size = i = 0;
+ LIST_FOREACH(ptr, &layer3_chain.nat, _next) {
+ if (ptr->lib->logDesc == NULL)
+ continue;
+ cnt++;
+ size = cnt * (sof + sizeof(int));
+ data = realloc(data, size, M_IPFW, M_NOWAIT | M_ZERO);
+ if (data == NULL) {
+ IPFW_RUNLOCK(&layer3_chain);
+ return (ENOSPC);
+ }
+ bcopy(&ptr->id, &data[i], sizeof(int));
+ i += sizeof(int);
+ bcopy(ptr->lib->logDesc, &data[i], sof);
+ i += sof;
+ }
+ IPFW_RUNLOCK(&layer3_chain);
+ error = sooptcopyout(sopt, data, size);
+ free(data, M_IPFW);
+ }
+ break;
+#endif
+
default:
printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
error = EINVAL;
@@ -4141,17 +4886,17 @@
continue;
if ( (q->state & BOTH_SYN) != BOTH_SYN)
continue;
- if (TIME_LEQ( time_second+dyn_keepalive_interval,
+ if (TIME_LEQ( time_uptime+dyn_keepalive_interval,
q->expire))
continue; /* too early */
- if (TIME_LEQ(q->expire, time_second))
+ if (TIME_LEQ(q->expire, time_uptime))
continue; /* too late, rule expired */
- *mtailp = send_pkt(&(q->id), q->ack_rev - 1,
+ *mtailp = send_pkt(NULL, &(q->id), q->ack_rev - 1,
q->ack_fwd, TH_SYN);
if (*mtailp != NULL)
mtailp = &(*mtailp)->m_nextpkt;
- *mtailp = send_pkt(&(q->id), q->ack_fwd - 1,
+ *mtailp = send_pkt(NULL, &(q->id), q->ack_fwd - 1,
q->ack_rev, 0);
if (*mtailp != NULL)
mtailp = &(*mtailp)->m_nextpkt;
@@ -4177,24 +4922,24 @@
/* Setup IPv6 fw sysctl tree. */
sysctl_ctx_init(&ip6_fw_sysctl_ctx);
ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx,
- SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
- CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
+ SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw",
+ CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall");
+ SYSCTL_ADD_PROC(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
+ OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3,
+ &fw6_enable, 0, ipfw_chg_hook, "I", "Enable ipfw+6");
SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree),
- OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
- &fw_deny_unknown_exthdrs, 0,
- "Deny packets with unknown IPv6 Extension Headers");
+ OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE,
+ &fw_deny_unknown_exthdrs, 0,
+ "Deny packets with unknown IPv6 Extension Headers");
#endif
layer3_chain.rules = NULL;
- layer3_chain.want_write = 0;
- layer3_chain.busy_count = 0;
- cv_init(&layer3_chain.cv, "Condition variable for IPFW rw locks");
IPFW_LOCK_INIT(&layer3_chain);
- ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule zone",
+ ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule",
sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
IPFW_DYN_LOCK_INIT();
- callout_init(&ipfw_timeout, NET_CALLOUT_MPSAFE);
+ callout_init(&ipfw_timeout, CALLOUT_MPSAFE);
bzero(&default_rule, sizeof default_rule);
@@ -4221,7 +4966,11 @@
}
ip_fw_default_rule = layer3_chain.rules;
- printf("ipfw2 (+ipv6) initialized, divert %s, "
+ printf("ipfw2 "
+#ifdef INET6
+ "(+ipv6) "
+#endif
+ "initialized, divert %s, "
"rule-based forwarding "
#ifdef IPFIREWALL_FORWARD
"enabled, "
@@ -4259,8 +5008,12 @@
}
ip_fw_ctl_ptr = ipfw_ctl;
ip_fw_chk_ptr = ipfw_chk;
- callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
-
+ callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL);
+#ifdef IPFIREWALL_NAT
+ LIST_INIT(&layer3_chain.nat);
+ ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
+ NULL, EVENTHANDLER_PRI_ANY);
+#endif
return (0);
}
@@ -4268,12 +5021,24 @@
ipfw_destroy(void)
{
struct ip_fw *reap;
+#ifdef IPFIREWALL_NAT
+ struct cfg_nat *ptr, *ptr_temp;
+#endif
ip_fw_chk_ptr = NULL;
ip_fw_ctl_ptr = NULL;
callout_drain(&ipfw_timeout);
IPFW_WLOCK(&layer3_chain);
flush_tables(&layer3_chain);
+#ifdef IPFIREWALL_NAT
+ LIST_FOREACH_SAFE(ptr, &layer3_chain.nat, _next, ptr_temp) {
+ LIST_REMOVE(ptr, _next);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ }
+ EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
+#endif
layer3_chain.reap = NULL;
free_chain(&layer3_chain, 1 /* kill default rule */);
reap = layer3_chain.reap, layer3_chain.reap = NULL;
@@ -4282,6 +5047,8 @@
reap_rules(reap);
IPFW_DYN_LOCK_DESTROY();
uma_zdestroy(ipfw_dyn_rule_zone);
+ if (ipfw_dyn_v != NULL)
+ free(ipfw_dyn_v, M_IPFW);
IPFW_LOCK_DESTROY(&layer3_chain);
#ifdef INET6
Index: tcp_fsm.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/tcp_fsm.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/tcp_fsm.h -L sys/netinet/tcp_fsm.h -u -r1.1.1.1 -r1.2
--- sys/netinet/tcp_fsm.h
+++ sys/netinet/tcp_fsm.h
@@ -1,6 +1,7 @@
/*-
* Copyright (c) 1982, 1986, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -27,14 +28,15 @@
* SUCH DAMAGE.
*
* @(#)tcp_fsm.h 8.1 (Berkeley) 6/10/93
- * $FreeBSD: src/sys/netinet/tcp_fsm.h,v 1.18 2005/01/07 01:45:45 imp Exp $
+ * $FreeBSD: src/sys/netinet/tcp_fsm.h,v 1.20 2007/07/30 11:06:41 des Exp $
*/
#ifndef _NETINET_TCP_FSM_H_
-#define _NETINET_TCP_FSM_H_
+#define _NETINET_TCP_FSM_H_
/*
* TCP FSM state definitions.
+ *
* Per RFC793, September, 1981.
*/
@@ -75,10 +77,10 @@
#ifdef TCPOUTFLAGS
/*
- * Flags used when sending segments in tcp_output.
- * Basic flags (TH_RST,TH_ACK,TH_SYN,TH_FIN) are totally
- * determined by state, with the proviso that TH_FIN is sent only
- * if all data queued for output is included in the segment.
+ * Flags used when sending segments in tcp_output. Basic flags (TH_RST,
+ * TH_ACK,TH_SYN,TH_FIN) are totally determined by state, with the proviso
+ * that TH_FIN is sent only if all data queued for output is included in the
+ * segment.
*/
static u_char tcp_outflags[TCP_NSTATES] = {
TH_RST|TH_ACK, /* 0, CLOSED */
@@ -100,7 +102,7 @@
#endif
#ifdef TCPSTATES
-const char *tcpstates[] = {
+static char const * const tcpstates[] = {
"CLOSED", "LISTEN", "SYN_SENT", "SYN_RCVD",
"ESTABLISHED", "CLOSE_WAIT", "FIN_WAIT_1", "CLOSING",
"LAST_ACK", "FIN_WAIT_2", "TIME_WAIT",
--- /dev/null
+++ sys/netinet/sctp_sysctl.h
@@ -0,0 +1,464 @@
+/*-
+ * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_sysctl.h,v 1.13 2007/09/15 19:07:42 rrs Exp $");
+
+#ifndef __sctp_sysctl_h__
+#define __sctp_sysctl_h__
+
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_constants.h>
+
+/*
+ * limits for the sysctl variables
+ */
+/* maxdgram: Maximum outgoing SCTP buffer size */
+#define SCTPCTL_MAXDGRAM_DESC "Maximum outgoing SCTP buffer size"
+#define SCTPCTL_MAXDGRAM_MIN 0
+#define SCTPCTL_MAXDGRAM_MAX 0xFFFFFFFF
+#define SCTPCTL_MAXDGRAM_DEFAULT 262144 /* 256k */
+
+/* recvspace: Maximum incoming SCTP buffer size */
+#define SCTPCTL_RECVSPACE_DESC "Maximum incoming SCTP buffer size"
+#define SCTPCTL_RECVSPACE_MIN 0
+#define SCTPCTL_RECVSPACE_MAX 0xFFFFFFFF
+#define SCTPCTL_RECVSPACE_DEFAULT 262144 /* 256k */
+
+/* autoasconf: Enable SCTP Auto-ASCONF */
+#define SCTPCTL_AUTOASCONF_DESC "Enable SCTP Auto-ASCONF"
+#define SCTPCTL_AUTOASCONF_MIN 0
+#define SCTPCTL_AUTOASCONF_MAX 1
+#define SCTPCTL_AUTOASCONF_DEFAULT SCTP_DEFAULT_AUTO_ASCONF
+
+/* ecn_enable: Enable SCTP ECN */
+#define SCTPCTL_ECN_ENABLE_DESC "Enable SCTP ECN"
+#define SCTPCTL_ECN_ENABLE_MIN 0
+#define SCTPCTL_ECN_ENABLE_MAX 1
+#define SCTPCTL_ECN_ENABLE_DEFAULT 1
+
+/* ecn_nonce: Enable SCTP ECN Nonce */
+#define SCTPCTL_ECN_NONCE_DESC "Enable SCTP ECN Nonce"
+#define SCTPCTL_ECN_NONCE_MIN 0
+#define SCTPCTL_ECN_NONCE_MAX 1
+#define SCTPCTL_ECN_NONCE_DEFAULT 0
+
+/* strict_sacks: Enable SCTP Strict SACK checking */
+#define SCTPCTL_STRICT_SACKS_DESC "Enable SCTP Strict SACK checking"
+#define SCTPCTL_STRICT_SACKS_MIN 0
+#define SCTPCTL_STRICT_SACKS_MAX 1
+#define SCTPCTL_STRICT_SACKS_DEFAULT 0
+
+/* loopback_nocsum: Enable NO Csum on packets sent on loopback */
+#define SCTPCTL_LOOPBACK_NOCSUM_DESC "Enable NO Csum on packets sent on loopback"
+#define SCTPCTL_LOOPBACK_NOCSUM_MIN 0
+#define SCTPCTL_LOOPBACK_NOCSUM_MAX 1
+#define SCTPCTL_LOOPBACK_NOCSUM_DEFAULT 1
+
+/* strict_init: Enable strict INIT/INIT-ACK singleton enforcement */
+#define SCTPCTL_STRICT_INIT_DESC "Enable strict INIT/INIT-ACK singleton enforcement"
+#define SCTPCTL_STRICT_INIT_MIN 0
+#define SCTPCTL_STRICT_INIT_MAX 1
+#define SCTPCTL_STRICT_INIT_DEFAULT 1
+
+/* peer_chkoh: Amount to debit peers rwnd per chunk sent */
+#define SCTPCTL_PEER_CHKOH_DESC "Amount to debit peers rwnd per chunk sent"
+#define SCTPCTL_PEER_CHKOH_MIN 0
+#define SCTPCTL_PEER_CHKOH_MAX 0xFFFFFFFF
+#define SCTPCTL_PEER_CHKOH_DEFAULT 256
+
+/* maxburst: Default max burst for sctp endpoints */
+#define SCTPCTL_MAXBURST_DESC "Default max burst for sctp endpoints"
+#define SCTPCTL_MAXBURST_MIN 1
+#define SCTPCTL_MAXBURST_MAX 0xFFFFFFFF
+#define SCTPCTL_MAXBURST_DEFAULT SCTP_DEF_MAX_BURST
+
+/* maxchunks: Default max chunks on queue per asoc */
+#define SCTPCTL_MAXCHUNKS_DESC "Default max chunks on queue per asoc"
+#define SCTPCTL_MAXCHUNKS_MIN 0
+#define SCTPCTL_MAXCHUNKS_MAX 0xFFFFFFFF
+#define SCTPCTL_MAXCHUNKS_DEFAULT SCTP_ASOC_MAX_CHUNKS_ON_QUEUE
+
+/* tcbhashsize: Tuneable for Hash table sizes */
+#define SCTPCTL_TCBHASHSIZE_DESC "Tunable for TCB hash table sizes"
+#define SCTPCTL_TCBHASHSIZE_MIN 1
+#define SCTPCTL_TCBHASHSIZE_MAX 0xFFFFFFFF
+#define SCTPCTL_TCBHASHSIZE_DEFAULT SCTP_TCBHASHSIZE
+
+/* pcbhashsize: Tuneable for PCB Hash table sizes */
+#define SCTPCTL_PCBHASHSIZE_DESC "Tunable for PCB hash table sizes"
+#define SCTPCTL_PCBHASHSIZE_MIN 1
+#define SCTPCTL_PCBHASHSIZE_MAX 0xFFFFFFFF
+#define SCTPCTL_PCBHASHSIZE_DEFAULT SCTP_PCBHASHSIZE
+
+/* min_split_point: Minimum size when splitting a chunk */
+#define SCTPCTL_MIN_SPLIT_POINT_DESC "Minimum size when splitting a chunk"
+#define SCTPCTL_MIN_SPLIT_POINT_MIN 0
+#define SCTPCTL_MIN_SPLIT_POINT_MAX 0xFFFFFFFF
+#define SCTPCTL_MIN_SPLIT_POINT_DEFAULT SCTP_DEFAULT_SPLIT_POINT_MIN
+
+/* chunkscale: Tuneable for Scaling of number of chunks and messages */
+#define SCTPCTL_CHUNKSCALE_DESC "Tuneable for Scaling of number of chunks and messages"
+#define SCTPCTL_CHUNKSCALE_MIN 1
+#define SCTPCTL_CHUNKSCALE_MAX 0xFFFFFFFF
+#define SCTPCTL_CHUNKSCALE_DEFAULT SCTP_CHUNKQUEUE_SCALE
+
+/* delayed_sack_time: Default delayed SACK timer in msec */
+#define SCTPCTL_DELAYED_SACK_TIME_DESC "Default delayed SACK timer in msec"
+#define SCTPCTL_DELAYED_SACK_TIME_MIN 0
+#define SCTPCTL_DELAYED_SACK_TIME_MAX 0xFFFFFFFF
+#define SCTPCTL_DELAYED_SACK_TIME_DEFAULT SCTP_RECV_MSEC
+
+/* sack_freq: Default SACK frequency */
+#define SCTPCTL_SACK_FREQ_DESC "Default SACK frequency"
+#define SCTPCTL_SACK_FREQ_MIN 0
+#define SCTPCTL_SACK_FREQ_MAX 0xFFFFFFFF
+#define SCTPCTL_SACK_FREQ_DEFAULT SCTP_DEFAULT_SACK_FREQ
+
+/* sys_resource: Max number of cached resources in the system */
+#define SCTPCTL_SYS_RESOURCE_DESC "Max number of cached resources in the system"
+#define SCTPCTL_SYS_RESOURCE_MIN 0
+#define SCTPCTL_SYS_RESOURCE_MAX 0xFFFFFFFF
+#define SCTPCTL_SYS_RESOURCE_DEFAULT SCTP_DEF_SYSTEM_RESC_LIMIT
+
+/* asoc_resource: Max number of cached resources in an asoc */
+#define SCTPCTL_ASOC_RESOURCE_DESC "Max number of cached resources in an asoc"
+#define SCTPCTL_ASOC_RESOURCE_MIN 0
+#define SCTPCTL_ASOC_RESOURCE_MAX 0xFFFFFFFF
+#define SCTPCTL_ASOC_RESOURCE_DEFAULT SCTP_DEF_ASOC_RESC_LIMIT
+
+/* heartbeat_interval: Default heartbeat interval in msec */
+#define SCTPCTL_HEARTBEAT_INTERVAL_DESC "Default heartbeat interval in msec"
+#define SCTPCTL_HEARTBEAT_INTERVAL_MIN 0
+#define SCTPCTL_HEARTBEAT_INTERVAL_MAX 0xFFFFFFFF
+#define SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT SCTP_HB_DEFAULT_MSEC
+
+/* pmtu_raise_time: Default PMTU raise timer in sec */
+#define SCTPCTL_PMTU_RAISE_TIME_DESC "Default PMTU raise timer in sec"
+#define SCTPCTL_PMTU_RAISE_TIME_MIN 0
+#define SCTPCTL_PMTU_RAISE_TIME_MAX 0xFFFFFFFF
+#define SCTPCTL_PMTU_RAISE_TIME_DEFAULT SCTP_DEF_PMTU_RAISE_SEC
+
+/* shutdown_guard_time: Default shutdown guard timer in sec */
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DESC "Default shutdown guard timer in sec"
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MIN 0
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_MAX 0xFFFFFFFF
+#define SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT SCTP_DEF_MAX_SHUTDOWN_SEC
+
+/* secret_lifetime: Default secret lifetime in sec */
+#define SCTPCTL_SECRET_LIFETIME_DESC "Default secret lifetime in sec"
+#define SCTPCTL_SECRET_LIFETIME_MIN 0
+#define SCTPCTL_SECRET_LIFETIME_MAX 0xFFFFFFFF
+#define SCTPCTL_SECRET_LIFETIME_DEFAULT SCTP_DEFAULT_SECRET_LIFE_SEC
+
+/* rto_max: Default maximum retransmission timeout in msec */
+#define SCTPCTL_RTO_MAX_DESC "Default maximum retransmission timeout in msec"
+#define SCTPCTL_RTO_MAX_MIN 0
+#define SCTPCTL_RTO_MAX_MAX 0xFFFFFFFF
+#define SCTPCTL_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND
+
+/* rto_min: Default minimum retransmission timeout in msec */
+#define SCTPCTL_RTO_MIN_DESC "Default minimum retransmission timeout in msec"
+#define SCTPCTL_RTO_MIN_MIN 0
+#define SCTPCTL_RTO_MIN_MAX 0xFFFFFFFF
+#define SCTPCTL_RTO_MIN_DEFAULT SCTP_RTO_LOWER_BOUND
+
+/* rto_initial: Default initial retransmission timeout in msec */
+#define SCTPCTL_RTO_INITIAL_DESC "Default initial retransmission timeout in msec"
+#define SCTPCTL_RTO_INITIAL_MIN 0
+#define SCTPCTL_RTO_INITIAL_MAX 0xFFFFFFFF
+#define SCTPCTL_RTO_INITIAL_DEFAULT SCTP_RTO_INITIAL
+
+/* init_rto_max: Default maximum retransmission timeout during association setup in msec */
+#define SCTPCTL_INIT_RTO_MAX_DESC "Default maximum retransmission timeout during association setup in msec"
+#define SCTPCTL_INIT_RTO_MAX_MIN 0
+#define SCTPCTL_INIT_RTO_MAX_MAX 0xFFFFFFFF
+#define SCTPCTL_INIT_RTO_MAX_DEFAULT SCTP_RTO_UPPER_BOUND
+
+/* valid_cookie_life: Default cookie lifetime in sec */
+#define SCTPCTL_VALID_COOKIE_LIFE_DESC "Default cookie lifetime in sec"
+#define SCTPCTL_VALID_COOKIE_LIFE_MIN 0
+#define SCTPCTL_VALID_COOKIE_LIFE_MAX 0xFFFFFFFF
+#define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT SCTP_DEFAULT_COOKIE_LIFE
+
+/* init_rtx_max: Default maximum number of retransmission for INIT chunks */
+#define SCTPCTL_INIT_RTX_MAX_DESC "Default maximum number of retransmission for INIT chunks"
+#define SCTPCTL_INIT_RTX_MAX_MIN 0
+#define SCTPCTL_INIT_RTX_MAX_MAX 0xFFFFFFFF
+#define SCTPCTL_INIT_RTX_MAX_DEFAULT SCTP_DEF_MAX_INIT
+
+/* assoc_rtx_max: Default maximum number of retransmissions per association */
+#define SCTPCTL_ASSOC_RTX_MAX_DESC "Default maximum number of retransmissions per association"
+#define SCTPCTL_ASSOC_RTX_MAX_MIN 0
+#define SCTPCTL_ASSOC_RTX_MAX_MAX 0xFFFFFFFF
+#define SCTPCTL_ASSOC_RTX_MAX_DEFAULT SCTP_DEF_MAX_SEND
+
+/* path_rtx_max: Default maximum of retransmissions per path */
+#define SCTPCTL_PATH_RTX_MAX_DESC "Default maximum of retransmissions per path"
+#define SCTPCTL_PATH_RTX_MAX_MIN 0
+#define SCTPCTL_PATH_RTX_MAX_MAX 0xFFFFFFFF
+#define SCTPCTL_PATH_RTX_MAX_DEFAULT SCTP_DEF_MAX_PATH_RTX
+
+/* add_more_on_output: When space wise is it worthwhile to try to add more to a socket send buffer */
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DESC "When space wise is it worthwhile to try to add more to a socket send buffer"
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MIN 0
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_MAX 0xFFFFFFFF
+#define SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT SCTP_DEFAULT_ADD_MORE
+
+/* outgoing_streams: Default number of outgoing streams */
+#define SCTPCTL_OUTGOING_STREAMS_DESC "Default number of outgoing streams"
+#define SCTPCTL_OUTGOING_STREAMS_MIN 1
+#define SCTPCTL_OUTGOING_STREAMS_MAX 65535
+#define SCTPCTL_OUTGOING_STREAMS_DEFAULT SCTP_OSTREAM_INITIAL
+
+/* cmt_on_off: CMT on/off flag */
+#define SCTPCTL_CMT_ON_OFF_DESC "CMT on/off flag"
+#define SCTPCTL_CMT_ON_OFF_MIN 0
+#define SCTPCTL_CMT_ON_OFF_MAX 1
+#define SCTPCTL_CMT_ON_OFF_DEFAULT 0
+
+/* cmt_use_dac: CMT DAC on/off flag */
+#define SCTPCTL_CMT_USE_DAC_DESC "CMT DAC on/off flag"
+#define SCTPCTL_CMT_USE_DAC_MIN 0
+#define SCTPCTL_CMT_USE_DAC_MAX 1
+#define SCTPCTL_CMT_USE_DAC_DEFAULT 0
+
+/* JRS 5/2107 - CMT PF type flag */
+#define SCTPCTL_CMT_PF_DESC "CMT PF type flag"
+#define SCTPCTL_CMT_PF_MIN 0
+#define SCTPCTL_CMT_PF_MAX 2
+#define SCTPCTL_CMT_PF_DEFAULT 0
+
+/* cwnd_maxburst: Use a CWND adjusting maxburst */
+#define SCTPCTL_CWND_MAXBURST_DESC "Use a CWND adjusting maxburst"
+#define SCTPCTL_CWND_MAXBURST_MIN 0
+#define SCTPCTL_CWND_MAXBURST_MAX 1
+#define SCTPCTL_CWND_MAXBURST_DEFAULT 1
+
+/* early_fast_retran: Early Fast Retransmit with timer */
+#define SCTPCTL_EARLY_FAST_RETRAN_DESC "Early Fast Retransmit with timer"
+#define SCTPCTL_EARLY_FAST_RETRAN_MIN 0
+#define SCTPCTL_EARLY_FAST_RETRAN_MAX 0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_DEFAULT 0
+
+/* early_fast_retran_msec: Early Fast Retransmit minimum timer value */
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DESC "Early Fast Retransmit minimum timer value"
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MIN 0
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_MAX 0xFFFFFFFF
+#define SCTPCTL_EARLY_FAST_RETRAN_MSEC_DEFAULT SCTP_MINFR_MSEC_TIMER
+
+/* asconf_auth_nochk: Disable SCTP ASCONF AUTH requirement */
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DESC "Disable SCTP ASCONF AUTH requirement"
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MIN 0
+#define SCTPCTL_ASCONF_AUTH_NOCHK_MAX 1
+#define SCTPCTL_ASCONF_AUTH_NOCHK_DEFAULT 0
+
+/* auth_disable: Disable SCTP AUTH function */
+#define SCTPCTL_AUTH_DISABLE_DESC "Disable SCTP AUTH function"
+#define SCTPCTL_AUTH_DISABLE_MIN 0
+#define SCTPCTL_AUTH_DISABLE_MAX 1
+#define SCTPCTL_AUTH_DISABLE_DEFAULT 0
+
+/* nat_friendly: SCTP NAT friendly operation */
+#define SCTPCTL_NAT_FRIENDLY_DESC "SCTP NAT friendly operation"
+#define SCTPCTL_NAT_FRIENDLY_MIN 0
+#define SCTPCTL_NAT_FRIENDLY_MAX 1
+#define SCTPCTL_NAT_FRIENDLY_DEFAULT 1
+
+/* abc_l_var: SCTP ABC max increase per SACK (L) */
+#define SCTPCTL_ABC_L_VAR_DESC "SCTP ABC max increase per SACK (L)"
+#define SCTPCTL_ABC_L_VAR_MIN 0
+#define SCTPCTL_ABC_L_VAR_MAX 0xFFFFFFFF
+#define SCTPCTL_ABC_L_VAR_DEFAULT 1
+
+/* max_chained_mbufs: Default max number of small mbufs on a chain */
+#define SCTPCTL_MAX_CHAINED_MBUFS_DESC "Default max number of small mbufs on a chain"
+#define SCTPCTL_MAX_CHAINED_MBUFS_MIN 0
+#define SCTPCTL_MAX_CHAINED_MBUFS_MAX 0xFFFFFFFF
+#define SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT SCTP_DEFAULT_MBUFS_IN_CHAIN
+
+/* do_sctp_drain: Should SCTP respond to the drain calls */
+#define SCTPCTL_DO_SCTP_DRAIN_DESC "Should SCTP respond to the drain calls"
+#define SCTPCTL_DO_SCTP_DRAIN_MIN 0
+#define SCTPCTL_DO_SCTP_DRAIN_MAX 1
+#define SCTPCTL_DO_SCTP_DRAIN_DEFAULT 1
+
+/* hb_max_burst: Confirmation Heartbeat max burst? */
+#define SCTPCTL_HB_MAX_BURST_DESC "Confirmation Heartbeat max burst"
+#define SCTPCTL_HB_MAX_BURST_MIN 1
+#define SCTPCTL_HB_MAX_BURST_MAX 0xFFFFFFFF
+#define SCTPCTL_HB_MAX_BURST_DEFAULT SCTP_DEF_MAX_BURST
+
+/* abort_at_limit: When one-2-one hits qlimit abort */
+#define SCTPCTL_ABORT_AT_LIMIT_DESC "When one-2-one hits qlimit abort"
+#define SCTPCTL_ABORT_AT_LIMIT_MIN 0
+#define SCTPCTL_ABORT_AT_LIMIT_MAX 1
+#define SCTPCTL_ABORT_AT_LIMIT_DEFAULT 0
+
+/* strict_data_order: Enforce strict data ordering, abort if control inside data */
+#define SCTPCTL_STRICT_DATA_ORDER_DESC "Enforce strict data ordering, abort if control inside data"
+#define SCTPCTL_STRICT_DATA_ORDER_MIN 0
+#define SCTPCTL_STRICT_DATA_ORDER_MAX 1
+#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT 0
+
+/* min_residual: min residual in a data fragment leftover */
+#define SCTPCTL_MIN_RESIDUAL_DESC "Minimum residual data chunk in second part of split"
+#define SCTPCTL_MIN_RESIDUAL_MIN 20
+#define SCTPCTL_MIN_RESIDUAL_MAX 65535
+#define SCTPCTL_MIN_RESIDUAL_DEFAULT 1452
+
+/* max_retran_chunk: max chunk retransmissions */
+#define SCTPCTL_MAX_RETRAN_CHUNK_DESC "Maximum times an unlucky chunk can be retran'd before assoc abort"
+#define SCTPCTL_MAX_RETRAN_CHUNK_MIN 0
+#define SCTPCTL_MAX_RETRAN_CHUNK_MAX 65535
+#define SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT 30
+
+/* sctp_logging: This gives us logging when the options are enabled */
+#define SCTPCTL_LOGGING_LEVEL_DESC "Ltrace/KTR trace logging level"
+#define SCTPCTL_LOGGING_LEVEL_MIN 0
+#define SCTPCTL_LOGGING_LEVEL_MAX 0xffffffff
+#define SCTPCTL_LOGGING_LEVEL_DEFAULT 0
+
+/* JRS - default congestion control module sysctl */
+#define SCTPCTL_DEFAULT_CC_MODULE_DESC "Default congestion control module"
+#define SCTPCTL_DEFAULT_CC_MODULE_MIN 0
+#define SCTPCTL_DEFAULT_CC_MODULE_MAX 2
+#define SCTPCTL_DEFAULT_CC_MODULE_DEFAULT 0
+
+/* RRS - default fragment interleave */
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DESC "Default fragment interleave level"
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MIN 0
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_MAX 2
+#define SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT 1
+
+/* mobility_base: Enable SCTP mobility support */
+#define SCTPCTL_MOBILITY_BASE_DESC "Enable SCTP base mobility"
+#define SCTPCTL_MOBILITY_BASE_MIN 0
+#define SCTPCTL_MOBILITY_BASE_MAX 1
+#define SCTPCTL_MOBILITY_BASE_DEFAULT SCTP_DEFAULT_MOBILITY_BASE
+
+/* mobility_fasthandoff: Enable SCTP fast handoff support */
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DESC "Enable SCTP fast handoff"
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MIN 0
+#define SCTPCTL_MOBILITY_FASTHANDOFF_MAX 1
+#define SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT SCTP_DEFAULT_MOBILITY_FASTHANDOFF
+
+#if defined(SCTP_DEBUG)
+/* debug: Configure debug output */
+#define SCTPCTL_DEBUG_DESC "Configure debug output"
+#define SCTPCTL_DEBUG_MIN 0
+#define SCTPCTL_DEBUG_MAX 0xFFFFFFFF
+#define SCTPCTL_DEBUG_DEFAULT 0
+#endif
+
+
+
+#if defined(_KERNEL)
+
+/*
+ * variable definitions
+ */
+extern uint32_t sctp_sendspace;
+extern uint32_t sctp_recvspace;
+extern uint32_t sctp_auto_asconf;
+extern uint32_t sctp_ecn_enable;
+extern uint32_t sctp_ecn_nonce;
+extern uint32_t sctp_strict_sacks;
+extern uint32_t sctp_no_csum_on_loopback;
+extern uint32_t sctp_strict_init;
+extern uint32_t sctp_peer_chunk_oh;
+extern uint32_t sctp_max_burst_default;
+extern uint32_t sctp_max_chunks_on_queue;
+extern uint32_t sctp_hashtblsize;
+extern uint32_t sctp_pcbtblsize;
+extern uint32_t sctp_min_split_point;
+extern uint32_t sctp_chunkscale;
+extern uint32_t sctp_delayed_sack_time_default;
+extern uint32_t sctp_sack_freq_default;
+extern uint32_t sctp_system_free_resc_limit;
+extern uint32_t sctp_asoc_free_resc_limit;
+extern uint32_t sctp_heartbeat_interval_default;
+extern uint32_t sctp_pmtu_raise_time_default;
+extern uint32_t sctp_shutdown_guard_time_default;
+extern uint32_t sctp_secret_lifetime_default;
+extern uint32_t sctp_rto_max_default;
+extern uint32_t sctp_rto_min_default;
+extern uint32_t sctp_rto_initial_default;
+extern uint32_t sctp_init_rto_max_default;
+extern uint32_t sctp_valid_cookie_life_default;
+extern uint32_t sctp_init_rtx_max_default;
+extern uint32_t sctp_assoc_rtx_max_default;
+extern uint32_t sctp_path_rtx_max_default;
+extern uint32_t sctp_add_more_threshold;
+extern uint32_t sctp_nr_outgoing_streams_default;
+extern uint32_t sctp_cmt_on_off;
+extern uint32_t sctp_cmt_use_dac;
+
+/* JRS 5/21/07 - CMT PF type flag variables */
+extern uint32_t sctp_cmt_pf;
+extern uint32_t sctp_use_cwnd_based_maxburst;
+extern uint32_t sctp_early_fr;
+extern uint32_t sctp_early_fr_msec;
+extern uint32_t sctp_asconf_auth_nochk;
+extern uint32_t sctp_auth_disable;
+extern uint32_t sctp_nat_friendly;
+extern uint32_t sctp_L2_abc_variable;
+extern uint32_t sctp_mbuf_threshold_count;
+extern uint32_t sctp_do_drain;
+extern uint32_t sctp_hb_maxburst;
+extern uint32_t sctp_abort_if_one_2_one_hits_limit;
+extern uint32_t sctp_strict_data_order;
+extern uint32_t sctp_min_residual;
+extern uint32_t sctp_max_retran_chunk;
+extern uint32_t sctp_logging_level;
+
+/* JRS - Variable for the default congestion control module */
+extern uint32_t sctp_default_cc_module;
+extern uint32_t sctp_default_frag_interleave;
+extern uint32_t sctp_mobility_base;
+extern uint32_t sctp_mobility_fasthandoff;
+
+#if defined(SCTP_LOCAL_TRACE_BUF)
+extern struct sctp_log sctp_log;
+
+#endif
+#if defined(SCTP_DEBUG)
+extern uint32_t sctp_debug_on;
+
+#endif
+
+extern struct sctpstat sctpstat;
+
+#if defined(SYSCTL_DECL)
+SYSCTL_DECL(_net_inet_sctp);
+#endif
+
+#endif /* _KERNEL */
+#endif /* __sctp_sysctl_h__ */
--- /dev/null
+++ sys/netinet/sctp_peeloff.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_peeloff.h,v 1.6 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_peeloff.h,v 1.3 2007/05/08 17:01:10 rrs Exp $");
+
+#ifndef __sctp_peeloff_h__
+#define __sctp_peeloff_h__
+
+
+
+
+#if defined(_KERNEL)
+
+int sctp_can_peel_off(struct socket *, sctp_assoc_t);
+int sctp_do_peeloff(struct socket *, struct socket *, sctp_assoc_t);
+struct socket *sctp_get_peeloff(struct socket *, sctp_assoc_t, int *);
+
+
+
+#endif /* _KERNEL */
+
+#endif
--- /dev/null
+++ sys/netinet/sctp_auth.h
@@ -0,0 +1,230 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_auth.h,v 1.5 2007/06/09 13:46:57 rrs Exp $");
+
+#ifndef __SCTP_AUTH_H__
+#define __SCTP_AUTH_H__
+
+
+/* digest lengths */
+#define SCTP_AUTH_DIGEST_LEN_SHA1 20
+#define SCTP_AUTH_DIGEST_LEN_MD5 16
+#define SCTP_AUTH_DIGEST_LEN_SHA224 28
+#define SCTP_AUTH_DIGEST_LEN_SHA256 32
+#define SCTP_AUTH_DIGEST_LEN_SHA384 48
+#define SCTP_AUTH_DIGEST_LEN_SHA512 64
+#define SCTP_AUTH_DIGEST_LEN_MAX 64
+
+/* random sizes */
+#define SCTP_AUTH_RANDOM_SIZE_DEFAULT 32
+#define SCTP_AUTH_RANDOM_SIZE_REQUIRED 32
+#define SCTP_AUTH_RANDOM_SIZE_MAX 256
+
+/* union of all supported HMAC algorithm contexts */
+typedef union sctp_hash_context {
+ SHA1_CTX sha1;
+ MD5_CTX md5;
+#ifdef HAVE_SHA2
+ SHA256_CTX sha256;
+ SHA384_CTX sha384;
+ SHA512_CTX sha512;
+#endif
+} sctp_hash_context_t;
+
+typedef struct sctp_key {
+ uint32_t keylen;
+ uint8_t key[0];
+} sctp_key_t;
+
+typedef struct sctp_shared_key {
+ LIST_ENTRY(sctp_shared_key) next;
+ sctp_key_t *key; /* key text */
+ uint16_t keyid; /* shared key ID */
+} sctp_sharedkey_t;
+
+LIST_HEAD(sctp_keyhead, sctp_shared_key);
+
+/* authentication chunks list */
+typedef struct sctp_auth_chklist {
+ uint8_t chunks[256];
+ uint8_t num_chunks;
+} sctp_auth_chklist_t;
+
+/* hmac algos supported list */
+typedef struct sctp_hmaclist {
+ uint16_t max_algo; /* max algorithms allocated */
+ uint16_t num_algo; /* num algorithms used */
+ uint16_t hmac[0];
+} sctp_hmaclist_t;
+
+/* authentication info */
+typedef struct sctp_authinfo {
+ sctp_key_t *random; /* local random key (concatenated) */
+ uint32_t random_len; /* local random number length for param */
+ sctp_key_t *peer_random;/* peer's random key (concatenated) */
+ uint16_t assoc_keyid; /* current send keyid (cached) */
+ uint16_t recv_keyid; /* last recv keyid (cached) */
+ sctp_key_t *assoc_key; /* cached send key */
+ sctp_key_t *recv_key; /* cached recv key */
+} sctp_authinfo_t;
+
+
+
+/*
+ * Macros
+ */
+#define sctp_auth_is_required_chunk(chunk, list) ((list == NULL) ? (0) : (list->chunks[chunk] != 0))
+
+/*
+ * function prototypes
+ */
+
+/* socket option api functions */
+extern sctp_auth_chklist_t *sctp_alloc_chunklist(void);
+extern void sctp_free_chunklist(sctp_auth_chklist_t * chklist);
+extern void sctp_clear_chunklist(sctp_auth_chklist_t * chklist);
+extern sctp_auth_chklist_t *sctp_copy_chunklist(sctp_auth_chklist_t * chklist);
+extern int sctp_auth_add_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern int sctp_auth_delete_chunk(uint8_t chunk, sctp_auth_chklist_t * list);
+extern size_t sctp_auth_get_chklist_size(const sctp_auth_chklist_t * list);
+extern void sctp_auth_set_default_chunks(sctp_auth_chklist_t * list);
+extern int
+ sctp_serialize_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr);
+extern int sctp_pack_auth_chunks(const sctp_auth_chklist_t * list, uint8_t * ptr);
+extern int
+sctp_unpack_auth_chunks(const uint8_t * ptr, uint8_t num_chunks,
+ sctp_auth_chklist_t * list);
+
+/* key handling */
+extern sctp_key_t *sctp_alloc_key(uint32_t keylen);
+extern void sctp_free_key(sctp_key_t * key);
+extern void sctp_print_key(sctp_key_t * key, const char *str);
+extern void sctp_show_key(sctp_key_t * key, const char *str);
+extern sctp_key_t *sctp_generate_random_key(uint32_t keylen);
+extern sctp_key_t *sctp_set_key(uint8_t * key, uint32_t keylen);
+extern sctp_key_t *
+sctp_compute_hashkey(sctp_key_t * key1, sctp_key_t * key2,
+ sctp_key_t * shared);
+
+/* shared key handling */
+extern sctp_sharedkey_t *sctp_alloc_sharedkey(void);
+extern void sctp_free_sharedkey(sctp_sharedkey_t * skey);
+extern sctp_sharedkey_t *
+ sctp_find_sharedkey(struct sctp_keyhead *shared_keys, uint16_t key_id);
+extern void
+sctp_insert_sharedkey(struct sctp_keyhead *shared_keys,
+ sctp_sharedkey_t * new_skey);
+extern int
+sctp_copy_skeylist(const struct sctp_keyhead *src,
+ struct sctp_keyhead *dest);
+
+/* hmac list handling */
+extern sctp_hmaclist_t *sctp_alloc_hmaclist(uint8_t num_hmacs);
+extern void sctp_free_hmaclist(sctp_hmaclist_t * list);
+extern int sctp_auth_add_hmacid(sctp_hmaclist_t * list, uint16_t hmac_id);
+extern sctp_hmaclist_t *sctp_copy_hmaclist(sctp_hmaclist_t * list);
+extern sctp_hmaclist_t *sctp_default_supported_hmaclist(void);
+extern uint16_t
+sctp_negotiate_hmacid(sctp_hmaclist_t * peer,
+ sctp_hmaclist_t * local);
+extern int sctp_serialize_hmaclist(sctp_hmaclist_t * list, uint8_t * ptr);
+extern int
+sctp_verify_hmac_param(struct sctp_auth_hmac_algo *hmacs,
+ uint32_t num_hmacs);
+
+extern sctp_authinfo_t *sctp_alloc_authinfo(void);
+extern void sctp_free_authinfo(sctp_authinfo_t * authinfo);
+
+/* keyed-HMAC functions */
+extern uint32_t sctp_get_auth_chunk_len(uint16_t hmac_algo);
+extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo);
+extern uint32_t
+sctp_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int
+sctp_verify_hmac(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ uint8_t * text, uint32_t textlen, uint8_t * digest,
+ uint32_t digestlen);
+extern uint32_t
+sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t * key,
+ uint8_t * text, uint32_t textlen, uint8_t * digest);
+extern int sctp_auth_is_supported_hmac(sctp_hmaclist_t * list, uint16_t id);
+
+/* mbuf versions */
+extern uint32_t
+sctp_hmac_m(uint16_t hmac_algo, uint8_t * key, uint32_t keylen,
+ struct mbuf *m, uint32_t m_offset, uint8_t * digest, uint32_t trailer);
+extern uint32_t
+sctp_compute_hmac_m(uint16_t hmac_algo, sctp_key_t * key, struct mbuf *m,
+ uint32_t m_offset, uint8_t * digest);
+
+/*
+ * authentication routines
+ */
+extern void sctp_clear_cachedkeys(struct sctp_tcb *stcb, uint16_t keyid);
+extern void sctp_clear_cachedkeys_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_delete_sharedkey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_delete_sharedkey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+extern int sctp_auth_setactivekey(struct sctp_tcb *stcb, uint16_t keyid);
+extern int sctp_auth_setactivekey_ep(struct sctp_inpcb *inp, uint16_t keyid);
+
+extern void
+sctp_auth_get_cookie_params(struct sctp_tcb *stcb, struct mbuf *m,
+ uint32_t offset, uint32_t length);
+extern void
+sctp_fill_hmac_digest_m(struct mbuf *m, uint32_t auth_offset,
+ struct sctp_auth_chunk *auth,
+ struct sctp_tcb *stcb);
+extern struct mbuf *
+sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end,
+ struct sctp_auth_chunk **auth_ret,
+ uint32_t * offset, struct sctp_tcb *stcb,
+ uint8_t chunk);
+extern int
+sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *ch,
+ struct mbuf *m, uint32_t offset);
+extern void
+sctp_notify_authentication(struct sctp_tcb *stcb,
+ uint32_t indication, uint16_t keyid,
+ uint16_t alt_keyid);
+extern int
+ sctp_validate_init_auth_params(struct mbuf *m, int offset, int limit);
+extern void
+ sctp_initialize_auth_params(struct sctp_inpcb *inp, struct sctp_tcb *stcb);
+
+
+/* test functions */
+extern void sctp_test_hmac_sha1(void);
+extern void sctp_test_hmac_md5(void);
+extern void sctp_test_authkey(void);
+
+#endif /* __SCTP_AUTH_H__ */
--- /dev/null
+++ sys/netinet/sctp_cc_functions.h
@@ -0,0 +1,116 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_cc_functions.h,v 1.2 2007/09/10 17:06:25 rrs Exp $");
+
+#ifndef __sctp_cc_functions_h__
+#define __sctp_cc_functions_h__
+
+#if defined(_KERNEL)
+
+void
+sctp_set_initial_cc_param(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc);
+
+void
+sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc);
+
+void
+sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
+ struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
+ uint32_t * bottle_bw, uint32_t * on_queue);
+
+void
+sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
+ struct sctp_nets *net, int burst_limit);
+
+void
+sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net);
+
+/*
+ * HTCP algorithms are directly taken from
+ * R.N.Shorten, D.J.Leith and are work/outcome from
+ * a Cisco-URP grant to enhance HTCP for satellite
+ * communications. We use the BSD Liscense
+ * granted from his source and have modified his
+ * algorithms to fit within the SCTP BSD framework.
+ */
+
+void
+sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
+ struct sctp_association *asoc);
+
+void
+sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
+ struct sctp_association *asoc,
+ int accum_moved, int reneged_all, int will_exit);
+
+void
+sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
+ struct sctp_nets *net);
+
+void
+sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb, struct sctp_nets *net);
+
+#endif
+#endif
--- /dev/null
+++ sys/netinet/sctp_timer.c
@@ -0,0 +1,1930 @@
+/*-
+ * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * a) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * b) Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ *
+ * c) Neither the name of Cisco Systems, Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/sctp_timer.c,v 1.32.2.1 2007/10/25 12:27:06 rrs Exp $");
+
+#define _IP_VHL
+#include <netinet/sctp_os.h>
+#include <netinet/sctp_pcb.h>
+#ifdef INET6
+#include <netinet6/sctp6_var.h>
+#endif
+#include <netinet/sctp_var.h>
+#include <netinet/sctp_sysctl.h>
+#include <netinet/sctp_timer.h>
+#include <netinet/sctputil.h>
+#include <netinet/sctp_output.h>
+#include <netinet/sctp_header.h>
+#include <netinet/sctp_indata.h>
+#include <netinet/sctp_asconf.h>
+#include <netinet/sctp_input.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_uio.h>
+
+
+
+void
+sctp_early_fr_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_tmit_chunk *chk, *tp2;
+ struct timeval now, min_wait, tv;
+ unsigned int cur_rtt, cnt = 0, cnt_resend = 0;
+
+ /* an early FR is occuring. */
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ /* get cur rto in micro-seconds */
+ if (net->lastsa == 0) {
+ /* Hmm no rtt estimate yet? */
+ cur_rtt = stcb->asoc.initial_rto >> 2;
+ } else {
+
+ cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
+ }
+ if (cur_rtt < sctp_early_fr_msec) {
+ cur_rtt = sctp_early_fr_msec;
+ }
+ cur_rtt *= 1000;
+ tv.tv_sec = cur_rtt / 1000000;
+ tv.tv_usec = cur_rtt % 1000000;
+ min_wait = now;
+ timevalsub(&min_wait, &tv);
+ if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+ /*
+ * if we hit here, we don't have enough seconds on the clock
+ * to account for the RTO. We just let the lower seconds be
+ * the bounds and don't worry about it. This may mean we
+ * will mark a lot more than we should.
+ */
+ min_wait.tv_sec = min_wait.tv_usec = 0;
+ }
+ chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead);
+ for (; chk != NULL; chk = tp2) {
+ tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next);
+ if (chk->whoTo != net) {
+ continue;
+ }
+ if (chk->sent == SCTP_DATAGRAM_RESEND)
+ cnt_resend++;
+ else if ((chk->sent > SCTP_DATAGRAM_UNSENT) &&
+ (chk->sent < SCTP_DATAGRAM_RESEND)) {
+ /* pending, may need retran */
+ if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) {
+ /*
+ * we have reached a chunk that was sent
+ * some seconds past our min.. forget it we
+ * will find no more to send.
+ */
+ continue;
+ } else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) {
+ /*
+ * we must look at the micro seconds to
+ * know.
+ */
+ if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+ /*
+ * ok it was sent after our boundary
+ * time.
+ */
+ continue;
+ }
+ }
+ if (sctp_logging_level & SCTP_EARLYFR_LOGGING_ENABLE) {
+ sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+ 4, SCTP_FR_MARKED_EARLY);
+ }
+ SCTP_STAT_INCR(sctps_earlyfrmrkretrans);
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ /* double book size since we are doing an early FR */
+ chk->book_size_scale++;
+ cnt += chk->send_size;
+ if ((cnt + net->flight_size) > net->cwnd) {
+ /* Mark all we could possibly resend */
+ break;
+ }
+ }
+ }
+ if (cnt) {
+ /*
+ * JRS - Use the congestion control given in the congestion
+ * control module
+ */
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net);
+ } else if (cnt_resend) {
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
+ }
+ /* Restart it? */
+ if (net->flight_size < net->cwnd) {
+ SCTP_STAT_INCR(sctps_earlyfrstrtmr);
+ sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
+ }
+}
+
+void
+sctp_audit_retranmission_queue(struct sctp_association *asoc)
+{
+ struct sctp_tmit_chunk *chk;
+
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Audit invoked on send queue cnt:%d onqueue:%d\n",
+ asoc->sent_queue_retran_cnt,
+ asoc->sent_queue_cnt);
+ asoc->sent_queue_retran_cnt = 0;
+ asoc->sent_queue_cnt = 0;
+ TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
+ if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+ }
+ asoc->sent_queue_cnt++;
+ }
+ TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
+ if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_incr(asoc->sent_queue_retran_cnt);
+ }
+ }
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Audit completes retran:%d onqueue:%d\n",
+ asoc->sent_queue_retran_cnt,
+ asoc->sent_queue_cnt);
+}
+
+int
+sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, uint16_t threshold)
+{
+ if (net) {
+ net->error_count++;
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
+ net, net->error_count,
+ net->failure_threshold);
+ if (net->error_count > net->failure_threshold) {
+ /* We had a threshold failure */
+ if (net->dest_state & SCTP_ADDR_REACHABLE) {
+ net->dest_state &= ~SCTP_ADDR_REACHABLE;
+ net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
+ net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
+ if (net == stcb->asoc.primary_destination) {
+ net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+ }
+ /*
+ * JRS 5/14/07 - If a destination is
+ * unreachable, the PF bit is turned off.
+ * This allows an unambiguous use of the PF
+ * bit for destinations that are reachable
+ * but potentially failed. If the
+ * destination is set to the unreachable
+ * state, also set the destination to the PF
+ * state.
+ */
+ /*
+ * Add debug message here if destination is
+ * not in PF state.
+ */
+ /* Stop any running T3 timers here? */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ net->dest_state &= ~SCTP_ADDR_PF;
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
+ net);
+ }
+ sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
+ stcb,
+ SCTP_FAILED_THRESHOLD,
+ (void *)net, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ /*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE
+ *********ROUTING CODE
+ */
+ /*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE
+ *********ROUTING CODE
+ */
+ }
+ if (stcb == NULL)
+ return (0);
+
+ if (net) {
+ if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_INCR,
+ stcb->asoc.overall_error_count,
+ (stcb->asoc.overall_error_count + 1),
+ SCTP_FROM_SCTP_TIMER,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count++;
+ }
+ } else {
+ if (sctp_logging_level & SCTP_THRESHOLD_LOGGING) {
+ sctp_misc_ints(SCTP_THRESHOLD_INCR,
+ stcb->asoc.overall_error_count,
+ (stcb->asoc.overall_error_count + 1),
+ SCTP_FROM_SCTP_TIMER,
+ __LINE__);
+ }
+ stcb->asoc.overall_error_count++;
+ }
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
+ &stcb->asoc, stcb->asoc.overall_error_count,
+ (uint32_t) threshold,
+ ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
+ /*
+ * We specifically do not do >= to give the assoc one more change
+ * before we fail it.
+ */
+ if (stcb->asoc.overall_error_count > threshold) {
+ /* Abort notification sends a ULP notify */
+ struct mbuf *oper;
+
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
+ }
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
+ printf("Aborting association threshold:%d overall error count:%d\n",
+ threshold,
+ stcb->asoc.overall_error_count);
+ sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED);
+ return (1);
+ }
+ return (0);
+}
+
+struct sctp_nets *
+sctp_find_alternate_net(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ int mode)
+{
+ /* Find and return an alternate network if possible */
+ struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL;
+ int once;
+
+ /* JRS 5/14/07 - Initialize min_errors to an impossible value. */
+ int min_errors = -1;
+ uint32_t max_cwnd = 0;
+
+ if (stcb->asoc.numnets == 1) {
+ /* No others but net */
+ return (TAILQ_FIRST(&stcb->asoc.nets));
+ }
+ /*
+ * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate
+ * net algorithm. This algorithm chooses the active destination (not
+ * in PF state) with the largest cwnd value. If all destinations are
+ * in PF state, unreachable, or unconfirmed, choose the desination
+ * that is in PF state with the lowest error count. In case of a
+ * tie, choose the destination that was most recently active.
+ */
+ if (mode == 2) {
+ TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+ /*
+ * JRS 5/14/07 - If the destination is unreachable
+ * or unconfirmed, skip it.
+ */
+ if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+ (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
+ continue;
+ }
+ /*
+ * JRS 5/14/07 - If the destination is reachable
+ * but in PF state, compare the error count of the
+ * destination to the minimum error count seen thus
+ * far. Store the destination with the lower error
+ * count. If the error counts are equal, store the
+ * destination that was most recently active.
+ */
+ if (mnet->dest_state & SCTP_ADDR_PF) {
+ /*
+ * JRS 5/14/07 - If the destination under
+ * consideration is the current destination,
+ * work as if the error count is one higher.
+ * The actual error count will not be
+ * incremented until later in the t3
+ * handler.
+ */
+ if (mnet == net) {
+ if (min_errors == -1) {
+ min_errors = mnet->error_count + 1;
+ min_errors_net = mnet;
+ } else if (mnet->error_count + 1 < min_errors) {
+ min_errors = mnet->error_count + 1;
+ min_errors_net = mnet;
+ } else if (mnet->error_count + 1 == min_errors
+ && mnet->last_active > min_errors_net->last_active) {
+ min_errors_net = mnet;
+ min_errors = mnet->error_count + 1;
+ }
+ continue;
+ } else {
+ if (min_errors == -1) {
+ min_errors = mnet->error_count;
+ min_errors_net = mnet;
+ } else if (mnet->error_count < min_errors) {
+ min_errors = mnet->error_count;
+ min_errors_net = mnet;
+ } else if (mnet->error_count == min_errors
+ && mnet->last_active > min_errors_net->last_active) {
+ min_errors_net = mnet;
+ min_errors = mnet->error_count;
+ }
+ continue;
+ }
+ }
+ /*
+ * JRS 5/14/07 - If the destination is reachable and
+ * not in PF state, compare the cwnd of the
+ * destination to the highest cwnd seen thus far.
+ * Store the destination with the higher cwnd value.
+ * If the cwnd values are equal, randomly choose one
+ * of the two destinations.
+ */
+ if (max_cwnd < mnet->cwnd) {
+ max_cwnd_net = mnet;
+ max_cwnd = mnet->cwnd;
+ } else if (max_cwnd == mnet->cwnd) {
+ uint32_t rndval;
+ uint8_t this_random;
+
+ if (stcb->asoc.hb_random_idx > 3) {
+ rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+ memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values));
+ this_random = stcb->asoc.hb_random_values[0];
+ stcb->asoc.hb_random_idx++;
+ stcb->asoc.hb_ect_randombit = 0;
+ } else {
+ this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+ stcb->asoc.hb_random_idx++;
+ stcb->asoc.hb_ect_randombit = 0;
+ }
+ if (this_random % 2 == 1) {
+ max_cwnd_net = mnet;
+ max_cwnd = mnet->cwnd;
+ //Useless ?
+ }
+ }
+ }
+ /*
+ * JRS 5/14/07 - After all destination have been considered
+ * as alternates, check to see if there was some active
+ * destination (not in PF state). If not, check to see if
+ * there was some PF destination with the minimum number of
+ * errors. If not, return the original destination. If
+ * there is a min_errors_net, remove the PF flag from that
+ * destination, set the cwnd to one or two MTUs, and return
+ * the destination as an alt. If there was some active
+ * destination with a highest cwnd, return the destination
+ * as an alt.
+ */
+ if (max_cwnd_net == NULL) {
+ if (min_errors_net == NULL) {
+ return (net);
+ }
+ min_errors_net->dest_state &= ~SCTP_ADDR_PF;
+ min_errors_net->cwnd = min_errors_net->mtu * sctp_cmt_pf;
+ if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) {
+ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
+ stcb, min_errors_net,
+ SCTP_FROM_SCTP_TIMER + SCTP_LOC_2);
+ }
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n",
+ min_errors_net, min_errors_net->error_count);
+ return (min_errors_net);
+ } else {
+ return (max_cwnd_net);
+ }
+ }
+ /*
+ * JRS 5/14/07 - If mode is set to 1, use the CMT policy for
+ * choosing an alternate net.
+ */
+ else if (mode == 1) {
+ TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
+ if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
+ (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)
+ ) {
+ /*
+ * will skip ones that are not-reachable or
+ * unconfirmed
+ */
+ continue;
+ }
+ if (max_cwnd < mnet->cwnd) {
+ max_cwnd_net = mnet;
+ max_cwnd = mnet->cwnd;
+ } else if (max_cwnd == mnet->cwnd) {
+ uint32_t rndval;
+ uint8_t this_random;
+
+ if (stcb->asoc.hb_random_idx > 3) {
+ rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
+ memcpy(stcb->asoc.hb_random_values, &rndval,
+ sizeof(stcb->asoc.hb_random_values));
+ this_random = stcb->asoc.hb_random_values[0];
+ stcb->asoc.hb_random_idx = 0;
+ stcb->asoc.hb_ect_randombit = 0;
+ } else {
+ this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
+ stcb->asoc.hb_random_idx++;
+ stcb->asoc.hb_ect_randombit = 0;
+ }
+ if (this_random % 2) {
+ max_cwnd_net = mnet;
+ max_cwnd = mnet->cwnd;
+ }
+ }
+ }
+ if (max_cwnd_net) {
+ return (max_cwnd_net);
+ }
+ }
+ mnet = net;
+ once = 0;
+
+ if (mnet == NULL) {
+ mnet = TAILQ_FIRST(&stcb->asoc.nets);
+ }
+ do {
+ alt = TAILQ_NEXT(mnet, sctp_next);
+ if (alt == NULL) {
+ once++;
+ if (once > 1) {
+ break;
+ }
+ alt = TAILQ_FIRST(&stcb->asoc.nets);
+ }
+ if (alt->ro.ro_rt == NULL) {
+ if (alt->ro._s_addr) {
+ sctp_free_ifa(alt->ro._s_addr);
+ alt->ro._s_addr = NULL;
+ }
+ alt->src_addr_selected = 0;
+ }
+ if (
+ ((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
+ (alt->ro.ro_rt != NULL) &&
+ /* sa_ignore NO_NULL_CHK */
+ (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))
+ ) {
+ /* Found a reachable address */
+ break;
+ }
+ mnet = alt;
+ } while (alt != NULL);
+
+ if (alt == NULL) {
+ /* Case where NO insv network exists (dormant state) */
+ /* we rotate destinations */
+ once = 0;
+ mnet = net;
+ do {
+ alt = TAILQ_NEXT(mnet, sctp_next);
+ if (alt == NULL) {
+ once++;
+ if (once > 1) {
+ break;
+ }
+ alt = TAILQ_FIRST(&stcb->asoc.nets);
+ }
+ /* sa_ignore NO_NULL_CHK */
+ if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
+ (alt != net)) {
+ /* Found an alternate address */
+ break;
+ }
+ mnet = alt;
+ } while (alt != NULL);
+ }
+ if (alt == NULL) {
+ return (net);
+ }
+ return (alt);
+}
+
+
+
+static void
+sctp_backoff_on_timeout(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ int win_probe,
+ int num_marked)
+{
+ if (net->RTO == 0) {
+ net->RTO = stcb->asoc.minrto;
+ }
+ net->RTO <<= 1;
+ if (net->RTO > stcb->asoc.maxrto) {
+ net->RTO = stcb->asoc.maxrto;
+ }
+ if ((win_probe == 0) && num_marked) {
+ /* We don't apply penalty to window probe scenarios */
+ /* JRS - Use the congestion control given in the CC module */
+ stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net);
+ }
+}
+
+static int
+sctp_mark_all_for_resend(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ struct sctp_nets *alt,
+ int window_probe,
+ int *num_marked)
+{
+
+ /*
+ * Mark all chunks (well not all) that were sent to *net for
+ * retransmission. Move them to alt for there destination as well...
+ * We only mark chunks that have been outstanding long enough to
+ * have received feed-back.
+ */
+ struct sctp_tmit_chunk *chk, *tp2, *could_be_sent = NULL;
+ struct sctp_nets *lnets;
+ struct timeval now, min_wait, tv;
+ int cur_rtt;
+ int audit_tf, num_mk, fir;
+ unsigned int cnt_mk;
+ uint32_t orig_flight, orig_tf;
+ uint32_t tsnlast, tsnfirst;
+
+
+ /* none in flight now */
+ audit_tf = 0;
+ fir = 0;
+ /*
+ * figure out how long a data chunk must be pending before we can
+ * mark it ..
+ */
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ /* get cur rto in micro-seconds */
+ cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1);
+ cur_rtt *= 1000;
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(cur_rtt,
+ stcb->asoc.peers_rwnd,
+ window_probe,
+ SCTP_FR_T3_MARK_TIME);
+ sctp_log_fr(net->flight_size,
+ SCTP_OS_TIMER_PENDING(&net->fr_timer.timer),
+ SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer),
+ SCTP_FR_CWND_REPORT);
+ sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT);
+ }
+ tv.tv_sec = cur_rtt / 1000000;
+ tv.tv_usec = cur_rtt % 1000000;
+ min_wait = now;
+ timevalsub(&min_wait, &tv);
+ if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
+ /*
+ * if we hit here, we don't have enough seconds on the clock
+ * to account for the RTO. We just let the lower seconds be
+ * the bounds and don't worry about it. This may mean we
+ * will mark a lot more than we should.
+ */
+ min_wait.tv_sec = min_wait.tv_usec = 0;
+ }
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME);
+ sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME);
+ }
+ /*
+ * Our rwnd will be incorrect here since we are not adding back the
+ * cnt * mbuf but we will fix that down below.
+ */
+ orig_flight = net->flight_size;
+ orig_tf = stcb->asoc.total_flight;
+
+ net->fast_retran_ip = 0;
+ /* Now on to each chunk */
+ num_mk = cnt_mk = 0;
+ tsnfirst = tsnlast = 0;
+ chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
+ for (; chk != NULL; chk = tp2) {
+ tp2 = TAILQ_NEXT(chk, sctp_next);
+ if ((compare_with_wrap(stcb->asoc.last_acked_seq,
+ chk->rec.data.TSN_seq,
+ MAX_TSN)) ||
+ (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
+ /* Strange case our list got out of order? */
+ SCTP_PRINTF("Our list is out of order?\n");
+ panic("Out of order list");
+ }
+ if ((chk->whoTo == net) && (chk->sent < SCTP_DATAGRAM_ACKED)) {
+ /*
+ * found one to mark: If it is less than
+ * DATAGRAM_ACKED it MUST not be a skipped or marked
+ * TSN but instead one that is either already set
+ * for retransmission OR one that needs
+ * retransmission.
+ */
+
+ /* validate its been outstanding long enough */
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(chk->rec.data.TSN_seq,
+ chk->sent_rcv_time.tv_sec,
+ chk->sent_rcv_time.tv_usec,
+ SCTP_FR_T3_MARK_TIME);
+ }
+ if ((chk->sent_rcv_time.tv_sec > min_wait.tv_sec) && (window_probe == 0)) {
+ /*
+ * we have reached a chunk that was sent
+ * some seconds past our min.. forget it we
+ * will find no more to send.
+ */
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(0,
+ chk->sent_rcv_time.tv_sec,
+ chk->sent_rcv_time.tv_usec,
+ SCTP_FR_T3_STOPPED);
+ }
+ continue;
+ } else if ((chk->sent_rcv_time.tv_sec == min_wait.tv_sec) &&
+ (window_probe == 0)) {
+ /*
+ * we must look at the micro seconds to
+ * know.
+ */
+ if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
+ /*
+ * ok it was sent after our boundary
+ * time.
+ */
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(0,
+ chk->sent_rcv_time.tv_sec,
+ chk->sent_rcv_time.tv_usec,
+ SCTP_FR_T3_STOPPED);
+ }
+ continue;
+ }
+ }
+ if (PR_SCTP_TTL_ENABLED(chk->flags)) {
+ /* Is it expired? */
+ if ((now.tv_sec > chk->rec.data.timetodrop.tv_sec) ||
+ ((chk->rec.data.timetodrop.tv_sec == now.tv_sec) &&
+ (now.tv_usec > chk->rec.data.timetodrop.tv_usec))) {
+ /* Yes so drop it */
+ if (chk->data) {
+ (void)sctp_release_pr_sctp_chunk(stcb,
+ chk,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ &stcb->asoc.sent_queue, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ continue;
+ }
+ if (PR_SCTP_RTX_ENABLED(chk->flags)) {
+ /* Has it been retransmitted tv_sec times? */
+ if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
+ if (chk->data) {
+ (void)sctp_release_pr_sctp_chunk(stcb,
+ chk,
+ (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
+ &stcb->asoc.sent_queue, SCTP_SO_NOT_LOCKED);
+ }
+ }
+ continue;
+ }
+ if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ num_mk++;
+ if (fir == 0) {
+ fir = 1;
+ tsnfirst = chk->rec.data.TSN_seq;
+ }
+ tsnlast = chk->rec.data.TSN_seq;
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
+ 0, SCTP_FR_T3_MARKED);
+ }
+ if (chk->rec.data.chunk_was_revoked) {
+ /* deflate the cwnd */
+ chk->whoTo->cwnd -= chk->book_size;
+ chk->rec.data.chunk_was_revoked = 0;
+ }
+ net->marked_retrans++;
+ stcb->asoc.marked_retrans++;
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
+ chk->whoTo->flight_size,
+ chk->book_size,
+ (uintptr_t) chk->whoTo,
+ chk->rec.data.TSN_seq);
+ }
+ sctp_flight_size_decrease(chk);
+ sctp_total_flight_decrease(stcb, chk);
+ stcb->asoc.peers_rwnd += chk->send_size;
+ stcb->asoc.peers_rwnd += sctp_peer_chunk_oh;
+ }
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ SCTP_STAT_INCR(sctps_markedretrans);
+
+ /* reset the TSN for striking and other FR stuff */
+ chk->rec.data.doing_fast_retransmit = 0;
+ /* Clear any time so NO RTT is being done */
+ chk->do_rtt = 0;
+ if (alt != net) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->no_fr_allowed = 1;
+ chk->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ } else {
+ chk->no_fr_allowed = 0;
+ if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
+ chk->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
+ } else {
+ chk->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
+ }
+ }
+ /*
+ * CMT: Do not allow FRs on retransmitted TSNs.
+ */
+ if (sctp_cmt_on_off == 1) {
+ chk->no_fr_allowed = 1;
+ }
+ } else if (chk->sent == SCTP_DATAGRAM_ACKED) {
+ /* remember highest acked one */
+ could_be_sent = chk;
+ }
+ if (chk->sent == SCTP_DATAGRAM_RESEND) {
+ cnt_mk++;
+ }
+ }
+ if ((orig_flight - net->flight_size) != (orig_tf - stcb->asoc.total_flight)) {
+ /* we did not subtract the same things? */
+ audit_tf = 1;
+ }
+ if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
+ sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
+ }
+#ifdef SCTP_DEBUG
+ if (num_mk) {
+ SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+ tsnlast);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n",
+ num_mk, (u_long)stcb->asoc.peers_rwnd);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
+ tsnlast);
+ SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n",
+ num_mk,
+ (int)stcb->asoc.peers_rwnd);
+ }
+#endif
+ *num_marked = num_mk;
+ if ((stcb->asoc.sent_queue_retran_cnt == 0) && (could_be_sent)) {
+ /* fix it so we retransmit the highest acked anyway */
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ cnt_mk++;
+ could_be_sent->sent = SCTP_DATAGRAM_RESEND;
+ }
+ if (stcb->asoc.sent_queue_retran_cnt != cnt_mk) {
+#ifdef INVARIANTS
+ SCTP_PRINTF("Local Audit says there are %d for retran asoc cnt:%d we marked:%d this time\n",
+ cnt_mk, stcb->asoc.sent_queue_retran_cnt, num_mk);
+#endif
+#ifndef SCTP_AUDITING_ENABLED
+ stcb->asoc.sent_queue_retran_cnt = cnt_mk;
+#endif
+ }
+ /* Now check for a ECN Echo that may be stranded */
+ TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+ if ((chk->whoTo == net) &&
+ (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = alt;
+ if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ }
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+ if (audit_tf) {
+ SCTPDBG(SCTP_DEBUG_TIMER4,
+ "Audit total flight due to negative value net:%p\n",
+ net);
+ stcb->asoc.total_flight = 0;
+ stcb->asoc.total_flight_count = 0;
+ /* Clear all networks flight size */
+ TAILQ_FOREACH(lnets, &stcb->asoc.nets, sctp_next) {
+ lnets->flight_size = 0;
+ SCTPDBG(SCTP_DEBUG_TIMER4,
+ "Net:%p c-f cwnd:%d ssthresh:%d\n",
+ lnets, lnets->cwnd, lnets->ssthresh);
+ }
+ TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
+ if (chk->sent < SCTP_DATAGRAM_RESEND) {
+ if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
+ sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
+ chk->whoTo->flight_size,
+ chk->book_size,
+ (uintptr_t) chk->whoTo,
+ chk->rec.data.TSN_seq);
+ }
+ sctp_flight_size_increase(chk);
+ sctp_total_flight_increase(stcb, chk);
+ }
+ }
+ }
+ /*
+ * Setup the ecn nonce re-sync point. We do this since
+ * retranmissions are NOT setup for ECN. This means that do to
+ * Karn's rule, we don't know the total of the peers ecn bits.
+ */
+ chk = TAILQ_FIRST(&stcb->asoc.send_queue);
+ if (chk == NULL) {
+ stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
+ } else {
+ stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq;
+ }
+ stcb->asoc.nonce_wait_for_ecne = 0;
+ stcb->asoc.nonce_sum_check = 0;
+ /* We return 1 if we only have a window probe outstanding */
+ return (0);
+}
+
+static void
+sctp_move_all_chunks_to_alt(struct sctp_tcb *stcb,
+ struct sctp_nets *net,
+ struct sctp_nets *alt)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *outs;
+ struct sctp_tmit_chunk *chk;
+ struct sctp_stream_queue_pending *sp;
+
+ if (net == alt)
+ /* nothing to do */
+ return;
+
+ asoc = &stcb->asoc;
+
+ /*
+ * now through all the streams checking for chunks sent to our bad
+ * network.
+ */
+ TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
+ /* now clean up any chunks here */
+ TAILQ_FOREACH(sp, &outs->outqueue, next) {
+ if (sp->net == net) {
+ sctp_free_remote_addr(sp->net);
+ sp->net = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+ }
+ /* Now check the pending queue */
+ TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
+ if (chk->whoTo == net) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+
+}
+
+int
+sctp_t3rxt_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+ int win_probe, num_mk;
+
+ if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
+ sctp_log_fr(0, 0, 0, SCTP_FR_T3_TIMEOUT);
+ }
+ if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
+ struct sctp_nets *lnet;
+
+ TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
+ if (net == lnet) {
+ sctp_log_cwnd(stcb, lnet, 1, SCTP_CWND_LOG_FROM_T3);
+ } else {
+ sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_LOG_FROM_T3);
+ }
+ }
+ }
+ /* Find an alternate and mark those for retransmission */
+ if ((stcb->asoc.peers_rwnd == 0) &&
+ (stcb->asoc.total_flight < net->mtu)) {
+ SCTP_STAT_INCR(sctps_timowindowprobe);
+ win_probe = 1;
+ } else {
+ win_probe = 0;
+ }
+
+ /*
+ * JRS 5/14/07 - If CMT PF is on and the destination if not already
+ * in PF state, set the destination to PF state and store the
+ * current time as the time that the destination was last active. In
+ * addition, find an alternate destination with PF-based
+ * find_alt_net().
+ */
+ if (sctp_cmt_on_off && sctp_cmt_pf) {
+ if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) {
+ net->dest_state |= SCTP_ADDR_PF;
+ net->last_active = sctp_get_tick_count();
+ SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n",
+ net);
+ }
+ alt = sctp_find_alternate_net(stcb, net, 2);
+ } else if (sctp_cmt_on_off) {
+ /*
+ * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being
+ * used, then pick dest with largest ssthresh for any
+ * retransmission.
+ */
+ alt = net;
+ alt = sctp_find_alternate_net(stcb, alt, 1);
+ /*
+ * CUCv2: If a different dest is picked for the
+ * retransmission, then new (rtx-)pseudo_cumack needs to be
+ * tracked for orig dest. Let CUCv2 track new (rtx-)
+ * pseudo-cumack always.
+ */
+ net->find_pseudo_cumack = 1;
+ net->find_rtx_pseudo_cumack = 1;
+ } else { /* CMT is OFF */
+ alt = sctp_find_alternate_net(stcb, net, 0);
+ }
+
+ (void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, &num_mk);
+ /* FR Loss recovery just ended with the T3. */
+ stcb->asoc.fast_retran_loss_recovery = 0;
+
+ /* CMT FR loss recovery ended with the T3 */
+ net->fast_retran_loss_recovery = 0;
+
+ /*
+ * setup the sat loss recovery that prevents satellite cwnd advance.
+ */
+ stcb->asoc.sat_t3_loss_recovery = 1;
+ stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq;
+
+ /* Backoff the timer and cwnd */
+ sctp_backoff_on_timeout(stcb, net, win_probe, num_mk);
+ if (win_probe == 0) {
+ /* We don't do normal threshold management on window probes */
+ if (sctp_threshold_management(inp, stcb, net,
+ stcb->asoc.max_send_times)) {
+ /* Association was destroyed */
+ return (1);
+ } else {
+ if (net != stcb->asoc.primary_destination) {
+ /* send a immediate HB if our RTO is stale */
+ struct timeval now;
+ unsigned int ms_goneby;
+
+ (void)SCTP_GETTIME_TIMEVAL(&now);
+ if (net->last_sent_time.tv_sec) {
+ ms_goneby = (now.tv_sec - net->last_sent_time.tv_sec) * 1000;
+ } else {
+ ms_goneby = 0;
+ }
+ if ((ms_goneby > net->RTO) || (net->RTO == 0)) {
+ /*
+ * no recent feed back in an RTO or
+ * more, request a RTT update
+ */
+ if (sctp_send_hb(stcb, 1, net) < 0)
+ return 1;
+ }
+ }
+ }
+ } else {
+ /*
+ * For a window probe we don't penalize the net's but only
+ * the association. This may fail it if SACKs are not coming
+ * back. If sack's are coming with rwnd locked at 0, we will
+ * continue to hold things waiting for rwnd to raise
+ */
+ if (sctp_threshold_management(inp, stcb, NULL,
+ stcb->asoc.max_send_times)) {
+ /* Association was destroyed */
+ return (1);
+ }
+ }
+ if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+ /* Move all pending over too */
+ sctp_move_all_chunks_to_alt(stcb, net, alt);
+
+ /*
+ * Get the address that failed, to force a new src address
+ * selecton and a route allocation.
+ */
+ if (net->ro._s_addr) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ }
+ net->src_addr_selected = 0;
+
+ /* Force a route allocation too */
+ if (net->ro.ro_rt) {
+ RTFREE(net->ro.ro_rt);
+ net->ro.ro_rt = NULL;
+ }
+ /* Was it our primary? */
+ if ((stcb->asoc.primary_destination == net) && (alt != net)) {
+ /*
+ * Yes, note it as such and find an alternate note:
+ * this means HB code must use this to resent the
+ * primary if it goes active AND if someone does a
+ * change-primary then this flag must be cleared
+ * from any net structures.
+ */
+ if (sctp_set_primary_addr(stcb,
+ (struct sockaddr *)NULL,
+ alt) == 0) {
+ net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
+ }
+ }
+ } else if (sctp_cmt_on_off && sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) {
+ /*
+ * JRS 5/14/07 - If the destination hasn't failed completely
+ * but is in PF state, a PF-heartbeat needs to be sent
+ * manually.
+ */
+ if (sctp_send_hb(stcb, 1, net) < 0)
+ return 1;
+ }
+ /*
+ * Special case for cookie-echo'ed case, we don't do output but must
+ * await the COOKIE-ACK before retransmission
+ */
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ /*
+ * Here we just reset the timer and start again since we
+ * have not established the asoc
+ */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
+ return (0);
+ }
+ if (stcb->asoc.peer_supports_prsctp) {
+ struct sctp_tmit_chunk *lchk;
+
+ lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
+ /* C3. See if we need to send a Fwd-TSN */
+ if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point,
+ stcb->asoc.last_acked_seq, MAX_TSN)) {
+ /*
+ * ISSUE with ECN, see FWD-TSN processing for notes
+ * on issues that will occur when the ECN NONCE
+ * stuff is put into SCTP for cross checking.
+ */
+ send_forward_tsn(stcb, &stcb->asoc);
+ if (lchk) {
+ /* Assure a timer is up */
+ sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo);
+ }
+ }
+ }
+ if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
+ sctp_log_cwnd(stcb, net, net->cwnd, SCTP_CWND_LOG_FROM_RTX);
+ }
+ return (0);
+}
+
+int
+sctp_t1init_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ /* bump the thresholds */
+ if (stcb->asoc.delayed_connection) {
+ /*
+ * special hook for delayed connection. The library did NOT
+ * complete the rest of its sends.
+ */
+ stcb->asoc.delayed_connection = 0;
+ sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+ return (0);
+ }
+ if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
+ return (0);
+ }
+ if (sctp_threshold_management(inp, stcb, net,
+ stcb->asoc.max_init_times)) {
+ /* Association was destroyed */
+ return (1);
+ }
+ stcb->asoc.dropped_special_cnt = 0;
+ sctp_backoff_on_timeout(stcb, stcb->asoc.primary_destination, 1, 0);
+ if (stcb->asoc.initial_init_rto_max < net->RTO) {
+ net->RTO = stcb->asoc.initial_init_rto_max;
+ }
+ if (stcb->asoc.numnets > 1) {
+ /* If we have more than one addr use it */
+ struct sctp_nets *alt;
+
+ alt = sctp_find_alternate_net(stcb, stcb->asoc.primary_destination, 0);
+ if ((alt != NULL) && (alt != stcb->asoc.primary_destination)) {
+ sctp_move_all_chunks_to_alt(stcb, stcb->asoc.primary_destination, alt);
+ stcb->asoc.primary_destination = alt;
+ }
+ }
+ /* Send out a new init */
+ sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
+ return (0);
+}
+
+/*
+ * For cookie and asconf we actually need to find and mark for resend, then
+ * increment the resend counter (after all the threshold management stuff of
+ * course).
+ */
+int
+sctp_cookie_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+ struct sctp_tmit_chunk *cookie;
+
+ /* first before all else we must find the cookie */
+ TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) {
+ if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
+ break;
+ }
+ }
+ if (cookie == NULL) {
+ if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
+ /* FOOBAR! */
+ struct mbuf *oper;
+
+ oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
+ 0, M_DONTWAIT, 1, MT_DATA);
+ if (oper) {
+ struct sctp_paramhdr *ph;
+ uint32_t *ippp;
+
+ SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
+ sizeof(uint32_t);
+ ph = mtod(oper, struct sctp_paramhdr *);
+ ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
+ ph->param_length = htons(SCTP_BUF_LEN(oper));
+ ippp = (uint32_t *) (ph + 1);
+ *ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
+ }
+ inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
+ sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
+ oper, SCTP_SO_NOT_LOCKED);
+ } else {
+#ifdef INVARIANTS
+ panic("Cookie timer expires in wrong state?");
+#else
+ SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
+ return (0);
+#endif
+ }
+ return (0);
+ }
+ /* Ok we found the cookie, threshold management next */
+ if (sctp_threshold_management(inp, stcb, cookie->whoTo,
+ stcb->asoc.max_init_times)) {
+ /* Assoc is over */
+ return (1);
+ }
+ /*
+ * cleared theshold management now lets backoff the address & select
+ * an alternate
+ */
+ stcb->asoc.dropped_special_cnt = 0;
+ sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0);
+ alt = sctp_find_alternate_net(stcb, cookie->whoTo, 0);
+ if (alt != cookie->whoTo) {
+ sctp_free_remote_addr(cookie->whoTo);
+ cookie->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ /* Now mark the retran info */
+ if (cookie->sent != SCTP_DATAGRAM_RESEND) {
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ }
+ cookie->sent = SCTP_DATAGRAM_RESEND;
+ /*
+ * Now call the output routine to kick out the cookie again, Note we
+ * don't mark any chunks for retran so that FR will need to kick in
+ * to move these (or a send timer).
+ */
+ return (0);
+}
+
+int
+sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+ struct sctp_tmit_chunk *strrst = NULL, *chk = NULL;
+
+ if (stcb->asoc.stream_reset_outstanding == 0) {
+ return (0);
+ }
+ /* find the existing STRRESET, we use the seq number we sent out on */
+ (void)sctp_find_stream_reset(stcb, stcb->asoc.str_reset_seq_out, &strrst);
+ if (strrst == NULL) {
+ return (0);
+ }
+ /* do threshold management */
+ if (sctp_threshold_management(inp, stcb, strrst->whoTo,
+ stcb->asoc.max_send_times)) {
+ /* Assoc is over */
+ return (1);
+ }
+ /*
+ * cleared theshold management now lets backoff the address & select
+ * an alternate
+ */
+ sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0);
+ alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
+ sctp_free_remote_addr(strrst->whoTo);
+ strrst->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+
+ /* See if a ECN Echo is also stranded */
+ TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+ if ((chk->whoTo == net) &&
+ (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+ sctp_free_remote_addr(chk->whoTo);
+ if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ }
+ chk->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+ if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+ /*
+ * If the address went un-reachable, we need to move to
+ * alternates for ALL chk's in queue
+ */
+ sctp_move_all_chunks_to_alt(stcb, net, alt);
+ }
+ /* mark the retran info */
+ if (strrst->sent != SCTP_DATAGRAM_RESEND)
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ strrst->sent = SCTP_DATAGRAM_RESEND;
+
+ /* restart the timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
+ return (0);
+}
+
+int
+sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+ struct sctp_tmit_chunk *asconf, *chk;
+
+ /* is this a first send, or a retransmission? */
+ if (stcb->asoc.asconf_sent == 0) {
+ /* compose a new ASCONF chunk and send it */
+ sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
+ } else {
+ /*
+ * Retransmission of the existing ASCONF is needed
+ */
+
+ /* find the existing ASCONF */
+ TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
+ sctp_next) {
+ if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
+ break;
+ }
+ }
+ if (asconf == NULL) {
+ return (0);
+ }
+ /* do threshold management */
+ if (sctp_threshold_management(inp, stcb, asconf->whoTo,
+ stcb->asoc.max_send_times)) {
+ /* Assoc is over */
+ return (1);
+ }
+ if (asconf->snd_count > stcb->asoc.max_send_times) {
+ /*
+ * Something is rotten: our peer is not responding
+ * to ASCONFs but apparently is to other chunks.
+ * i.e. it is not properly handling the chunk type
+ * upper bits. Mark this peer as ASCONF incapable
+ * and cleanup.
+ */
+ SCTPDBG(SCTP_DEBUG_TIMER1, "asconf_timer: Peer has not responded to our repeated ASCONFs\n");
+ sctp_asconf_cleanup(stcb, net);
+ return (0);
+ }
+ /*
+ * cleared threshold management, so now backoff the net and
+ * select an alternate
+ */
+ sctp_backoff_on_timeout(stcb, asconf->whoTo, 1, 0);
+ alt = sctp_find_alternate_net(stcb, asconf->whoTo, 0);
+ sctp_free_remote_addr(asconf->whoTo);
+ asconf->whoTo = alt;
+ atomic_add_int(&alt->ref_count, 1);
+
+ /* See if an ECN Echo is also stranded */
+ TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
+ if ((chk->whoTo == net) &&
+ (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
+ sctp_free_remote_addr(chk->whoTo);
+ chk->whoTo = alt;
+ if (chk->sent != SCTP_DATAGRAM_RESEND) {
+ chk->sent = SCTP_DATAGRAM_RESEND;
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ }
+ atomic_add_int(&alt->ref_count, 1);
+ }
+ }
+ if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
+ /*
+ * If the address went un-reachable, we need to move
+ * to the alternate for ALL chunks in queue
+ */
+ sctp_move_all_chunks_to_alt(stcb, net, alt);
+ }
+ /* mark the retran info */
+ if (asconf->sent != SCTP_DATAGRAM_RESEND)
+ sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
+ asconf->sent = SCTP_DATAGRAM_RESEND;
+ }
+ return (0);
+}
+
+/* Mobility adaptation */
+void
+sctp_delete_prim_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ if (stcb->asoc.deleted_primary == NULL) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: deleted_primary is not stored...\n");
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ return;
+ }
+ SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: finished to keep deleted primary ");
+ SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
+ sctp_free_remote_addr(stcb->asoc.deleted_primary);
+ stcb->asoc.deleted_primary = NULL;
+ sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
+ return;
+}
+
+/*
+ * For the shutdown and shutdown-ack, we do not keep one around on the
+ * control queue. This means we must generate a new one and call the general
+ * chunk output routine, AFTER having done threshold management.
+ */
+int
+sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+
+ /* first threshold managment */
+ if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+ /* Assoc is over */
+ return (1);
+ }
+ /* second select an alternative */
+ alt = sctp_find_alternate_net(stcb, net, 0);
+
+ /* third generate a shutdown into the queue for out net */
+ if (alt) {
+ sctp_send_shutdown(stcb, alt);
+ } else {
+ /*
+ * if alt is NULL, there is no dest to send to??
+ */
+ return (0);
+ }
+ /* fourth restart timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, alt);
+ return (0);
+}
+
+int
+sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct sctp_nets *alt;
+
+ /* first threshold managment */
+ if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
+ /* Assoc is over */
+ return (1);
+ }
+ /* second select an alternative */
+ alt = sctp_find_alternate_net(stcb, net, 0);
+
+ /* third generate a shutdown into the queue for out net */
+ sctp_send_shutdown_ack(stcb, alt);
+
+ /* fourth restart timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, alt);
+ return (0);
+}
+
+static void
+sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb)
+{
+ struct sctp_stream_out *outs;
+ struct sctp_stream_queue_pending *sp;
+ unsigned int chks_in_queue = 0;
+ int being_filled = 0;
+
+ /*
+ * This function is ONLY called when the send/sent queues are empty.
+ */
+ if ((stcb == NULL) || (inp == NULL))
+ return;
+
+ if (stcb->asoc.sent_queue_retran_cnt) {
+ SCTP_PRINTF("Hmm, sent_queue_retran_cnt is non-zero %d\n",
+ stcb->asoc.sent_queue_retran_cnt);
+ stcb->asoc.sent_queue_retran_cnt = 0;
+ }
+ SCTP_TCB_SEND_LOCK(stcb);
+ if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) {
+ int i, cnt = 0;
+
+ /* Check to see if a spoke fell off the wheel */
+ for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
+ if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
+ sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1);
+ cnt++;
+ }
+ }
+ if (cnt) {
+ /* yep, we lost a spoke or two */
+ SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt);
+ } else {
+ /* no spokes lost, */
+ stcb->asoc.total_output_queue_size = 0;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ return;
+ }
+ SCTP_TCB_SEND_UNLOCK(stcb);
+ /* Check to see if some data queued, if so report it */
+ TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) {
+ if (!TAILQ_EMPTY(&outs->outqueue)) {
+ TAILQ_FOREACH(sp, &outs->outqueue, next) {
+ if (sp->msg_is_complete)
+ being_filled++;
+ chks_in_queue++;
+ }
+ }
+ }
+ if (chks_in_queue != stcb->asoc.stream_queue_cnt) {
+ SCTP_PRINTF("Hmm, stream queue cnt at %d I counted %d in stream out wheel\n",
+ stcb->asoc.stream_queue_cnt, chks_in_queue);
+ }
+ if (chks_in_queue) {
+ /* call the output queue function */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+ if ((TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+ (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+ /*
+ * Probably should go in and make it go back through
+ * and add fragments allowed
+ */
+ if (being_filled == 0) {
+ SCTP_PRINTF("Still nothing moved %d chunks are stuck\n",
+ chks_in_queue);
+ }
+ }
+ } else {
+ SCTP_PRINTF("Found no chunks on any queue tot:%lu\n",
+ (u_long)stcb->asoc.total_output_queue_size);
+ stcb->asoc.total_output_queue_size = 0;
+ }
+}
+
+int
+sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
+ struct sctp_nets *net, int cnt_of_unconf)
+{
+ int ret;
+
+ if (net) {
+ if (net->hb_responded == 0) {
+ if (net->ro._s_addr) {
+ /*
+ * Invalidate the src address if we did not
+ * get a response last time.
+ */
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ sctp_backoff_on_timeout(stcb, net, 1, 0);
+ }
+ /* Zero PBA, if it needs it */
+ if (net->partial_bytes_acked) {
+ net->partial_bytes_acked = 0;
+ }
+ }
+ if ((stcb->asoc.total_output_queue_size > 0) &&
+ (TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
+ (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
+ sctp_audit_stream_queues_for_size(inp, stcb);
+ }
+ /* Send a new HB, this will do threshold managment, pick a new dest */
+ if (cnt_of_unconf == 0) {
+ if (sctp_send_hb(stcb, 0, NULL) < 0) {
+ return (1);
+ }
+ } else {
+ /*
+ * this will send out extra hb's up to maxburst if there are
+ * any unconfirmed addresses.
+ */
+ uint32_t cnt_sent = 0;
+
+ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
+ (net->dest_state & SCTP_ADDR_REACHABLE)) {
+ cnt_sent++;
+ if (net->hb_responded == 0) {
+ /* Did we respond last time? */
+ if (net->ro._s_addr) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ }
+ }
+ ret = sctp_send_hb(stcb, 1, net);
+ if (ret < 0)
+ return 1;
+ else if (ret == 0) {
+ break;
+ }
+ if (cnt_sent >= sctp_hb_maxburst)
+ break;
+ }
+ }
+ }
+ return (0);
+}
+
+int
+sctp_is_hb_timer_running(struct sctp_tcb *stcb)
+{
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.hb_timer.timer)) {
+ /* its running */
+ return (1);
+ } else {
+ /* nope */
+ return (0);
+ }
+}
+
+int
+sctp_is_sack_timer_running(struct sctp_tcb *stcb)
+{
+ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
+ /* its running */
+ return (1);
+ } else {
+ /* nope */
+ return (0);
+ }
+}
+
+#define SCTP_NUMBER_OF_MTU_SIZES 18
+static uint32_t mtu_sizes[] = {
+ 68,
+ 296,
+ 508,
+ 512,
+ 544,
+ 576,
+ 1006,
+ 1492,
+ 1500,
+ 1536,
+ 2002,
+ 2048,
+ 4352,
+ 4464,
+ 8166,
+ 17914,
+ 32000,
+ 65535
+};
+
+
+static uint32_t
+sctp_getnext_mtu(struct sctp_inpcb *inp, uint32_t cur_mtu)
+{
+ /* select another MTU that is just bigger than this one */
+ int i;
+
+ for (i = 0; i < SCTP_NUMBER_OF_MTU_SIZES; i++) {
+ if (cur_mtu < mtu_sizes[i]) {
+ /* no max_mtu is bigger than this one */
+ return (mtu_sizes[i]);
+ }
+ }
+ /* here return the highest allowable */
+ return (cur_mtu);
+}
+
+
+void
+sctp_pathmtu_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ uint32_t next_mtu;
+
+ /* restart the timer in any case */
+ next_mtu = sctp_getnext_mtu(inp, net->mtu);
+ if (next_mtu <= net->mtu) {
+ /* nothing to do */
+ return;
+ } {
+ uint32_t mtu;
+
+ if ((net->src_addr_selected == 0) ||
+ (net->ro._s_addr == NULL) ||
+ (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+ if ((net->ro._s_addr != NULL) && (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
+ sctp_free_ifa(net->ro._s_addr);
+ net->ro._s_addr = NULL;
+ net->src_addr_selected = 0;
+ } else if (net->ro._s_addr == NULL) {
+ net->ro._s_addr = sctp_source_address_selection(inp,
+ stcb,
+ (sctp_route_t *) & net->ro,
+ net, 0, stcb->asoc.vrf_id);
+ }
+ if (net->ro._s_addr)
+ net->src_addr_selected = 1;
+ }
+ if (net->ro._s_addr) {
+ mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
+ if (mtu > next_mtu) {
+ net->mtu = next_mtu;
+ }
+ }
+ }
+ /* restart the timer */
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
+}
+
+void
+sctp_autoclose_timer(struct sctp_inpcb *inp,
+ struct sctp_tcb *stcb,
+ struct sctp_nets *net)
+{
+ struct timeval tn, *tim_touse;
+ struct sctp_association *asoc;
+ int ticks_gone_by;
+
+ (void)SCTP_GETTIME_TIMEVAL(&tn);
+ if (stcb->asoc.sctp_autoclose_ticks &&
+ sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
+ /* Auto close is on */
+ asoc = &stcb->asoc;
+ /* pick the time to use */
+ if (asoc->time_last_rcvd.tv_sec >
+ asoc->time_last_sent.tv_sec) {
+ tim_touse = &asoc->time_last_rcvd;
+ } else {
+ tim_touse = &asoc->time_last_sent;
+ }
+ /* Now has long enough transpired to autoclose? */
+ ticks_gone_by = SEC_TO_TICKS(tn.tv_sec - tim_touse->tv_sec);
+ if ((ticks_gone_by > 0) &&
+ (ticks_gone_by >= (int)asoc->sctp_autoclose_ticks)) {
+ /*
+ * autoclose time has hit, call the output routine,
+ * which should do nothing just to be SURE we don't
+ * have hanging data. We can then safely check the
+ * queues and know that we are clear to send
+ * shutdown
+ */
+ sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
+ /* Are we clean? */
+ if (TAILQ_EMPTY(&asoc->send_queue) &&
+ TAILQ_EMPTY(&asoc->sent_queue)) {
+ /*
+ * there is nothing queued to send, so I'm
+ * done...
+ */
+ if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
+ /* only send SHUTDOWN 1st time thru */
+ sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
+ if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
+ (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
+ SCTP_STAT_DECR_GAUGE32(sctps_currestab);
+ }
+ SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
+ SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
+ stcb->sctp_ep, stcb,
+ asoc->primary_destination);
+ }
+ }
+ } else {
+ /*
+ * No auto close at this time, reset t-o to check
+ * later
+ */
+ int tmp;
+
+ /* fool the timer startup to use the time left */
+ tmp = asoc->sctp_autoclose_ticks;
+ asoc->sctp_autoclose_ticks -= ticks_gone_by;
+ sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
+ net);
+ /* restore the real tick value */
+ asoc->sctp_autoclose_ticks = tmp;
+ }
+ }
+}
+
+void
+sctp_iterator_timer(struct sctp_iterator *it)
+{
+ int iteration_count = 0;
+ int inp_skip = 0;
+
+ /*
+ * only one iterator can run at a time. This is the only way we can
+ * cleanly pull ep's from underneath all the running interators when
+ * a ep is freed.
+ */
+ SCTP_ITERATOR_LOCK();
+ if (it->inp == NULL) {
+ /* iterator is complete */
+done_with_iterator:
+ SCTP_ITERATOR_UNLOCK();
+ SCTP_INP_INFO_WLOCK();
+ TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
+ /* stopping the callout is not needed, in theory */
+ SCTP_INP_INFO_WUNLOCK();
+ (void)SCTP_OS_TIMER_STOP(&it->tmr.timer);
+ if (it->function_atend != NULL) {
+ (*it->function_atend) (it->pointer, it->val);
+ }
+ SCTP_FREE(it, SCTP_M_ITER);
+ return;
+ }
+select_a_new_ep:
+ SCTP_INP_WLOCK(it->inp);
+ while (((it->pcb_flags) &&
+ ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
+ ((it->pcb_features) &&
+ ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
+ /* endpoint flags or features don't match, so keep looking */
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ SCTP_INP_WUNLOCK(it->inp);
+ goto done_with_iterator;
+ }
+ SCTP_INP_WUNLOCK(it->inp);
+ it->inp = LIST_NEXT(it->inp, sctp_list);
+ if (it->inp == NULL) {
+ goto done_with_iterator;
+ }
+ SCTP_INP_WLOCK(it->inp);
+ }
+ if ((it->inp->inp_starting_point_for_iterator != NULL) &&
+ (it->inp->inp_starting_point_for_iterator != it)) {
+ SCTP_PRINTF("Iterator collision, waiting for one at %p\n",
+ it->inp);
+ SCTP_INP_WUNLOCK(it->inp);
+ goto start_timer_return;
+ }
+ /* mark the current iterator on the endpoint */
+ it->inp->inp_starting_point_for_iterator = it;
+ SCTP_INP_WUNLOCK(it->inp);
+ SCTP_INP_RLOCK(it->inp);
+ /* now go through each assoc which is in the desired state */
+ if (it->done_current_ep == 0) {
+ if (it->function_inp != NULL)
+ inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
+ it->done_current_ep = 1;
+ }
+ if (it->stcb == NULL) {
+ /* run the per instance function */
+ it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
+ }
+ SCTP_INP_RUNLOCK(it->inp);
+ if ((inp_skip) || it->stcb == NULL) {
+ if (it->function_inp_end != NULL) {
+ inp_skip = (*it->function_inp_end) (it->inp,
+ it->pointer,
+ it->val);
+ }
+ goto no_stcb;
+ }
+ if ((it->stcb) &&
+ (it->stcb->asoc.stcb_starting_point_for_iterator == it)) {
+ it->stcb->asoc.stcb_starting_point_for_iterator = NULL;
+ }
+ while (it->stcb) {
+ SCTP_TCB_LOCK(it->stcb);
+ if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
+ /* not in the right state... keep looking */
+ SCTP_TCB_UNLOCK(it->stcb);
+ goto next_assoc;
+ }
+ /* mark the current iterator on the assoc */
+ it->stcb->asoc.stcb_starting_point_for_iterator = it;
+ /* see if we have limited out the iterator loop */
+ iteration_count++;
+ if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
+ start_timer_return:
+ /* set a timer to continue this later */
+ if (it->stcb)
+ SCTP_TCB_UNLOCK(it->stcb);
+ sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR,
+ (struct sctp_inpcb *)it, NULL, NULL);
+ SCTP_ITERATOR_UNLOCK();
+ return;
+ }
+ /* run function on this one */
+ (*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
+
+ /*
+ * we lie here, it really needs to have its own type but
+ * first I must verify that this won't effect things :-0
+ */
+ if (it->no_chunk_output == 0)
+ sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
+
+ SCTP_TCB_UNLOCK(it->stcb);
+next_assoc:
+ it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
+ if (it->stcb == NULL) {
+ if (it->function_inp_end != NULL) {
+ inp_skip = (*it->function_inp_end) (it->inp,
+ it->pointer,
+ it->val);
+ }
+ }
+ }
+no_stcb:
+ /* done with all assocs on this endpoint, move on to next endpoint */
+ it->done_current_ep = 0;
+ SCTP_INP_WLOCK(it->inp);
+ it->inp->inp_starting_point_for_iterator = NULL;
+ SCTP_INP_WUNLOCK(it->inp);
+ if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
+ it->inp = NULL;
+ } else {
+ SCTP_INP_INFO_RLOCK();
+ it->inp = LIST_NEXT(it->inp, sctp_list);
+ SCTP_INP_INFO_RUNLOCK();
+ }
+ if (it->inp == NULL) {
+ goto done_with_iterator;
+ }
+ goto select_a_new_ep;
+}
Index: ip_mroute.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/ip_mroute.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet/ip_mroute.c -L sys/netinet/ip_mroute.c -u -r1.1.1.2 -r1.2
--- sys/netinet/ip_mroute.c
+++ sys/netinet/ip_mroute.c
@@ -51,24 +51,25 @@
* MROUTING Revision: 3.5
* and PIM-SMv2 and PIM-DM support, advanced API support,
* bandwidth metering and signaling
- *
- * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.111.2.2 2006/01/31 16:13:22 andre Exp $
*/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/ip_mroute.c,v 1.138 2007/10/07 20:44:23 silby Exp $");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
#include "opt_mac.h"
#include "opt_mrouting.h"
-#ifdef PIM
#define _PIM_VT 1
-#endif
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/priv.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -90,13 +91,20 @@
#include <netinet/ip_encap.h>
#include <netinet/ip_mroute.h>
#include <netinet/ip_var.h>
-#ifdef PIM
+#include <netinet/ip_options.h>
#include <netinet/pim.h>
#include <netinet/pim_var.h>
-#endif
#include <netinet/udp.h>
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_mroute.h>
+#include <netinet6/ip6_var.h>
+#endif
#include <machine/in_cksum.h>
+#include <security/mac/mac_framework.h>
+
/*
* Control debugging code for rsvp and multicast routing code.
* Can only set them with the debugger.
@@ -123,6 +131,10 @@
* to cover not only the specific data structure but also related data
* structures. It may be better to add more fine-grained locking later;
* it's not clear how performance-critical this code is.
+ *
+ * XXX: This module could particularly benefit from being cleaned
+ * up to use the <sys/queue.h> macros.
+ *
*/
static struct mrtstat mrtstat;
@@ -135,13 +147,18 @@
&mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]",
"Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)");
+static struct mtx mrouter_mtx;
+#define MROUTER_LOCK() mtx_lock(&mrouter_mtx)
+#define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx)
+#define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED)
+#define MROUTER_LOCK_INIT() \
+ mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
+#define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx)
+
static struct mtx mfc_mtx;
#define MFC_LOCK() mtx_lock(&mfc_mtx)
#define MFC_UNLOCK() mtx_unlock(&mfc_mtx)
-#define MFC_LOCK_ASSERT() do { \
- mtx_assert(&mfc_mtx, MA_OWNED); \
- NET_ASSERT_GIANT(); \
-} while (0)
+#define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED)
#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF)
#define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx)
@@ -159,46 +176,14 @@
static u_char nexpire[MFCTBLSIZ];
+static eventhandler_tag if_detach_event_tag = NULL;
+
static struct callout expire_upcalls_ch;
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
#define UPCALL_EXPIRE 6 /* number of timeouts */
-/*
- * Define the token bucket filter structures
- * tbftable -> each vif has one of these for storing info
- */
-
-static struct tbf tbftable[MAXVIFS];
-#define TBF_REPROCESS (hz / 100) /* 100x / second */
-
-/*
- * 'Interfaces' associated with decapsulator (so we can tell
- * packets that went through it from ones that get reflected
- * by a broken gateway). These interfaces are never linked into
- * the system ifnet list & no routes point to them. I.e., packets
- * can't be sent this way. They only exist as a placeholder for
- * multicast source verification.
- */
-static struct ifnet multicast_decap_if[MAXVIFS];
-
#define ENCAP_TTL 64
-#define ENCAP_PROTO IPPROTO_IPIP /* 4 */
-
-/* prototype IP hdr for encapsulated packets */
-static struct ip multicast_encap_iphdr = {
-#if BYTE_ORDER == LITTLE_ENDIAN
- sizeof(struct ip) >> 2, IPVERSION,
-#else
- IPVERSION, sizeof(struct ip) >> 2,
-#endif
- 0, /* tos */
- sizeof(struct ip), /* total length */
- 0, /* id */
- 0, /* frag offset */
- ENCAP_TTL, ENCAP_PROTO,
- 0, /* checksum */
-};
/*
* Bandwidth meter variables and constants
@@ -222,12 +207,45 @@
static struct callout bw_upcalls_ch;
#define BW_UPCALLS_PERIOD (hz) /* periodical flush of bw upcalls */
-#ifdef PIM
static struct pimstat pimstat;
+
+SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
SYSCTL_STRUCT(_net_inet_pim, PIMCTL_STATS, stats, CTLFLAG_RD,
&pimstat, pimstat,
"PIM Statistics (struct pimstat, netinet/pim_var.h)");
+static u_long pim_squelch_wholepkt = 0;
+SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
+ &pim_squelch_wholepkt, 0,
+ "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
+
+extern struct domain inetdomain;
+struct protosw in_pim_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inetdomain,
+ .pr_protocol = IPPROTO_PIM,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+ .pr_input = pim_input,
+ .pr_output = (pr_output_t*)rip_output,
+ .pr_ctloutput = rip_ctloutput,
+ .pr_usrreqs = &rip_usrreqs
+};
+static const struct encaptab *pim_encap_cookie;
+
+#ifdef INET6
+/* ip6_mroute.c glue */
+extern struct in6_protosw in6_pim_protosw;
+static const struct encaptab *pim6_encap_cookie;
+
+extern int X_ip6_mrouter_set(struct socket *, struct sockopt *);
+extern int X_ip6_mrouter_get(struct socket *, struct sockopt *);
+extern int X_ip6_mrouter_done(void);
+extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
+extern int X_mrt6_ioctl(int, caddr_t);
+#endif
+
+static int pim_encapcheck(const struct mbuf *, int, int, void *);
+
/*
* Note: the PIM Register encapsulation adds the following in front of a
* data packet:
@@ -272,25 +290,11 @@
static struct ifnet multicast_register_if;
static vifi_t reg_vif_num = VIFI_INVALID;
-#endif /* PIM */
/*
* Private variables.
*/
static vifi_t numvifs;
-static const struct encaptab *encap_cookie;
-
-/*
- * one-back cache used by mroute_encapcheck to locate a tunnel's vif
- * given a datagram's src ip address.
- */
-static u_long last_encap_src;
-static struct vif *last_encap_vif;
-
-/*
- * Callout for queue processing.
- */
-static struct callout tbf_reprocess_ch;
static u_long X_ip_mcast_src(int vifi);
static int X_ip_mforward(struct ip *ip, struct ifnet *ifp,
@@ -303,8 +307,10 @@
static int get_sg_cnt(struct sioc_sg_req *);
static int get_vif_cnt(struct sioc_vif_req *);
+static void if_detached_event(void *arg __unused, struct ifnet *);
static int ip_mrouter_init(struct socket *, int);
static int add_vif(struct vifctl *);
+static int del_vif_locked(vifi_t);
static int del_vif(vifi_t);
static int add_mfc(struct mfcctl2 *);
static int del_mfc(struct mfcctl2 *);
@@ -314,15 +320,7 @@
static void expire_upcalls(void *);
static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
static void phyint_send(struct ip *, struct vif *, struct mbuf *);
-static void encap_send(struct ip *, struct vif *, struct mbuf *);
-static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long);
-static void tbf_queue(struct vif *, struct mbuf *);
-static void tbf_process_q(struct vif *);
-static void tbf_reprocess_q(void *);
-static int tbf_dq_sel(struct vif *, struct ip *);
-static void tbf_send_packet(struct vif *, struct mbuf *);
-static void tbf_update_tokens(struct vif *);
-static int priority(struct vif *, struct ip *);
+static void send_packet(struct vif *, struct mbuf *);
/*
* Bandwidth monitoring
@@ -340,7 +338,6 @@
static void expire_bw_upcalls_send(void *);
static void expire_bw_meter_process(void *);
-#ifdef PIM
static int pim_register_send(struct ip *, struct vif *,
struct mbuf *, struct mfc *);
static int pim_register_send_rp(struct ip *, struct vif *,
@@ -348,7 +345,6 @@
static int pim_register_send_upcall(struct ip *, struct vif *,
struct mbuf *, struct mfc *);
static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *);
-#endif
/*
* whether or not special PIM assert processing is enabled.
@@ -378,7 +374,6 @@
/*
* Find a route for a given origin IP address and Multicast group address
- * Type of service parameter to be added in the future!!!
* Statistics are updated by the caller if needed
* (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses)
*/
@@ -566,7 +561,7 @@
* Typically, only root can create the raw socket in order to execute
* this ioctl method, however the request might be coming from a prison
*/
- error = suser(curthread);
+ error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error)
return (error);
switch (cmd) {
@@ -639,18 +634,74 @@
pim_assert = 0;
mrt_api_config = 0;
- callout_init(&expire_upcalls_ch, NET_CALLOUT_MPSAFE);
+ callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
bw_upcalls_n = 0;
bzero((caddr_t)bw_meter_timers, sizeof(bw_meter_timers));
- callout_init(&bw_upcalls_ch, NET_CALLOUT_MPSAFE);
- callout_init(&bw_meter_ch, NET_CALLOUT_MPSAFE);
-
- callout_init(&tbf_reprocess_ch, NET_CALLOUT_MPSAFE);
+ callout_init(&bw_upcalls_ch, CALLOUT_MPSAFE);
+ callout_init(&bw_meter_ch, CALLOUT_MPSAFE);
}
-static struct mtx mrouter_mtx; /* used to synch init/done work */
+static void
+if_detached_event(void *arg __unused, struct ifnet *ifp)
+{
+ vifi_t vifi;
+ int i;
+ struct mfc *mfc;
+ struct mfc *nmfc;
+ struct mfc **ppmfc; /* Pointer to previous node's next-pointer */
+ struct rtdetq *pq;
+ struct rtdetq *npq;
+
+ MROUTER_LOCK();
+ if (ip_mrouter == NULL) {
+ MROUTER_UNLOCK();
+ }
+ /*
+ * Tear down multicast forwarder state associated with this ifnet.
+ * 1. Walk the vif list, matching vifs against this ifnet.
+ * 2. Walk the multicast forwarding cache (mfc) looking for
+ * inner matches with this vif's index.
+ * 3. Free any pending mbufs for this mfc.
+ * 4. Free the associated mfc entry and state associated with this vif.
+ * Be very careful about unlinking from a singly-linked list whose
+ * "head node" is a pointer in a simple array.
+ * 5. Free vif state. This should disable ALLMULTI on the interface.
+ */
+ VIF_LOCK();
+ MFC_LOCK();
+ for (vifi = 0; vifi < numvifs; vifi++) {
+ if (viftable[vifi].v_ifp != ifp)
+ continue;
+ for (i = 0; i < MFCTBLSIZ; i++) {
+ ppmfc = &mfctable[i];
+ for (mfc = mfctable[i]; mfc != NULL; ) {
+ nmfc = mfc->mfc_next;
+ if (mfc->mfc_parent == vifi) {
+ for (pq = mfc->mfc_stall; pq != NULL; ) {
+ npq = pq->next;
+ m_freem(pq->m);
+ free(pq, M_MRTABLE);
+ pq = npq;
+ }
+ free_bw_list(mfc->mfc_bw_meter);
+ free(mfc, M_MRTABLE);
+ *ppmfc = nmfc;
+ } else {
+ ppmfc = &mfc->mfc_next;
+ }
+ mfc = nmfc;
+ }
+ }
+ del_vif_locked(vifi);
+ }
+ MFC_UNLOCK();
+ VIF_UNLOCK();
+
+ MROUTER_UNLOCK();
+}
+
/*
* Enable multicast routing
*/
@@ -667,13 +718,20 @@
if (version != 1)
return ENOPROTOOPT;
- mtx_lock(&mrouter_mtx);
+ MROUTER_LOCK();
if (ip_mrouter != NULL) {
- mtx_unlock(&mrouter_mtx);
+ MROUTER_UNLOCK();
return EADDRINUSE;
}
+ if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
+ if (if_detach_event_tag == NULL) {
+ MROUTER_UNLOCK();
+ return (ENOMEM);
+ }
+
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL);
callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD,
@@ -682,7 +740,7 @@
ip_mrouter = so;
- mtx_unlock(&mrouter_mtx);
+ MROUTER_UNLOCK();
if (mrtdebug)
log(LOG_DEBUG, "ip_mrouter_init\n");
@@ -703,10 +761,10 @@
struct mfc *rt;
struct rtdetq *rte;
- mtx_lock(&mrouter_mtx);
+ MROUTER_LOCK();
if (ip_mrouter == NULL) {
- mtx_unlock(&mrouter_mtx);
+ MROUTER_UNLOCK();
return EINVAL;
}
@@ -717,16 +775,6 @@
mrt_api_config = 0;
VIF_LOCK();
- if (encap_cookie) {
- const struct encaptab *c = encap_cookie;
- encap_cookie = NULL;
- encap_detach(c);
- }
- VIF_UNLOCK();
-
- callout_stop(&tbf_reprocess_ch);
-
- VIF_LOCK();
/*
* For each phyint in use, disable promiscuous reception of all IP
* multicasts.
@@ -743,11 +791,11 @@
if_allmulti(ifp, 0);
}
}
- bzero((caddr_t)tbftable, sizeof(tbftable));
bzero((caddr_t)viftable, sizeof(viftable));
numvifs = 0;
pim_assert = 0;
VIF_UNLOCK();
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
/*
* Free all multicast forwarding cache entries.
@@ -779,16 +827,9 @@
bzero(bw_meter_timers, sizeof(bw_meter_timers));
MFC_UNLOCK();
- /*
- * Reset de-encapsulation cache
- */
- last_encap_src = INADDR_ANY;
- last_encap_vif = NULL;
-#ifdef PIM
reg_vif_num = VIFI_INVALID;
-#endif
- mtx_unlock(&mrouter_mtx);
+ MROUTER_UNLOCK();
if (mrtdebug)
log(LOG_DEBUG, "ip_mrouter_done\n");
@@ -847,90 +888,6 @@
}
/*
- * Decide if a packet is from a tunnelled peer.
- * Return 0 if not, 64 if so. XXX yuck.. 64 ???
- */
-static int
-mroute_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
-{
- struct ip *ip = mtod(m, struct ip *);
- int hlen = ip->ip_hl << 2;
-
- /*
- * don't claim the packet if it's not to a multicast destination or if
- * we don't have an encapsulating tunnel with the source.
- * Note: This code assumes that the remote site IP address
- * uniquely identifies the tunnel (i.e., that this site has
- * at most one tunnel with the remote site).
- */
- if (!IN_MULTICAST(ntohl(((struct ip *)((char *)ip+hlen))->ip_dst.s_addr)))
- return 0;
- if (ip->ip_src.s_addr != last_encap_src) {
- struct vif *vifp = viftable;
- struct vif *vife = vifp + numvifs;
-
- last_encap_src = ip->ip_src.s_addr;
- last_encap_vif = NULL;
- for ( ; vifp < vife; ++vifp)
- if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
- if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL)
- last_encap_vif = vifp;
- break;
- }
- }
- if (last_encap_vif == NULL) {
- last_encap_src = INADDR_ANY;
- return 0;
- }
- return 64;
-}
-
-/*
- * De-encapsulate a packet and feed it back through ip input (this
- * routine is called whenever IP gets a packet that mroute_encap_func()
- * claimed).
- */
-static void
-mroute_encap_input(struct mbuf *m, int off)
-{
- struct ip *ip = mtod(m, struct ip *);
- int hlen = ip->ip_hl << 2;
-
- if (hlen > sizeof(struct ip))
- ip_stripoptions(m, (struct mbuf *) 0);
- m->m_data += sizeof(struct ip);
- m->m_len -= sizeof(struct ip);
- m->m_pkthdr.len -= sizeof(struct ip);
-
- m->m_pkthdr.rcvif = last_encap_vif->v_ifp;
-
- netisr_queue(NETISR_IP, m); /* mbuf is free'd on failure. */
- /*
- * normally we would need a "schednetisr(NETISR_IP)"
- * here but we were called by ip_input and it is going
- * to loop back & try to dequeue the packet we just
- * queued as soon as we return so we avoid the
- * unnecessary software interrrupt.
- *
- * XXX
- * This no longer holds - we may have direct-dispatched the packet,
- * or there may be a queue processing limit.
- */
-}
-
-extern struct domain inetdomain;
-static struct protosw mroute_encap_protosw =
-{
- .pr_type = SOCK_RAW,
- .pr_domain = &inetdomain,
- .pr_protocol = IPPROTO_IPV4,
- .pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = mroute_encap_input,
- .pr_ctloutput = rip_ctloutput,
- .pr_usrreqs = &rip_usrreqs
-};
-
-/*
* Add a vif to the vif table
*/
static int
@@ -941,13 +898,18 @@
struct ifaddr *ifa;
struct ifnet *ifp;
int error;
- struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
VIF_LOCK();
if (vifcp->vifc_vifi >= MAXVIFS) {
VIF_UNLOCK();
return EINVAL;
}
+ /* rate limiting is no longer supported by this code */
+ if (vifcp->vifc_rate_limit != 0) {
+ log(LOG_ERR, "rate limiting is no longer supported\n");
+ VIF_UNLOCK();
+ return EINVAL;
+ }
if (vifp->v_lcl_addr.s_addr != INADDR_ANY) {
VIF_UNLOCK();
return EADDRINUSE;
@@ -958,7 +920,6 @@
}
/* Find the interface with an address in AF_INET family */
-#ifdef PIM
if (vifcp->vifc_flags & VIFF_REGISTER) {
/*
* XXX: Because VIFF_REGISTER does not really need a valid
@@ -966,9 +927,7 @@
* check its address.
*/
ifp = NULL;
- } else
-#endif
- {
+ } else {
sin.sin_addr = vifcp->vifc_lcl_addr;
ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
if (ifa == NULL) {
@@ -978,43 +937,10 @@
ifp = ifa->ifa_ifp;
}
- if (vifcp->vifc_flags & VIFF_TUNNEL) {
- if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
- /*
- * An encapsulating tunnel is wanted. Tell
- * mroute_encap_input() to start paying attention
- * to encapsulated packets.
- */
- if (encap_cookie == NULL) {
- int i;
-
- encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV4,
- mroute_encapcheck,
- (struct protosw *)&mroute_encap_protosw, NULL);
-
- if (encap_cookie == NULL) {
- printf("ip_mroute: unable to attach encap\n");
- VIF_UNLOCK();
- return EIO; /* XXX */
- }
- for (i = 0; i < MAXVIFS; ++i) {
- if_initname(&multicast_decap_if[i], "mdecap", i);
- }
- }
- /*
- * Set interface to fake encapsulator interface
- */
- ifp = &multicast_decap_if[vifcp->vifc_vifi];
- /*
- * Prepare cached route entry
- */
- bzero(&vifp->v_route, sizeof(vifp->v_route));
- } else {
- log(LOG_ERR, "source routed tunnels not supported\n");
- VIF_UNLOCK();
- return EOPNOTSUPP;
- }
-#ifdef PIM
+ if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
+ log(LOG_ERR, "tunnels are no longer supported\n");
+ VIF_UNLOCK();
+ return EOPNOTSUPP;
} else if (vifcp->vifc_flags & VIFF_REGISTER) {
ifp = &multicast_register_if;
if (mrtdebug)
@@ -1023,10 +949,8 @@
if (reg_vif_num == VIFI_INVALID) {
if_initname(&multicast_register_if, "register_vif", 0);
multicast_register_if.if_flags = IFF_LOOPBACK;
- bzero(&vifp->v_route, sizeof(vifp->v_route));
reg_vif_num = vifcp->vifc_vifi;
}
-#endif
} else { /* Make sure the interface supports multicast */
if ((ifp->if_flags & IFF_MULTICAST) == 0) {
VIF_UNLOCK();
@@ -1041,21 +965,11 @@
}
}
- /* define parameters for the tbf structure */
- vifp->v_tbf = v_tbf;
- GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
- vifp->v_tbf->tbf_n_tok = 0;
- vifp->v_tbf->tbf_q_len = 0;
- vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
- vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
-
vifp->v_flags = vifcp->vifc_flags;
vifp->v_threshold = vifcp->vifc_threshold;
vifp->v_lcl_addr = vifcp->vifc_lcl_addr;
vifp->v_rmt_addr = vifcp->vifc_rmt_addr;
vifp->v_ifp = ifp;
- /* scaling up here allows division by 1024 in critical code */
- vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
vifp->v_rsvp_on = 0;
vifp->v_rsvpd = NULL;
/* initialize per vif pkt counters */
@@ -1063,6 +977,7 @@
vifp->v_pkt_out = 0;
vifp->v_bytes_in = 0;
vifp->v_bytes_out = 0;
+ bzero(&vifp->v_route, sizeof(vifp->v_route));
/* Adjust numvifs up if the vifi is higher than numvifs */
if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
@@ -1070,13 +985,12 @@
VIF_UNLOCK();
if (mrtdebug)
- log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
+ log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x\n",
vifcp->vifc_vifi,
(u_long)ntohl(vifcp->vifc_lcl_addr.s_addr),
(vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
(u_long)ntohl(vifcp->vifc_rmt_addr.s_addr),
- vifcp->vifc_threshold,
- vifcp->vifc_rate_limit);
+ vifcp->vifc_threshold);
return 0;
}
@@ -1085,46 +999,26 @@
* Delete a vif from the vif table
*/
static int
-del_vif(vifi_t vifi)
+del_vif_locked(vifi_t vifi)
{
struct vif *vifp;
- VIF_LOCK();
+ VIF_LOCK_ASSERT();
if (vifi >= numvifs) {
- VIF_UNLOCK();
return EINVAL;
}
vifp = &viftable[vifi];
if (vifp->v_lcl_addr.s_addr == INADDR_ANY) {
- VIF_UNLOCK();
return EADDRNOTAVAIL;
}
if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
if_allmulti(vifp->v_ifp, 0);
- if (vifp == last_encap_vif) {
- last_encap_vif = NULL;
- last_encap_src = INADDR_ANY;
- }
-
- /*
- * Free packets queued at the interface
- */
- while (vifp->v_tbf->tbf_q) {
- struct mbuf *m = vifp->v_tbf->tbf_q;
-
- vifp->v_tbf->tbf_q = m->m_act;
- m_freem(m);
- }
-
-#ifdef PIM
if (vifp->v_flags & VIFF_REGISTER)
reg_vif_num = VIFI_INVALID;
-#endif
- bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
bzero((caddr_t)vifp, sizeof (*vifp));
if (mrtdebug)
@@ -1136,9 +1030,19 @@
break;
numvifs = vifi;
+ return 0;
+}
+
+static int
+del_vif(vifi_t vifi)
+{
+ int cc;
+
+ VIF_LOCK();
+ cc = del_vif_locked(vifi);
VIF_UNLOCK();
- return 0;
+ return cc;
}
/*
@@ -1346,7 +1250,7 @@
}
/*
- * Send a message to mrouted on the multicast routing socket
+ * Send a message to the routing daemon on the multicast routing socket
*/
static int
socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
@@ -1402,8 +1306,8 @@
* Source-route tunnels are no longer supported.
*/
static int last_log;
- if (last_log != time_second) {
- last_log = time_second;
+ if (last_log != time_uptime) {
+ last_log = time_uptime;
log(LOG_ERR,
"ip_mforward: received source-routed packet from %lx\n",
(u_long)ntohl(ip->ip_src.s_addr));
@@ -1414,7 +1318,7 @@
VIF_LOCK();
MFC_LOCK();
if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) {
- if (ip->ip_ttl < 255)
+ if (ip->ip_ttl < MAXTTL)
ip->ip_ttl++; /* compensate for -1 in *_send routines */
if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
struct vif *vifp = viftable + vifi;
@@ -1441,7 +1345,7 @@
* Don't forward a packet with time-to-live of zero or one,
* or a packet destined to a local-only group.
*/
- if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) {
+ if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
MFC_UNLOCK();
VIF_UNLOCK();
return 0;
@@ -1690,17 +1594,6 @@
int plen = ip->ip_len;
VIF_LOCK_ASSERT();
-/*
- * Macro to send packet on vif. Since RSVP packets don't get counted on
- * input, they shouldn't get counted on output, so statistics keeping is
- * separate.
- */
-#define MC_SEND(ip,vifp,m) { \
- if ((vifp)->v_flags & VIFF_TUNNEL) \
- encap_send((ip), (vifp), (m)); \
- else \
- phyint_send((ip), (vifp), (m)); \
-}
/*
* If xmt_vif is not -1, send on only the requested vif.
@@ -1708,12 +1601,10 @@
* (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
*/
if (xmt_vif < numvifs) {
-#ifdef PIM
if (viftable[xmt_vif].v_flags & VIFF_REGISTER)
- pim_register_send(ip, viftable + xmt_vif, m, rt);
+ pim_register_send(ip, viftable + xmt_vif, m, rt);
else
-#endif
- MC_SEND(ip, viftable + xmt_vif, m);
+ phyint_send(ip, viftable + xmt_vif, m);
return 1;
}
@@ -1740,10 +1631,8 @@
struct timeval now;
u_long delta;
-#ifdef PIM
if (ifp == &multicast_register_if)
pimstat.pims_rcv_registers_wrongiif++;
-#endif
/* Get vifi for the incoming packet */
for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++)
@@ -1811,12 +1700,10 @@
if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
viftable[vifi].v_pkt_out++;
viftable[vifi].v_bytes_out += plen;
-#ifdef PIM
if (viftable[vifi].v_flags & VIFF_REGISTER)
pim_register_send(ip, viftable + vifi, m, rt);
else
-#endif
- MC_SEND(ip, viftable+vifi, m);
+ phyint_send(ip, viftable + vifi, m);
}
/*
@@ -1877,251 +1764,25 @@
if (mb_copy == NULL)
return;
- if (vifp->v_rate_limit == 0)
- tbf_send_packet(vifp, mb_copy);
- else
- tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
+ send_packet(vifp, mb_copy);
}
static void
-encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
+send_packet(struct vif *vifp, struct mbuf *m)
{
- struct mbuf *mb_copy;
- struct ip *ip_copy;
- int i, len = ip->ip_len;
-
- VIF_LOCK_ASSERT();
-
- /* Take care of delayed checksums */
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
- in_delayed_cksum(m);
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
- }
-
- /*
- * copy the old packet & pullup its IP header into the
- * new mbuf so we can modify it. Try to fill the new
- * mbuf since if we don't the ethernet driver will.
- */
- MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
- if (mb_copy == NULL)
- return;
-#ifdef MAC
- mac_create_mbuf_multicast_encap(m, vifp->v_ifp, mb_copy);
-#endif
- mb_copy->m_data += max_linkhdr;
- mb_copy->m_len = sizeof(multicast_encap_iphdr);
-
- if ((mb_copy->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) {
- m_freem(mb_copy);
- return;
- }
- i = MHLEN - M_LEADINGSPACE(mb_copy);
- if (i > len)
- i = len;
- mb_copy = m_pullup(mb_copy, i);
- if (mb_copy == NULL)
- return;
- mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
-
- /*
- * fill in the encapsulating IP header.
- */
- ip_copy = mtod(mb_copy, struct ip *);
- *ip_copy = multicast_encap_iphdr;
- ip_copy->ip_id = ip_newid();
- ip_copy->ip_len += len;
- ip_copy->ip_src = vifp->v_lcl_addr;
- ip_copy->ip_dst = vifp->v_rmt_addr;
-
- /*
- * turn the encapsulated IP header back into a valid one.
- */
- ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
- --ip->ip_ttl;
- ip->ip_len = htons(ip->ip_len);
- ip->ip_off = htons(ip->ip_off);
- ip->ip_sum = 0;
- mb_copy->m_data += sizeof(multicast_encap_iphdr);
- ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
- mb_copy->m_data -= sizeof(multicast_encap_iphdr);
-
- if (vifp->v_rate_limit == 0)
- tbf_send_packet(vifp, mb_copy);
- else
- tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
-}
-
-/*
- * Token bucket filter module
- */
-
-static void
-tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, u_long p_len)
-{
- struct tbf *t = vifp->v_tbf;
-
- VIF_LOCK_ASSERT();
-
- if (p_len > MAX_BKT_SIZE) { /* drop if packet is too large */
- mrtstat.mrts_pkt2large++;
- m_freem(m);
- return;
- }
-
- tbf_update_tokens(vifp);
-
- if (t->tbf_q_len == 0) { /* queue empty... */
- if (p_len <= t->tbf_n_tok) { /* send packet if enough tokens */
- t->tbf_n_tok -= p_len;
- tbf_send_packet(vifp, m);
- } else { /* no, queue packet and try later */
- tbf_queue(vifp, m);
- callout_reset(&tbf_reprocess_ch, TBF_REPROCESS,
- tbf_reprocess_q, vifp);
- }
- } else if (t->tbf_q_len < t->tbf_max_q_len) {
- /* finite queue length, so queue pkts and process queue */
- tbf_queue(vifp, m);
- tbf_process_q(vifp);
- } else {
- /* queue full, try to dq and queue and process */
- if (!tbf_dq_sel(vifp, ip)) {
- mrtstat.mrts_q_overflow++;
- m_freem(m);
- } else {
- tbf_queue(vifp, m);
- tbf_process_q(vifp);
- }
- }
-}
-
-/*
- * adds a packet to the queue at the interface
- */
-static void
-tbf_queue(struct vif *vifp, struct mbuf *m)
-{
- struct tbf *t = vifp->v_tbf;
-
- VIF_LOCK_ASSERT();
-
- if (t->tbf_t == NULL) /* Queue was empty */
- t->tbf_q = m;
- else /* Insert at tail */
- t->tbf_t->m_act = m;
-
- t->tbf_t = m; /* Set new tail pointer */
-
-#ifdef DIAGNOSTIC
- /* Make sure we didn't get fed a bogus mbuf */
- if (m->m_act)
- panic("tbf_queue: m_act");
-#endif
- m->m_act = NULL;
-
- t->tbf_q_len++;
-}
-
-/*
- * processes the queue at the interface
- */
-static void
-tbf_process_q(struct vif *vifp)
-{
- struct tbf *t = vifp->v_tbf;
-
- VIF_LOCK_ASSERT();
-
- /* loop through the queue at the interface and send as many packets
- * as possible
- */
- while (t->tbf_q_len > 0) {
- struct mbuf *m = t->tbf_q;
- int len = mtod(m, struct ip *)->ip_len;
-
- /* determine if the packet can be sent */
- if (len > t->tbf_n_tok) /* not enough tokens, we are done */
- break;
- /* ok, reduce no of tokens, dequeue and send the packet. */
- t->tbf_n_tok -= len;
-
- t->tbf_q = m->m_act;
- if (--t->tbf_q_len == 0)
- t->tbf_t = NULL;
-
- m->m_act = NULL;
- tbf_send_packet(vifp, m);
- }
-}
-
-static void
-tbf_reprocess_q(void *xvifp)
-{
- struct vif *vifp = xvifp;
-
- if (ip_mrouter == NULL)
- return;
- VIF_LOCK();
- tbf_update_tokens(vifp);
- tbf_process_q(vifp);
- if (vifp->v_tbf->tbf_q_len)
- callout_reset(&tbf_reprocess_ch, TBF_REPROCESS, tbf_reprocess_q, vifp);
- VIF_UNLOCK();
-}
-
-/* function that will selectively discard a member of the queue
- * based on the precedence value and the priority
- */
-static int
-tbf_dq_sel(struct vif *vifp, struct ip *ip)
-{
- u_int p;
- struct mbuf *m, *last;
- struct mbuf **np;
- struct tbf *t = vifp->v_tbf;
-
- VIF_LOCK_ASSERT();
-
- p = priority(vifp, ip);
-
- np = &t->tbf_q;
- last = NULL;
- while ((m = *np) != NULL) {
- if (p > priority(vifp, mtod(m, struct ip *))) {
- *np = m->m_act;
- /* If we're removing the last packet, fix the tail pointer */
- if (m == t->tbf_t)
- t->tbf_t = last;
- m_freem(m);
- /* It's impossible for the queue to be empty, but check anyways. */
- if (--t->tbf_q_len == 0)
- t->tbf_t = NULL;
- mrtstat.mrts_drop_sel++;
- return 1;
- }
- np = &m->m_act;
- last = m;
- }
- return 0;
-}
-
-static void
-tbf_send_packet(struct vif *vifp, struct mbuf *m)
-{
- VIF_LOCK_ASSERT();
-
- if (vifp->v_flags & VIFF_TUNNEL) /* If tunnel options */
- ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, NULL, NULL);
- else {
struct ip_moptions imo;
+ struct in_multi *imm[2];
int error;
- static struct route ro; /* XXX check this */
+
+ VIF_LOCK_ASSERT();
imo.imo_multicast_ifp = vifp->v_ifp;
imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1;
imo.imo_multicast_loop = 1;
imo.imo_multicast_vif = -1;
+ imo.imo_num_memberships = 0;
+ imo.imo_max_memberships = 2;
+ imo.imo_membership = &imm[0];
/*
* Re-entrancy should not be a problem here, because
@@ -2129,84 +1790,13 @@
* should get rejected because they appear to come from
* the loopback interface, thus preventing looping.
*/
- error = ip_output(m, NULL, &ro, IP_FORWARDING, &imo, NULL);
-
- if (mrtdebug & DEBUG_XMIT)
- log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
- (int)(vifp - viftable), error);
- }
-}
-
-/* determine the current time and then
- * the elapsed time (between the last time and time now)
- * in milliseconds & update the no. of tokens in the bucket
- */
-static void
-tbf_update_tokens(struct vif *vifp)
-{
- struct timeval tp;
- u_long tm;
- struct tbf *t = vifp->v_tbf;
-
- VIF_LOCK_ASSERT();
-
- GET_TIME(tp);
-
- TV_DELTA(tp, t->tbf_last_pkt_t, tm);
-
- /*
- * This formula is actually
- * "time in seconds" * "bytes/second".
- *
- * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
- *
- * The (1000/1024) was introduced in add_vif to optimize
- * this divide into a shift.
- */
- t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
- t->tbf_last_pkt_t = tp;
-
- if (t->tbf_n_tok > MAX_BKT_SIZE)
- t->tbf_n_tok = MAX_BKT_SIZE;
-}
-
-static int
-priority(struct vif *vifp, struct ip *ip)
-{
- int prio = 50; /* the lowest priority -- default case */
-
- /* temporary hack; may add general packet classifier some day */
-
- /*
- * The UDP port space is divided up into four priority ranges:
- * [0, 16384) : unclassified - lowest priority
- * [16384, 32768) : audio - highest priority
- * [32768, 49152) : whiteboard - medium priority
- * [49152, 65536) : video - low priority
- *
- * Everything else gets lowest priority.
- */
- if (ip->ip_p == IPPROTO_UDP) {
- struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
- switch (ntohs(udp->uh_dport) & 0xc000) {
- case 0x4000:
- prio = 70;
- break;
- case 0x8000:
- prio = 60;
- break;
- case 0xc000:
- prio = 55;
- break;
+ error = ip_output(m, NULL, &vifp->v_route, IP_FORWARDING, &imo, NULL);
+ if (mrtdebug & DEBUG_XMIT) {
+ log(LOG_DEBUG, "phyint_send on vif %td err %d\n",
+ vifp - viftable, error);
}
- }
- return prio;
}
-/*
- * End of token bucket filter modifications
- */
-
static int
X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt)
{
@@ -2718,7 +2308,7 @@
* Allocate a new mbuf, initialize it with the header and
* the payload for the pending calls.
*/
- MGETHDR(m, M_DONTWAIT, MT_HEADER);
+ MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
return;
@@ -2950,20 +2540,27 @@
* End of bandwidth monitoring code
*/
-#ifdef PIM
/*
* Send the packet up to the user daemon, or eventually do kernel encapsulation
*
*/
static int
-pim_register_send(struct ip *ip, struct vif *vifp,
- struct mbuf *m, struct mfc *rt)
+pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
+ struct mfc *rt)
{
struct mbuf *mb_copy, *mm;
if (mrtdebug & DEBUG_PIM)
log(LOG_DEBUG, "pim_register_send: ");
+ /*
+ * Do not send IGMP_WHOLEPKT notifications to userland, if the
+ * rendezvous point was unspecified, and we were told not to.
+ */
+ if (pim_squelch_wholepkt != 0 && (mrt_api_config & MRT_MFC_RP) &&
+ (rt->mfc_rp.s_addr == INADDR_ANY))
+ return 0;
+
mb_copy = pim_register_prepare(ip, m);
if (mb_copy == NULL)
return ENOBUFS;
@@ -3046,7 +2643,7 @@
*/
static int
pim_register_send_upcall(struct ip *ip, struct vif *vifp,
- struct mbuf *mb_copy, struct mfc *rt)
+ struct mbuf *mb_copy, struct mfc *rt)
{
struct mbuf *mb_first;
int len = ntohs(ip->ip_len);
@@ -3058,7 +2655,7 @@
/*
* Add a new mbuf with an upcall header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_HEADER);
+ MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -3099,8 +2696,8 @@
* Encapsulate the data packet in PIM Register message and send it to the RP.
*/
static int
-pim_register_send_rp(struct ip *ip, struct vif *vifp,
- struct mbuf *mb_copy, struct mfc *rt)
+pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
+ struct mfc *rt)
{
struct mbuf *mb_first;
struct ip *ip_outer;
@@ -3118,7 +2715,7 @@
/*
* Add a new mbuf with the encapsulating header
*/
- MGETHDR(mb_first, M_DONTWAIT, MT_HEADER);
+ MGETHDR(mb_first, M_DONTWAIT, MT_DATA);
if (mb_first == NULL) {
m_freem(mb_copy);
return ENOBUFS;
@@ -3156,10 +2753,7 @@
pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
mb_first->m_data -= sizeof(pim_encap_iphdr);
- if (vifp->v_rate_limit == 0)
- tbf_send_packet(vifp, mb_first);
- else
- tbf_control(vifp, mb_first, ip, ip_outer->ip_len);
+ send_packet(vifp, mb_first);
/* Keep statistics */
pimstat.pims_snd_registers_msgs++;
@@ -3169,6 +2763,24 @@
}
/*
+ * pim_encapcheck() is called by the encap[46]_input() path at runtime to
+ * determine if a packet is for PIM; allowing PIM to be dynamically loaded
+ * into the kernel.
+ */
+static int
+pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
+{
+
+#ifdef DIAGNOSTIC
+ KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
+#endif
+ if (proto != IPPROTO_PIM)
+ return 0; /* not for us; reject the datagram. */
+
+ return 64; /* claim the datagram. */
+}
+
+/*
* PIM-SMv2 and PIM-DM messages processing.
* Receives and verifies the PIM control messages, and passes them
* up to the listening socket, using rip_input().
@@ -3408,24 +3020,66 @@
return;
}
-#endif /* PIM */
+/*
+ * XXX: This is common code for dealing with initialization for both
+ * the IPv4 and IPv6 multicast forwarding paths. It could do with cleanup.
+ */
static int
ip_mroute_modevent(module_t mod, int type, void *unused)
{
switch (type) {
case MOD_LOAD:
- mtx_init(&mrouter_mtx, "mrouter initialization", NULL, MTX_DEF);
+ MROUTER_LOCK_INIT();
MFC_LOCK_INIT();
VIF_LOCK_INIT();
ip_mrouter_reset();
+ TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
+ &pim_squelch_wholepkt);
+
+ pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
+ pim_encapcheck, &in_pim_protosw, NULL);
+ if (pim_encap_cookie == NULL) {
+ printf("ip_mroute: unable to attach pim encap\n");
+ VIF_LOCK_DESTROY();
+ MFC_LOCK_DESTROY();
+ MROUTER_LOCK_DESTROY();
+ return (EINVAL);
+ }
+
+#ifdef INET6
+ pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM,
+ pim_encapcheck, (struct protosw *)&in6_pim_protosw, NULL);
+ if (pim6_encap_cookie == NULL) {
+ printf("ip_mroute: unable to attach pim6 encap\n");
+ if (pim_encap_cookie) {
+ encap_detach(pim_encap_cookie);
+ pim_encap_cookie = NULL;
+ }
+ VIF_LOCK_DESTROY();
+ MFC_LOCK_DESTROY();
+ MROUTER_LOCK_DESTROY();
+ return (EINVAL);
+ }
+#endif
+
ip_mcast_src = X_ip_mcast_src;
ip_mforward = X_ip_mforward;
ip_mrouter_done = X_ip_mrouter_done;
ip_mrouter_get = X_ip_mrouter_get;
ip_mrouter_set = X_ip_mrouter_set;
+
+#ifdef INET6
+ ip6_mforward = X_ip6_mforward;
+ ip6_mrouter_done = X_ip6_mrouter_done;
+ ip6_mrouter_get = X_ip6_mrouter_get;
+ ip6_mrouter_set = X_ip6_mrouter_set;
+ mrt6_ioctl = X_mrt6_ioctl;
+#endif
+
ip_rsvp_force_done = X_ip_rsvp_force_done;
ip_rsvp_vif = X_ip_rsvp_vif;
+
legal_vif_num = X_legal_vif_num;
mrt_ioctl = X_mrt_ioctl;
rsvp_input_p = X_rsvp_input;
@@ -3440,24 +3094,49 @@
* just loaded and then unloaded w/o starting up a user
* process we still need to cleanup.
*/
- if (ip_mrouter)
+ if (ip_mrouter
+#ifdef INET6
+ || ip6_mrouter
+#endif
+ )
return EINVAL;
+#ifdef INET6
+ if (pim6_encap_cookie) {
+ encap_detach(pim6_encap_cookie);
+ pim6_encap_cookie = NULL;
+ }
+ X_ip6_mrouter_done();
+ ip6_mforward = NULL;
+ ip6_mrouter_done = NULL;
+ ip6_mrouter_get = NULL;
+ ip6_mrouter_set = NULL;
+ mrt6_ioctl = NULL;
+#endif
+
+ if (pim_encap_cookie) {
+ encap_detach(pim_encap_cookie);
+ pim_encap_cookie = NULL;
+ }
X_ip_mrouter_done();
ip_mcast_src = NULL;
ip_mforward = NULL;
ip_mrouter_done = NULL;
ip_mrouter_get = NULL;
ip_mrouter_set = NULL;
+
ip_rsvp_force_done = NULL;
ip_rsvp_vif = NULL;
+
legal_vif_num = NULL;
mrt_ioctl = NULL;
rsvp_input_p = NULL;
+
VIF_LOCK_DESTROY();
MFC_LOCK_DESTROY();
- mtx_destroy(&mrouter_mtx);
+ MROUTER_LOCK_DESTROY();
break;
+
default:
return EOPNOTSUPP;
}
Index: alias.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias.h -L sys/netinet/libalias/alias.h -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias.h
+++ sys/netinet/libalias/alias.h
@@ -25,10 +25,10 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/libalias/alias.h,v 1.32 2005/05/05 21:53:17 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias.h,v 1.34 2006/12/01 16:27:11 piso Exp $
*/
-/*-
+/*
* Alias.h defines the outside world interfaces for the packet aliasing
* software.
*
@@ -39,12 +39,16 @@
#ifndef _ALIAS_H_
#define _ALIAS_H_
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#define LIBALIAS_BUF_SIZE 128
#ifdef _KERNEL
/*
* The kernel version of libalias does not support these features.
*/
#define NO_FW_PUNCH
-#define NO_LOGGING
#define NO_USE_SOCKETS
#endif
@@ -180,6 +184,13 @@
/* Transparent proxying routines. */
int LibAliasProxyRule(struct libalias *, const char *_cmd);
+/* Module handling API */
+int LibAliasLoadModule(char *);
+int LibAliasUnLoadAllModule(void);
+int LibAliasRefreshModules(void);
+
+/* Mbuf helper function. */
+struct mbuf *m_megapullup(struct mbuf *, int);
/*
* Mode flags and other constants.
@@ -192,9 +203,7 @@
* If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log
* every time a link is created or deleted. This is useful for debugging.
*/
-#ifndef NO_LOGGING
#define PKT_ALIAS_LOG 0x01
-#endif
/*
* If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp,
Index: alias.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias.c -L sys/netinet/libalias/alias.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias.c
+++ sys/netinet/libalias/alias.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias.c,v 1.53 2005/06/27 22:21:42 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias.c,v 1.58 2006/12/15 12:50:06 piso Exp $");
/*
Alias.c provides supervisory control for the functions of the
@@ -113,9 +113,16 @@
#ifdef _KERNEL
#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
#else
#include <sys/types.h>
+#include <stdlib.h>
#include <stdio.h>
+#include <ctype.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <string.h>
#endif
#include <netinet/in_systm.h>
@@ -128,22 +135,14 @@
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
+#include <err.h>
#include "alias.h"
#include "alias_local.h"
+#include "alias_mod.h"
#endif
-#define NETBIOS_NS_PORT_NUMBER 137
-#define NETBIOS_DGM_PORT_NUMBER 138
-#define FTP_CONTROL_PORT_NUMBER 21
-#define IRC_CONTROL_PORT_NUMBER_1 6667
-#define IRC_CONTROL_PORT_NUMBER_2 6668
-#define CUSEEME_PORT_NUMBER 7648
-#define RTSP_CONTROL_PORT_NUMBER_1 554
-#define RTSP_CONTROL_PORT_NUMBER_2 7070
-#define TFTP_PORT_NUMBER 69
-#define PPTP_CONTROL_PORT_NUMBER 1723
-
static __inline int
twowords(void *p)
{
@@ -284,6 +283,8 @@
static int
IcmpAliasIn1(struct libalias *la, struct ip *pip)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
/*
De-alias incoming echo and timestamp replies.
Alias incoming echo and timestamp requests.
@@ -327,6 +328,8 @@
static int
IcmpAliasIn2(struct libalias *la, struct ip *pip)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
/*
Alias incoming ICMP error messages containing
IP header and first 64 bits of datagram.
@@ -430,6 +433,7 @@
int iresult;
struct icmp *ic;
+ LIBALIAS_LOCK_ASSERT(la);
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
@@ -469,6 +473,7 @@
struct alias_link *lnk;
struct icmp *ic;
+ LIBALIAS_LOCK_ASSERT(la);
ic = (struct icmp *)ip_next(pip);
/* Save overwritten data for when echo packet returns */
@@ -516,6 +521,7 @@
struct tcphdr *tc;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
ic = (struct icmp *)ip_next(pip);
ip = &ic->icmp_ip;
@@ -609,6 +615,7 @@
int iresult;
struct icmp *ic;
+ LIBALIAS_LOCK_ASSERT(la);
(void)create;
/* Return if proxy-only mode is enabled */
@@ -651,6 +658,7 @@
*/
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
@@ -682,6 +690,7 @@
*/
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
(void)create;
/* Return if proxy-only mode is enabled */
@@ -711,6 +720,7 @@
struct udphdr *ud;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
@@ -725,24 +735,24 @@
struct in_addr original_address;
u_short alias_port;
int accumulate;
- int r = 0;
+ int r = 0, error;
+ struct alias_data ad = {
+ .lnk = lnk,
+ .oaddr = &original_address,
+ .aaddr = &alias_address,
+ .aport = &alias_port,
+ .sport = &ud->uh_sport,
+ .dport = &ud->uh_dport,
+ .maxpktsize = 0
+ };
alias_address = GetAliasAddress(lnk);
original_address = GetOriginalAddress(lnk);
alias_port = ud->uh_dport;
ud->uh_dport = GetOriginalPort(lnk);
-/* Special processing for IP encoding protocols */
- if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
- AliasHandleCUSeeMeIn(la, pip, original_address);
-/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
- else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
- || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
- r = AliasHandleUdpNbt(la, pip, lnk, &original_address, ud->uh_dport);
- else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
- || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
- r = AliasHandleUdpNbtNS(la, pip, lnk, &alias_address, &alias_port,
- &original_address, &ud->uh_dport);
+ /* Walk out chain. */
+ error = find_handler(IN, UDP, la, pip, &ad);
/* If UDP checksum is not zero, then adjust since destination port */
/* is being unaliased and destination address is being altered. */
@@ -774,7 +784,9 @@
{
struct udphdr *ud;
struct alias_link *lnk;
+ int error;
+ LIBALIAS_LOCK_ASSERT(la);
/* Return if proxy-only mode is enabled */
if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY)
return (PKT_ALIAS_OK);
@@ -787,29 +799,21 @@
if (lnk != NULL) {
u_short alias_port;
struct in_addr alias_address;
+ struct alias_data ad = {
+ .lnk = lnk,
+ .oaddr = NULL,
+ .aaddr = &alias_address,
+ .aport = &alias_port,
+ .sport = &ud->uh_sport,
+ .dport = &ud->uh_dport,
+ .maxpktsize = 0
+ };
alias_address = GetAliasAddress(lnk);
alias_port = GetAliasPort(lnk);
-/* Special processing for IP encoding protocols */
- if (ntohs(ud->uh_dport) == CUSEEME_PORT_NUMBER)
- AliasHandleCUSeeMeOut(la, pip, lnk);
-/* If NETBIOS Datagram, It should be alias address in UDP Data, too */
- else if (ntohs(ud->uh_dport) == NETBIOS_DGM_PORT_NUMBER
- || ntohs(ud->uh_sport) == NETBIOS_DGM_PORT_NUMBER)
- AliasHandleUdpNbt(la, pip, lnk, &alias_address, alias_port);
- else if (ntohs(ud->uh_dport) == NETBIOS_NS_PORT_NUMBER
- || ntohs(ud->uh_sport) == NETBIOS_NS_PORT_NUMBER)
- AliasHandleUdpNbtNS(la, pip, lnk, &pip->ip_src, &ud->uh_sport,
- &alias_address, &alias_port);
-/*
- * We don't know in advance what TID the TFTP server will choose,
- * so we create a wilcard link (destination port is unspecified)
- * that will match any TID from a given destination.
- */
- else if (ntohs(ud->uh_dport) == TFTP_PORT_NUMBER)
- FindRtspOut(la, pip->ip_src, pip->ip_dst,
- ud->uh_sport, alias_port, IPPROTO_UDP);
+ /* Walk out chain. */
+ error = find_handler(OUT, UDP, la, pip, &ad);
/* If UDP checksum is not zero, adjust since source port is */
/* being aliased and source address is being altered */
@@ -843,6 +847,7 @@
struct tcphdr *tc;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
tc = (struct tcphdr *)ip_next(pip);
lnk = FindUdpTcpIn(la, pip->ip_src, pip->ip_dst,
@@ -855,15 +860,26 @@
struct in_addr proxy_address;
u_short alias_port;
u_short proxy_port;
- int accumulate;
+ int accumulate, error;
+
+ /*
+ * The init of MANY vars is a bit below, but aliashandlepptpin
+ * seems to need the destination port that came within the
+ * packet and not the original one looks below [*].
+ */
+
+ struct alias_data ad = {
+ .lnk = lnk,
+ .oaddr = NULL,
+ .aaddr = NULL,
+ .aport = NULL,
+ .sport = &tc->th_sport,
+ .dport = &tc->th_dport,
+ .maxpktsize = 0
+ };
-/* Special processing for IP encoding protocols */
- if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
- || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
- AliasHandlePptpIn(la, pip, lnk);
- else if (la->skinnyPort != 0 && (ntohs(tc->th_dport) == la->skinnyPort
- || ntohs(tc->th_sport) == la->skinnyPort))
- AliasHandleSkinny(la, pip, lnk);
+ /* Walk out chain. */
+ error = find_handler(IN, TCP, la, pip, &ad);
alias_address = GetAliasAddress(lnk);
original_address = GetOriginalAddress(lnk);
@@ -872,6 +888,28 @@
tc->th_dport = GetOriginalPort(lnk);
proxy_port = GetProxyPort(lnk);
+ /*
+ * Look above, if anyone is going to add find_handler AFTER
+ * this aliashandlepptpin/point, please redo alias_data too.
+ * Uncommenting the piece here below should be enough.
+ */
+#if 0
+ struct alias_data ad = {
+ .lnk = lnk,
+ .oaddr = &original_address,
+ .aaddr = &alias_address,
+ .aport = &alias_port,
+ .sport = &ud->uh_sport,
+ .dport = &ud->uh_dport,
+ .maxpktsize = 0
+ };
+
+ /* Walk out chain. */
+ error = find_handler(la, pip, &ad);
+ if (error == EHDNOF)
+ printf("Protocol handler not found\n");
+#endif
+
/* Adjust TCP checksum since destination port is being unaliased */
/* and destination port is being altered. */
accumulate = alias_port;
@@ -926,7 +964,7 @@
static int
TcpAliasOut(struct libalias *la, struct ip *pip, int maxpacketsize, int create)
{
- int proxy_type;
+ int proxy_type, error;
u_short dest_port;
u_short proxy_server_port;
struct in_addr dest_address;
@@ -934,6 +972,7 @@
struct tcphdr *tc;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
tc = (struct tcphdr *)ip_next(pip);
if (create)
@@ -973,6 +1012,15 @@
u_short alias_port;
struct in_addr alias_address;
int accumulate;
+ struct alias_data ad = {
+ .lnk = lnk,
+ .oaddr = NULL,
+ .aaddr = &alias_address,
+ .aport = &alias_port,
+ .sport = &tc->th_sport,
+ .dport = &tc->th_dport,
+ .maxpktsize = maxpacketsize
+ };
/* Save original destination address, if this is a proxy packet.
Also modify packet to include destination encoding. This may
@@ -989,25 +1037,9 @@
/* Monitor TCP connection state */
TcpMonitorOut(pip, lnk);
-
-/* Special processing for IP encoding protocols */
- if (ntohs(tc->th_dport) == FTP_CONTROL_PORT_NUMBER
- || ntohs(tc->th_sport) == FTP_CONTROL_PORT_NUMBER)
- AliasHandleFtpOut(la, pip, lnk, maxpacketsize);
- else if (ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_1
- || ntohs(tc->th_dport) == IRC_CONTROL_PORT_NUMBER_2)
- AliasHandleIrcOut(la, pip, lnk, maxpacketsize);
- else if (ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_1
- || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_1
- || ntohs(tc->th_dport) == RTSP_CONTROL_PORT_NUMBER_2
- || ntohs(tc->th_sport) == RTSP_CONTROL_PORT_NUMBER_2)
- AliasHandleRtspOut(la, pip, lnk, maxpacketsize);
- else if (ntohs(tc->th_dport) == PPTP_CONTROL_PORT_NUMBER
- || ntohs(tc->th_sport) == PPTP_CONTROL_PORT_NUMBER)
- AliasHandlePptpOut(la, pip, lnk);
- else if (la->skinnyPort != 0 && (ntohs(tc->th_sport) == la->skinnyPort
- || ntohs(tc->th_dport) == la->skinnyPort))
- AliasHandleSkinny(la, pip, lnk);
+
+ /* Walk out chain. */
+ error = find_handler(OUT, TCP, la, pip, &ad);
/* Adjust TCP checksum since source port is being aliased */
/* and source address is being altered */
@@ -1067,6 +1099,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindFragmentIn2(la, pip->ip_src, pip->ip_dst, pip->ip_id);
if (lnk != NULL) {
struct in_addr original_address;
@@ -1087,6 +1120,7 @@
{
struct in_addr alias_address;
+ LIBALIAS_LOCK_ASSERT(la);
alias_address = FindAliasAddress(la, pip->ip_src);
DifferentialChecksum(&pip->ip_sum,
&alias_address, &pip->ip_src, 2);
@@ -1120,6 +1154,7 @@
struct alias_link *lnk;
struct ip *pip;
+ LIBALIAS_LOCK(la);
pip = (struct ip *)ptr;
lnk = AddFragmentPtrLink(la, pip->ip_src, pip->ip_id);
iresult = PKT_ALIAS_ERROR;
@@ -1127,6 +1162,7 @@
SetFragmentPtr(lnk, ptr);
iresult = PKT_ALIAS_OK;
}
+ LIBALIAS_UNLOCK(la);
return (iresult);
}
@@ -1138,17 +1174,18 @@
char *fptr;
struct ip *pip;
+ LIBALIAS_LOCK(la);
pip = (struct ip *)ptr;
lnk = FindFragmentPtr(la, pip->ip_src, pip->ip_id);
if (lnk != NULL) {
GetFragmentPtr(lnk, &fptr);
SetFragmentPtr(lnk, NULL);
SetExpire(lnk, 0); /* Deletes link */
+ } else
+ fptr = NULL;
- return (fptr);
- } else {
- return (NULL);
- }
+ LIBALIAS_UNLOCK(la);
+ return (fptr);
}
@@ -1163,6 +1200,7 @@
struct ip *pip;
struct ip *fpip;
+ LIBALIAS_LOCK(la);
(void)la;
pip = (struct ip *)ptr;
fpip = (struct ip *)ptr_fragment;
@@ -1170,21 +1208,40 @@
DifferentialChecksum(&fpip->ip_sum,
&pip->ip_dst, &fpip->ip_dst, 2);
fpip->ip_dst = pip->ip_dst;
+ LIBALIAS_UNLOCK(la);
}
+/* Local prototypes */
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr,
+ int maxpacketsize, int create);
+static int
+LibAliasInLocked(struct libalias *la, char *ptr,
+ int maxpacketsize);
int
LibAliasIn(struct libalias *la, char *ptr, int maxpacketsize)
{
+ int res;
+
+ LIBALIAS_LOCK(la);
+ res = LibAliasInLocked(la, ptr, maxpacketsize);
+ LIBALIAS_UNLOCK(la);
+ return (res);
+}
+
+static int
+LibAliasInLocked(struct libalias *la, char *ptr, int maxpacketsize)
+{
struct in_addr alias_addr;
struct ip *pip;
int iresult;
if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
- iresult = LibAliasOut(la, ptr, maxpacketsize);
+ iresult = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
la->packetAliasMode |= PKT_ALIAS_REVERSE;
- return (iresult);
+ goto getout;
}
HouseKeeping(la);
ClearCheckNewLink(la);
@@ -1193,8 +1250,10 @@
/* Defense against mangled packets */
if (ntohs(pip->ip_len) > maxpacketsize
- || (pip->ip_hl << 2) > maxpacketsize)
- return (PKT_ALIAS_IGNORED);
+ || (pip->ip_hl << 2) > maxpacketsize) {
+ iresult = PKT_ALIAS_IGNORED;
+ goto getout;
+ }
iresult = PKT_ALIAS_IGNORED;
if ((ntohs(pip->ip_off) & IP_OFFMASK) == 0) {
@@ -1208,13 +1267,26 @@
case IPPROTO_TCP:
iresult = TcpAliasIn(la, pip);
break;
- case IPPROTO_GRE:
- if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
- AliasHandlePptpGreIn(la, pip) == 0)
+ case IPPROTO_GRE: {
+ int error;
+ struct alias_data ad = {
+ .lnk = NULL,
+ .oaddr = NULL,
+ .aaddr = NULL,
+ .aport = NULL,
+ .sport = NULL,
+ .dport = NULL,
+ .maxpktsize = 0
+ };
+
+ /* Walk out chain. */
+ error = find_handler(IN, IP, la, pip, &ad);
+ if (error == 0)
iresult = PKT_ALIAS_OK;
else
iresult = ProtoAliasIn(la, pip);
- break;
+ }
+ break;
default:
iresult = ProtoAliasIn(la, pip);
break;
@@ -1235,6 +1307,7 @@
iresult = FragmentIn(la, pip);
}
+getout:
return (iresult);
}
@@ -1255,19 +1328,32 @@
#define UNREG_ADDR_C_UPPER 0xc0a8ffff
int
-LibAliasOut(struct libalias *la, char *ptr, /* valid IP packet */
- int maxpacketsize /* How much the packet data may grow (FTP
- * and IRC inline changes) */
-)
+LibAliasOut(struct libalias *la, char *ptr, int maxpacketsize)
{
- return (LibAliasOutTry(la, ptr, maxpacketsize, 1));
+ int res;
+
+ LIBALIAS_LOCK(la);
+ res = LibAliasOutLocked(la, ptr, maxpacketsize, 1);
+ LIBALIAS_UNLOCK(la);
+ return (res);
}
int
-LibAliasOutTry(struct libalias *la, char *ptr, /* valid IP packet */
+LibAliasOutTry(struct libalias *la, char *ptr, int maxpacketsize, int create)
+{
+ int res;
+
+ LIBALIAS_LOCK(la);
+ res = LibAliasOutLocked(la, ptr, maxpacketsize, create);
+ LIBALIAS_UNLOCK(la);
+ return (res);
+}
+
+static int
+LibAliasOutLocked(struct libalias *la, char *ptr, /* valid IP packet */
int maxpacketsize, /* How much the packet data may grow (FTP
* and IRC inline changes) */
- int create /* Create new entries ? */
+ int create /* Create new entries ? */
)
{
int iresult;
@@ -1276,9 +1362,9 @@
if (la->packetAliasMode & PKT_ALIAS_REVERSE) {
la->packetAliasMode &= ~PKT_ALIAS_REVERSE;
- iresult = LibAliasIn(la, ptr, maxpacketsize);
+ iresult = LibAliasInLocked(la, ptr, maxpacketsize);
la->packetAliasMode |= PKT_ALIAS_REVERSE;
- return (iresult);
+ goto getout;
}
HouseKeeping(la);
ClearCheckNewLink(la);
@@ -1286,8 +1372,10 @@
/* Defense against mangled packets */
if (ntohs(pip->ip_len) > maxpacketsize
- || (pip->ip_hl << 2) > maxpacketsize)
- return (PKT_ALIAS_IGNORED);
+ || (pip->ip_hl << 2) > maxpacketsize) {
+ iresult = PKT_ALIAS_IGNORED;
+ goto getout;
+ }
addr_save = GetDefaultAliasAddress(la);
if (la->packetAliasMode & PKT_ALIAS_UNREGISTERED_ONLY) {
@@ -1321,12 +1409,25 @@
case IPPROTO_TCP:
iresult = TcpAliasOut(la, pip, maxpacketsize, create);
break;
- case IPPROTO_GRE:
- if (AliasHandlePptpGreOut(la, pip) == 0)
- iresult = PKT_ALIAS_OK;
- else
- iresult = ProtoAliasOut(la, pip, create);
- break;
+ case IPPROTO_GRE: {
+ int error;
+ struct alias_data ad = {
+ .lnk = NULL,
+ .oaddr = NULL,
+ .aaddr = NULL,
+ .aport = NULL,
+ .sport = NULL,
+ .dport = NULL,
+ .maxpktsize = 0
+ };
+ /* Walk out chain. */
+ error = find_handler(OUT, IP, la, pip, &ad);
+ if (error == 0)
+ iresult = PKT_ALIAS_OK;
+ else
+ iresult = ProtoAliasOut(la, pip, create);
+ }
+ break;
default:
iresult = ProtoAliasOut(la, pip, create);
break;
@@ -1336,6 +1437,7 @@
}
SetDefaultAliasAddress(la, addr_save);
+getout:
return (iresult);
}
@@ -1351,12 +1453,13 @@
struct alias_link *lnk;
int iresult = PKT_ALIAS_IGNORED;
+ LIBALIAS_LOCK(la);
pip = (struct ip *)ptr;
/* Defense against mangled packets */
if (ntohs(pip->ip_len) > maxpacketsize
|| (pip->ip_hl << 2) > maxpacketsize)
- return (iresult);
+ goto getout;
ud = (struct udphdr *)ip_next(pip);
tc = (struct tcphdr *)ip_next(pip);
@@ -1440,6 +1543,140 @@
iresult = PKT_ALIAS_OK;
}
}
+getout:
+ LIBALIAS_UNLOCK(la);
return (iresult);
}
+
+#ifndef _KERNEL
+
+int
+LibAliasRefreshModules(void)
+{
+ char buf[256], conf[] = "/etc/libalias.conf";
+ FILE *fd;
+ int i, len;
+
+ fd = fopen(conf, "r");
+ if (fd == NULL)
+ err(1, "fopen(%s)", conf);
+
+ LibAliasUnLoadAllModule();
+
+ for (;;) {
+ fgets(buf, 256, fd);
+ if feof(fd)
+ break;
+ len = strlen(buf);
+ if (len > 1) {
+ for (i = 0; i < len; i++)
+ if (!isspace(buf[i]))
+ break;
+ if (buf[i] == '#')
+ continue;
+ buf[len - 1] = '\0';
+ printf("Loading %s\n", buf);
+ LibAliasLoadModule(buf);
+ }
+ }
+ return (0);
+}
+
+int
+LibAliasLoadModule(char *path)
+{
+ struct dll *t;
+ void *handle;
+ struct proto_handler *m;
+ const char *error;
+ moduledata_t *p;
+
+ handle = dlopen (path, RTLD_LAZY);
+ if (!handle) {
+ fprintf(stderr, "%s\n", dlerror());
+ return (EINVAL);
+ }
+
+ p = dlsym(handle, "alias_mod");
+ if ((error = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", dlerror());
+ return (EINVAL);
+ }
+
+ t = malloc(sizeof(struct dll));
+ if (t == NULL)
+ return (ENOMEM);
+ strncpy(t->name, p->name, DLL_LEN);
+ t->handle = handle;
+ if (attach_dll(t) == EEXIST) {
+ free(t);
+ fprintf(stderr, "dll conflict\n");
+ return (EEXIST);
+ }
+
+ m = dlsym(t->handle, "handlers");
+ if ((error = dlerror()) != NULL) {
+ fprintf(stderr, "%s\n", error);
+ return (EINVAL);
+ }
+
+ LibAliasAttachHandlers(m);
+ return (0);
+}
+
+int
+LibAliasUnLoadAllModule(void)
+{
+ struct dll *t;
+ struct proto_handler *p;
+
+ /* Unload all modules then reload everything. */
+ while ((p = first_handler()) != NULL) {
+ detach_handler(p);
+ }
+ while ((t = walk_dll_chain()) != NULL) {
+ dlclose(t->handle);
+ free(t);
+ }
+ return (1);
+}
+
+#endif
+
+#ifdef _KERNEL
+/*
+ * m_megapullup() - this function is a big hack.
+ * Thankfully, it's only used in ng_nat and ipfw+nat.
+ *
+ * It allocates an mbuf with cluster and copies the whole chain into cluster,
+ * so that it is all contiguous and the whole packet can be accessed via a
+ * plain (char *) pointer. This is required, because libalias doesn't know
+ * how to handle mbuf chains.
+ *
+ * On success, m_megapullup returns an mbuf with cluster containing the input
+ * packet, on failure NULL. In both cases, the input packet is consumed.
+ */
+struct mbuf *
+m_megapullup(struct mbuf *m, int len) {
+ struct mbuf *mcl;
+ caddr_t cp;
+
+ if (len > MCLBYTES)
+ goto bad;
+
+ if ((mcl = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR)) == NULL)
+ goto bad;
+
+ cp = mtod(mcl, caddr_t);
+ m_copydata(m, 0, len, cp);
+ m_move_pkthdr(mcl, m);
+ mcl->m_len = mcl->m_pkthdr.len;
+ m_freem(m);
+
+ return (mcl);
+bad:
+ m_freem(m);
+ return (NULL);
+}
+#endif
Index: alias_proxy.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_proxy.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet/libalias/alias_proxy.c -L sys/netinet/libalias/alias_proxy.c -u -r1.2 -r1.3
--- sys/netinet/libalias/alias_proxy.c
+++ sys/netinet/libalias/alias_proxy.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_proxy.c,v 1.26 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_proxy.c,v 1.31 2007/04/30 20:26:11 maxim Exp $");
/* file: alias_proxy.c
@@ -58,30 +58,24 @@
#include <sys/param.h>
#include <sys/ctype.h>
#include <sys/libkern.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
#include <sys/limits.h>
#else
#include <sys/types.h>
-#include <sys/socket.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
-#include <string.h>
#include <netdb.h>
-#include <arpa/inet.h>
+#include <string.h>
#endif
-/* BSD IPV4 includes */
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
#include <netinet/tcp.h>
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
+#include <arpa/inet.h>
#include "alias.h" /* Public API functions for libalias */
#include "alias_local.h" /* Functions used by alias*.c */
#endif
@@ -182,8 +176,8 @@
if (l == ULONG_MAX || (l == 0 && endptr == c))
return (0);
- val = (in_addr_t)l;
- /*
+ val = (in_addr_t)l;
+ /*
* If the whole string is invalid, endptr will equal
* c.. this way we can make sure someone hasn't
* gone '.12' or something which would get past
@@ -318,6 +312,8 @@
struct proxy_entry *ptr;
struct proxy_entry *ptr_last;
+ LIBALIAS_LOCK_ASSERT(la);
+
if (la->proxyList == NULL) {
la->proxyList = entry;
entry->last = NULL;
@@ -359,6 +355,7 @@
struct libalias *la;
la = entry->la;
+ LIBALIAS_LOCK_ASSERT(la);
if (entry->last != NULL)
entry->last->next = entry->next;
else
@@ -376,6 +373,7 @@
int err;
struct proxy_entry *ptr;
+ LIBALIAS_LOCK_ASSERT(la);
err = -1;
ptr = la->proxyList;
while (ptr != NULL) {
@@ -433,7 +431,7 @@
{
int dlen;
int hlen;
- u_char *p;
+ char *p;
hlen = (pip->ip_hl + tc->th_off) << 2;
dlen = ntohs(pip->ip_len) - hlen;
@@ -572,6 +570,7 @@
struct in_addr dst_addr;
struct proxy_entry *ptr;
+ LIBALIAS_LOCK_ASSERT(la);
src_addr = pip->ip_src;
dst_addr = pip->ip_dst;
dst_port = ((struct tcphdr *)ip_next(pip))
@@ -612,6 +611,7 @@
int proxy_type)
{
+ LIBALIAS_LOCK_ASSERT(la);
(void)la;
switch (proxy_type) {
@@ -654,7 +654,7 @@
* then 0 is used, and group 0 rules are always checked before any
* others.
*/
- int i, n, len;
+ int i, n, len, ret;
int cmd_len;
int token_count;
int state;
@@ -674,11 +674,15 @@
struct in_addr dst_addr, dst_mask;
struct proxy_entry *proxy_entry;
+ LIBALIAS_LOCK(la);
+ ret = 0;
/* Copy command line into a buffer */
cmd += strspn(cmd, " \t");
cmd_len = strlen(cmd);
- if (cmd_len > (int)(sizeof(buffer) - 1))
- return (-1);
+ if (cmd_len > (int)(sizeof(buffer) - 1)) {
+ ret = -1;
+ goto getout;
+ }
strcpy(buffer, cmd);
/* Convert to lower case */
@@ -736,8 +740,10 @@
state = STATE_READ_SRC;
else if (strcmp(token, "dst") == 0)
state = STATE_READ_DST;
- else
- return (-1);
+ else {
+ ret = -1;
+ goto getout;
+ }
break;
case STATE_READ_TYPE:
@@ -747,8 +753,10 @@
proxy_type = PROXY_TYPE_ENCODE_TCPSTREAM;
else if (strcmp(token, "no_encode") == 0)
proxy_type = PROXY_TYPE_ENCODE_NONE;
- else
- return (-1);
+ else {
+ ret = -1;
+ goto getout;
+ }
state = STATE_READ_KEYWORD;
break;
@@ -769,18 +777,24 @@
if (*p != ':') {
err = IpAddr(token, &server_addr);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
} else {
*p = ' ';
n = sscanf(token, "%s %s", s, str_server_port);
- if (n != 2)
- return (-1);
+ if (n != 2) {
+ ret = -1;
+ goto getout;
+ }
err = IpAddr(s, &server_addr);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
}
}
state = STATE_READ_KEYWORD;
@@ -788,8 +802,10 @@
case STATE_READ_RULE:
n = sscanf(token, "%d", &rule_index);
- if (n != 1 || rule_index < 0)
- return (-1);
+ if (n != 1 || rule_index < 0) {
+ ret = -1;
+ goto getout;
+ }
state = STATE_READ_KEYWORD;
break;
@@ -798,16 +814,21 @@
int err;
int rule_to_delete;
- if (token_count != 2)
- return (-1);
+ if (token_count != 2) {
+ ret = -1;
+ goto getout;
+ }
n = sscanf(token, "%d", &rule_to_delete);
- if (n != 1)
- return (-1);
+ if (n != 1) {
+ ret = -1;
+ goto getout;
+ }
err = RuleNumberDelete(la, rule_to_delete);
if (err)
- return (-1);
- return (0);
+ ret = -1;
+ ret = 0;
+ goto getout;
}
case STATE_READ_PROTO:
@@ -815,8 +836,10 @@
proto = IPPROTO_TCP;
else if (strcmp(token, "udp") == 0)
proto = IPPROTO_UDP;
- else
- return (-1);
+ else {
+ ret = -1;
+ goto getout;
+ }
state = STATE_READ_KEYWORD;
break;
@@ -835,24 +858,32 @@
if (*p != '/') {
IpMask(32, &mask);
err = IpAddr(token, &addr);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
} else {
int nbits;
char s[sizeof(buffer)];
*p = ' ';
n = sscanf(token, "%s %d", s, &nbits);
- if (n != 2)
- return (-1);
+ if (n != 2) {
+ ret = -1;
+ goto getout;
+ }
err = IpAddr(s, &addr);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
err = IpMask(nbits, &mask);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
}
if (state == STATE_READ_SRC) {
@@ -867,7 +898,8 @@
break;
default:
- return (-1);
+ ret = -1;
+ goto getout;
break;
}
@@ -893,8 +925,10 @@
int err;
err = IpPort(str_port, proto, &proxy_port);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
} else {
proxy_port = 0;
}
@@ -903,20 +937,26 @@
int err;
err = IpPort(str_server_port, proto, &server_port);
- if (err)
- return (-1);
+ if (err) {
+ ret = -1;
+ goto getout;
+ }
} else {
server_port = 0;
}
/* Check that at least the server address has been defined */
- if (server_addr.s_addr == 0)
- return (-1);
+ if (server_addr.s_addr == 0) {
+ ret = -1;
+ goto getout;
+ }
/* Add to linked list */
proxy_entry = malloc(sizeof(struct proxy_entry));
- if (proxy_entry == NULL)
- return (-1);
+ if (proxy_entry == NULL) {
+ ret = -1;
+ goto getout;
+ }
proxy_entry->proxy_type = proxy_type;
proxy_entry->rule_index = rule_index;
@@ -931,5 +971,7 @@
RuleAdd(la, proxy_entry);
- return (0);
+getout:
+ LIBALIAS_UNLOCK(la);
+ return (ret);
}
--- /dev/null
+++ sys/netinet/libalias/alias_mod.h
@@ -0,0 +1,157 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/netinet/libalias/alias_mod.h,v 1.1 2006/09/26 23:26:53 piso Exp $
+ */
+
+/*
+ * Alias_mod.h defines the outside world interfaces for the packet aliasing
+ * modular framework
+ */
+
+#ifndef _ALIAS_MOD_H_
+#define _ALIAS_MOD_H_
+
+#ifdef _KERNEL
+MALLOC_DECLARE(M_ALIAS);
+
+/* Use kernel allocator. */
+#if defined(_SYS_MALLOC_H_)
+#define malloc(x) malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
+#define calloc(x, n) malloc(x*n)
+#define free(x) free(x, M_ALIAS)
+#endif
+#endif
+
+/* Protocol handlers struct & function. */
+
+/* Packet flow direction. */
+#define IN 1
+#define OUT 2
+
+/* Working protocol. */
+#define IP 1
+#define TCP 2
+#define UDP 4
+
+/*
+ * Data passed to protocol handler module, it must be filled
+ * right before calling find_handler() to determine which
+ * module is elegible to be called.
+ */
+
+struct alias_data {
+ struct alias_link *lnk;
+ struct in_addr *oaddr; /* Original address. */
+ struct in_addr *aaddr; /* Alias address. */
+ uint16_t *aport; /* Alias port. */
+ uint16_t *sport, *dport; /* Source & destination port */
+ uint16_t maxpktsize; /* Max packet size. */
+};
+
+/*
+ * This structure contains all the information necessary to make
+ * a protocol handler correctly work.
+ */
+
+struct proto_handler {
+ u_int pri; /* Handler priority. */
+ int16_t dir; /* Flow direction. */
+ uint8_t proto; /* Working protocol. */
+ int (*fingerprint)(struct libalias *la, /* Fingerprint * function. */
+ struct ip *pip, struct alias_data *ah);
+ int (*protohandler)(struct libalias *la, /* Aliasing * function. */
+ struct ip *pip, struct alias_data *ah);
+ LIST_ENTRY(proto_handler) entries;
+};
+
+
+/*
+ * Used only in userland when libalias needs to keep track of all
+ * module loaded. In kernel land (kld mode) we don't need to care
+ * care about libalias modules cause it's kld to do it for us.
+ */
+
+#define DLL_LEN 32
+struct dll {
+ char name[DLL_LEN]; /* Name of module. */
+ void *handle; /*
+ * Ptr to shared obj obtained through
+ * dlopen() - use this ptr to get access
+ * to any symbols from a loaded module
+ * via dlsym().
+ */
+ SLIST_ENTRY(dll) next;
+};
+
+/* Functions used with protocol handlers. */
+
+void handler_chain_init(void);
+void handler_chain_destroy(void);
+int LibAliasAttachHandlers(struct proto_handler *);
+int LibAliasDetachHandlers(struct proto_handler *);
+int detach_handler(struct proto_handler *);
+int find_handler(int8_t, int8_t, struct libalias *,
+ struct ip *, struct alias_data *);
+struct proto_handler *first_handler(void);
+
+/* Functions used with dll module. */
+
+void dll_chain_init(void);
+void dll_chain_destroy(void);
+int attach_dll(struct dll *);
+void *detach_dll(char *);
+struct dll *walk_dll_chain(void);
+
+/* End of handlers. */
+#define EOH -1
+
+/*
+ * Some defines borrowed from sys/module.h used to compile a kld
+ * in userland as a shared lib.
+ */
+
+#ifndef _KERNEL
+typedef enum modeventtype {
+ MOD_LOAD,
+ MOD_UNLOAD,
+ MOD_SHUTDOWN,
+ MOD_QUIESCE
+} modeventtype_t;
+
+typedef struct module *module_t;
+typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *);
+
+/*
+ * Struct for registering modules statically via SYSINIT.
+ */
+typedef struct moduledata {
+ const char *name; /* module name */
+ modeventhand_t evhand; /* event handler */
+ void *priv; /* extra data */
+} moduledata_t;
+#endif
+
+#endif /* !_ALIAS_MOD_H_ */
Index: alias_nbt.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_nbt.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_nbt.c -L sys/netinet/libalias/alias_nbt.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_nbt.c
+++ sys/netinet/libalias/alias_nbt.c
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_nbt.c,v 1.19 2005/05/06 11:07:49 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_nbt.c,v 1.20 2006/09/26 23:26:53 piso Exp $");
/*
alias_nbt.c performs special processing for NetBios over TCP/IP
@@ -43,27 +43,147 @@
/* Includes */
#ifdef _KERNEL
#include <sys/param.h>
-#include <sys/ctype.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
+#include <errno.h>
#include <sys/types.h>
-#include <ctype.h>
#include <stdio.h>
-#include <string.h>
-#include <arpa/inet.h>
#endif
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
-#include <netinet/tcp.h>
#ifdef _KERNEL
-#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define NETBIOS_NS_PORT_NUMBER 137
+#define NETBIOS_DGM_PORT_NUMBER 138
+
+static int
+AliasHandleUdpNbt(struct libalias *, struct ip *, struct alias_link *,
+ struct in_addr *, u_short);
+
+static int
+AliasHandleUdpNbtNS(struct libalias *, struct ip *, struct alias_link *,
+ struct in_addr *, u_short *, struct in_addr *, u_short *);
+static int
+fingerprint1(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ ah->aaddr == NULL || ah->aport == NULL)
+ return (-1);
+ if (ntohs(*ah->dport) == NETBIOS_DGM_PORT_NUMBER
+ || ntohs(*ah->sport) == NETBIOS_DGM_PORT_NUMBER)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler1(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleUdpNbt(la, pip, ah->lnk, ah->aaddr, *ah->aport);
+ return (0);
+}
+
+static int
+fingerprint2(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ ah->aaddr == NULL || ah->aport == NULL)
+ return (-1);
+ if (ntohs(*ah->dport) == NETBIOS_NS_PORT_NUMBER
+ || ntohs(*ah->sport) == NETBIOS_NS_PORT_NUMBER)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler2in(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleUdpNbtNS(la, pip, ah->lnk, ah->aaddr, ah->aport,
+ ah->oaddr, ah->dport);
+ return (0);
+}
+
+static int
+protohandler2out(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleUdpNbtNS(la, pip, ah->lnk, &pip->ip_src, ah->sport,
+ ah->aaddr, ah->aport);
+ return (0);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+ {
+ .pri = 130,
+ .dir = IN|OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint1,
+ .protohandler = &protohandler1
+ },
+ {
+ .pri = 140,
+ .dir = IN,
+ .proto = UDP,
+ .fingerprint = &fingerprint2,
+ .protohandler = &protohandler2in
+ },
+ {
+ .pri = 140,
+ .dir = OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint2,
+ .protohandler = &protohandler2out
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_nbt", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_nbt, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_nbt, 1);
+MODULE_DEPEND(alias_nbt, libalias, 1, 1, 1);
#endif
typedef struct {
@@ -212,7 +332,7 @@
#define DGM_POSITIVE_RES 0x15
#define DGM_NEGATIVE_RES 0x16
-int
+static int
AliasHandleUdpNbt(
struct libalias *la,
struct ip *pip, /* IP packet to examine/patch */
@@ -640,7 +760,7 @@
return ((u_char *) q);
}
-int
+static int
AliasHandleUdpNbtNS(
struct libalias *la,
struct ip *pip, /* IP packet to examine/patch */
Index: alias_util.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_util.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_util.c -L sys/netinet/libalias/alias_util.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_util.c
+++ sys/netinet/libalias/alias_util.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_util.c,v 1.18 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_util.c,v 1.20 2006/12/15 12:50:06 piso Exp $");
/*
@@ -45,6 +45,7 @@
#ifdef _KERNEL
#include <sys/param.h>
+#include <sys/proc.h>
#else
#include <sys/types.h>
#include <stdio.h>
@@ -75,6 +76,7 @@
{
int sum, oddbyte;
+ LIBALIAS_LOCK(la);
sum = 0;
while (nbytes > 1) {
sum += *ptr++;
@@ -88,6 +90,7 @@
}
sum = (sum >> 16) + (sum & 0xffff);
sum += (sum >> 16);
+ LIBALIAS_UNLOCK(la);
return (~sum);
}
Index: alias_cuseeme.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_cuseeme.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_cuseeme.c -L sys/netinet/libalias/alias_cuseeme.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_cuseeme.c
+++ sys/netinet/libalias/alias_cuseeme.c
@@ -27,11 +27,14 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_cuseeme.c,v 1.12 2005/05/05 21:55:17 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_cuseeme.c,v 1.13 2006/09/26 23:26:53 piso Exp $");
#ifdef _KERNEL
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
+#include <errno.h>
#include <sys/types.h>
#include <stdio.h>
#endif
@@ -44,8 +47,100 @@
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define CUSEEME_PORT_NUMBER 7648
+
+static void
+AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip,
+ struct alias_link *lnk);
+
+static void
+AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip,
+ struct in_addr original_addr);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->oaddr == NULL)
+ return (-1);
+ if (ntohs(*ah->dport) == CUSEEME_PORT_NUMBER)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleCUSeeMeIn(la, pip, *ah->oaddr);
+ return (0);
+}
+
+static int
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleCUSeeMeOut(la, pip, ah->lnk);
+ return (0);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+ {
+ .pri = 120,
+ .dir = OUT,
+ .proto = UDP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandlerout
+ },
+ {
+ .pri = 120,
+ .dir = IN,
+ .proto = UDP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandlerin
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t
+alias_mod = {
+ "alias_cuseeme", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_cuseeme, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_cuseeme, 1);
+MODULE_DEPEND(alias_cuseeme, libalias, 1, 1, 1);
#endif
/* CU-SeeMe Data Header */
@@ -77,7 +172,7 @@
* counts etc */
};
-void
+static void
AliasHandleCUSeeMeOut(struct libalias *la, struct ip *pip, struct alias_link *lnk)
{
struct udphdr *ud = ip_next(pip);
@@ -100,7 +195,7 @@
}
}
-void
+static void
AliasHandleCUSeeMeIn(struct libalias *la, struct ip *pip, struct in_addr original_addr)
{
struct in_addr alias_addr;
Index: libalias.3
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/libalias.3,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/libalias.3 -L sys/netinet/libalias/libalias.3 -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/libalias.3
+++ sys/netinet/libalias/libalias.3
@@ -23,9 +23,9 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.\" $FreeBSD: src/sys/netinet/libalias/libalias.3,v 1.53 2005/01/20 09:17:02 ru Exp $
+.\" $FreeBSD: src/sys/netinet/libalias/libalias.3,v 1.58 2006/10/11 07:11:56 ru Exp $
.\"
-.Dd January 17, 2004
+.Dd October 1, 2006
.Dt LIBALIAS 3
.Os
.Sh NAME
@@ -836,7 +836,7 @@
.Ed
.Pp
.Ft int
-.Fn LibAliasCheckNewLink void
+.Fn LibAliasCheckNewLink "struct libalias *"
.Bd -ragged -offset indent
This function returns a non-zero value when a new aliasing link is created.
In circumstances where incoming traffic is being sequentially sent to
@@ -893,6 +893,9 @@
added support for RTSP/PNA.
.An Ruslan Ermilov Aq ru at FreeBSD.org
added support for PPTP and LSNAT as well as general hacking.
+.An Paolo Pisati Aq piso at FreeBSD.org
+made the library modular, moving support for all
+protocols (except for IP, TCP and UDP) to external modules.
.Sh ACKNOWLEDGMENTS
Listed below, in approximate chronological order, are individuals who
have provided valuable comments and/or debugging assistance.
@@ -1011,8 +1014,445 @@
a unique aliasing link can be established.
In an alternate operating mode, the first choice of an aliasing port is also
random and unrelated to the local port number.
-.Sh BUGS
-PPTP aliasing does not work when more than one internal client
-connects to the same external server at the same time, because
-PPTP requires a single TCP control connection to be established
-between any two IP addresses.
+.Sh MODULAR ARCHITECTURE (AND Xr ipfw 4 Sh SUPPORT)
+One of the latest improvements to
+.Nm
+was to make its support
+for new protocols independent from the rest of the library, giving it
+the ability to load/unload support for new protocols at run-time.
+To achieve this feature, all the code for protocol handling was moved
+to a series of modules outside of the main library.
+These modules are compiled from the same sources but work in
+different ways, depending on whether they are compiled to work inside a kernel
+or as part of the userland library.
+.Ss LIBALIAS MODULES IN KERNEL LAND
+When compiled for the kernel,
+.Nm
+modules are plain KLDs recognizable with the
+.Pa alias_
+prefix.
+.Pp
+To add support for a new protocol, load the corresponding module.
+For example:
+.Pp
+.Dl "kldload alias_ftp"
+.Pp
+When support for a protocol is no longer needed, its module can be unloaded:
+.Pp
+.Dl "kldunload alias_ftp"
+.Ss LIBALIAS MODULES IN USERLAND
+Due to the differences between kernel and userland (no KLD mechanism,
+many different address spaces, etc.), we had to change a bit how to
+handle module loading/tracking/unloading in userland.
+.Pp
+While compiled for a userland
+.Nm ,
+all the modules are plain libraries, residing in
+.Pa /usr/lib ,
+and recognizable with the
+.Pa libalias_
+prefix.
+.Pp
+There is a configuration file,
+.Pa /etc/libalias.conf ,
+with the following contents (by default):
+.Bd -literal -offset indent
+/usr/lib/libalias_cuseeme.so
+/usr/lib/libalias_ftp.so
+/usr/lib/libalias_irc.so
+/usr/lib/libalias_nbt.so
+/usr/lib/libalias_pptp.so
+/usr/lib/libalias_skinny.so
+/usr/lib/libalias_smedia.so
+.Ed
+.Pp
+This file contains the paths to the modules that
+.Nm
+will load.
+To load/unload a new module, just add its path to
+.Pa libalias.conf
+and call
+.Fn LibAliasRefreshModules
+from the program.
+In case the application provides a
+.Dv SIGHUP
+signal handler, add a call to
+.Fn LibAliasRefreshModules
+inside the handler, and everytime you want to refresh the loaded modules,
+send it the
+.Dv SIGHUP
+signal:
+.Pp
+.Dl "kill -HUP <process_pid>"
+.Ss MODULAR ARCHITECURE: HOW IT WORKS
+The modular architecture of
+.Nm
+works similar whether it is running inside the
+kernel or in userland.
+From
+.Pa alias_mod.c :
+.Bd -literal
+/* Protocol and userland module handlers chains. */
+LIST_HEAD(handler_chain, proto_handler) handler_chain ...
+\&...
+SLIST_HEAD(dll_chain, dll) dll_chain ...
+.Ed
+.Pp
+.Va handler_chain
+keep tracks of all the protocol handlers loaded, while
+.Va ddl_chain
+takes care of userland modules loaded.
+.Pp
+.Va handler_chain
+is composed of
+.Vt "struct proto_handler"
+entries:
+.Bd -literal
+struct proto_handler {
+ u_int pri;
+ int16_t dir;
+ uint8_t proto;
+ int (*fingerprint)(struct libalias *la,
+ struct ip *pip, struct alias_data *ah);
+ int (*protohandler)(struct libalias *la,
+ struct ip *pip, struct alias_data *ah);
+ LIST_ENTRY(proto_handler) entries;
+};
+.Ed
+.Pp
+where:
+.Bl -inset
+.It Va pri
+is the priority assigned to a protocol handler, lower
+is better.
+.It Va dir
+is the direction of packets: ingoing or outgoing.
+.It Va proto
+says at which protocol this packet belongs: IP, TCP or UDP.
+.It Va fingerprint
+points to the fingerprint function while protohandler points
+to the protocol handler function.
+.El
+.Pp
+The
+.Va fingerprint
+function has the double of scope of checking if the
+incoming packet is found and if it belongs to any categories that this
+module can handle.
+.Pp
+The
+.Va protohandler
+function actually manipulates
+the packet to make
+.Nm
+correctly NAT it.
+.Pp
+When a packet enters
+.Nm ,
+if it meets a module hook,
+.Va handler_chain
+is searched to see if there is an handler that matches
+this type of a packet (it checks protocol and direction of packet), then if
+more than one handler is found, it starts with the module with
+the lowest priority number: it calls the
+.Va fingerprint
+function and interprets the result.
+.Pp
+If the result value is equal to 0 then it calls the protocol handler
+of this handler and returns.
+Otherwise, it proceeds to the next eligible module until the
+.Va handler_chain
+is exhausted.
+.Pp
+Inside
+.Nm ,
+the module hook looks like this:
+.Bd -literal -offset indent
+struct alias_data ad = {
+ lnk,
+ &original_address,
+ &alias_address,
+ &alias_port,
+ &ud->uh_sport, /* original source port */
+ &ud->uh_dport, /* original dest port */
+ 256 /* maxpacketsize */
+};
+
+\&...
+
+/* walk out chain */
+err = find_handler(IN, UDP, la, pip, &ad);
+.Ed
+.Pp
+All data useful to a module are gathered together in an
+.Vt alias_data
+structure, then
+.Fn find_handler
+is called.
+The
+.Fn find_handler
+function is responsible for walking out the handler
+chain, it receives as input parameters:
+.Bl -tag -width indent
+.It Fa IN
+direction
+.It Fa UDP
+working protocol
+.It Fa la
+pointer to this instance of libalias
+.It Fa pip
+pointer to a
+.Vt "struct ip"
+.It Fa ad
+pointer to
+.Vt "struct alias_data"
+(see above)
+.El
+.Pp
+In this case,
+.Fn find_handler
+will search only for modules registered for
+supporting INcoming UDP packets.
+.Pp
+As was mentioned earlier,
+.Nm
+in userland is a bit different, cause
+care has to be taken of module handling too (avoiding duplicate load of
+module, avoiding module with same name, etc.) so
+.Va dll_chain
+was introduced.
+.Pp
+.Va dll_chain
+contains a list of all userland
+.Nm
+modules loaded.
+.Pp
+When an application calls
+.Fn LibAliasRefreshModules ,
+.Nm
+first unloads all the loaded modules, then reloads all the modules listed in
+.Pa /etc/libalias.conf :
+for every module loaded, a new entry to
+.Va dll_chain
+is added.
+.Pp
+.Va dll_chain
+is composed of
+.Vt "struct dll"
+entries:
+.Bd -literal
+struct dll {
+ /* name of module */
+ char name[DLL_LEN];
+ /*
+ * ptr to shared obj obtained through
+ * dlopen() - use this ptr to get access
+ * to any symbols from a loaded module
+ * via dlsym()
+ */
+ void *handle;
+ struct dll *next;
+};
+.Ed
+.Bl -inset
+.It Va name
+is the name of the module
+.It Va handle
+is a pointer to the module obtained through
+.Xr dlopen 3
+.El
+Whenever a module is loaded in userland, an entry is added to
+.Va dll_chain ,
+then every protocol handler present in that module
+is resolved and registered in
+.Va handler_chain .
+.Ss HOW TO WRITE A MODULE FOR LIBALIAS
+There is a module (called
+.Pa alias_dummy.[ch] )
+in
+.Nm
+that can be used as a skeleton for future work, here we analyse some parts of that
+module.
+From
+.Pa alias_dummy.c :
+.Bd -literal
+struct proto_handler handlers [] = {{666, IN|OUT, UDP|TCP,
+ &fingerprint, &protohandler}};
+.Ed
+.Pp
+The variable
+.Va handlers
+is the
+.Dq "most important thing"
+in a module
+cause it describes the handlers present and lets the outside world use
+it in an opaque way.
+.Pp
+It must ALWAYS be present in every module, and it MUST retain
+the name
+.Va handlers ,
+otherwise attempting to load a module in userland will fail and
+complain about missing symbols: for more information about module
+load/unload, please refer to
+.Fn LibAliasRefreshModules ,
+.Fn LibAliasLoadModule
+and
+.Fn LibAliasUnloadModule
+in
+.Pa alias.c .
+.Pp
+.Va handlers
+contains all the
+.Vt proto_handler
+structures present in a module.
+.Bd -literal
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ attach_handlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ detach_handlers(handlers;
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+.Ed
+When running as KLD,
+.Fn mod_handler
+register/deregister the module using
+.Fn attach_handlers
+and
+.Fn detach_handlers ,
+respectively.
+.Pp
+Every module must contain at least 2 functions: one fingerprint
+function and a protocol handler function.
+.Bd -literal
+#ifdef _KERNEL
+static
+#endif
+int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+\&...
+}
+
+#ifdef _KERNEL
+static
+#endif
+int
+protohandler(struct libalias *la, struct ip *pip,
+ struct alias_data *ah)
+{
+
+\&...
+}
+.Ed
+and they must accept exactly these input parameters.
+.Ss PATCHING AN APPLICATION FOR USERLAND LIBALIAS MODULES
+To add module support into an application that uses
+.Nm ,
+the following simple steps can be followed.
+.Bl -enum
+.It
+Find the main file of an application
+(let us call it
+.Pa main.c ) .
+.It
+Add this to the header section of
+.Pa main.c ,
+if not already present:
+.Pp
+.Dl "#include <signal.h>"
+.Pp
+and this just after the header section:
+.Pp
+.Dl "static void signal_handler(int);"
+.It
+Add the following line to the init function of an application or,
+if it does not have any init function, put it in
+.Fn main :
+.Pp
+.Dl "signal(SIGHUP, signal_handler);"
+.Pp
+and place the
+.Fn signal_handler
+function somewhere in
+.Pa main.c :
+.Bd -literal -offset indent
+static void
+signal_handler(int sig)
+{
+
+ LibAliasRefreshModules();
+}
+.Ed
+.Pp
+Otherwise, if an application already traps the
+.Dv SIGHUP
+signal, just add a call to
+.Fn LibAliasRefreshModules
+in the signal handler function.
+.El
+For example, to patch
+.Xr natd 8
+to use
+.Nm
+modules, just add the following line to
+.Fn RefreshAddr "int sig __unused" :
+.Pp
+.Dl "LibAliasRefreshModules()"
+.Pp
+recompile and you are done.
+.Ss LOGGING SUPPORT IN KERNEL LAND
+When working as KLD,
+.Nm
+now has log support that
+happens on a buffer allocated inside
+.Vt "struct libalias"
+(from
+.Pa alias_local.h ) :
+.Bd -literal
+struct libalias {
+ ...
+
+ /* log descriptor */
+#ifdef KERNEL_LOG
+ char *logDesc; /*
+ * ptr to an auto-malloced
+ * memory buffer when libalias
+ * works as kld
+ */
+#else
+ FILE *logDesc; /*
+ * ptr to /var/log/alias.log
+ * when libalias runs as a
+ * userland lib
+ */
+#endif
+
+ ...
+}
+.Ed
+so all applications using
+.Nm
+will be able to handle their
+own logs, if they want, accessing
+.Va logDesc .
+Moreover, every change to a log buffer is automatically added to
+.Xr syslog 3
+with the
+.Dv LOG_SECURITY
+facility and the
+.Dv LOG_INFO
+level.
Index: alias_old.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_old.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_old.c -L sys/netinet/libalias/alias_old.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_old.c
+++ sys/netinet/libalias/alias_old.c
@@ -25,10 +25,11 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_old.c,v 1.7 2005/05/05 19:27:32 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_old.c,v 1.8 2006/09/26 23:26:53 piso Exp $");
#ifdef _KERNEL
#include <sys/param.h>
+#include <sys/proc.h>
#else
#include <sys/types.h>
#include <stdlib.h>
--- /dev/null
+++ sys/netinet/libalias/alias_dummy.c
@@ -0,0 +1,153 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_dummy.c,v 1.1 2006/09/26 23:26:53 piso Exp $");
+
+/*
+ * Alias_dummy is just an empty skeleton used to demostrate how to write
+ * a module for libalias, that will run unalterated in userland or in
+ * kernel land.
+ */
+
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#else
+#include <errno.h>
+#include <sys/types.h>
+#include <stdio.h>
+#endif
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ /*
+ * Check here all the data that will be used later, if any field
+ * is empy/NULL, return a -1 value.
+ */
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ ah->maxpktsize == 0)
+ return (-1);
+ /*
+ * Fingerprint the incoming packet, if it matches any conditions
+ * return an OK value.
+ */
+ if (ntohs(*ah->dport) == 123
+ || ntohs(*ah->sport) == 456)
+ return (0); /* I know how to handle it. */
+ return (-1); /* I don't recognize this packet. */
+}
+
+/*
+ * Wrap in this general purpose function, the real function used to alias the
+ * packets.
+ */
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleDummy(la, pip, ah);
+ return (0);
+}
+
+/*
+ * NOTA BENE: the next variable MUST NOT be renamed in any case if you want
+ * your module to work in userland, cause it's used to find and use all
+ * the protocol handlers present in every module.
+ * So WATCH OUT, your module needs this variables and it needs it with
+ * ITS EXACT NAME: handlers.
+ */
+
+struct proto_handler handlers [] = {
+ {
+ .pri = 666,
+ .dir = IN|OUT,
+ .proto = UDP|TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandler
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_dummy", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_dummy, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_dummy, 1);
+MODULE_DEPEND(alias_dummy, libalias, 1, 1, 1);
+#endif
+
+static void
+AliasHandleDummy(struct libalias *la, struct ip *ip, struct alias_data *ah)
+{
+ ; /* Dummy. */
+}
+
Index: alias_skinny.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_skinny.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_skinny.c -L sys/netinet/libalias/alias_skinny.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_skinny.c
+++ sys/netinet/libalias/alias_skinny.c
@@ -27,31 +27,97 @@
*
* Author: Joe Marcus Clarke <marcus at FreeBSD.org>
*
- * $FreeBSD: src/sys/netinet/libalias/alias_skinny.c,v 1.12 2005/06/27 07:36:02 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias_skinny.c,v 1.14 2007/04/07 09:52:36 piso Exp $
*/
#ifdef _KERNEL
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <errno.h>
#include <stdio.h>
-#include <string.h>
#include <unistd.h>
-#include <arpa/inet.h>
#endif
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
-#include <netinet/udp.h>
#ifdef _KERNEL
-#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+static void
+AliasHandleSkinny(struct libalias *, struct ip *, struct alias_link *);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+ return (-1);
+ if (la->skinnyPort != 0 && (ntohs(*ah->sport) == la->skinnyPort ||
+ ntohs(*ah->dport) == la->skinnyPort))
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleSkinny(la, pip, ah->lnk);
+ return (0);
+}
+
+struct proto_handler handlers[] = {
+ {
+ .pri = 110,
+ .dir = IN|OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandler
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_skinny", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_skinny, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_skinny, 1);
+MODULE_DEPEND(alias_skinny, libalias, 1, 1, 1);
#endif
/*
@@ -233,7 +299,7 @@
return (0);
}
-void
+static void
AliasHandleSkinny(struct libalias *la, struct ip *pip, struct alias_link *lnk)
{
size_t hlen, tlen, dlen;
@@ -243,6 +309,7 @@
size_t orig_len, skinny_hdr_len = sizeof(struct skinny_header);
ConvDirection direction;
+ lip = -1;
tc = (struct tcphdr *)ip_next(pip);
hlen = (pip->ip_hl + tc->th_off) << 2;
tlen = ntohs(pip->ip_len);
@@ -352,6 +419,16 @@
#endif
return;
}
+ if (lip == -1) {
+#ifdef LIBALIAS_DEBUG
+ fprintf(stderr,
+ "PacketAlias/Skinny: received a"
+ " packet,StartMediaTx Message before"
+ " packet,OpnRcvChnAckMsg\n"
+#endif
+ return;
+ }
+
#ifdef LIBALIAS_DEBUG
fprintf(stderr,
"PacketAlias/Skinny: Received start media trans msg\n");
Index: alias_irc.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_irc.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_irc.c -L sys/netinet/libalias/alias_irc.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_irc.c
+++ sys/netinet/libalias/alias_irc.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_irc.c,v 1.21 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_irc.c,v 1.23 2007/04/04 03:16:59 kan Exp $");
/* Alias_irc.c intercepts packages contain IRC CTCP commands, and
changes DCC commands to export a port on the aliasing host instead
@@ -50,12 +50,14 @@
/* Includes */
#ifdef _KERNEL
#include <sys/param.h>
-#include <sys/libkern.h>
#include <sys/ctype.h>
#include <sys/limits.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
+#include <errno.h>
#include <sys/types.h>
-#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
@@ -69,15 +71,89 @@
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
#endif
+#define IRC_CONTROL_PORT_NUMBER_1 6667
+#define IRC_CONTROL_PORT_NUMBER_2 6668
+
/* Local defines */
#define DBprintf(a)
+static void
+AliasHandleIrcOut(struct libalias *, struct ip *, struct alias_link *,
+ int maxpacketsize);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->dport == NULL || ah->lnk == NULL ||
+ ah->maxpktsize == 0)
+ return (-1);
+ if (ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_1
+ || ntohs(*ah->dport) == IRC_CONTROL_PORT_NUMBER_2)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleIrcOut(la, pip, ah->lnk, ah->maxpktsize);
+ return (0);
+}
+
+struct proto_handler handlers[] = {
+ {
+ .pri = 90,
+ .dir = OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandler
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_irc", mod_handler, NULL
+};
+
+/* Kernel module definition. */
+#ifdef _KERNEL
+DECLARE_MODULE(alias_irc, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_irc, 1);
+MODULE_DEPEND(alias_irc, libalias, 1, 1, 1);
+#endif
-void
+static void
AliasHandleIrcOut(struct libalias *la,
struct ip *pip, /* IP packet to examine */
struct alias_link *lnk, /* Which link are we on? */
--- /dev/null
+++ sys/netinet/libalias/alias_mod.c
@@ -0,0 +1,284 @@
+/*-
+ * Copyright (c) 2005 Paolo Pisati <piso at FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_mod.c,v 1.3 2006/12/15 12:50:06 piso Exp $");
+
+#ifdef _KERNEL
+#include <sys/libkern.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#else
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <errno.h>
+#endif
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+/* Protocol and userland module handlers chains. */
+LIST_HEAD(handler_chain, proto_handler) handler_chain = LIST_HEAD_INITIALIZER(foo);
+#ifdef _KERNEL
+struct rwlock handler_rw;
+#endif
+SLIST_HEAD(dll_chain, dll) dll_chain = SLIST_HEAD_INITIALIZER(foo);
+
+#ifdef _KERNEL
+
+#define LIBALIAS_RWLOCK_INIT() \
+ rw_init(&handler_rw, "Libalias_modules_rwlock")
+#define LIBALIAS_RWLOCK_DESTROY() rw_destroy(&handler_rw)
+#define LIBALIAS_WLOCK_ASSERT() \
+ rw_assert(&handler_rw, RA_WLOCKED)
+
+static __inline void
+LIBALIAS_RLOCK(void)
+{
+ rw_rlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_RUNLOCK(void)
+{
+ rw_runlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WLOCK(void)
+{
+ rw_wlock(&handler_rw);
+}
+
+static __inline void
+LIBALIAS_WUNLOCK(void)
+{
+ rw_wunlock(&handler_rw);
+}
+
+static void
+_handler_chain_init(void)
+{
+
+ if (!rw_initialized(&handler_rw))
+ LIBALIAS_RWLOCK_INIT();
+}
+
+static void
+_handler_chain_destroy(void)
+{
+
+ if (rw_initialized(&handler_rw))
+ LIBALIAS_RWLOCK_DESTROY();
+}
+
+#else
+#define LIBALIAS_RWLOCK_INIT() ;
+#define LIBALIAS_RWLOCK_DESTROY() ;
+#define LIBALIAS_WLOCK_ASSERT() ;
+#define LIBALIAS_RLOCK() ;
+#define LIBALIAS_RUNLOCK() ;
+#define LIBALIAS_WLOCK() ;
+#define LIBALIAS_WUNLOCK() ;
+#define _handler_chain_init() ;
+#define _handler_chain_destroy() ;
+#endif
+
+void
+handler_chain_init(void)
+{
+ _handler_chain_init();
+}
+
+void
+handler_chain_destroy(void)
+{
+ _handler_chain_destroy();
+}
+
+static int
+_attach_handler(struct proto_handler *p)
+{
+ struct proto_handler *b = NULL;
+
+ LIBALIAS_WLOCK_ASSERT();
+ LIST_FOREACH(b, &handler_chain, entries) {
+ if ((b->pri == p->pri) &&
+ (b->dir == p->dir) &&
+ (b->proto == p->proto))
+ return (EEXIST); /* Priority conflict. */
+ if (b->pri > p->pri) {
+ LIST_INSERT_BEFORE(b, p, entries);
+ return (0);
+ }
+ }
+ /* End of list or found right position, inserts here. */
+ if (b)
+ LIST_INSERT_AFTER(b, p, entries);
+ else
+ LIST_INSERT_HEAD(&handler_chain, p, entries);
+ return (0);
+}
+
+static int
+_detach_handler(struct proto_handler *p)
+{
+ struct proto_handler *b, *b_tmp;;
+
+ LIBALIAS_WLOCK_ASSERT();
+ LIST_FOREACH_SAFE(b, &handler_chain, entries, b_tmp) {
+ if (b == p) {
+ LIST_REMOVE(b, entries);
+ return (0);
+ }
+ }
+ return (ENOENT); /* Handler not found. */
+}
+
+int
+LibAliasAttachHandlers(struct proto_handler *_p)
+{
+ int i, error = -1;
+
+ LIBALIAS_WLOCK();
+ for (i=0; 1; i++) {
+ if (*((int *)&_p[i]) == EOH)
+ break;
+ error = _attach_handler(&_p[i]);
+ if (error != 0)
+ break;
+ }
+ LIBALIAS_WUNLOCK();
+ return (error);
+}
+
+int
+LibAliasDetachHandlers(struct proto_handler *_p)
+{
+ int i, error = -1;
+
+ LIBALIAS_WLOCK();
+ for (i=0; 1; i++) {
+ if (*((int *)&_p[i]) == EOH)
+ break;
+ error = _detach_handler(&_p[i]);
+ if (error != 0)
+ break;
+ }
+ LIBALIAS_WUNLOCK();
+ return (error);
+}
+
+int
+detach_handler(struct proto_handler *_p)
+{
+ int error = -1;
+
+ LIBALIAS_WLOCK();
+ error = _detach_handler(_p);
+ LIBALIAS_WUNLOCK();
+ return (error);
+}
+
+int
+find_handler(int8_t dir, int8_t proto, struct libalias *la, struct ip *pip,
+ struct alias_data *ad)
+{
+ struct proto_handler *p;
+ int error = ENOENT;
+
+ LIBALIAS_RLOCK();
+
+ LIST_FOREACH(p, &handler_chain, entries) {
+ if ((p->dir & dir) && (p->proto & proto))
+ if (p->fingerprint(la, pip, ad) == 0) {
+ error = p->protohandler(la, pip, ad);
+ break;
+ }
+ }
+ LIBALIAS_RUNLOCK();
+ return (error);
+}
+
+struct proto_handler *
+first_handler(void)
+{
+
+ return (LIST_FIRST(&handler_chain));
+}
+
+/* Dll manipulation code - this code is not thread safe... */
+
+int
+attach_dll(struct dll *p)
+{
+ struct dll *b;
+
+ SLIST_FOREACH(b, &dll_chain, next) {
+ if (!strncmp(b->name, p->name, DLL_LEN))
+ return (EEXIST); /* Dll name conflict. */
+ }
+ SLIST_INSERT_HEAD(&dll_chain, p, next);
+ return (0);
+}
+
+void *
+detach_dll(char *p)
+{
+ struct dll *b = NULL, *b_tmp;
+ void *error = NULL;
+
+ SLIST_FOREACH_SAFE(b, &dll_chain, next, b_tmp)
+ if (!strncmp(b->name, p, DLL_LEN)) {
+ SLIST_REMOVE(&dll_chain, b, dll, next);
+ error = b;
+ break;
+ }
+ return (error);
+}
+
+struct dll *
+walk_dll_chain(void)
+{
+ struct dll *t;
+
+ t = SLIST_FIRST(&dll_chain);
+ if (t == NULL)
+ return (NULL);
+ SLIST_REMOVE_HEAD(&dll_chain, next);
+ return (t);
+}
Index: alias_smedia.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_smedia.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_smedia.c -L sys/netinet/libalias/alias_smedia.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_smedia.c
+++ sys/netinet/libalias/alias_smedia.c
@@ -64,7 +64,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_smedia.c,v 1.15 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_smedia.c,v 1.17 2006/11/07 21:06:48 marcus Exp $");
/*
Alias_smedia.c is meant to contain the aliasing code for streaming media
@@ -100,8 +100,11 @@
#ifdef _KERNEL
#include <sys/param.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
+#include <errno.h>
#include <sys/types.h>
#include <stdio.h>
#include <string.h>
@@ -111,13 +114,94 @@
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
-#include <netinet/udp.h>
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define RTSP_CONTROL_PORT_NUMBER_1 554
+#define RTSP_CONTROL_PORT_NUMBER_2 7070
+#define TFTP_PORT_NUMBER 69
+
+static void
+AliasHandleRtspOut(struct libalias *, struct ip *, struct alias_link *,
+ int maxpacketsize);
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport != NULL && ah->aport != NULL && ah->sport != NULL &&
+ ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+ return (0);
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ ah->maxpktsize == 0)
+ return (-1);
+ if (ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_1
+ || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_1
+ || ntohs(*ah->dport) == RTSP_CONTROL_PORT_NUMBER_2
+ || ntohs(*ah->sport) == RTSP_CONTROL_PORT_NUMBER_2)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ntohs(*ah->dport) == TFTP_PORT_NUMBER)
+ FindRtspOut(la, pip->ip_src, pip->ip_dst,
+ *ah->sport, *ah->aport, IPPROTO_UDP);
+ else AliasHandleRtspOut(la, pip, ah->lnk, ah->maxpktsize);
+ return (0);
+}
+
+struct proto_handler handlers[] = {
+ {
+ .pri = 100,
+ .dir = OUT,
+ .proto = TCP|UDP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandler
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_smedia", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_smedia, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_smedia, 1);
+MODULE_DEPEND(alias_smedia, libalias, 1, 1, 1);
#endif
#define RTSP_CONTROL_PORT_NUMBER_1 554
@@ -392,7 +476,7 @@
return (0);
}
-void
+static void
AliasHandleRtspOut(struct libalias *la, struct ip *pip, struct alias_link *lnk, int maxpacketsize)
{
int hlen, tlen, dlen;
Index: alias_local.h
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_local.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_local.h -L sys/netinet/libalias/alias_local.h -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_local.h
+++ sys/netinet/libalias/alias_local.h
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/libalias/alias_local.h,v 1.32 2005/06/27 07:36:02 glebius Exp $
+ * $FreeBSD: src/sys/netinet/libalias/alias_local.h,v 1.34 2006/12/15 12:50:06 piso Exp $
*/
/*
@@ -46,18 +46,16 @@
#ifndef _ALIAS_LOCAL_H_
#define _ALIAS_LOCAL_H_
-#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
-/* Use kernel allocator. */
-#if defined(_KERNEL) && defined(_SYS_MALLOC_H_)
-MALLOC_DECLARE(M_ALIAS);
-#define malloc(x) malloc(x, M_ALIAS, M_NOWAIT|M_ZERO)
-#define calloc(x, n) malloc(x*n)
-#define free(x) free(x, M_ALIAS)
-#endif
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
/* XXX: LibAliasSetTarget() uses this constant. */
-#ifdef _KERNEL
#define INADDR_NONE 0xffffffff
#endif
@@ -116,10 +114,14 @@
int deleteAllLinks; /* If equal to zero, DeleteLink() */
/* will not remove permanent links */
-#ifndef NO_LOGGING
- FILE *monitorFile; /* File descriptor for link */
+
+ /* log descriptor */
+#ifdef _KERNEL
+ char *logDesc;
+#else
+ FILE *logDesc;
#endif
- /* statistics monitoring file */
+ /* statistics monitoring */
int newDefaultLink; /* Indicates if a new aliasing */
/* link has been created after a */
@@ -147,11 +149,31 @@
struct in_addr true_addr; /* in network byte order. */
u_short true_port; /* in host byte order. */
-
+#ifdef _KERNEL
+ /*
+ * avoid races in libalias: every public function has to use it.
+ */
+ struct mtx mutex;
+#endif
};
/* Macros */
+#ifdef _KERNEL
+#define LIBALIAS_LOCK_INIT(l) \
+ mtx_init(&l->mutex, "per-instance libalias mutex", NULL, MTX_DEF)
+#define LIBALIAS_LOCK_ASSERT(l) mtx_assert(&l->mutex, MA_OWNED)
+#define LIBALIAS_LOCK(l) mtx_lock(&l->mutex)
+#define LIBALIAS_UNLOCK(l) mtx_unlock(&l->mutex)
+#define LIBALIAS_LOCK_DESTROY(l) mtx_destroy(&l->mutex)
+#else
+#define LIBALIAS_LOCK_INIT(l)
+#define LIBALIAS_LOCK_ASSERT(l)
+#define LIBALIAS_LOCK(l)
+#define LIBALIAS_UNLOCK(l)
+#define LIBALIAS_LOCK_DESTROY(l)
+#endif
+
/*
* The following macro is used to update an
* internet checksum. "delta" is a 32-bit
@@ -296,43 +318,6 @@
/* Tcp specfic routines */
/* lint -save -library Suppress flexelint warnings */
-/* FTP routines */
-void
-AliasHandleFtpOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
- int _maxpacketsize);
-
-/* IRC routines */
-void
-AliasHandleIrcOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
- int _maxsize);
-
-/* RTSP routines */
-void
-AliasHandleRtspOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
- int _maxpacketsize);
-
-/* PPTP routines */
-void AliasHandlePptpOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-void AliasHandlePptpIn(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-int AliasHandlePptpGreOut(struct libalias *la, struct ip *_pip);
-int AliasHandlePptpGreIn(struct libalias *la, struct ip *_pip);
-
-/* NetBIOS routines */
-int
-AliasHandleUdpNbt(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
- struct in_addr *_alias_address, u_short _alias_port);
-int
-AliasHandleUdpNbtNS(struct libalias *la, struct ip *_pip, struct alias_link *_lnk,
- struct in_addr *_alias_address, u_short * _alias_port,
- struct in_addr *_original_address, u_short * _original_port);
-
-/* CUSeeMe routines */
-void AliasHandleCUSeeMeOut(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-void AliasHandleCUSeeMeIn(struct libalias *la, struct ip *_pip, struct in_addr _original_addr);
-
-/* Skinny routines */
-void AliasHandleSkinny(struct libalias *la, struct ip *_pip, struct alias_link *_lnk);
-
/* Transparent proxy routines */
int
ProxyCheck(struct libalias *la, struct ip *_pip, struct in_addr *_proxy_server_addr,
@@ -373,6 +358,4 @@
}
#endif
-/*lint -restore */
-
#endif /* !_ALIAS_LOCAL_H_ */
Index: alias_pptp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_pptp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_pptp.c -L sys/netinet/libalias/alias_pptp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_pptp.c
+++ sys/netinet/libalias/alias_pptp.c
@@ -37,7 +37,170 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_pptp.c,v 1.14 2005/05/05 21:55:17 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_pptp.c,v 1.15 2006/09/26 23:26:53 piso Exp $");
+
+/* Includes */
+#ifdef _KERNEL
+#include <sys/param.h>
+#include <sys/limits.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#else
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <stdio.h>
+#endif
+
+#include <netinet/tcp.h>
+
+#ifdef _KERNEL
+#include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#else
+#include "alias.h"
+#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define PPTP_CONTROL_PORT_NUMBER 1723
+
+static void
+AliasHandlePptpOut(struct libalias *, struct ip *, struct alias_link *);
+
+static void
+AliasHandlePptpIn(struct libalias *, struct ip *, struct alias_link *);
+
+static int
+AliasHandlePptpGreOut(struct libalias *, struct ip *);
+
+static int
+AliasHandlePptpGreIn(struct libalias *, struct ip *);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL)
+ return (-1);
+ if (ntohs(*ah->dport) == PPTP_CONTROL_PORT_NUMBER
+ || ntohs(*ah->sport) == PPTP_CONTROL_PORT_NUMBER)
+ return (0);
+ return (-1);
+}
+
+static int
+fingerprintgre(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ return (0);
+}
+
+static int
+protohandlerin(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandlePptpIn(la, pip, ah->lnk);
+ return (0);
+}
+
+static int
+protohandlerout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandlePptpOut(la, pip, ah->lnk);
+ return (0);
+}
+
+static int
+protohandlergrein(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (la->packetAliasMode & PKT_ALIAS_PROXY_ONLY ||
+ AliasHandlePptpGreIn(la, pip) == 0)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandlergreout(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (AliasHandlePptpGreOut(la, pip) == 0)
+ return (0);
+ return (-1);
+}
+
+/* Kernel module definition. */
+struct proto_handler handlers[] = {
+ {
+ .pri = 200,
+ .dir = IN,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandlerin
+ },
+ {
+ .pri = 210,
+ .dir = OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandlerout
+ },
+/*
+ * WATCH OUT!!! these 2 handlers NEED a priority of INT_MAX (highest possible)
+ * cause they will ALWAYS process packets, so they must be the last one
+ * in chain: look fingerprintgre() above.
+ */
+ {
+ .pri = INT_MAX,
+ .dir = IN,
+ .proto = IP,
+ .fingerprint = &fingerprintgre,
+ .protohandler = &protohandlergrein
+ },
+ {
+ .pri = INT_MAX,
+ .dir = OUT,
+ .proto = IP,
+ .fingerprint = &fingerprintgre,
+ .protohandler = &protohandlergreout
+ },
+ { EOH }
+};
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_pptp", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_pptp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_pptp, 1);
+MODULE_DEPEND(alias_pptp, libalias, 1, 1, 1);
+#endif
/*
Alias_pptp.c performs special processing for PPTP sessions under TCP.
@@ -65,26 +228,6 @@
*/
-/* Includes */
-#ifdef _KERNEL
-#include <sys/param.h>
-#else
-#include <sys/types.h>
-#include <stdio.h>
-#endif
-
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/tcp.h>
-
-#ifdef _KERNEL
-#include <netinet/libalias/alias.h>
-#include <netinet/libalias/alias_local.h>
-#else
-#include "alias_local.h"
-#endif
-
/*
* PPTP definitions
*/
@@ -153,7 +296,7 @@
static PptpCallId AliasVerifyPptp(struct ip *, u_int16_t *);
-void
+static void
AliasHandlePptpOut(struct libalias *la,
struct ip *pip, /* IP packet to examine/patch */
struct alias_link *lnk)
@@ -225,7 +368,7 @@
}
}
-void
+static void
AliasHandlePptpIn(struct libalias *la,
struct ip *pip, /* IP packet to examine/patch */
struct alias_link *lnk)
@@ -328,8 +471,7 @@
return (PptpCallId) (hptr + 1);
}
-
-int
+static int
AliasHandlePptpGreOut(struct libalias *la, struct ip *pip)
{
GreHdr *gr;
@@ -353,8 +495,7 @@
return (0);
}
-
-int
+static int
AliasHandlePptpGreIn(struct libalias *la, struct ip *pip)
{
GreHdr *gr;
Index: alias_db.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_db.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_db.c -L sys/netinet/libalias/alias_db.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_db.c
+++ sys/netinet/libalias/alias_db.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_db.c,v 1.67 2005/05/06 11:07:49 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_db.c,v 1.71 2007/04/07 09:47:39 piso Exp $");
/*
Alias_db.c encapsulates all data structures used for storing
@@ -143,40 +143,32 @@
*/
#ifdef _KERNEL
+#include <machine/stdarg.h>
#include <sys/param.h>
-#else
-#include <sys/types.h>
-#endif
-
-#include <sys/errno.h>
-#include <sys/queue.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
#include <sys/kernel.h>
-#include <sys/malloc.h>
#include <sys/module.h>
-#else
+#include <sys/syslog.h>
+#else
+#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
+#include <sys/errno.h>
+#include <sys/time.h>
#include <unistd.h>
-#include <arpa/inet.h>
#endif
-/* BSD network include files */
-#include <netinet/in_systm.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
+#include <sys/socket.h>
#include <netinet/tcp.h>
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
+#include <net/if.h>
#else
#include "alias.h"
#include "alias_local.h"
+#include "alias_mod.h"
#endif
static LIST_HEAD(, libalias) instancehead = LIST_HEAD_INITIALIZER(instancehead);
@@ -358,10 +350,12 @@
switch (type) {
case MOD_LOAD:
error = 0;
+ handler_chain_init();
break;
case MOD_QUIESCE:
case MOD_UNLOAD:
- finishoff();
+ handler_chain_destroy();
+ finishoff();
error = 0;
break;
default:
@@ -409,12 +403,10 @@
#endif
-#ifndef NO_LOGGING
/* Log file control */
static void ShowAliasStats(struct libalias *);
-static void InitPacketAliasLog(struct libalias *);
+static int InitPacketAliasLog(struct libalias *);
static void UninitPacketAliasLog(struct libalias *);
-#endif
static u_int
StartPointIn(struct in_addr alias_addr,
@@ -462,37 +454,56 @@
return (ntohl(y) - ntohl(x));
}
+#ifdef _KERNEL
-#ifndef NO_LOGGING
static void
-ShowAliasStats(struct libalias *la)
+AliasLog(char *str, const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ vsnprintf(str, LIBALIAS_BUF_SIZE, format, ap);
+ va_end(ap);
+}
+#else
+static void
+AliasLog(FILE *stream, const char *format, ...)
{
-/* Used for debugging */
+ va_list ap;
+
+ va_start(ap, format);
+ vfprintf(stream, format, ap);
+ va_end(ap);
+ fflush(stream);
+}
+#endif
- if (la->monitorFile) {
- fprintf(la->monitorFile,
- "icmp=%d, udp=%d, tcp=%d, pptp=%d, proto=%d, frag_id=%d frag_ptr=%d",
- la->icmpLinkCount,
- la->udpLinkCount,
- la->tcpLinkCount,
- la->pptpLinkCount,
- la->protoLinkCount,
- la->fragmentIdLinkCount,
- la->fragmentPtrLinkCount);
-
- fprintf(la->monitorFile, " / tot=%d (sock=%d)\n",
- la->icmpLinkCount + la->udpLinkCount
- + la->tcpLinkCount
- + la->pptpLinkCount
- + la->protoLinkCount
- + la->fragmentIdLinkCount
- + la->fragmentPtrLinkCount,
- la->sockCount);
+static void
+ShowAliasStats(struct libalias *la)
+{
- fflush(la->monitorFile);
+ LIBALIAS_LOCK_ASSERT(la);
+/* Used for debugging */
+ if (la->logDesc) {
+ int tot = la->icmpLinkCount + la->udpLinkCount +
+ la->tcpLinkCount + la->pptpLinkCount +
+ la->protoLinkCount + la->fragmentIdLinkCount +
+ la->fragmentPtrLinkCount;
+
+ AliasLog(la->logDesc,
+ "icmp=%u, udp=%u, tcp=%u, pptp=%u, proto=%u, frag_id=%u frag_ptr=%u / tot=%u",
+ la->icmpLinkCount,
+ la->udpLinkCount,
+ la->tcpLinkCount,
+ la->pptpLinkCount,
+ la->protoLinkCount,
+ la->fragmentIdLinkCount,
+ la->fragmentPtrLinkCount, tot);
+#ifndef _KERNEL
+ AliasLog(la->logDesc, " (sock=%u)\n", la->sockCount);
+#endif
}
}
-#endif
/* Internal routines for finding, deleting and adding links
@@ -565,6 +576,7 @@
u_short port_sys;
u_short port_net;
+ LIBALIAS_LOCK_ASSERT(la);
/*
Description of alias_port_param for GetNewPort(). When
this parameter is zero or positive, it precisely specifies
@@ -665,6 +677,7 @@
int sock;
struct sockaddr_in sock_addr;
+ LIBALIAS_LOCK_ASSERT(la);
if (link_type == LINK_TCP)
sock = socket(AF_INET, SOCK_STREAM, 0);
else if (link_type == LINK_UDP)
@@ -723,6 +736,7 @@
u_short port_sys;
int link_type;
+ LIBALIAS_LOCK_ASSERT(la);
/*
* Get link_type from protocol
*/
@@ -802,6 +816,7 @@
struct alias_link *lnk;
int i, icount;
+ LIBALIAS_LOCK_ASSERT(la);
icount = 0;
for (i = 0; i < LINK_TABLE_OUT_SIZE; i++) {
lnk = LIST_FIRST(&la->linkTableOut[i]);
@@ -825,6 +840,7 @@
int icount;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
icount = 0;
lnk = LIST_FIRST(&la->linkTableOut[la->cleanupIndex++]);
while (lnk != NULL) {
@@ -865,6 +881,7 @@
{
struct libalias *la = lnk->la;
+ LIBALIAS_LOCK_ASSERT(la);
/* Don't do anything if the link is marked permanent */
if (la->deleteAllLinks == 0 && lnk->flags & LINK_PERMANENT)
return;
@@ -929,12 +946,10 @@
/* Free memory */
free(lnk);
-#ifndef NO_LOGGING
/* Write statistics, if logging enabled */
if (la->packetAliasMode & PKT_ALIAS_LOG) {
ShowAliasStats(la);
}
-#endif
}
@@ -951,6 +966,7 @@
u_int start_point; /* zero, equal to alias port */
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = malloc(sizeof(struct alias_link));
if (lnk != NULL) {
/* Basic initialization */
@@ -1072,11 +1088,9 @@
fprintf(stderr, "malloc() call failed.\n");
#endif
}
-#ifndef NO_LOGGING
if (la->packetAliasMode & PKT_ALIAS_LOG) {
ShowAliasStats(la);
}
-#endif
return (lnk);
}
@@ -1094,6 +1108,7 @@
struct alias_link *new_lnk; /* zero, equal to alias port */
struct libalias *la = old_lnk->la;
+ LIBALIAS_LOCK_ASSERT(la);
new_lnk = AddLink(la, src_addr, dst_addr, alias_addr,
src_port, dst_port, alias_port_param,
link_type);
@@ -1119,6 +1134,7 @@
u_int i;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
i = StartPointOut(src_addr, dst_addr, src_port, dst_port, link_type);
LIST_FOREACH(lnk, &la->linkTableOut[i], list_out) {
if (lnk->src_addr.s_addr == src_addr.s_addr
@@ -1166,6 +1182,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = _FindLinkOut(la, src_addr, dst_addr, src_port, dst_port,
link_type, replace_partial_links);
@@ -1202,6 +1219,7 @@
struct alias_link *lnk_unknown_dst_addr;
struct alias_link *lnk_unknown_dst_port;
+ LIBALIAS_LOCK_ASSERT(la);
/* Initialize pointers */
lnk_fully_specified = NULL;
lnk_unknown_all = NULL;
@@ -1305,6 +1323,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = _FindLinkIn(la, dst_addr, alias_addr, dst_port, alias_port,
link_type, replace_partial_links);
@@ -1352,6 +1371,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkIn(la, dst_addr, alias_addr,
NO_DEST_PORT, id_alias,
LINK_ICMP, 0);
@@ -1375,6 +1395,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkOut(la, src_addr, dst_addr,
id, NO_DEST_PORT,
LINK_ICMP, 0);
@@ -1397,6 +1418,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkIn(la, dst_addr, alias_addr,
NO_DEST_PORT, ip_id,
LINK_FRAGMENT_ID, 0);
@@ -1416,6 +1438,8 @@
struct in_addr alias_addr, /* is not found. */
u_short ip_id)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
return FindLinkIn(la, dst_addr, alias_addr,
NO_DEST_PORT, ip_id,
LINK_FRAGMENT_ID, 0);
@@ -1426,6 +1450,8 @@
AddFragmentPtrLink(struct libalias *la, struct in_addr dst_addr,
u_short ip_id)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
return AddLink(la, la->nullAddress, dst_addr, la->nullAddress,
NO_SRC_PORT, NO_DEST_PORT, ip_id,
LINK_FRAGMENT_PTR);
@@ -1436,6 +1462,8 @@
FindFragmentPtr(struct libalias *la, struct in_addr dst_addr,
u_short ip_id)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
return FindLinkIn(la, dst_addr, la->nullAddress,
NO_DEST_PORT, ip_id,
LINK_FRAGMENT_PTR, 0);
@@ -1449,6 +1477,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkIn(la, dst_addr, alias_addr,
NO_DEST_PORT, 0,
proto, 1);
@@ -1472,6 +1501,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkOut(la, src_addr, dst_addr,
NO_SRC_PORT, NO_DEST_PORT,
proto, 1);
@@ -1499,6 +1529,7 @@
int link_type;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
switch (proto) {
case IPPROTO_UDP:
link_type = LINK_UDP;
@@ -1538,6 +1569,7 @@
int link_type;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
switch (proto) {
case IPPROTO_UDP:
link_type = LINK_UDP;
@@ -1572,6 +1604,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = AddLink(la, src_addr, dst_addr, alias_addr,
src_call_id, 0, GET_ALIAS_PORT,
LINK_PPTP);
@@ -1588,6 +1621,7 @@
u_int i;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
if (lnk->link_type == LINK_PPTP &&
@@ -1608,6 +1642,7 @@
u_int i;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
i = StartPointOut(src_addr, dst_addr, 0, 0, LINK_PPTP);
LIST_FOREACH(lnk, &la->linkTableOut[i], list_out)
if (lnk->link_type == LINK_PPTP &&
@@ -1628,6 +1663,7 @@
u_int i;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
i = StartPointIn(alias_addr, 0, LINK_PPTP);
LIST_FOREACH(lnk, &la->linkTableIn[i], list_in)
if (lnk->link_type == LINK_PPTP &&
@@ -1647,6 +1683,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkIn(la, dst_addr, alias_addr,
0 /* any */ , alias_call_id,
LINK_PPTP, 0);
@@ -1666,6 +1703,7 @@
int link_type;
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
switch (proto) {
case IPPROTO_UDP:
link_type = LINK_UDP;
@@ -1697,6 +1735,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkIn(la, la->nullAddress, alias_addr,
0, 0, LINK_ADDR, 0);
if (lnk == NULL) {
@@ -1729,6 +1768,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK_ASSERT(la);
lnk = FindLinkOut(la, original_addr, la->nullAddress,
0, 0, LINK_ADDR, 0);
if (lnk == NULL) {
@@ -1885,6 +1925,8 @@
struct in_addr
GetDefaultAliasAddress(struct libalias *la)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
return (la->aliasAddress);
}
@@ -1892,6 +1934,8 @@
void
SetDefaultAliasAddress(struct libalias *la, struct in_addr alias_addr)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
la->aliasAddress = alias_addr;
}
@@ -2106,6 +2150,8 @@
void
ClearCheckNewLink(struct libalias *la)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
la->newDefaultLink = 0;
}
@@ -2128,8 +2174,9 @@
{
struct libalias *la = lnk->la;
+ LIBALIAS_LOCK_ASSERT(la);
la->deleteAllLinks = 1;
- lnk = ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr,
+ ReLink(lnk, lnk->src_addr, lnk->dst_addr, lnk->alias_addr,
lnk->src_port, cid, lnk->alias_port, lnk->link_type);
la->deleteAllLinks = 0;
}
@@ -2160,13 +2207,14 @@
struct timezone tz;
#endif
+ LIBALIAS_LOCK_ASSERT(la);
/*
* Save system time (seconds) in global variable timeStamp for use
* by other functions. This is done so as not to unnecessarily
* waste timeline by making system calls.
*/
#ifdef _KERNEL
- la->timeStamp = time_second;
+ la->timeStamp = time_uptime;
#else
gettimeofday(&tv, &tz);
la->timeStamp = tv.tv_sec;
@@ -2203,30 +2251,44 @@
}
}
-#ifndef NO_LOGGING
/* Init the log file and enable logging */
-static void
+static int
InitPacketAliasLog(struct libalias *la)
{
- if ((~la->packetAliasMode & PKT_ALIAS_LOG)
- && (la->monitorFile = fopen("/var/log/alias.log", "w"))) {
+
+ LIBALIAS_LOCK_ASSERT(la);
+ if (~la->packetAliasMode & PKT_ALIAS_LOG) {
+#ifdef _KERNEL
+ if ((la->logDesc = malloc(LIBALIAS_BUF_SIZE)))
+ ;
+#else
+ if ((la->logDesc = fopen("/var/log/alias.log", "w")))
+ fprintf(la->logDesc, "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");
+#endif
+ else
+ return (ENOMEM); /* log initialization failed */
la->packetAliasMode |= PKT_ALIAS_LOG;
- fprintf(la->monitorFile,
- "PacketAlias/InitPacketAliasLog: Packet alias logging enabled.\n");
}
+
+ return (1);
}
/* Close the log-file and disable logging. */
static void
UninitPacketAliasLog(struct libalias *la)
{
- if (la->monitorFile) {
- fclose(la->monitorFile);
- la->monitorFile = NULL;
+
+ LIBALIAS_LOCK_ASSERT(la);
+ if (la->logDesc) {
+#ifdef _KERNEL
+ free(la->logDesc);
+#else
+ fclose(la->logDesc);
+#endif
+ la->logDesc = NULL;
}
la->packetAliasMode &= ~PKT_ALIAS_LOG;
}
-#endif
/* Outside world interfaces
@@ -2257,6 +2319,7 @@
int link_type;
struct alias_link *lnk;
+ LIBALIAS_LOCK(la);
switch (proto) {
case IPPROTO_UDP:
link_type = LINK_UDP;
@@ -2269,7 +2332,8 @@
fprintf(stderr, "PacketAliasRedirectPort(): ");
fprintf(stderr, "only TCP and UDP protocols allowed\n");
#endif
- return (NULL);
+ lnk = NULL;
+ goto getout;
}
lnk = AddLink(la, src_addr, dst_addr, alias_addr,
@@ -2286,6 +2350,8 @@
}
#endif
+getout:
+ LIBALIAS_UNLOCK(la);
return (lnk);
}
@@ -2294,7 +2360,9 @@
LibAliasAddServer(struct libalias *la, struct alias_link *lnk, struct in_addr addr, u_short port)
{
struct server *server;
+ int res;
+ LIBALIAS_LOCK(la);
(void)la;
server = malloc(sizeof(struct server));
@@ -2316,9 +2384,12 @@
server->next = head;
}
lnk->server = server;
- return (0);
+ res = 0;
} else
- return (-1);
+ res = -1;
+
+ LIBALIAS_UNLOCK(la);
+ return (res);
}
/* Redirect packets of a given IP protocol from a specific
@@ -2331,6 +2402,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK(la);
lnk = AddLink(la, src_addr, dst_addr, alias_addr,
NO_SRC_PORT, NO_DEST_PORT, 0,
proto);
@@ -2345,6 +2417,7 @@
}
#endif
+ LIBALIAS_UNLOCK(la);
return (lnk);
}
@@ -2355,6 +2428,7 @@
{
struct alias_link *lnk;
+ LIBALIAS_LOCK(la);
lnk = AddLink(la, src_addr, la->nullAddress, alias_addr,
0, 0, 0,
LINK_ADDR);
@@ -2369,6 +2443,7 @@
}
#endif
+ LIBALIAS_UNLOCK(la);
return (lnk);
}
@@ -2377,15 +2452,19 @@
int
LibAliasRedirectDynamic(struct libalias *la, struct alias_link *lnk)
{
+ int res;
+ LIBALIAS_LOCK(la);
(void)la;
if (lnk->flags & LINK_PARTIALLY_SPECIFIED)
- return (-1);
+ res = -1;
else {
lnk->flags &= ~LINK_PERMANENT;
- return (0);
+ res = 0;
}
+ LIBALIAS_UNLOCK(la);
+ return (res);
}
@@ -2395,27 +2474,35 @@
/* This is a dangerous function to put in the API,
because an invalid pointer can crash the program. */
+ LIBALIAS_LOCK(la);
la->deleteAllLinks = 1;
DeleteLink(lnk);
la->deleteAllLinks = 0;
+ LIBALIAS_UNLOCK(la);
}
void
LibAliasSetAddress(struct libalias *la, struct in_addr addr)
{
+
+ LIBALIAS_LOCK(la);
if (la->packetAliasMode & PKT_ALIAS_RESET_ON_ADDR_CHANGE
&& la->aliasAddress.s_addr != addr.s_addr)
CleanupAliasData(la);
la->aliasAddress = addr;
+ LIBALIAS_UNLOCK(la);
}
void
LibAliasSetTarget(struct libalias *la, struct in_addr target_addr)
{
+
+ LIBALIAS_LOCK(la);
la->targetAddress = target_addr;
+ LIBALIAS_UNLOCK(la);
}
static void
@@ -2447,8 +2534,8 @@
LIST_INSERT_HEAD(&instancehead, la, instancelist);
#ifdef _KERNEL
- la->timeStamp = time_second;
- la->lastCleanupTime = time_second;
+ la->timeStamp = time_uptime;
+ la->lastCleanupTime = time_uptime;
#else
gettimeofday(&tv, &tz);
la->timeStamp = tv.tv_sec;
@@ -2460,8 +2547,10 @@
LIST_INIT(&la->linkTableOut[i]);
for (i = 0; i < LINK_TABLE_IN_SIZE; i++)
LIST_INIT(&la->linkTableIn[i]);
-
+ LIBALIAS_LOCK_INIT(la);
+ LIBALIAS_LOCK(la);
} else {
+ LIBALIAS_LOCK(la);
la->deleteAllLinks = 1;
CleanupAliasData(la);
la->deleteAllLinks = 0;
@@ -2489,22 +2578,28 @@
#ifndef NO_FW_PUNCH
la->fireWallFD = -1;
#endif
+#ifndef _KERNEL
+ LibAliasRefreshModules();
+#endif
+ LIBALIAS_UNLOCK(la);
return (la);
}
void
LibAliasUninit(struct libalias *la)
{
+
+ LIBALIAS_LOCK(la);
la->deleteAllLinks = 1;
CleanupAliasData(la);
la->deleteAllLinks = 0;
-#ifndef NO_LOGGING
UninitPacketAliasLog(la);
-#endif
#ifndef NO_FW_PUNCH
UninitPunchFW(la);
#endif
LIST_REMOVE(la, instancelist);
+ LIBALIAS_UNLOCK(la);
+ LIBALIAS_LOCK_DESTROY(la);
free(la);
}
@@ -2517,16 +2612,19 @@
* do a probe for flag values) */
)
{
-#ifndef NO_LOGGING
+ int res = -1;
+
+ LIBALIAS_LOCK(la);
/* Enable logging? */
if (flags & mask & PKT_ALIAS_LOG) {
- InitPacketAliasLog(la); /* Do the enable */
+ /* Do the enable */
+ if (InitPacketAliasLog(la) == ENOMEM)
+ goto getout;
} else
/* _Disable_ logging? */
if (~flags & mask & PKT_ALIAS_LOG) {
UninitPacketAliasLog(la);
}
-#endif
#ifndef NO_FW_PUNCH
/* Start punching holes in the firewall? */
if (flags & mask & PKT_ALIAS_PUNCH_FW) {
@@ -2540,14 +2638,22 @@
/* Other flags can be set/cleared without special action */
la->packetAliasMode = (flags & mask) | (la->packetAliasMode & ~mask);
- return (la->packetAliasMode);
+ res = la->packetAliasMode;
+getout:
+ LIBALIAS_UNLOCK(la);
+ return (res);
}
int
LibAliasCheckNewLink(struct libalias *la)
{
- return (la->newDefaultLink);
+ int res;
+
+ LIBALIAS_LOCK(la);
+ res = la->newDefaultLink;
+ LIBALIAS_UNLOCK(la);
+ return (res);
}
@@ -2653,6 +2759,7 @@
InitPunchFW(struct libalias *la)
{
+ LIBALIAS_LOCK_ASSERT(la);
la->fireWallField = malloc(la->fireWallNumNums);
if (la->fireWallField) {
memset(la->fireWallField, 0, la->fireWallNumNums);
@@ -2667,6 +2774,8 @@
static void
UninitPunchFW(struct libalias *la)
{
+
+ LIBALIAS_LOCK_ASSERT(la);
ClearAllFWHoles(la);
if (la->fireWallFD >= 0)
close(la->fireWallFD);
@@ -2686,6 +2795,7 @@
struct ip_fw rule; /* On-the-fly built rule */
int fwhole; /* Where to punch hole */
+ LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
/* Don't do anything unless we are asked to */
@@ -2757,9 +2867,9 @@
static void
ClearFWHole(struct alias_link *lnk)
{
-
struct libalias *la;
+ LIBALIAS_LOCK_ASSERT(la);
la = lnk->la;
if (lnk->link_type == LINK_TCP) {
int fwhole = lnk->data.tcp->fwhole; /* Where is the firewall
@@ -2784,6 +2894,7 @@
struct ip_fw rule; /* On-the-fly built rule */
int i;
+ LIBALIAS_LOCK_ASSERT(la);
if (la->fireWallFD < 0)
return;
@@ -2802,14 +2913,20 @@
void
LibAliasSetFWBase(struct libalias *la, unsigned int base, unsigned int num)
{
+
+ LIBALIAS_LOCK(la);
#ifndef NO_FW_PUNCH
la->fireWallBaseNum = base;
la->fireWallNumNums = num;
#endif
+ LIBALIAS_UNLOCK(la);
}
void
LibAliasSetSkinnyPort(struct libalias *la, unsigned int port)
{
+
+ LIBALIAS_LOCK(la);
la->skinnyPort = port;
+ LIBALIAS_UNLOCK(la);
}
Index: alias_ftp.c
===================================================================
RCS file: /home/cvs/src/sys/netinet/libalias/alias_ftp.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet/libalias/alias_ftp.c -L sys/netinet/libalias/alias_ftp.c -u -r1.1.1.1 -r1.2
--- sys/netinet/libalias/alias_ftp.c
+++ sys/netinet/libalias/alias_ftp.c
@@ -25,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_ftp.c,v 1.27 2005/06/27 07:36:02 glebius Exp $");
+__FBSDID("$FreeBSD: src/sys/netinet/libalias/alias_ftp.c,v 1.29 2007/04/04 03:14:15 kan Exp $");
/*
Alias_ftp.c performs special processing for FTP sessions under
@@ -72,10 +72,12 @@
#ifdef _KERNEL
#include <sys/param.h>
#include <sys/ctype.h>
-#include <sys/libkern.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#else
+#include <errno.h>
#include <sys/types.h>
-#include <ctype.h>
#include <stdio.h>
#include <string.h>
#endif
@@ -88,8 +90,81 @@
#ifdef _KERNEL
#include <netinet/libalias/alias.h>
#include <netinet/libalias/alias_local.h>
+#include <netinet/libalias/alias_mod.h>
#else
#include "alias_local.h"
+#include "alias_mod.h"
+#endif
+
+#define FTP_CONTROL_PORT_NUMBER 21
+
+static void
+AliasHandleFtpOut(struct libalias *, struct ip *, struct alias_link *,
+ int maxpacketsize);
+
+static int
+fingerprint(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ if (ah->dport == NULL || ah->sport == NULL || ah->lnk == NULL ||
+ ah->maxpktsize == 0)
+ return (-1);
+ if (ntohs(*ah->dport) == FTP_CONTROL_PORT_NUMBER
+ || ntohs(*ah->sport) == FTP_CONTROL_PORT_NUMBER)
+ return (0);
+ return (-1);
+}
+
+static int
+protohandler(struct libalias *la, struct ip *pip, struct alias_data *ah)
+{
+
+ AliasHandleFtpOut(la, pip, ah->lnk, ah->maxpktsize);
+ return (0);
+}
+
+struct proto_handler handlers[] = {
+ {
+ .pri = 80,
+ .dir = OUT,
+ .proto = TCP,
+ .fingerprint = &fingerprint,
+ .protohandler = &protohandler
+ },
+ { EOH }
+};
+
+static int
+mod_handler(module_t mod, int type, void *data)
+{
+ int error;
+
+ switch (type) {
+ case MOD_LOAD:
+ error = 0;
+ LibAliasAttachHandlers(handlers);
+ break;
+ case MOD_UNLOAD:
+ error = 0;
+ LibAliasDetachHandlers(handlers);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+#ifdef _KERNEL
+static
+#endif
+moduledata_t alias_mod = {
+ "alias_ftp", mod_handler, NULL
+};
+
+#ifdef _KERNEL
+DECLARE_MODULE(alias_ftp, alias_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
+MODULE_VERSION(alias_ftp, 1);
+MODULE_DEPEND(alias_ftp, libalias, 1, 1, 1);
#endif
#define FTP_CONTROL_PORT_NUMBER 21
@@ -112,7 +187,7 @@
static int ParseFtp229Reply(struct libalias *la, char *, int);
static void NewFtpMessage(struct libalias *la, struct ip *, struct alias_link *, int, int);
-void
+static void
AliasHandleFtpOut(
struct libalias *la,
struct ip *pip, /* IP packet to examine/patch */
Index: mld6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/mld6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/mld6_var.h -L sys/netinet6/mld6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/mld6_var.h
+++ sys/netinet6/mld6_var.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/mld6_var.h,v 1.6.2.1 2005/12/25 14:03:38 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/mld6_var.h,v 1.7 2005/10/21 16:23:00 suz Exp $ */
/* $KAME: mld6_var.h,v 1.4 2000/03/25 07:23:54 sumikawa Exp $ */
/*-
Index: ip6_mroute.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_mroute.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_mroute.h -L sys/netinet6/ip6_mroute.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_mroute.h
+++ sys/netinet6/ip6_mroute.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.6.2.1 2005/10/09 05:21:18 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_mroute.h,v 1.12 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $ */
/*-
@@ -100,12 +100,9 @@
* Argument structure for MRT6_ADD_IF.
*/
struct mif6ctl {
- mifi_t mif6c_mifi; /* the index of the mif to be added */
- u_char mif6c_flags; /* MIFF_ flags defined below */
+ mifi_t mif6c_mifi; /* the index of the mif to be added */
+ u_char mif6c_flags; /* MIFF_ flags defined below */
u_short mif6c_pifi; /* the index of the physical IF */
-#ifdef notyet
- u_int mif6c_rate_limit; /* max rate */
-#endif
};
#define MIFF_REGISTER 0x1 /* mif represents a register end-point */
@@ -126,16 +123,16 @@
struct mrt6stat {
u_quad_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */
u_quad_t mrt6s_mfc_misses; /* # forw. cache hash table misses */
- u_quad_t mrt6s_upcalls; /* # calls to mrouted */
+ u_quad_t mrt6s_upcalls; /* # calls to multicast routing daemon */
u_quad_t mrt6s_no_route; /* no route for packet's origin */
u_quad_t mrt6s_bad_tunnel; /* malformed tunnel options */
u_quad_t mrt6s_cant_tunnel; /* no room for tunnel options */
u_quad_t mrt6s_wrong_if; /* arrived on wrong interface */
u_quad_t mrt6s_upq_ovflw; /* upcall Q overflow */
- u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */
- u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */
- u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
- u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
+ u_quad_t mrt6s_cache_cleanups; /* # entries with no upcalls */
+ u_quad_t mrt6s_drop_sel; /* pkts dropped selectively */
+ u_quad_t mrt6s_q_overflow; /* pkts dropped - Q overflow */
+ u_quad_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */
u_quad_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */
};
@@ -207,13 +204,10 @@
* The kernel's multicast-interface structure.
*/
struct mif6 {
- u_char m6_flags; /* MIFF_ flags defined above */
- u_int m6_rate_limit; /* max rate */
-#ifdef notyet
- struct tbf *m6_tbf; /* token bucket structure at intf. */
-#endif
- struct in6_addr m6_lcl_addr; /* local interface address */
- struct ifnet *m6_ifp; /* pointer to interface */
+ u_char m6_flags; /* MIFF_ flags defined above */
+ u_int m6_rate_limit; /* max rate */
+ struct in6_addr m6_lcl_addr; /* local interface address */
+ struct ifnet *m6_ifp; /* pointer to interface */
u_quad_t m6_pkt_in; /* # pkts in on interface */
u_quad_t m6_pkt_out; /* # pkts out on interface */
u_quad_t m6_bytes_in; /* # bytes in on interface */
@@ -231,13 +225,13 @@
struct mf6c {
struct sockaddr_in6 mf6c_origin; /* IPv6 origin of mcasts */
struct sockaddr_in6 mf6c_mcastgrp; /* multicast group associated*/
- mifi_t mf6c_parent; /* incoming IF */
+ mifi_t mf6c_parent; /* incoming IF */
struct if_set mf6c_ifset; /* set of outgoing IFs */
- u_quad_t mf6c_pkt_cnt; /* pkt count for src-grp */
- u_quad_t mf6c_byte_cnt; /* byte count for src-grp */
- u_quad_t mf6c_wrong_if; /* wrong if for src-grp */
- int mf6c_expire; /* time to clean entry up */
+ u_quad_t mf6c_pkt_cnt; /* pkt count for src-grp */
+ u_quad_t mf6c_byte_cnt; /* byte count for src-grp */
+ u_quad_t mf6c_wrong_if; /* wrong if for src-grp */
+ int mf6c_expire; /* time to clean entry up */
struct timeval mf6c_last_assert; /* last time I sent an assert*/
struct rtdetq *mf6c_stall; /* pkts waiting for route */
struct mf6c *mf6c_next; /* hash table linkage */
@@ -250,8 +244,8 @@
*/
#ifndef _NETINET_IP_MROUTE_H_
struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */
- struct mbuf *m; /* A copy of the packet */
- struct ifnet *ifp; /* Interface pkt came in on */
+ struct mbuf *m; /* A copy of the packet */
+ struct ifnet *ifp; /* Interface pkt came in on */
#ifdef UPCALL_TIMING
struct timeval t; /* Timestamp */
#endif /* UPCALL_TIMING */
@@ -268,10 +262,10 @@
#define MAX_UPQ6 4 /* max. no of pkts in upcall Q */
-int ip6_mrouter_set __P((struct socket *so, struct sockopt *sopt));
-int ip6_mrouter_get __P((struct socket *so, struct sockopt *sopt));
-int ip6_mrouter_done __P((void));
-int mrt6_ioctl __P((int, caddr_t));
+extern int (*ip6_mrouter_set)(struct socket *so, struct sockopt *sopt);
+extern int (*ip6_mrouter_get)(struct socket *so, struct sockopt *sopt);
+extern int (*ip6_mrouter_done)(void);
+extern int (*mrt6_ioctl)(int, caddr_t);
#endif /* _KERNEL */
#endif /* !_NETINET6_IP6_MROUTE_H_ */
Index: in6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/in6.c -L sys/netinet6/in6.c -u -r1.3 -r1.4
--- sys/netinet6/in6.c
+++ sys/netinet6/in6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netinet6/in6.c,v 1.51.2.9 2006/06/17 17:58:33 gnn Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6.c,v 1.73 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ */
/*-
@@ -71,6 +71,7 @@
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/systm.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/kernel.h>
@@ -97,8 +98,6 @@
#include <netinet6/scope6_var.h>
#include <netinet6/in6_pcb.h>
-#include <net/net_osdep.h>
-
MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");
/*
@@ -141,6 +140,7 @@
struct sockaddr_in6 all1_sa;
struct rtentry *nrt = NULL;
int e;
+ char ip6buf[INET6_ADDRSTRLEN];
bzero(&all1_sa, sizeof(all1_sa));
all1_sa.sin6_family = AF_INET6;
@@ -159,11 +159,12 @@
(struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt);
if (e != 0) {
/* XXX need more descriptive message */
+
log(LOG_ERR, "in6_ifloop_request: "
"%s operation failed for %s (errno=%d)\n",
cmd == RTM_ADD ? "ADD" : "DELETE",
- ip6_sprintf(&((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr),
- e);
+ ip6_sprintf(ip6buf,
+ &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e);
}
/*
@@ -187,9 +188,9 @@
}
rt_newaddrmsg(cmd, ifa, e, nrt);
- if (cmd == RTM_DELETE) {
- rtfree(nrt);
- } else {
+ if (cmd == RTM_DELETE)
+ RTFREE_LOCKED(nrt);
+ else {
/* the cmd must be RTM_ADD here */
RT_REMREF(nrt);
RT_UNLOCK(nrt);
@@ -215,7 +216,7 @@
need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
(rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0);
if (rt)
- rtfree(rt);
+ RTFREE_LOCKED(rt);
if (need_loop)
in6_ifloop_request(RTM_ADD, ifa);
}
@@ -267,7 +268,7 @@
if (rt != NULL) {
if ((rt->rt_flags & RTF_HOST) != 0 &&
(rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
- rtfree(rt);
+ RTFREE_LOCKED(rt);
in6_ifloop_request(RTM_DELETE, ifa);
} else
RT_UNLOCK(rt);
@@ -276,9 +277,7 @@
}
int
-in6_mask2len(mask, lim0)
- struct in6_addr *mask;
- u_char *lim0;
+in6_mask2len(struct in6_addr *mask, u_char *lim0)
{
int x = 0, y;
u_char *lim = lim0, *p;
@@ -317,34 +316,29 @@
#define ia62ifa(ia6) (&((ia6)->ia_ifa))
int
-in6_control(so, cmd, data, ifp, td)
- struct socket *so;
- u_long cmd;
- caddr_t data;
- struct ifnet *ifp;
- struct thread *td;
+in6_control(struct socket *so, u_long cmd, caddr_t data,
+ struct ifnet *ifp, struct thread *td)
{
struct in6_ifreq *ifr = (struct in6_ifreq *)data;
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
- int error, privileged;
struct sockaddr_in6 *sa6;
-
- privileged = 0;
- if (td == NULL || !suser(td))
- privileged++;
+ int error;
switch (cmd) {
case SIOCGETSGCNT_IN6:
case SIOCGETMIFCNT_IN6:
- return (mrt6_ioctl(cmd, data));
+ return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
}
switch(cmd) {
case SIOCAADDRCTL_POLICY:
case SIOCDADDRCTL_POLICY:
- if (!privileged)
- return (EPERM);
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
+ if (error)
+ return (error);
+ }
return (in6_src_ioctl(cmd, data));
}
@@ -357,8 +351,11 @@
case SIOCSRTRFLUSH_IN6:
case SIOCSDEFIFACE_IN6:
case SIOCSIFINFO_FLAGS:
- if (!privileged)
- return (EPERM);
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NETINET_ND6);
+ if (error)
+ return (error);
+ }
/* FALLTHROUGH */
case OSIOCGIFINFO_IN6:
case SIOCGIFINFO_IN6:
@@ -385,8 +382,11 @@
switch (cmd) {
case SIOCSSCOPE6:
- if (!privileged)
- return (EPERM);
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NETINET_SCOPE6);
+ if (error)
+ return (error);
+ }
return (scope6_set(ifp,
(struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
case SIOCGSCOPE6:
@@ -400,8 +400,15 @@
switch (cmd) {
case SIOCALIFADDR:
case SIOCDLIFADDR:
- if (!privileged)
- return (EPERM);
+ /*
+ * XXXRW: Is this checked at another layer? What priv to use
+ * here?
+ */
+ if (td != NULL) {
+ error = suser(td);
+ if (error)
+ return (error);
+ }
/* FALLTHROUGH */
case SIOCGLIFADDR:
return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
@@ -490,8 +497,16 @@
if (ifra->ifra_addr.sin6_family != AF_INET6 ||
ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
return (EAFNOSUPPORT);
- if (!privileged)
- return (EPERM);
+
+ /*
+ * XXXRW: Is this checked at another layer? What priv to use
+ * here?
+ */
+ if (td != NULL) {
+ error = suser(td);
+ if (error)
+ return (error);
+ }
break;
@@ -510,8 +525,11 @@
{
struct in6_addrlifetime *lt;
- if (!privileged)
- return (EPERM);
+ if (td != NULL) {
+ error = priv_check(td, PRIV_NETINET_ALIFETIME6);
+ if (error)
+ return (error);
+ }
if (ia == NULL)
return (EADDRNOTAVAIL);
/* sanity for overflow - beware unsigned */
@@ -643,7 +661,7 @@
return (error);
if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
== NULL) {
- /*
+ /*
* this can happen when the user specify the 0 valid
* lifetime.
*/
@@ -771,11 +789,8 @@
* XXX: should this be performed under splnet()?
*/
int
-in6_update_ifa(ifp, ifra, ia, flags)
- struct ifnet *ifp;
- struct in6_aliasreq *ifra;
- struct in6_ifaddr *ia;
- int flags;
+in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
+ struct in6_ifaddr *ia, int flags)
{
int error = 0, hostIsNew = 0, plen = -1;
struct in6_ifaddr *oia;
@@ -785,6 +800,7 @@
struct in6_multi *in6m_sol;
struct rtentry *rt;
int delay;
+ char ip6buf[INET6_ADDRSTRLEN];
/* Validate parameters */
if (ifp == NULL || ifra == NULL) /* this maybe redundant */
@@ -879,7 +895,7 @@
*/
nd6log((LOG_INFO,
"in6_update_ifa: valid lifetime is 0 for %s\n",
- ip6_sprintf(&ifra->ifra_addr.sin6_addr)));
+ ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
if (ia == NULL)
return (0); /* there's nothing to do */
@@ -901,6 +917,7 @@
if (ia == NULL)
return (ENOBUFS);
bzero((caddr_t)ia, sizeof(*ia));
+ LIST_INIT(&ia->ia6_memberships);
/* Initialize the address and masks, and put time stamp */
IFA_LOCK_INIT(&ia->ia_ifa);
ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
@@ -946,7 +963,7 @@
in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
" existing (%s) address should not be changed\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr)));
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
error = EINVAL;
goto unlink;
}
@@ -966,7 +983,7 @@
(e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
"a route to the old destination: %s\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr)));
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
/* proceed anyway... */
} else
ia->ia_flags &= ~IFA_ROUTE;
@@ -1058,15 +1075,17 @@
(MAX_RTR_SOLICITATION_DELAY * hz);
}
imm = in6_joingroup(ifp, &llsol, &error, delay);
- if (error != 0) {
+ if (imm == NULL) {
nd6log((LOG_WARNING,
"in6_update_ifa: addmulti failed for "
"%s on %s (errno=%d)\n",
- ip6_sprintf(&llsol), if_name(ifp),
+ ip6_sprintf(ip6buf, &llsol), if_name(ifp),
error));
in6_purgeaddr((struct ifaddr *)ia);
return (error);
}
+ LIST_INSERT_HEAD(&ia->ia6_memberships,
+ imm, i6mm_chain);
in6m_sol = imm->i6mm_maddr;
bzero(&mltmask, sizeof(mltmask));
@@ -1144,10 +1163,11 @@
nd6log((LOG_WARNING,
"in6_update_ifa: addmulti failed for "
"%s on %s (errno=%d)\n",
- ip6_sprintf(&mltaddr.sin6_addr),
+ ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
goto cleanup;
}
+ LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
/*
* join node information group address
@@ -1170,9 +1190,12 @@
nd6log((LOG_WARNING, "in6_update_ifa: "
"addmulti failed for %s on %s "
"(errno=%d)\n",
- ip6_sprintf(&mltaddr.sin6_addr),
+ ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
/* XXX not very fatal, go on... */
+ } else {
+ LIST_INSERT_HEAD(&ia->ia6_memberships,
+ imm, i6mm_chain);
}
}
#undef hostnamelen
@@ -1231,10 +1254,11 @@
nd6log((LOG_WARNING, "in6_update_ifa: "
"addmulti failed for %s on %s "
"(errno=%d)\n",
- ip6_sprintf(&mltaddr.sin6_addr),
+ ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
goto cleanup;
}
+ LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
#undef MLTMASK_LEN
}
@@ -1293,11 +1317,12 @@
}
void
-in6_purgeaddr(ifa)
- struct ifaddr *ifa;
+in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
+ char ip6buf[INET6_ADDRSTRLEN];
+ struct in6_multi_mship *imm;
/* stop DAD processing */
nd6_dad_stop(ifa);
@@ -1314,8 +1339,8 @@
log(LOG_ERR, "in6_purgeaddr: failed to remove "
"a route to the p2p destination: %s on %s, "
"errno=%d\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp),
- e);
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
+ if_name(ifp), e);
/* proceed anyway... */
} else
ia->ia_flags &= ~IFA_ROUTE;
@@ -1324,33 +1349,19 @@
/* Remove ownaddr's loopback rtentry, if it exists. */
in6_ifremloop(&(ia->ia_ifa));
- if (ifp->if_flags & IFF_MULTICAST) {
- /*
- * delete solicited multicast addr for deleting host id
- */
- struct in6_multi *in6m;
- struct in6_addr llsol;
- bzero(&llsol, sizeof(struct in6_addr));
- llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
- llsol.s6_addr32[1] = 0;
- llsol.s6_addr32[2] = htonl(1);
- llsol.s6_addr32[3] =
- ia->ia_addr.sin6_addr.s6_addr32[3];
- llsol.s6_addr8[12] = 0xff;
- (void)in6_setscope(&llsol, ifp, NULL); /* XXX proceed anyway */
-
- IN6_LOOKUP_MULTI(llsol, ifp, in6m);
- if (in6m)
- in6_delmulti(in6m);
+ /*
+ * leave from multicast groups we have joined for the interface
+ */
+ while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
}
in6_unlink_ifa(ia, ifp);
}
static void
-in6_unlink_ifa(ia, ifp)
- struct in6_ifaddr *ia;
- struct ifnet *ifp;
+in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
struct in6_ifaddr *oia;
int s = splnet();
@@ -1387,7 +1398,7 @@
/*
* Also, if the address being removed is autoconf'ed, call
* pfxlist_onlink_check() since the release might affect the status of
- * other (detached) addresses.
+ * other (detached) addresses.
*/
if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) {
pfxlist_onlink_check();
@@ -1403,8 +1414,7 @@
}
void
-in6_purgeif(ifp)
- struct ifnet *ifp;
+in6_purgeif(struct ifnet *ifp)
{
struct ifaddr *ifa, *nifa;
@@ -1442,12 +1452,8 @@
* address encoding scheme. (see figure on page 8)
*/
static int
-in6_lifaddr_ioctl(so, cmd, data, ifp, td)
- struct socket *so;
- u_long cmd;
- caddr_t data;
- struct ifnet *ifp;
- struct thread *td;
+in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
+ struct ifnet *ifp, struct thread *td)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
@@ -1511,7 +1517,7 @@
return EADDRNOTAVAIL;
hostid = IFA_IN6(ifa);
- /* prefixlen must be <= 64. */
+ /* prefixlen must be <= 64. */
if (64 < iflr->prefixlen)
return EINVAL;
prefixlen = iflr->prefixlen;
@@ -1680,11 +1686,8 @@
* and routing table entry.
*/
static int
-in6_ifinit(ifp, ia, sin6, newhost)
- struct ifnet *ifp;
- struct in6_ifaddr *ia;
- struct sockaddr_in6 *sin6;
- int newhost;
+in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
+ struct sockaddr_in6 *sin6, int newhost)
{
int error = 0, plen, ifacount = 0;
int s = splimp();
@@ -1696,8 +1699,6 @@
* and to validate the address if necessary.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
- if (ifa->ifa_addr == NULL)
- continue; /* just for safety */
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifacount++;
@@ -1720,8 +1721,12 @@
/* we could do in(6)_socktrim here, but just omit it at this moment. */
- if (newhost && nd6_need_cache(ifp) != 0) {
- /* set the rtrequest function to create llinfo */
+ if (newhost) {
+ /*
+ * set the rtrequest function to create llinfo. It also
+ * adjust outgoing interface of the route for the local
+ * address when called via in6_ifaddloop() below.
+ */
ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
}
@@ -1786,11 +1791,8 @@
}
struct in6_multi_mship *
-in6_joingroup(ifp, addr, errorp, delay)
- struct ifnet *ifp;
- struct in6_addr *addr;
- int *errorp;
- int delay;
+in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
+ int *errorp, int delay)
{
struct in6_multi_mship *imm;
@@ -1809,8 +1811,7 @@
}
int
-in6_leavegroup(imm)
- struct in6_multi_mship *imm;
+in6_leavegroup(struct in6_multi_mship *imm)
{
if (imm->i6mm_maddr)
@@ -1823,15 +1824,11 @@
* Find an IPv6 interface link-local address specific to an interface.
*/
struct in6_ifaddr *
-in6ifa_ifpforlinklocal(ifp, ignoreflags)
- struct ifnet *ifp;
- int ignoreflags;
+in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
{
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
- if (ifa->ifa_addr == NULL)
- continue; /* just for safety */
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
@@ -1850,15 +1847,11 @@
* find the internet address corresponding to a given interface and address.
*/
struct in6_ifaddr *
-in6ifa_ifpwithaddr(ifp, addr)
- struct ifnet *ifp;
- struct in6_addr *addr;
+in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
{
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
- if (ifa->ifa_addr == NULL)
- continue; /* just for safety */
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
@@ -1869,23 +1862,20 @@
}
/*
- * Convert IP6 address to printable (loggable) representation.
+ * Convert IP6 address to printable (loggable) representation. Caller
+ * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
*/
static char digits[] = "0123456789abcdef";
-static int ip6round = 0;
char *
-ip6_sprintf(addr)
- const struct in6_addr *addr;
+ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
{
- static char ip6buf[8][48];
int i;
char *cp;
const u_int16_t *a = (const u_int16_t *)addr;
const u_int8_t *d;
- int dcolon = 0;
+ int dcolon = 0, zero = 0;
- ip6round = (ip6round + 1) & 7;
- cp = ip6buf[ip6round];
+ cp = ip6buf;
for (i = 0; i < 8; i++) {
if (dcolon == 1) {
@@ -1911,20 +1901,33 @@
continue;
}
d = (const u_char *)a;
- *cp++ = digits[*d >> 4];
- *cp++ = digits[*d++ & 0xf];
- *cp++ = digits[*d >> 4];
+ /* Try to eliminate leading zeros in printout like in :0001. */
+ zero = 1;
+ *cp = digits[*d >> 4];
+ if (*cp != '0') {
+ zero = 0;
+ cp++;
+ }
+ *cp = digits[*d++ & 0xf];
+ if (zero == 0 || (*cp != '0')) {
+ zero = 0;
+ cp++;
+ }
+ *cp = digits[*d >> 4];
+ if (zero == 0 || (*cp != '0')) {
+ zero = 0;
+ cp++;
+ }
*cp++ = digits[*d & 0xf];
*cp++ = ':';
a++;
}
- *--cp = 0;
- return (ip6buf[ip6round]);
+ *--cp = '\0';
+ return (ip6buf);
}
int
-in6_localaddr(in6)
- struct in6_addr *in6;
+in6_localaddr(struct in6_addr *in6)
{
struct in6_ifaddr *ia;
@@ -1942,8 +1945,7 @@
}
int
-in6_is_addr_deprecated(sa6)
- struct sockaddr_in6 *sa6;
+in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
struct in6_ifaddr *ia;
@@ -1964,8 +1966,7 @@
* hard coding...
*/
int
-in6_matchlen(src, dst)
-struct in6_addr *src, *dst;
+in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
{
int match = 0;
u_char *s = (u_char *)src, *d = (u_char *)dst;
@@ -1985,9 +1986,7 @@
/* XXX: to be scope conscious */
int
-in6_are_prefix_equal(p1, p2, len)
- struct in6_addr *p1, *p2;
- int len;
+in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
{
int bytelen, bitlen;
@@ -2012,9 +2011,7 @@
}
void
-in6_prefixlen2mask(maskp, len)
- struct in6_addr *maskp;
- int len;
+in6_prefixlen2mask(struct in6_addr *maskp, int len)
{
u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
int bytelen, bitlen, i;
@@ -2040,9 +2037,7 @@
* found, return the first valid address from designated IF.
*/
struct in6_ifaddr *
-in6_ifawithifp(ifp, dst)
- struct ifnet *ifp;
- struct in6_addr *dst;
+in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
int dst_scope = in6_addrscope(dst), blen = -1, tlen;
struct ifaddr *ifa;
@@ -2122,8 +2117,7 @@
* perform DAD when interface becomes IFF_UP.
*/
void
-in6_if_up(ifp)
- struct ifnet *ifp;
+in6_if_up(struct ifnet *ifp)
{
struct ifaddr *ifa;
struct in6_ifaddr *ia;
@@ -2151,8 +2145,7 @@
}
int
-in6if_do_dad(ifp)
- struct ifnet *ifp;
+in6if_do_dad(struct ifnet *ifp)
{
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
@@ -2192,7 +2185,7 @@
* to in6_maxmtu.
*/
void
-in6_setmaxmtu()
+in6_setmaxmtu(void)
{
unsigned long maxmtu = 0;
struct ifnet *ifp;
@@ -2220,8 +2213,7 @@
* consistent, and those really are as of August 2004.
*/
int
-in6_if2idlen(ifp)
- struct ifnet *ifp;
+in6_if2idlen(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ETHER: /* RFC2464 */
@@ -2272,8 +2264,7 @@
}
void *
-in6_domifattach(ifp)
- struct ifnet *ifp;
+in6_domifattach(struct ifnet *ifp)
{
struct in6_ifextra *ext;
@@ -2295,9 +2286,7 @@
}
void
-in6_domifdetach(ifp, aux)
- struct ifnet *ifp;
- void *aux;
+in6_domifdetach(struct ifnet *ifp, void *aux)
{
struct in6_ifextra *ext = (struct in6_ifextra *)aux;
@@ -2315,6 +2304,7 @@
void
in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
{
+
bzero(sin, sizeof(*sin));
sin->sin_len = sizeof(struct sockaddr_in);
sin->sin_family = AF_INET;
Index: nd6_rtr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6_rtr.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/nd6_rtr.c -L sys/netinet6/nd6_rtr.c -u -r1.2 -r1.3
--- sys/netinet6/nd6_rtr.c
+++ sys/netinet6/nd6_rtr.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/nd6_rtr.c,v 1.26.2.5 2006/03/20 16:23:08 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/nd6_rtr.c,v 1.36 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */
/*-
@@ -60,8 +60,6 @@
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
-#include <net/net_osdep.h>
-
#define SDL(s) ((struct sockaddr_dl *)s)
static int rtpref __P((struct nd_defrouter *));
@@ -116,9 +114,7 @@
* Based on RFC 2461
*/
void
-nd6_rs_input(m, off, icmp6len)
- struct mbuf *m;
- int off, icmp6len;
+nd6_rs_input(struct mbuf *m, int off, int icmp6len)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -127,6 +123,7 @@
char *lladdr = NULL;
int lladdrlen = 0;
union nd_opts ndopts;
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/* If I'm not a router, ignore it. */
if (ip6_accept_rtadv != 0 || ip6_forwarding != 1)
@@ -136,8 +133,8 @@
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
- ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+ ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
@@ -177,7 +174,7 @@
nd6log((LOG_INFO,
"nd6_rs_input: lladdrlen mismatch for %s "
"(if %d, RS packet %d)\n",
- ip6_sprintf(&saddr6),
+ ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
@@ -201,9 +198,7 @@
* TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
*/
void
-nd6_ra_input(m, off, icmp6len)
- struct mbuf *m;
- int off, icmp6len;
+nd6_ra_input(struct mbuf *m, int off, int icmp6len)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct nd_ifinfo *ndi = ND_IFINFO(ifp);
@@ -213,6 +208,7 @@
int mcast = 0;
union nd_opts ndopts;
struct nd_defrouter *dr;
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/*
* We only accept RAs only when
@@ -227,15 +223,15 @@
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
- ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+ ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
nd6log((LOG_ERR,
"nd6_ra_input: src %s is not link-local\n",
- ip6_sprintf(&saddr6)));
+ ip6_sprintf(ip6bufs, &saddr6)));
goto bad;
}
@@ -327,7 +323,8 @@
nd6log((LOG_INFO,
"nd6_ra_input: invalid prefix "
"%s, ignored\n",
- ip6_sprintf(&pi->nd_opt_pi_prefix)));
+ ip6_sprintf(ip6bufs,
+ &pi->nd_opt_pi_prefix)));
continue;
}
@@ -361,7 +358,7 @@
if (mtu < IPV6_MMTU) {
nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
"mtu=%lu sent from %s, ignoring\n",
- mtu, ip6_sprintf(&ip6->ip6_src)));
+ mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
goto skip;
}
@@ -378,7 +375,7 @@
nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
"mtu=%lu sent from %s; "
"exceeds maxmtu %lu, ignoring\n",
- mtu, ip6_sprintf(&ip6->ip6_src), maxmtu));
+ mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
}
}
@@ -399,7 +396,7 @@
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
"nd6_ra_input: lladdrlen mismatch for %s "
- "(if %d, RA packet %d)\n", ip6_sprintf(&saddr6),
+ "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
@@ -430,9 +427,7 @@
/* tell the change to user processes watching the routing socket. */
static void
-nd6_rtmsg(cmd, rt)
- int cmd;
- struct rtentry *rt;
+nd6_rtmsg(int cmd, struct rtentry *rt)
{
struct rt_addrinfo info;
@@ -450,8 +445,7 @@
}
void
-defrouter_addreq(new)
- struct nd_defrouter *new;
+defrouter_addreq(struct nd_defrouter *new)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *newrt = NULL;
@@ -484,9 +478,7 @@
}
struct nd_defrouter *
-defrouter_lookup(addr, ifp)
- struct in6_addr *addr;
- struct ifnet *ifp;
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
@@ -505,8 +497,7 @@
* not be called from anywhere else.
*/
static void
-defrouter_delreq(dr)
- struct nd_defrouter *dr;
+defrouter_delreq(struct nd_defrouter *dr)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *oldrt = NULL;
@@ -535,7 +526,7 @@
* remove all default routes from default router list
*/
void
-defrouter_reset()
+defrouter_reset(void)
{
struct nd_defrouter *dr;
@@ -550,8 +541,7 @@
}
void
-defrtrlist_del(dr)
- struct nd_defrouter *dr;
+defrtrlist_del(struct nd_defrouter *dr)
{
struct nd_defrouter *deldr = NULL;
struct nd_prefix *pr;
@@ -612,7 +602,7 @@
* complicated and the possibility of introducing bugs.
*/
void
-defrouter_select()
+defrouter_select(void)
{
int s = splnet();
struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
@@ -728,8 +718,7 @@
}
static struct nd_defrouter *
-defrtrlist_update(new)
- struct nd_defrouter *new;
+defrtrlist_update(struct nd_defrouter *new)
{
struct nd_defrouter *dr, *n;
int s = splnet();
@@ -814,9 +803,7 @@
}
static struct nd_pfxrouter *
-pfxrtr_lookup(pr, dr)
- struct nd_prefix *pr;
- struct nd_defrouter *dr;
+pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *search;
@@ -829,9 +816,7 @@
}
static void
-pfxrtr_add(pr, dr)
- struct nd_prefix *pr;
- struct nd_defrouter *dr;
+pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *new;
@@ -847,16 +832,14 @@
}
static void
-pfxrtr_del(pfr)
- struct nd_pfxrouter *pfr;
+pfxrtr_del(struct nd_pfxrouter *pfr)
{
LIST_REMOVE(pfr, pfr_entry);
free(pfr, M_IP6NDP);
}
struct nd_prefix *
-nd6_prefix_lookup(key)
- struct nd_prefixctl *key;
+nd6_prefix_lookup(struct nd_prefixctl *key)
{
struct nd_prefix *search;
@@ -873,14 +856,13 @@
}
int
-nd6_prelist_add(pr, dr, newp)
- struct nd_prefixctl *pr;
- struct nd_prefix **newp;
- struct nd_defrouter *dr;
+nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
+ struct nd_prefix **newp)
{
struct nd_prefix *new = NULL;
int error = 0;
int i, s;
+ char ip6buf[INET6_ADDRSTRLEN];
new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
if (new == NULL)
@@ -920,7 +902,7 @@
if ((e = nd6_prefix_onlink(new)) != 0) {
nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
"the prefix %s/%d on-link on %s (errno=%d)\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* proceed anyway. XXX: is it correct? */
}
@@ -933,11 +915,11 @@
}
void
-prelist_remove(pr)
- struct nd_prefix *pr;
+prelist_remove(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfr, *next;
int e, s;
+ char ip6buf[INET6_ADDRSTRLEN];
/* make sure to invalidate the prefix until it is really freed. */
pr->ndpr_vltime = 0;
@@ -953,7 +935,7 @@
(e = nd6_prefix_offlink(pr)) != 0) {
nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
"on %s, errno=%d\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* what should we do? */
}
@@ -979,12 +961,13 @@
pfxlist_onlink_check();
}
+/*
+ * dr - may be NULL
+ */
+
static int
-prelist_update(new, dr, m, mcast)
- struct nd_prefixctl *new;
- struct nd_defrouter *dr; /* may be NULL */
- struct mbuf *m;
- int mcast;
+prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
+ struct mbuf *m, int mcast)
{
struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
struct ifaddr *ifa;
@@ -995,6 +978,7 @@
int newprefix = 0;
int auth;
struct in6_addrlifetime lt6_tmp;
+ char ip6buf[INET6_ADDRSTRLEN];
auth = 0;
if (m) {
@@ -1039,7 +1023,8 @@
"prelist_update: failed to make "
"the prefix %s/%d on-link on %s "
"(errno=%d)\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf,
+ &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* proceed anyway. XXX: is it correct? */
}
@@ -1062,7 +1047,7 @@
nd6log((LOG_NOTICE, "prelist_update: "
"nd6_prelist_add failed for %s/%d on %s "
"errno=%d, returnpr=%p\n",
- ip6_sprintf(&new->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
new->ndpr_plen, if_name(new->ndpr_ifp),
error, newpr));
goto end; /* we should just give up in this case. */
@@ -1104,7 +1089,7 @@
goto end;
}
- /*
+ /*
* 5.5.3 (d). If the prefix advertised is not equal to the prefix of
* an address configured by stateless autoconfiguration already in the
* list of addresses associated with the interface, and the Valid
@@ -1197,14 +1182,14 @@
in6_init_address_ltimes(pr, <6_tmp);
- /*
+ /*
* We need to treat lifetimes for temporary addresses
* differently, according to
* draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
* we only update the lifetimes when they are in the maximum
* intervals.
- */
- if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
+ */
+ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
u_int32_t maxvltime, maxpltime;
if (ip6_temp_valid_lifetime >
@@ -1320,8 +1305,7 @@
* XXX: lengthy function name...
*/
static struct nd_pfxrouter *
-find_pfxlist_reachable_router(pr)
- struct nd_prefix *pr;
+find_pfxlist_reachable_router(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfxrtr;
struct rtentry *rt;
@@ -1388,15 +1372,15 @@
}
}
if (pr != NULL || (TAILQ_FIRST(&nd_defrouter) && pfxrtr == NULL)) {
- /*
+ /*
* There is at least one prefix that has a reachable router,
* or at least a router which probably does not advertise
* any prefixes. The latter would be the case when we move
* to a new link where we have a router that does not provide
* prefixes and we configure an address by hand.
- * Detach prefixes which have no reachable advertising
- * router, and attach other prefixes.
- */
+ * Detach prefixes which have no reachable advertising
+ * router, and attach other prefixes.
+ */
for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
/* XXX: a link-local prefix should never be detached */
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
@@ -1440,6 +1424,7 @@
*/
for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
int e;
+ char ip6buf[INET6_ADDRSTRLEN];
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
continue;
@@ -1453,8 +1438,9 @@
nd6log((LOG_ERR,
"pfxlist_onlink_check: failed to "
"make %s/%d offlink, errno=%d\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
- pr->ndpr_plen, e));
+ ip6_sprintf(ip6buf,
+ &pr->ndpr_prefix.sin6_addr),
+ pr->ndpr_plen, e));
}
}
if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
@@ -1464,8 +1450,9 @@
nd6log((LOG_ERR,
"pfxlist_onlink_check: failed to "
"make %s/%d onlink, errno=%d\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
- pr->ndpr_plen, e));
+ ip6_sprintf(ip6buf,
+ &pr->ndpr_prefix.sin6_addr),
+ pr->ndpr_plen, e));
}
}
}
@@ -1529,8 +1516,7 @@
}
int
-nd6_prefix_onlink(pr)
- struct nd_prefix *pr;
+nd6_prefix_onlink(struct nd_prefix *pr)
{
struct ifaddr *ifa;
struct ifnet *ifp = pr->ndpr_ifp;
@@ -1539,12 +1525,14 @@
u_long rtflags;
int error = 0;
struct rtentry *rt = NULL;
+ char ip6buf[INET6_ADDRSTRLEN];
/* sanity check */
if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
nd6log((LOG_ERR,
"nd6_prefix_onlink: %s/%d is already on-link\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+ pr->ndpr_plen));
return (EEXIST);
}
@@ -1592,7 +1580,7 @@
nd6log((LOG_NOTICE,
"nd6_prefix_onlink: failed to find any ifaddr"
" to add route for a prefix(%s/%d) on %s\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp)));
return (0);
}
@@ -1621,13 +1609,14 @@
nd6_rtmsg(RTM_ADD, rt);
pr->ndpr_stateflags |= NDPRF_ONLINK;
} else {
+ char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
" prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
"errno = %d\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp),
- ip6_sprintf(&((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
- ip6_sprintf(&mask6.sin6_addr), rtflags, error));
+ ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
+ ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
}
if (rt != NULL) {
@@ -1640,20 +1629,21 @@
}
int
-nd6_prefix_offlink(pr)
- struct nd_prefix *pr;
+nd6_prefix_offlink(struct nd_prefix *pr)
{
int error = 0;
struct ifnet *ifp = pr->ndpr_ifp;
struct nd_prefix *opr;
struct sockaddr_in6 sa6, mask6;
struct rtentry *rt = NULL;
+ char ip6buf[INET6_ADDRSTRLEN];
/* sanity check */
if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
nd6log((LOG_ERR,
"nd6_prefix_offlink: %s/%d is already off-link\n",
- ip6_sprintf(&pr->ndpr_prefix.sin6_addr), pr->ndpr_plen));
+ ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
+ pr->ndpr_plen));
return (EEXIST);
}
@@ -1706,7 +1696,8 @@
"nd6_prefix_offlink: failed to "
"recover a prefix %s/%d from %s "
"to %s (errno = %d)\n",
- ip6_sprintf(&opr->ndpr_prefix.sin6_addr),
+ ip6_sprintf(ip6buf,
+ &opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
}
@@ -1717,8 +1708,8 @@
nd6log((LOG_ERR,
"nd6_prefix_offlink: failed to delete route: "
"%s/%d on %s (errno = %d)\n",
- ip6_sprintf(&sa6.sin6_addr), pr->ndpr_plen, if_name(ifp),
- error));
+ ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
+ if_name(ifp), error));
}
if (rt != NULL) {
@@ -1729,9 +1720,7 @@
}
static struct in6_ifaddr *
-in6_ifadd(pr, mcast)
- struct nd_prefixctl *pr;
- int mcast;
+in6_ifadd(struct nd_prefixctl *pr, int mcast)
{
struct ifnet *ifp = pr->ndpr_ifp;
struct ifaddr *ifa;
@@ -1741,6 +1730,7 @@
struct in6_addr mask;
int prefixlen = pr->ndpr_plen;
int updateflags;
+ char ip6buf[INET6_ADDRSTRLEN];
in6_prefixlen2mask(&mask, prefixlen);
@@ -1820,7 +1810,7 @@
ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
- /*
+ /*
* Make sure that we do not have this address already. This should
* usually not happen, but we can still see this case, e.g., if we
* have manually configured the exact address to be configured.
@@ -1828,7 +1818,7 @@
if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
/* this should be rare enough to make an explicit log */
log(LOG_INFO, "in6_ifadd: %s is already configured\n",
- ip6_sprintf(&ifra.ifra_addr.sin6_addr));
+ ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
return (NULL);
}
@@ -1844,8 +1834,8 @@
if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
nd6log((LOG_ERR,
"in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
- ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp),
- error));
+ ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
+ if_name(ifp), error));
return (NULL); /* ifaddr must not have been allocated. */
}
@@ -1854,10 +1844,11 @@
return (ia); /* this is always non-NULL */
}
+/*
+ * ia0 - corresponding public address
+ */
int
-in6_tmpifadd(ia0, forcegen, delay)
- const struct in6_ifaddr *ia0; /* corresponding public address */
- int forcegen, delay;
+in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
{
struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
struct in6_ifaddr *newia, *ia;
@@ -1891,7 +1882,7 @@
ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
(randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
- /*
+ /*
* in6_get_tmpifid() quite likely provided a unique interface ID.
* However, we may still have a chance to see collision, because
* there may be a time lag between generation of the ID and generation
@@ -1900,7 +1891,7 @@
for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
&ifra.ifra_addr.sin6_addr)) {
- if (trylimit-- == 0) {
+ if (trylimit-- == 0) {
/*
* Give up. Something strange should have
* happened.
@@ -2024,9 +2015,7 @@
* it shouldn't be called when acting as a router.
*/
void
-rt6_flush(gateway, ifp)
- struct in6_addr *gateway;
- struct ifnet *ifp;
+rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
struct radix_node_head *rnh = rt_tables[AF_INET6];
int s = splnet();
@@ -2044,9 +2033,7 @@
}
static int
-rt6_deleteroute(rn, arg)
- struct radix_node *rn;
- void *arg;
+rt6_deleteroute(struct radix_node *rn, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
struct rtentry *rt = (struct rtentry *)rn;
@@ -2080,8 +2067,7 @@
}
int
-nd6_setdefaultiface(ifindex)
- int ifindex;
+nd6_setdefaultiface(int ifindex)
{
int error = 0;
Index: ip6_forward.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_forward.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_forward.c -L sys/netinet6/ip6_forward.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_forward.c
+++ sys/netinet6/ip6_forward.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_forward.c,v 1.28.2.2 2005/11/04 20:26:15 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_forward.c,v 1.40 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_forward.c,v 1.69 2001/05/17 03:48:30 itojun Exp $ */
/*-
@@ -30,7 +30,6 @@
* SUCH DAMAGE.
*/
-#include "opt_ip6fw.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
@@ -67,23 +66,10 @@
#include <netinet/in_pcb.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
-#define IPSEC
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
-
-#include <net/net_osdep.h>
+#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
@@ -101,11 +87,8 @@
* protocol deal with that.
*
*/
-
void
-ip6_forward(m, srcrt)
- struct mbuf *m;
- int srcrt;
+ip6_forward(struct mbuf *m, int srcrt)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct sockaddr_in6 *dst = NULL;
@@ -119,6 +102,9 @@
struct secpolicy *sp = NULL;
int ipsecrt = 0;
#endif
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+
+ GIANT_REQUIRED; /* XXX bz: ip6_forward_rt */
#ifdef IPSEC
/*
@@ -129,9 +115,7 @@
* before forwarding packet actually.
*/
if (ipsec6_in_reject(m, NULL)) {
-#if !defined(FAST_IPSEC)
ipsec6stat.in_polvio++;
-#endif
m_freem(m);
return;
}
@@ -153,8 +137,8 @@
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ip6->ip6_nxt,
if_name(m->m_pkthdr.rcvif));
}
@@ -190,7 +174,7 @@
#ifdef IPSEC
/* get a security policy for this packet */
- sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
+ sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
IP_FORWARDING, &error);
if (sp == NULL) {
ipsec6stat.out_inval++;
@@ -216,7 +200,7 @@
*/
ipsec6stat.out_polvio++;
ip6stat.ip6s_cantforward++;
- key_freesp(sp);
+ KEY_FREESP(&sp);
if (mcopy) {
#if 0
/* XXX: what icmp ? */
@@ -230,7 +214,7 @@
case IPSEC_POLICY_BYPASS:
case IPSEC_POLICY_NONE:
/* no need to do IPsec. */
- key_freesp(sp);
+ KEY_FREESP(&sp);
goto skip_ipsec;
case IPSEC_POLICY_IPSEC:
@@ -238,7 +222,7 @@
/* XXX should be panic ? */
printf("ip6_forward: No IPsec request specified.\n");
ip6stat.ip6s_cantforward++;
- key_freesp(sp);
+ KEY_FREESP(&sp);
if (mcopy) {
#if 0
/* XXX: what icmp ? */
@@ -256,7 +240,7 @@
default:
/* should be panic ?? */
printf("ip6_forward: Invalid policy found. %d\n", sp->policy);
- key_freesp(sp);
+ KEY_FREESP(&sp);
goto skip_ipsec;
}
@@ -303,7 +287,7 @@
error = ipsec6_output_tunnel(&state, sp, 0);
m = state.m;
- key_freesp(sp);
+ KEY_FREESP(&sp);
if (error) {
/* mbuf is already reclaimed in ipsec6_output_tunnel. */
@@ -331,9 +315,18 @@
}
m_freem(m);
return;
+ } else {
+ /*
+ * In the FAST IPSec case we have already
+ * re-injected the packet and it has been freed
+ * by the ipsec_done() function. So, just clean
+ * up after ourselves.
+ */
+ m = NULL;
+ goto freecopy;
}
- if (ip6 != mtod(m, struct ip6_hdr *)) {
+ if ((m != NULL) && (ip6 != mtod(m, struct ip6_hdr *)) ){
/*
* now tunnel mode headers are added. we are originating
* packet instead of forwarding the packet.
@@ -392,7 +385,7 @@
dst->sin6_family = AF_INET6;
dst->sin6_addr = ip6->ip6_dst;
- rtalloc((struct route *)&ip6_forward_rt);
+ rtalloc((struct route *)&ip6_forward_rt);
if (ip6_forward_rt.ro_rt == 0) {
ip6stat.ip6s_noroute++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_noroute);
@@ -446,8 +439,8 @@
log(LOG_DEBUG,
"cannot forward "
"src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ip6->ip6_nxt,
if_name(m->m_pkthdr.rcvif), if_name(rt->rt_ifp));
}
@@ -483,7 +476,7 @@
struct secpolicy *sp;
int ipsecerror;
size_t ipsechdrsiz;
-#endif
+#endif /* IPSEC */
mtu = IN6_LINKMTU(rt->rt_ifp);
#ifdef IPSEC
@@ -494,7 +487,7 @@
* case, as we have the outgoing interface for
* encapsulated packet as "rt->rt_ifp".
*/
- sp = ipsec6_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
+ sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND,
IP_FORWARDING, &ipsecerror);
if (sp) {
ipsechdrsiz = ipsec6_hdrsiz(mcopy,
@@ -509,7 +502,7 @@
*/
if (mtu < IPV6_MMTU)
mtu = IPV6_MMTU;
-#endif
+#endif /* IPSEC */
icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu);
}
m_freem(m);
@@ -528,10 +521,10 @@
* Also, don't send redirect if forwarding using a route
* modified by a redirect.
*/
- if (rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
+ if (ip6_sendredirects && rt->rt_ifp == m->m_pkthdr.rcvif && !srcrt &&
#ifdef IPSEC
!ipsecrt &&
-#endif
+#endif /* IPSEC */
(rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0) {
if ((rt->rt_ifp->if_flags & IFF_POINTOPOINT) != 0) {
/*
@@ -553,20 +546,6 @@
}
/*
- * Check with the firewall...
- */
- if (ip6_fw_enable && ip6_fw_chk_ptr) {
- u_short port = 0;
- /* If ipfw says divert, we have to just drop packet */
- if ((*ip6_fw_chk_ptr)(&ip6, rt->rt_ifp, &port, &m)) {
- m_freem(m);
- goto freecopy;
- }
- if (!m)
- goto freecopy;
- }
-
- /*
* Fake scoped addresses. Note that even link-local source or
* destinaion can appear, if the originating node just sends the
* packet to us (without address resolution for the destination).
@@ -592,8 +571,8 @@
{
printf("ip6_forward: outgoing interface is loopback. "
"src %s, dst %s, nxt %d, rcvif %s, outif %s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ip6->ip6_nxt, if_name(m->m_pkthdr.rcvif),
if_name(rt->rt_ifp));
}
@@ -611,7 +590,7 @@
in6_clearscope(&ip6->ip6_dst);
/* Jump over all PFIL processing if hooks are not active. */
- if (inet6_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet6_pfil_hook))
goto pass;
/* Run through list of hooks for output packets. */
Index: in6_cksum.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_cksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_cksum.c -L sys/netinet6/in6_cksum.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_cksum.c
+++ sys/netinet6/in6_cksum.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_cksum.c,v 1.10.2.1 2005/11/04 20:26:15 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_cksum.c,v 1.16 2007/07/05 16:23:47 delphij Exp $ */
/* $KAME: in6_cksum.c,v 1.10 2000/12/03 00:53:59 itojun Exp $ */
/*-
@@ -68,8 +68,6 @@
#include <netinet/ip6.h>
#include <netinet6/scope6_var.h>
-#include <net/net_osdep.h>
-
/*
* Checksum routine for Internet Protocol family headers (Portable Version).
*
@@ -86,12 +84,8 @@
* len is a total length of a transport segment.
* (e.g. TCP header + TCP payload)
*/
-
int
-in6_cksum(m, nxt, off, len)
- struct mbuf *m;
- u_int8_t nxt;
- u_int32_t off, len;
+in6_cksum(struct mbuf *m, u_int8_t nxt, u_int32_t off, u_int32_t len)
{
u_int16_t *w;
int sum = 0;
@@ -105,7 +99,7 @@
u_int32_t ph_len;
u_int8_t ph_zero[3];
u_int8_t ph_nxt;
- } ph __packed;
+ } __packed ph;
} uph;
union {
u_int8_t c[2];
@@ -156,7 +150,7 @@
/*
* Secondly calculate a summary of the first mbuf excluding offset.
*/
- while (m != NULL && off > 0) {
+ while (off > 0) {
if (m->m_len <= off)
off -= m->m_len;
else
Index: ip6_input.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_input.c -L sys/netinet6/ip6_input.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_input.c
+++ sys/netinet6/ip6_input.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_input.c,v 1.81.2.4 2006/01/31 16:36:11 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_input.c,v 1.95 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ */
/*-
@@ -61,7 +61,6 @@
* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
*/
-#include "opt_ip6fw.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
@@ -103,24 +102,13 @@
#include <netinet6/nd6.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#endif
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
+#include <netinet6/ip6_ipsec.h>
#include <netipsec/ipsec6.h>
-#define IPSEC
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
+#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
-#include <net/net_osdep.h>
-
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
@@ -138,11 +126,6 @@
struct pfil_head inet6_pfil_hook;
-/* firewall hooks */
-ip6_fw_chk_t *ip6_fw_chk_ptr;
-ip6_fw_ctl_t *ip6_fw_ctl_ptr;
-int ip6_fw_enable = 1;
-
struct ip6stat ip6stat;
static void ip6_init2 __P((void *));
@@ -157,7 +140,7 @@
* All protocols not implemented in kernel go to raw IP6 protocol handler.
*/
void
-ip6_init()
+ip6_init(void)
{
struct ip6protosw *pr;
int i;
@@ -204,8 +187,7 @@
}
static void
-ip6_init2(dummy)
- void *dummy;
+ip6_init2(void *dummy)
{
/* nd6_timer_init */
@@ -227,8 +209,7 @@
extern struct route_in6 ip6_forward_rt;
void
-ip6_input(m)
- struct mbuf *m;
+ip6_input(struct mbuf *m)
{
struct ip6_hdr *ip6;
int off = sizeof(struct ip6_hdr), nest;
@@ -240,16 +221,18 @@
int srcrt = 0;
GIANT_REQUIRED; /* XXX for now */
+
#ifdef IPSEC
/*
* should the inner packet be considered authentic?
* see comment in ah4_input().
+ * NB: m cannot be NULL when passed to the input routine
*/
- if (m) {
- m->m_flags &= ~M_AUTHIPHDR;
- m->m_flags &= ~M_AUTHIPDGM;
- }
-#endif
+
+ m->m_flags &= ~M_AUTHIPHDR;
+ m->m_flags &= ~M_AUTHIPDGM;
+
+#endif /* IPSEC */
/*
* make sure we don't have onion peering information into m_tag.
@@ -414,7 +397,7 @@
odst = ip6->ip6_dst;
/* Jump over all PFIL processing if hooks are not active. */
- if (inet6_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet6_pfil_hook))
goto passin;
if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
@@ -426,21 +409,6 @@
passin:
/*
- * Check with the firewall...
- */
- if (ip6_fw_enable && ip6_fw_chk_ptr) {
- u_short port = 0;
- /* If ipfw says divert, we have to just drop packet */
- /* use port as a dummy argument */
- if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
- m_freem(m);
- m = NULL;
- }
- if (!m)
- return;
- }
-
- /*
* Disambiguate address scope zones (if there is ambiguity).
* We first make sure that the original source or destination address
* is not in our internal form for scoped addresses. Such addresses
@@ -464,7 +432,7 @@
* Multicast check
*/
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
- struct in6_multi *in6m = 0;
+ struct in6_multi *in6m = 0;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
/*
@@ -573,11 +541,13 @@
ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
goto hbhcheck;
} else {
+ char ip6bufs[INET6_ADDRSTRLEN];
+ char ip6bufd[INET6_ADDRSTRLEN];
/* address is not ready, so discard the packet. */
nd6log((LOG_INFO,
"ip6_input: packet to an unready address %s->%s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst)));
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
goto bad;
}
@@ -680,11 +650,25 @@
nxt = hbh->ip6h_nxt;
/*
- * accept the packet if a router alert option is included
- * and we act as an IPv6 router.
+ * If we are acting as a router and the packet contains a
+ * router alert option, see if we know the option value.
+ * Currently, we only support the option value for MLD, in which
+ * case we should pass the packet to the multicast routing
+ * daemon.
*/
- if (rtalert != ~0 && ip6_forwarding)
- ours = 1;
+ if (rtalert != ~0 && ip6_forwarding) {
+ switch (rtalert) {
+ case IP6OPT_RTALERT_MLD:
+ ours = 1;
+ break;
+ default:
+ /*
+ * RFC2711 requires unrecognized values must be
+ * silently ignored.
+ */
+ break;
+ }
+ }
} else
nxt = ip6->ip6_nxt;
@@ -719,7 +703,8 @@
* ip6_mforward() returns a non-zero value, the packet
* must be discarded, else it may be accepted below.
*/
- if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
+ if (ip6_mrouter && ip6_mforward &&
+ ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
ip6stat.ip6s_cantforward++;
m_freem(m);
return;
@@ -780,12 +765,9 @@
* note that we do not visit this with protocols with pcb layer
* code - like udp/tcp/raw ip.
*/
- if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
- ipsec6_in_reject(m, NULL)) {
- ipsec6stat.in_polvio++;
+ if (ip6_ipsec_input(m, nxt))
goto bad;
- }
-#endif
+#endif /* IPSEC */
nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
}
return;
@@ -798,9 +780,7 @@
* XXX backward compatibility wrapper
*/
static struct ip6aux *
-ip6_setdstifaddr(m, ia6)
- struct mbuf *m;
- struct in6_ifaddr *ia6;
+ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
{
struct ip6aux *ip6a;
@@ -811,8 +791,7 @@
}
struct in6_ifaddr *
-ip6_getdstifaddr(m)
- struct mbuf *m;
+ip6_getdstifaddr(struct mbuf *m)
{
struct ip6aux *ip6a;
@@ -826,13 +805,12 @@
/*
* Hop-by-Hop options header processing. If a valid jumbo payload option is
* included, the real payload length will be stored in plenp.
+ *
+ * rtalertp - XXX: should be stored more smart way
*/
static int
-ip6_hopopts_input(plenp, rtalertp, mp, offp)
- u_int32_t *plenp;
- u_int32_t *rtalertp; /* XXX: should be stored more smart way */
- struct mbuf **mp;
- int *offp;
+ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
+ struct mbuf **mp, int *offp)
{
struct mbuf *m = *mp;
int off = *offp, hbhlen;
@@ -886,12 +864,8 @@
* opthead + hbhlen is located in continuous memory region.
*/
int
-ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
- struct mbuf *m;
- u_int8_t *opthead;
- int hbhlen;
- u_int32_t *rtalertp;
- u_int32_t *plenp;
+ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
+ u_int32_t *rtalertp, u_int32_t *plenp)
{
struct ip6_hdr *ip6;
int optlen = 0;
@@ -1023,10 +997,7 @@
* is not continuous in order to return an ICMPv6 error.
*/
int
-ip6_unknown_opt(optp, m, off)
- u_int8_t *optp;
- struct mbuf *m;
- int off;
+ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
{
struct ip6_hdr *ip6;
@@ -1067,9 +1038,7 @@
* very first mbuf on the mbuf chain.
*/
void
-ip6_savecontrol(in6p, m, mp)
- struct inpcb *in6p;
- struct mbuf *m, **mp;
+ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
{
#define IS2292(x, y) ((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -1273,7 +1242,7 @@
default:
/*
- * other cases have been filtered in the above.
+ * other cases have been filtered in the above.
* none will visit this case. here we supply
* the code just in case (nxt overwritten or
* other cases).
@@ -1302,10 +1271,7 @@
}
void
-ip6_notify_pmtu(in6p, dst, mtu)
- struct inpcb *in6p;
- struct sockaddr_in6 *dst;
- u_int32_t *mtu;
+ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
{
struct socket *so;
struct mbuf *m_mtu;
@@ -1347,10 +1313,7 @@
* contains the result, or NULL on error.
*/
static struct mbuf *
-ip6_pullexthdr(m, off, nxt)
- struct mbuf *m;
- size_t off;
- int nxt;
+ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
{
struct ip6_ext ip6e;
size_t elen;
@@ -1410,9 +1373,7 @@
* we develop `neater' mechanism to process extension headers.
*/
char *
-ip6_get_prevhdr(m, off)
- struct mbuf *m;
- int off;
+ip6_get_prevhdr(struct mbuf *m, int off)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -1451,11 +1412,7 @@
* get next header offset. m will be retained.
*/
int
-ip6_nexthdr(m, off, proto, nxtp)
- struct mbuf *m;
- int off;
- int proto;
- int *nxtp;
+ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
struct ip6_hdr ip6;
struct ip6_ext ip6e;
@@ -1530,11 +1487,7 @@
* get offset for the last header in the chain. m will be kept untainted.
*/
int
-ip6_lasthdr(m, off, proto, nxtp)
- struct mbuf *m;
- int off;
- int proto;
- int *nxtp;
+ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
int newoff;
int nxt;
@@ -1558,8 +1511,7 @@
}
struct ip6aux *
-ip6_addaux(m)
- struct mbuf *m;
+ip6_addaux(struct mbuf *m)
{
struct m_tag *mtag;
@@ -1576,8 +1528,7 @@
}
struct ip6aux *
-ip6_findaux(m)
- struct mbuf *m;
+ip6_findaux(struct mbuf *m)
{
struct m_tag *mtag;
@@ -1586,8 +1537,7 @@
}
void
-ip6_delaux(m)
- struct mbuf *m;
+ip6_delaux(struct mbuf *m)
{
struct m_tag *mtag;
Index: ip6_id.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_id.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/ip6_id.c -L sys/netinet6/ip6_id.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/ip6_id.c
+++ sys/netinet6/ip6_id.c
@@ -1,6 +1,6 @@
/* $KAME: ip6_id.c,v 1.13 2003/09/16 09:11:19 itojun Exp $ */
/* $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $ */
-/* $FreeBSD: src/sys/netinet6/ip6_id.c,v 1.7 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_id.c,v 1.8 2007/07/05 16:23:47 delphij Exp $ */
/*-
* Copyright (C) 2003 WIDE Project.
@@ -152,7 +152,6 @@
* Do a fast modular exponation, returned value will be in the range
* of 0 - (mod-1)
*/
-
static u_int32_t
pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod)
{
Index: in6_gif.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_gif.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_gif.c -L sys/netinet6/in6_gif.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_gif.c
+++ sys/netinet6/in6_gif.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_gif.c,v 1.22.2.2 2006/01/31 15:56:47 glebius Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_gif.c,v 1.29 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ */
/*-
@@ -68,8 +68,6 @@
#include <net/if_gif.h>
-#include <net/net_osdep.h>
-
static int gif_validate6(const struct ip6_hdr *, struct gif_softc *,
struct ifnet *);
@@ -83,10 +81,9 @@
};
int
-in6_gif_output(ifp, family, m)
- struct ifnet *ifp;
- int family; /* family of the packet to be encapsulate. */
- struct mbuf *m;
+in6_gif_output(struct ifnet *ifp,
+ int family, /* family of the packet to be encapsulate */
+ struct mbuf *m)
{
struct gif_softc *sc = ifp->if_softc;
struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst;
@@ -139,16 +136,16 @@
}
#endif
case AF_LINK:
- proto = IPPROTO_ETHERIP;
- eiphdr.eip_ver = ETHERIP_VERSION & ETHERIP_VER_VERS_MASK;
- eiphdr.eip_pad = 0;
- /* prepend Ethernet-in-IP header */
- M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
- if (m && m->m_len < sizeof(struct etherip_header))
- m = m_pullup(m, sizeof(struct etherip_header));
- if (m == NULL)
- return ENOBUFS;
- bcopy(&eiphdr, mtod(m, struct etherip_header *),
+ proto = IPPROTO_ETHERIP;
+ eiphdr.eip_ver = ETHERIP_VERSION & ETHERIP_VER_VERS_MASK;
+ eiphdr.eip_pad = 0;
+ /* prepend Ethernet-in-IP header */
+ M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT);
+ if (m && m->m_len < sizeof(struct etherip_header))
+ m = m_pullup(m, sizeof(struct etherip_header));
+ if (m == NULL)
+ return ENOBUFS;
+ bcopy(&eiphdr, mtod(m, struct etherip_header *),
sizeof(struct etherip_header));
break;
@@ -245,9 +242,7 @@
}
int
-in6_gif_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+in6_gif_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct ifnet *gifp = NULL;
@@ -318,9 +313,9 @@
break;
}
#endif
- case IPPROTO_ETHERIP:
- af = AF_LINK;
- break;
+ case IPPROTO_ETHERIP:
+ af = AF_LINK;
+ break;
default:
ip6stat.ip6s_nogif++;
@@ -336,10 +331,8 @@
* validate outer address.
*/
static int
-gif_validate6(ip6, sc, ifp)
- const struct ip6_hdr *ip6;
- struct gif_softc *sc;
- struct ifnet *ifp;
+gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc,
+ struct ifnet *ifp)
{
struct sockaddr_in6 *src, *dst;
@@ -371,9 +364,10 @@
rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
if (!rt || rt->rt_ifp != ifp) {
#if 0
+ char ip6buf[INET6_ADDRSTRLEN];
log(LOG_WARNING, "%s: packet from %s dropped "
"due to ingress filter\n", if_name(GIF2IFP(sc)),
- ip6_sprintf(&sin6.sin6_addr));
+ ip6_sprintf(ip6buf, &sin6.sin6_addr));
#endif
if (rt)
rtfree(rt);
@@ -391,11 +385,7 @@
* sanity check for arg should have been done in the caller.
*/
int
-gif_encapcheck6(m, off, proto, arg)
- const struct mbuf *m;
- int off;
- int proto;
- void *arg;
+gif_encapcheck6(const struct mbuf *m, int off, int proto, void *arg)
{
struct ip6_hdr ip6;
struct gif_softc *sc;
@@ -412,19 +402,17 @@
}
int
-in6_gif_attach(sc)
- struct gif_softc *sc;
+in6_gif_attach(struct gif_softc *sc)
{
sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, gif_encapcheck,
- (struct protosw *)&in6_gif_protosw, sc);
+ (void *)&in6_gif_protosw, sc);
if (sc->encap_cookie6 == NULL)
return EEXIST;
return 0;
}
int
-in6_gif_detach(sc)
- struct gif_softc *sc;
+in6_gif_detach(struct gif_softc *sc)
{
int error;
Index: in6_ifattach.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_ifattach.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_ifattach.c -L sys/netinet6/in6_ifattach.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_ifattach.c
+++ sys/netinet6/in6_ifattach.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.26.2.5 2005/12/25 14:03:37 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.39 2007/07/05 16:23:47 delphij Exp $ */
/* $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ */
/*-
@@ -58,8 +58,6 @@
#include <netinet6/nd6.h>
#include <netinet6/scope6_var.h>
-#include <net/net_osdep.h>
-
unsigned long in6_maxmtu = 0;
#ifdef IP6_AUTO_LINKLOCAL
@@ -78,6 +76,7 @@
static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *));
static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *));
static int in6_ifattach_loopback __P((struct ifnet *));
+static void in6_purgemaddrs __P((struct ifnet *));
#define EUI64_GBIT 0x01
#define EUI64_UBIT 0x02
@@ -96,11 +95,11 @@
* The goal here is to get an interface identifier that is
* (1) random enough and (2) does not change across reboot.
* We currently use MD5(hostname) for it.
+ *
+ * in6 - upper 64bits are preserved
*/
static int
-get_rand_ifid(ifp, in6)
- struct ifnet *ifp;
- struct in6_addr *in6; /* upper 64bits are preserved */
+get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6)
{
MD5_CTX ctxt;
u_int8_t digest[16];
@@ -132,15 +131,13 @@
}
static int
-generate_tmp_ifid(seed0, seed1, ret)
- u_int8_t *seed0, *ret;
- const u_int8_t *seed1;
+generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret)
{
MD5_CTX ctxt;
u_int8_t seed[16], digest[16], nullbuf[8];
u_int32_t val32;
- /* If there's no hisotry, start with a random seed. */
+ /* If there's no history, start with a random seed. */
bzero(nullbuf, sizeof(nullbuf));
if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
int i;
@@ -215,11 +212,11 @@
/*
* Get interface identifier for the specified interface.
* XXX assumes single sockaddr_dl (AF_LINK address) per an interface
+ *
+ * in6 - upper 64bits are preserved
*/
int
-in6_get_hw_ifid(ifp, in6)
- struct ifnet *ifp;
- struct in6_addr *in6; /* upper 64bits are preserved */
+in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
{
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
@@ -348,12 +345,12 @@
* Get interface identifier for the specified interface. If it is not
* available on ifp0, borrow interface identifier from other information
* sources.
+ *
+ * altifp - secondary EUI64 source
*/
static int
-get_ifid(ifp0, altifp, in6)
- struct ifnet *ifp0;
- struct ifnet *altifp; /* secondary EUI64 source */
- struct in6_addr *in6;
+get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
+ struct in6_addr *in6)
{
struct ifnet *ifp;
@@ -412,10 +409,11 @@
return 0;
}
+/*
+ * altifp - secondary EUI64 source
+ */
static int
-in6_ifattach_linklocal(ifp, altifp)
- struct ifnet *ifp;
- struct ifnet *altifp; /* secondary EUI64 source */
+in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
{
struct in6_ifaddr *ia;
struct in6_aliasreq ifra;
@@ -527,9 +525,11 @@
return 0;
}
+/*
+ * ifp - must be IFT_LOOP
+ */
static int
-in6_ifattach_loopback(ifp)
- struct ifnet *ifp; /* must be IFT_LOOP */
+in6_ifattach_loopback(struct ifnet *ifp)
{
struct in6_aliasreq ifra;
int error;
@@ -589,11 +589,8 @@
* when ifp == NULL, the caller is responsible for filling scopeid.
*/
int
-in6_nigroup(ifp, name, namelen, in6)
- struct ifnet *ifp;
- const char *name;
- int namelen;
- struct in6_addr *in6;
+in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
+ struct in6_addr *in6)
{
const char *p;
u_char *q;
@@ -639,11 +636,11 @@
* XXX multiple loopback interface needs more care. for instance,
* nodelocal address needs to be configured onto only one of them.
* XXX multiple link-local address case
+ *
+ * altifp - secondary EUI64 source
*/
void
-in6_ifattach(ifp, altifp)
- struct ifnet *ifp;
- struct ifnet *altifp; /* secondary EUI64 source */
+in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
{
struct in6_ifaddr *ia;
struct in6_addr in6;
@@ -725,16 +722,14 @@
* from the ifnet list in bsdi.
*/
void
-in6_ifdetach(ifp)
- struct ifnet *ifp;
+in6_ifdetach(struct ifnet *ifp)
{
struct in6_ifaddr *ia, *oia;
struct ifaddr *ifa, *next;
struct rtentry *rt;
short rtflags;
struct sockaddr_in6 sin6;
- struct in6_multi *in6m;
- struct in6_multi *in6m_next;
+ struct in6_multi_mship *imm;
/* remove neighbor management table */
nd6_purge(ifp);
@@ -758,6 +753,14 @@
ia = (struct in6_ifaddr *)ifa;
+ /*
+ * leave from multicast groups we have joined for the interface
+ */
+ while ((imm = ia->ia6_memberships.lh_first) != NULL) {
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+
/* remove from the routing table */
if ((ia->ia_flags & IFA_ROUTE) &&
(rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) {
@@ -792,20 +795,10 @@
IFAFREE(&oia->ia_ifa);
}
+ in6_pcbpurgeif0(&udbinfo, ifp);
+ in6_pcbpurgeif0(&ripcbinfo, ifp);
/* leave from all multicast groups joined */
-
- if (udbinfo.listhead != NULL)
- in6_pcbpurgeif0(LIST_FIRST(udbinfo.listhead), ifp);
- if (ripcbinfo.listhead != NULL)
- in6_pcbpurgeif0(LIST_FIRST(ripcbinfo.listhead), ifp);
-
- for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) {
- in6m_next = LIST_NEXT(in6m, in6m_entry);
- if (in6m->in6m_ifp != ifp)
- continue;
- in6_delmulti(in6m);
- in6m = NULL;
- }
+ in6_purgemaddrs(ifp);
/*
* remove neighbor management table. we call it twice just to make
@@ -839,11 +832,8 @@
}
int
-in6_get_tmpifid(ifp, retbuf, baseid, generate)
- struct ifnet *ifp;
- u_int8_t *retbuf;
- const u_int8_t *baseid;
- int generate;
+in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
+ const u_int8_t *baseid, int generate)
{
u_int8_t nullbuf[8];
struct nd_ifinfo *ndi = ND_IFINFO(ifp);
@@ -867,8 +857,7 @@
}
void
-in6_tmpaddrtimer(ignored_arg)
- void *ignored_arg;
+in6_tmpaddrtimer(void *ignored_arg)
{
struct nd_ifinfo *ndi;
u_int8_t nullbuf[8];
@@ -894,3 +883,21 @@
splx(s);
}
+
+static void
+in6_purgemaddrs(struct ifnet *ifp)
+{
+ struct in6_multi *in6m;
+ struct in6_multi *oin6m;
+
+#ifdef DIAGNOSTIC
+ printf("%s: purging ifp %p\n", __func__, ifp);
+#endif
+
+ IFF_LOCKGIANT(ifp);
+ LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) {
+ if (in6m->in6m_ifp == ifp)
+ in6_delmulti(in6m);
+ }
+ IFF_UNLOCKGIANT(ifp);
+}
Index: in6_pcb.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_pcb.c -L sys/netinet6/in6_pcb.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -1,6 +1,6 @@
-/* $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.62.2.2 2005/12/25 12:44:12 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.84.2.1 2007/12/21 14:25:43 rwatson Exp $ */
/* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */
-
+
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
@@ -65,6 +65,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -77,6 +78,7 @@
#include <sys/sockio.h>
#include <sys/errno.h>
#include <sys/time.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/jail.h>
@@ -99,30 +101,18 @@
#include <netinet6/scope6_var.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netinet6/ah.h>
-#ifdef INET6
-#include <netinet6/ah6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
struct in6_addr zeroin6_addr;
int
-in6_pcbbind(inp, nam, cred)
- register struct inpcb *inp;
- struct sockaddr *nam;
- struct ucred *cred;
+in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
+ struct ucred *cred)
{
struct socket *so = inp->inp_socket;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
@@ -138,7 +128,7 @@
if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
return (EINVAL);
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
- wild = 1;
+ wild = INPLOOKUP_WILDCARD;
if (nam) {
int error;
@@ -188,11 +178,14 @@
struct inpcb *t;
/* GROSS */
- if (ntohs(lport) < IPV6PORT_RESERVED &&
- suser_cred(cred, SUSER_ALLOWJAIL))
+ if (ntohs(lport) <= ipport_reservedhigh &&
+ ntohs(lport) >= ipport_reservedlow &&
+ priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
+ 0))
return (EACCES);
- if (so->so_cred->cr_uid != 0 &&
- !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
+ priv_check_cred(so->so_cred,
+ PRIV_NETINET_REUSEPORT, 0) != 0) {
t = in6_pcblookup_local(pcbinfo,
&sin6->sin6_addr, lport,
INPLOOKUP_WILDCARD);
@@ -201,8 +194,8 @@
(so->so_type != SOCK_STREAM ||
IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
(!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
- !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
- (t->inp_socket->so_options & SO_REUSEPORT)
+ !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
+ (t->inp_socket->so_options & SO_REUSEPORT)
== 0) && (so->so_cred->cr_uid !=
t->inp_socket->so_cred->cr_uid))
return (EADDRINUSE);
@@ -228,7 +221,7 @@
t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
lport, wild);
if (t && (reuseport & ((t->inp_vflag & INP_TIMEWAIT) ?
- intotw(t)->tw_so_options :
+ intotw(t)->tw_so_options :
t->inp_socket->so_options)) == 0)
return (EADDRINUSE);
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
@@ -239,17 +232,17 @@
t = in_pcblookup_local(pcbinfo, sin.sin_addr,
lport, wild);
if (t && t->inp_vflag & INP_TIMEWAIT) {
- if ((reuseport &
+ if ((reuseport &
intotw(t)->tw_so_options) == 0 &&
(ntohl(t->inp_laddr.s_addr) !=
- INADDR_ANY || ((inp->inp_vflag &
- INP_IPV6PROTO) ==
+ INADDR_ANY || ((inp->inp_vflag &
+ INP_IPV6PROTO) ==
(t->inp_vflag & INP_IPV6PROTO))))
return (EADDRINUSE);
}
- else if (t &&
- (reuseport & t->inp_socket->so_options)
- == 0 && (ntohl(t->inp_laddr.s_addr) !=
+ else if (t &&
+ (reuseport & t->inp_socket->so_options)
+ == 0 && (ntohl(t->inp_laddr.s_addr) !=
INADDR_ANY || INP_SOCKAF(so) ==
INP_SOCKAF(t->inp_socket)))
return (EADDRINUSE);
@@ -284,18 +277,18 @@
* a bit of a kludge, but cleaning up the internal interfaces would
* have forced minor changes in every protocol).
*/
-
int
-in6_pcbladdr(inp, nam, plocal_addr6)
- register struct inpcb *inp;
- struct sockaddr *nam;
- struct in6_addr **plocal_addr6;
+in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
+ struct in6_addr **plocal_addr6)
{
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
int error = 0;
struct ifnet *ifp = NULL;
int scope_ambiguous = 0;
+ INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
+ INP_LOCK_ASSERT(inp);
+
if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
if (sin6->sin6_family != AF_INET6)
@@ -308,9 +301,6 @@
if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
return(error);
- INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
- INP_LOCK_ASSERT(inp);
-
if (in6_ifaddr) {
/*
* If the destination address is UNSPECIFIED addr,
@@ -355,10 +345,8 @@
* then pick one.
*/
int
-in6_pcbconnect(inp, nam, cred)
- register struct inpcb *inp;
- struct sockaddr *nam;
- struct ucred *cred;
+in6_pcbconnect(register struct inpcb *inp, struct sockaddr *nam,
+ struct ucred *cred)
{
struct in6_addr *addr6;
register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
@@ -398,16 +386,12 @@
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
in_pcbrehash(inp);
-#ifdef IPSEC
- if (inp->inp_socket->so_type == SOCK_STREAM)
- ipsec_pcbconn(inp->inp_sp);
-#endif
+
return (0);
}
void
-in6_pcbdisconnect(inp)
- struct inpcb *inp;
+in6_pcbdisconnect(struct inpcb *inp)
{
INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
@@ -418,52 +402,49 @@
/* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
in_pcbrehash(inp);
-#ifdef IPSEC
- ipsec_pcbdisconn(inp->inp_sp);
-#endif
- if (inp->inp_socket->so_state & SS_NOFDREF)
- in6_pcbdetach(inp);
}
void
-in6_pcbdetach(inp)
- struct inpcb *inp;
+in6_pcbdetach(struct inpcb *inp)
+{
+
+ KASSERT(inp->inp_socket != NULL, ("in6_pcbdetach: inp_socket == NULL"));
+ inp->inp_socket->so_pcb = NULL;
+ inp->inp_socket = NULL;
+}
+
+void
+in6_pcbfree(struct inpcb *inp)
{
- struct socket *so = inp->inp_socket;
struct inpcbinfo *ipi = inp->inp_pcbinfo;
+ KASSERT(inp->inp_socket == NULL, ("in6_pcbfree: inp_socket != NULL"));
INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
INP_LOCK_ASSERT(inp);
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
if (inp->in6p_sp != NULL)
ipsec6_delete_pcbpolicy(inp);
#endif /* IPSEC */
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
-
- if (so) {
- ACCEPT_LOCK();
- SOCK_LOCK(so);
- so->so_pcb = NULL;
- sotryfree(so);
- }
-
- ip6_freepcbopts(inp->in6p_outputopts);
- ip6_freemoptions(inp->in6p_moptions);
+ ip6_freepcbopts(inp->in6p_outputopts);
+ ip6_freemoptions(inp->in6p_moptions);
/* Check and free IPv4 related resources in case of mapped addr */
if (inp->inp_options)
(void)m_free(inp->inp_options);
- ip_freemoptions(inp->inp_moptions);
+ if (inp->inp_moptions != NULL)
+ inp_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
- INP_LOCK_DESTROY(inp);
+#ifdef MAC
+ mac_destroy_inpcb(inp);
+#endif
+ INP_UNLOCK(inp);
uma_zfree(ipi->ipi_zone, inp);
}
struct sockaddr *
-in6_sockaddr(port, addr_p)
- in_port_t port;
- struct in6_addr *addr_p;
+in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
{
struct sockaddr_in6 *sin6;
@@ -479,9 +460,7 @@
}
struct sockaddr *
-in6_v4mapsin6_sockaddr(port, addr_p)
- in_port_t port;
- struct in_addr *addr_p;
+in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
{
struct sockaddr_in sin;
struct sockaddr_in6 *sin6_p;
@@ -499,59 +478,39 @@
return (struct sockaddr *)sin6_p;
}
-/*
- * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
- * modified to match the pru_sockaddr() and pru_peeraddr() entry points
- * in struct pr_usrreqs, so that protocols can just reference then directly
- * without the need for a wrapper function. The socket must have a valid
- * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
- * except through a kernel programming error, so it is acceptable to panic
- * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
- * because there actually /is/ a programming error somewhere... XXX)
- */
int
-in6_setsockaddr(so, nam)
- struct socket *so;
- struct sockaddr **nam;
+in6_getsockaddr(struct socket *so, struct sockaddr **nam)
{
- int s;
register struct inpcb *inp;
struct in6_addr addr;
in_port_t port;
- s = splnet();
inp = sotoinpcb(so);
- if (!inp) {
- splx(s);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
+
+ INP_LOCK(inp);
port = inp->inp_lport;
addr = inp->in6p_laddr;
- splx(s);
+ INP_UNLOCK(inp);
*nam = in6_sockaddr(port, &addr);
return 0;
}
int
-in6_setpeeraddr(so, nam)
- struct socket *so;
- struct sockaddr **nam;
+in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
- int s;
struct inpcb *inp;
struct in6_addr addr;
in_port_t port;
- s = splnet();
inp = sotoinpcb(so);
- if (!inp) {
- splx(s);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
+
+ INP_LOCK(inp);
port = inp->inp_fport;
addr = inp->in6p_faddr;
- splx(s);
+ INP_UNLOCK(inp);
*nam = in6_sockaddr(port, &addr);
return 0;
@@ -560,18 +519,19 @@
int
in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
{
- struct inpcb *inp = sotoinpcb(so);
+ struct inpcb *inp;
int error;
- if (inp == NULL)
- return EINVAL;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
+
if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
- error = in_setsockaddr(so, nam, &tcbinfo);
+ error = in_getsockaddr(so, nam);
if (error == 0)
in6_sin_2_v4mapsin6_in_sock(nam);
} else {
- /* scope issues will be handled in in6_setsockaddr(). */
- error = in6_setsockaddr(so, nam);
+ /* scope issues will be handled in in6_getsockaddr(). */
+ error = in6_getsockaddr(so, nam);
}
return error;
@@ -580,18 +540,19 @@
int
in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
{
- struct inpcb *inp = sotoinpcb(so);
+ struct inpcb *inp;
int error;
- if (inp == NULL)
- return EINVAL;
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
+
if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
- error = in_setpeeraddr(so, nam, &tcbinfo);
+ error = in_getpeeraddr(so, nam);
if (error == 0)
in6_sin_2_v4mapsin6_in_sock(nam);
} else
- /* scope issues will be handled in in6_setpeeraddr(). */
- error = in6_setpeeraddr(so, nam);
+ /* scope issues will be handled in in6_getpeeraddr(). */
+ error = in6_getpeeraddr(so, nam);
return error;
}
@@ -604,25 +565,19 @@
* cmds that are uninteresting (e.g., no error in the map).
* Call the protocol specific routine (if any) to report
* any errors for each matching socket.
- *
- * Must be called at splnet.
*/
void
-in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
- struct inpcbinfo *pcbinfo;
- struct sockaddr *dst;
- const struct sockaddr *src;
- u_int fport_arg, lport_arg;
- int cmd;
- void *cmdarg;
- struct inpcb *(*notify) __P((struct inpcb *, int));
+in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
+ u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
+ int cmd, void *cmdarg,
+ struct inpcb *(*notify) __P((struct inpcb *, int)))
{
struct inpcbhead *head;
struct inpcb *inp, *ninp;
struct sockaddr_in6 sa6_src, *sa6_dst;
u_short fport = fport_arg, lport = lport_arg;
u_int32_t flowinfo;
- int errno, s;
+ int errno;
if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
return;
@@ -654,14 +609,13 @@
notify = in6_rtchange;
}
errno = inet6ctlerrmap[cmd];
- s = splnet();
- head = pcbinfo->listhead;
+ head = pcbinfo->ipi_listhead;
INP_INFO_WLOCK(pcbinfo);
- for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
+ for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
INP_LOCK(inp);
- ninp = LIST_NEXT(inp, inp_list);
+ ninp = LIST_NEXT(inp, inp_list);
- if ((inp->inp_vflag & INP_IPV6) == 0) {
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
INP_UNLOCK(inp);
continue;
}
@@ -715,31 +669,29 @@
INP_UNLOCK(inp);
}
INP_INFO_WUNLOCK(pcbinfo);
- splx(s);
}
/*
* Lookup a PCB based on the local address and port.
*/
struct inpcb *
-in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
- struct inpcbinfo *pcbinfo;
- struct in6_addr *laddr;
- u_int lport_arg;
- int wild_okay;
+in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
+ u_int lport_arg, int wild_okay)
{
register struct inpcb *inp;
int matchwild = 3, wildcard;
u_short lport = lport_arg;
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+
if (!wild_okay) {
struct inpcbhead *head;
/*
* Look for an unconnected (wildcard foreign addr) PCB that
* matches the local address and port we're looking for.
*/
- head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
- pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -766,8 +718,8 @@
* First see if this local port is in use by looking on the
* port hash list.
*/
- porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
- pcbinfo->porthashmask)];
+ porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+ pcbinfo->ipi_porthashmask)];
LIST_FOREACH(phd, porthash, phd_hash) {
if (phd->phd_port == lport)
break;
@@ -808,15 +760,15 @@
}
void
-in6_pcbpurgeif0(head, ifp)
- struct in6pcb *head;
- struct ifnet *ifp;
+in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
{
struct in6pcb *in6p;
struct ip6_moptions *im6o;
struct in6_multi_mship *imm, *nimm;
- for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
+ INP_INFO_RLOCK(pcbinfo);
+ LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
+ INP_LOCK(in6p);
im6o = in6p->in6p_moptions;
if ((in6p->inp_vflag & INP_IPV6) &&
im6o) {
@@ -843,7 +795,9 @@
}
}
}
+ INP_UNLOCK(in6p);
}
+ INP_INFO_RUNLOCK(pcbinfo);
}
/*
@@ -853,9 +807,9 @@
* (by a redirect), time to try a default gateway again.
*/
void
-in6_losing(in6p)
- struct inpcb *in6p;
+in6_losing(struct inpcb *in6p)
{
+
/*
* We don't store route pointers in the routing table anymore
*/
@@ -867,9 +821,7 @@
* and allocate a (hopefully) better one.
*/
struct inpcb *
-in6_rtchange(inp, errno)
- struct inpcb *inp;
- int errno;
+in6_rtchange(struct inpcb *inp, int errno)
{
/*
* We don't store route pointers in the routing table anymore
@@ -881,18 +833,17 @@
* Lookup PCB in hash list.
*/
struct inpcb *
-in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
- struct inpcbinfo *pcbinfo;
- struct in6_addr *faddr, *laddr;
- u_int fport_arg, lport_arg;
- int wildcard;
- struct ifnet *ifp;
+in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+ u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
+ int wildcard, struct ifnet *ifp)
{
struct inpcbhead *head;
register struct inpcb *inp;
u_short fport = fport_arg, lport = lport_arg;
int faith;
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+
if (faithprefix_p != NULL)
faith = (*faithprefix_p)(laddr);
else
@@ -901,9 +852,9 @@
/*
* First look for an exact match.
*/
- head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
- lport, fport,
- pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[
+ INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+ pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -920,8 +871,8 @@
if (wildcard) {
struct inpcb *local_wild = NULL;
- head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
- pcbinfo->hashmask)];
+ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_hashmask)];
LIST_FOREACH(inp, head, inp_hash) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
Index: udp6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/udp6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/udp6_var.h -L sys/netinet6/udp6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/udp6_var.h
+++ sys/netinet6/udp6_var.h
@@ -26,12 +26,13 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet6/udp6_var.h,v 1.7 2005/01/07 02:30:35 imp Exp $
+ * $FreeBSD: src/sys/netinet6/udp6_var.h,v 1.9 2007/07/23 07:58:58 rwatson Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -66,13 +67,10 @@
#ifdef _KERNEL
SYSCTL_DECL(_net_inet6_udp6);
-extern struct pr_usrreqs udp6_usrreqs;
+extern struct pr_usrreqs udp6_usrreqs;
-void udp6_ctlinput __P((int, struct sockaddr *, void *));
-int udp6_input __P((struct mbuf **, int *, int));
-int udp6_output __P((struct inpcb *inp, struct mbuf *m,
- struct sockaddr *addr, struct mbuf *control,
- struct thread *td));
+void udp6_ctlinput(int, struct sockaddr *, void *);
+int udp6_input(struct mbuf **, int *, int);
#endif
#endif /*_NETINET6_UDP6_VAR_H_*/
Index: scope6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/scope6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/scope6_var.h -L sys/netinet6/scope6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/scope6_var.h
+++ sys/netinet6/scope6_var.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/scope6_var.h,v 1.4.2.1 2005/11/04 20:26:15 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/scope6_var.h,v 1.5 2005/07/25 12:31:42 ume Exp $ */
/* $KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $ */
/*-
Index: raw_ip6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/raw_ip6.c -L sys/netinet6/raw_ip6.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/raw_ip6.c
+++ sys/netinet6/raw_ip6.c
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet6/raw_ip6.c,v 1.50.2.7 2005/12/26 00:59:12 suz Exp $
+ * $FreeBSD: src/sys/netinet6/raw_ip6.c,v 1.73 2007/07/05 16:29:40 delphij Exp $
*/
/*-
@@ -68,13 +68,13 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sx.h>
-#include <sys/systm.h>
#include <sys/syslog.h>
#include <net/if.h>
@@ -96,14 +96,9 @@
#include <netinet6/scope6_var.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#endif /*IPSEC*/
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
#include <machine/stdarg.h>
@@ -122,14 +117,22 @@
struct rip6stat rip6stat;
/*
+ * Hooks for multicast forwarding.
+ */
+struct socket *ip6_mrouter = NULL;
+int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
+int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
+int (*ip6_mrouter_done)(void);
+int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
+int (*mrt6_ioctl)(int, caddr_t);
+
+/*
* Setup generic address and protocol structures
* for raw_input routine, then pass them along with
* mbuf chain.
*/
int
-rip6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+rip6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -176,18 +179,16 @@
if (last) {
struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
/*
* Check AH/ESP integrity.
*/
if (n && ipsec6_in_reject(n, last)) {
m_freem(n);
-#ifdef IPSEC
ipsec6stat.in_polvio++;
-#endif /*IPSEC*/
/* do not inject data into pcb */
} else
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
if (n) {
if (last->in6p_flags & IN6P_CONTROLOPTS ||
last->in6p_socket->so_options & SO_TIMESTAMP)
@@ -209,20 +210,18 @@
}
last = in6p;
}
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
/*
* Check AH/ESP integrity.
*/
if (last && ipsec6_in_reject(m, last)) {
m_freem(m);
-#ifdef IPSEC
ipsec6stat.in_polvio++;
-#endif /*IPSEC*/
ip6stat.ip6s_delivered--;
/* do not inject data into pcb */
INP_UNLOCK(last);
} else
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
if (last) {
if (last->in6p_flags & IN6P_CONTROLOPTS ||
last->in6p_socket->so_options & SO_TIMESTAMP)
@@ -257,10 +256,7 @@
}
void
-rip6_ctlinput(cmd, sa, d)
- int cmd;
- struct sockaddr *sa;
- void *d;
+rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
struct ip6_hdr *ip6;
struct mbuf *m;
@@ -342,7 +338,7 @@
INP_LOCK(in6p);
priv = 0;
- if (so->so_cred->cr_uid == 0)
+ if (suser_cred(so->so_cred, 0) == 0)
priv = 1;
dst = &dstsock->sin6_addr;
if (control) {
@@ -480,9 +476,7 @@
* Raw IPv6 socket option processing.
*/
int
-rip6_ctloutput(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+rip6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error;
@@ -507,7 +501,8 @@
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
case MRT6_PIM:
- error = ip6_mrouter_get(so, sopt);
+ error = ip6_mrouter_get ? ip6_mrouter_get(so, sopt) :
+ EOPNOTSUPP;
break;
case IPV6_CHECKSUM:
error = ip6_raw_ctloutput(so, sopt);
@@ -527,7 +522,8 @@
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
case MRT6_PIM:
- error = ip6_mrouter_set(so, sopt);
+ error = ip6_mrouter_set ? ip6_mrouter_set(so, sopt) :
+ EOPNOTSUPP;
break;
case IPV6_CHECKSUM:
error = ip6_raw_ctloutput(so, sopt);
@@ -547,39 +543,27 @@
{
struct inpcb *inp;
struct icmp6_filter *filter;
- int error, s;
+ int error;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- panic("rip6_attach");
- }
- if (td && (error = suser(td)) != 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
+ if (td && (error = suser(td)) != 0)
return error;
- }
error = soreserve(so, rip_sendspace, rip_recvspace);
- if (error) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ if (error)
return error;
- }
MALLOC(filter, struct icmp6_filter *,
sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
- if (filter == NULL) {
- INP_INFO_WUNLOCK(&ripcbinfo);
+ if (filter == NULL)
return ENOMEM;
- }
- s = splnet();
- error = in_pcballoc(so, &ripcbinfo, "raw6inp");
- splx(s);
+ INP_INFO_WLOCK(&ripcbinfo);
+ error = in_pcballoc(so, &ripcbinfo);
if (error) {
INP_INFO_WUNLOCK(&ripcbinfo);
FREE(filter, M_PCB);
return error;
}
inp = (struct inpcb *)so->so_pcb;
- INP_LOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
inp->inp_vflag |= INP_IPV6;
inp->in6p_ip6_nxt = (long)proto;
@@ -591,35 +575,49 @@
return 0;
}
-static int
+static void
rip6_detach(struct socket *so)
{
struct inpcb *inp;
- INP_INFO_WLOCK(&ripcbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&ripcbinfo);
- panic("rip6_detach");
- }
- /* xxx: RSVP */
- if (so == ip6_mrouter)
+ KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
+
+ if (so == ip6_mrouter && ip6_mrouter_done)
ip6_mrouter_done();
+ /* xxx: RSVP */
+ INP_INFO_WLOCK(&ripcbinfo);
+ INP_LOCK(inp);
if (inp->in6p_icmp6filt) {
FREE(inp->in6p_icmp6filt, M_PCB);
inp->in6p_icmp6filt = NULL;
}
- INP_LOCK(inp);
in6_pcbdetach(inp);
+ in6_pcbfree(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
- return 0;
}
-static int
+/* XXXRW: This can't ever be called. */
+static void
rip6_abort(struct socket *so)
{
+ struct inpcb *inp;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
+
+ soisdisconnected(so);
+}
+
+static void
+rip6_close(struct socket *so)
+{
+ struct inpcb *inp;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
+
soisdisconnected(so);
- return rip6_detach(so);
}
static int
@@ -630,7 +628,8 @@
if ((so->so_state & SS_ISCONNECTED) == 0)
return ENOTCONN;
inp->in6p_faddr = in6addr_any;
- return rip6_abort(so);
+ rip6_abort(so);
+ return (0);
}
static int
@@ -641,6 +640,7 @@
struct ifaddr *ia = NULL;
int error = 0;
+ KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
if (nam->sa_len != sizeof(*addr))
return EINVAL;
if (TAILQ_EMPTY(&ifnet) || addr->sin6_family != AF_INET6)
@@ -674,6 +674,7 @@
struct ifnet *ifp = NULL;
int error = 0, scope_ambiguous = 0;
+ KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
if (nam->sa_len != sizeof(*addr))
return EINVAL;
if (TAILQ_EMPTY(&ifnet))
@@ -726,10 +727,9 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&ripcbinfo);
inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
INP_LOCK(inp);
- INP_INFO_RUNLOCK(&ripcbinfo);
socantsendmore(so);
INP_UNLOCK(inp);
return 0;
@@ -737,13 +737,14 @@
static int
rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
- struct mbuf *control, struct thread *td)
+ struct mbuf *control, struct thread *td)
{
struct inpcb *inp = sotoinpcb(so);
struct sockaddr_in6 tmp;
struct sockaddr_in6 *dst;
int ret;
+ KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
INP_INFO_WLOCK(&ripcbinfo);
/* always copy sockaddr to avoid overwrites */
/* Unlocked read. */
@@ -802,8 +803,9 @@
.pru_control = in6_control,
.pru_detach = rip6_detach,
.pru_disconnect = rip6_disconnect,
- .pru_peeraddr = in6_setpeeraddr,
+ .pru_peeraddr = in6_getpeeraddr,
.pru_send = rip6_send,
.pru_shutdown = rip6_shutdown,
- .pru_sockaddr = in6_setsockaddr,
+ .pru_sockaddr = in6_getsockaddr,
+ .pru_close = rip6_close,
};
Index: in6_proto.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/in6_proto.c -L sys/netinet6/in6_proto.c -u -r1.2 -r1.3
--- sys/netinet6/in6_proto.c
+++ sys/netinet6/in6_proto.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_proto.c,v 1.32.2.4 2005/11/16 10:31:23 ru Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_proto.c,v 1.46 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ */
/*-
@@ -66,6 +66,7 @@
#include "opt_ipsec.h"
#include "opt_ipstealth.h"
#include "opt_carp.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/socket.h>
@@ -102,44 +103,25 @@
#include <netinet6/pim6_var.h>
#include <netinet6/nd6.h>
-#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netinet6/ah.h>
-#ifdef INET6
-#include <netinet6/ah6.h>
-#endif
-#ifdef IPSEC_ESP
-#include <netinet6/esp.h>
-#ifdef INET6
-#include <netinet6/esp6.h>
-#endif
-#endif
-#include <netinet6/ipcomp.h>
-#ifdef INET6
-#include <netinet6/ipcomp6.h>
-#endif
-#endif /* IPSEC */
-
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
-#ifdef FAST_IPSEC
+#ifdef SCTP
+#include <netinet/in_pcb.h>
+#include <netinet/sctp_pcb.h>
+#include <netinet/sctp.h>
+#include <netinet/sctp_var.h>
+#include <netinet6/sctp6_var.h>
+#endif /* SCTP */
+
+#ifdef IPSEC
+#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
-#define IPSEC
-#define IPSEC_ESP
-#define ah6_input ipsec6_common_input
-#define esp6_input ipsec6_common_input
-#define ipcomp6_input ipsec6_common_input
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
-#include <net/net_osdep.h>
-
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -186,6 +168,42 @@
.pr_drain = tcp_drain,
.pr_usrreqs = &tcp6_usrreqs,
},
+#ifdef SCTP
+{
+ .pr_type = SOCK_DGRAM,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp6_input,
+ .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp6_usrreqs
+},
+{
+ .pr_type = SOCK_SEQPACKET,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp6_input,
+ .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp6_usrreqs
+},
+
+{
+ .pr_type = SOCK_STREAM,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_SCTP,
+ .pr_flags = PR_WANTRCVD,
+ .pr_input = sctp6_input,
+ .pr_ctlinput = sctp6_ctlinput,
+ .pr_ctloutput = sctp_ctloutput,
+ .pr_drain = sctp_drain,
+ .pr_usrreqs = &sctp6_usrreqs
+},
+#endif /* SCTP */
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
@@ -240,26 +258,24 @@
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_AH,
.pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = ah6_input,
+ .pr_input = ipsec6_common_input,
.pr_usrreqs = &nousrreqs,
},
-#ifdef IPSEC_ESP
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_ESP,
.pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = esp6_input,
+ .pr_input = ipsec6_common_input,
.pr_ctlinput = esp6_ctlinput,
.pr_usrreqs = &nousrreqs,
},
-#endif
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPCOMP,
.pr_flags = PR_ATOMIC|PR_ADDR,
- .pr_input = ipcomp6_input,
+ .pr_input = ipsec6_common_input,
.pr_usrreqs = &nousrreqs,
},
#endif /* IPSEC */
@@ -292,7 +308,7 @@
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
- .pr_input = pim6_input,
+ .pr_input = encap6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_usrreqs = &rip6_usrreqs
@@ -361,7 +377,7 @@
int ip6_maxfragpackets; /* initialized in frag6.c:frag6_init() */
int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
int ip6_log_interval = 5;
-int ip6_hdrnestlimit = 50; /* appropriate? */
+int ip6_hdrnestlimit = 15; /* How many header options will we process? */
int ip6_dad_count = 1; /* DupAddrDetectionTransmits */
int ip6_auto_flowlabel = 1;
int ip6_gif_hlim = 0;
@@ -376,8 +392,6 @@
#ifdef IPSTEALTH
int ip6stealth = 0;
#endif
-int ip6_rthdr0_allowed = 0; /* Disallow use of routing header 0 */
- /* by default. */
/* icmp6 */
/*
@@ -402,7 +416,8 @@
int icmp6_rediraccept = 1; /* accept and process redirects */
int icmp6_redirtimeout = 10 * 60; /* 10 minutes */
int icmp6errppslim = 100; /* 100pps */
-int icmp6_nodeinfo = 3; /* enable/disable NI response */
+/* control how to respond to NI queries */
+int icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
/* UDP on IP6 parameters */
int udp6_sendspace = 9216; /* really max datagram size */
@@ -420,6 +435,9 @@
SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW, 0, "ICMP6");
SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW, 0, "UDP6");
SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6");
+#ifdef SCTP
+SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6");
+#endif
#ifdef IPSEC
SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6");
#endif /* IPSEC */
@@ -463,7 +481,7 @@
}
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING,
- forwarding, CTLFLAG_RW, &ip6_forwarding, 0, "");
+ forwarding, CTLFLAG_RW, &ip6_forwarding, 0, "");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS,
redirect, CTLFLAG_RW, &ip6_sendredirects, 0, "");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM,
@@ -504,6 +522,7 @@
sysctl_ip6_tempvltime, "I", "");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY,
v6only, CTLFLAG_RW, &ip6_v6only, 0, "");
+TUNABLE_INT("net.inet6.ip6.auto_linklocal", &ip6_auto_linklocal);
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL,
auto_linklocal, CTLFLAG_RW, &ip6_auto_linklocal, 0, "");
SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
@@ -515,14 +534,11 @@
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS,
maxfrags, CTLFLAG_RW, &ip6_maxfrags, 0, "");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU,
- mcast_pmtu, CTLFLAG_RW, &ip6_mcast_pmtu, 0, "");
+ mcast_pmtu, CTLFLAG_RW, &ip6_mcast_pmtu, 0, "");
#ifdef IPSTEALTH
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_RW,
&ip6stealth, 0, "");
#endif
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTHDR0_ALLOWED,
- rthdr0_allowed, CTLFLAG_RW, &ip6_rthdr0_allowed, 0, "");
-
/* net.inet6.icmp6 */
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT,
Index: dest6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/dest6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/dest6.c -L sys/netinet6/dest6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/dest6.c
+++ sys/netinet6/dest6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/dest6.c,v 1.10 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/dest6.c,v 1.11 2007/07/05 16:23:46 delphij Exp $ */
/* $KAME: dest6.c,v 1.59 2003/07/11 13:21:16 t-momose Exp $ */
/*-
@@ -57,9 +57,7 @@
* Destination options header processing.
*/
int
-dest6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+dest6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
int off = *offp, dstoptlen, optlen;
Index: ip6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_var.h -L sys/netinet6/ip6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_var.h
+++ sys/netinet6/ip6_var.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_var.h,v 1.30.2.6 2005/12/25 14:03:38 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_var.h,v 1.39 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ */
/*-
@@ -69,16 +69,13 @@
* being reassembled is attached to one of these structures.
*/
struct ip6q {
- u_int32_t ip6q_head;
- u_int16_t ip6q_len;
- u_int8_t ip6q_nxt; /* ip6f_nxt in first fragment */
- u_int8_t ip6q_hlim;
struct ip6asfrag *ip6q_down;
struct ip6asfrag *ip6q_up;
u_int32_t ip6q_ident;
- u_int8_t ip6q_arrive;
+ u_int8_t ip6q_nxt;
+ u_int8_t ip6q_ecn;
u_int8_t ip6q_ttl;
- struct in6_addr ip6q_src, ip6q_dst;
+ struct in6_addr ip6q_src, ip6q_dst;
struct ip6q *ip6q_next;
struct ip6q *ip6q_prev;
int ip6q_unfrglen; /* len of unfragmentable part */
@@ -89,11 +86,6 @@
};
struct ip6asfrag {
- u_int32_t ip6af_head;
- u_int16_t ip6af_len;
- u_int8_t ip6af_nxt;
- u_int8_t ip6af_hlim;
- /* must not override the above members during reassembling */
struct ip6asfrag *ip6af_down;
struct ip6asfrag *ip6af_up;
struct mbuf *ip6af_m;
@@ -298,7 +290,7 @@
extern int ip6_mcast_pmtu; /* enable pMTU discovery for multicast? */
extern int ip6_v6only;
-extern struct socket *ip6_mrouter; /* multicast routing daemon */
+extern struct socket *ip6_mrouter; /* multicast routing daemon */
extern int ip6_sendredirects; /* send IP redirects when forwarding? */
extern int ip6_maxfragpackets; /* Maximum packets in reassembly queue */
extern int ip6_maxfrags; /* Maximum fragments in reassembly queue */
@@ -353,7 +345,9 @@
struct ip6aux *ip6_findaux __P((struct mbuf *));
void ip6_delaux __P((struct mbuf *));
-int ip6_mforward __P((struct ip6_hdr *, struct ifnet *, struct mbuf *));
+extern int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
+ struct mbuf *);
+
int ip6_process_hopopts __P((struct mbuf *, u_int8_t *, int, u_int32_t *,
u_int32_t *));
void ip6_savecontrol __P((struct inpcb *, struct mbuf *, struct mbuf **));
Index: in6_rmx.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_rmx.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_rmx.c -L sys/netinet6/in6_rmx.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_rmx.c
+++ sys/netinet6/in6_rmx.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.14 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.18 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $ */
/*-
@@ -110,7 +110,7 @@
*/
static struct radix_node *
in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
- struct radix_node *treenodes)
+ struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
@@ -259,7 +259,7 @@
*/
if (rtq_reallyold != 0) {
rt->rt_flags |= RTPRF_OURS;
- rt->rt_rmx.rmx_expire = time_second + rtq_reallyold;
+ rt->rt_rmx.rmx_expire = time_uptime + rtq_reallyold;
} else {
rtexpunge(rt);
}
@@ -290,7 +290,7 @@
if (rt->rt_flags & RTPRF_OURS) {
ap->found++;
- if (ap->draining || rt->rt_rmx.rmx_expire <= time_second) {
+ if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
if (rt->rt_refcnt > 0)
panic("rtqkill route really not free");
@@ -305,9 +305,9 @@
}
} else {
if (ap->updating
- && (rt->rt_rmx.rmx_expire - time_second
+ && (rt->rt_rmx.rmx_expire - time_uptime
> rtq_reallyold)) {
- rt->rt_rmx.rmx_expire = time_second
+ rt->rt_rmx.rmx_expire = time_uptime
+ rtq_reallyold;
}
ap->nextstop = lmin(ap->nextstop,
@@ -332,7 +332,7 @@
arg.found = arg.killed = 0;
arg.rnh = rnh;
- arg.nextstop = time_second + rtq_timeout;
+ arg.nextstop = time_uptime + rtq_timeout;
arg.draining = arg.updating = 0;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
@@ -347,14 +347,14 @@
* hard.
*/
if ((arg.found - arg.killed > rtq_toomany)
- && (time_second - last_adjusted_timeout >= rtq_timeout)
+ && (time_uptime - last_adjusted_timeout >= rtq_timeout)
&& rtq_reallyold > rtq_minreallyold) {
rtq_reallyold = 2*rtq_reallyold / 3;
if (rtq_reallyold < rtq_minreallyold) {
rtq_reallyold = rtq_minreallyold;
}
- last_adjusted_timeout = time_second;
+ last_adjusted_timeout = time_uptime;
#ifdef DIAGNOSTIC
log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
rtq_reallyold);
@@ -367,7 +367,7 @@
}
atv.tv_usec = 0;
- atv.tv_sec = arg.nextstop - time_second;
+ atv.tv_sec = arg.nextstop - time_uptime;
callout_reset(&rtq_timer, tvtohz(&atv), in6_rtqtimo, rock);
}
@@ -391,7 +391,7 @@
panic("rt == NULL in in6_mtuexpire");
if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
- if (rt->rt_rmx.rmx_expire <= time_second) {
+ if (rt->rt_rmx.rmx_expire <= time_uptime) {
rt->rt_flags |= RTF_PROBEMTU;
} else {
ap->nextstop = lmin(ap->nextstop,
@@ -412,23 +412,24 @@
struct timeval atv;
arg.rnh = rnh;
- arg.nextstop = time_second + MTUTIMO_DEFAULT;
+ arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
RADIX_NODE_HEAD_UNLOCK(rnh);
atv.tv_usec = 0;
- atv.tv_sec = arg.nextstop - time_second;
+ atv.tv_sec = arg.nextstop - time_uptime;
if (atv.tv_sec < 0) {
printf("invalid mtu expiration time on routing table\n");
- arg.nextstop = 30; /* last resort */
+ arg.nextstop = time_uptime + 30; /* last resort */
+ atv.tv_sec = 30;
}
callout_reset(&rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
}
#if 0
void
-in6_rtqdrain()
+in6_rtqdrain(void)
{
struct radix_node_head *rnh = rt_tables[AF_INET6];
struct rtqk_arg arg;
Index: route6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/route6.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/route6.c -L sys/netinet6/route6.c -u -r1.2 -r1.3
--- sys/netinet6/route6.c
+++ sys/netinet6/route6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/route6.c,v 1.11.2.1 2005/11/04 20:26:15 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/route6.c,v 1.14 2007/07/05 16:23:48 delphij Exp $ */
/* $KAME: route6.c,v 1.24 2001/03/14 03:07:05 itojun Exp $ */
/*-
@@ -49,15 +49,18 @@
#include <netinet/icmp6.h>
-extern int ip6_rthdr0_allowed;
-
+#if 0
static int ip6_rthdr0 __P((struct mbuf *, struct ip6_hdr *,
struct ip6_rthdr0 *));
+#endif /* Disable route header processing. */
+
+/*
+ * proto - is unused
+ */
+
int
-route6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto; /* proto is unused */
+route6_input(struct mbuf **mp, int *offp, int proto)
{
struct ip6_hdr *ip6;
struct mbuf *m = *mp;
@@ -89,9 +92,8 @@
#endif
switch (rh->ip6r_type) {
+#if 0
case IPV6_RTHDR_TYPE_0:
- if (!ip6_rthdr0_allowed)
- return (IPPROTO_DONE);
rhlen = (rh->ip6r_len + 1) << 3;
#ifndef PULLDOWN_TEST
/*
@@ -118,6 +120,7 @@
if (ip6_rthdr0(m, ip6, (struct ip6_rthdr0 *)rh))
return (IPPROTO_DONE);
break;
+#endif /* Disable route header 0 */
default:
/* unknown routing type */
if (rh->ip6r_segleft == 0) {
@@ -140,11 +143,9 @@
* RFC2292 backward compatibility warning: no support for strict/loose bitmap,
* as it was dropped between RFC1883 and RFC2460.
*/
+#if 0
static int
-ip6_rthdr0(m, ip6, rh0)
- struct mbuf *m;
- struct ip6_hdr *ip6;
- struct ip6_rthdr0 *rh0;
+ip6_rthdr0(struct mbuf *m, struct ip6_hdr *ip6, struct ip6_rthdr0 *rh0)
{
int addrs, index;
struct in6_addr *nextaddr, tmpaddr;
@@ -237,3 +238,4 @@
m_freem(m);
return (-1);
}
+#endif
Index: nd6_nbr.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6_nbr.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/nd6_nbr.c -L sys/netinet6/nd6_nbr.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/nd6_nbr.c
+++ sys/netinet6/nd6_nbr.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.29.2.8 2005/12/25 14:03:38 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.47 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ */
/*-
@@ -68,8 +68,6 @@
#include <netinet/ip_carp.h>
#endif
-#include <net/net_osdep.h>
-
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
@@ -91,9 +89,7 @@
* Based on RFC 2462 (duplicate address detection)
*/
void
-nd6_ns_input(m, off, icmp6len)
- struct mbuf *m;
- int off, icmp6len;
+nd6_ns_input(struct mbuf *m, int off, int icmp6len)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -109,6 +105,7 @@
int tlladdr;
union nd_opts ndopts;
struct sockaddr_dl *proxydl = NULL;
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
@@ -128,8 +125,8 @@
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
- ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+ ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
@@ -210,7 +207,7 @@
struct sockaddr_in6 tsin6;
int need_proxy;
- bzero(&tsin6, sizeof tsin6);
+ bzero(&tsin6, sizeof tsin6);
tsin6.sin6_len = sizeof(struct sockaddr_in6);
tsin6.sin6_family = AF_INET6;
tsin6.sin6_addr = taddr6;
@@ -249,14 +246,14 @@
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
"(if %d, NS packet %d)\n",
- ip6_sprintf(&taddr6),
+ ip6_sprintf(ip6bufs, &taddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
- ip6_sprintf(&saddr6)));
+ ip6_sprintf(ip6bufs, &saddr6)));
goto freeit;
}
@@ -319,9 +316,12 @@
return;
bad:
- nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(&saddr6)));
- nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(&daddr6)));
- nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(&taddr6)));
+ nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
+ ip6_sprintf(ip6bufs, &saddr6)));
+ nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
+ ip6_sprintf(ip6bufs, &daddr6)));
+ nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
+ ip6_sprintf(ip6bufs, &taddr6)));
icmp6stat.icp6s_badns++;
m_freem(m);
}
@@ -334,13 +334,13 @@
*
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
+ *
+ * ln - for source address determination
+ * dad - duplicate address detection
*/
void
-nd6_ns_output(ifp, daddr6, taddr6, ln, dad)
- struct ifnet *ifp;
- const struct in6_addr *daddr6, *taddr6;
- struct llinfo_nd6 *ln; /* for source address determination */
- int dad; /* duplicate address detection */
+nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
+ const struct in6_addr *taddr6, struct llinfo_nd6 *ln, int dad)
{
struct mbuf *m;
struct ip6_hdr *ip6;
@@ -456,10 +456,12 @@
src = in6_selectsrc(&dst_sa, NULL,
NULL, &ro, NULL, NULL, &error);
if (src == NULL) {
+ char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG,
"nd6_ns_output: source can't be "
"determined: dst=%s, error=%d\n",
- ip6_sprintf(&dst_sa.sin6_addr), error));
+ ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
+ error));
goto bad;
}
}
@@ -543,9 +545,7 @@
* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
*/
void
-nd6_na_input(m, off, icmp6len)
- struct mbuf *m;
- int off, icmp6len;
+nd6_na_input(struct mbuf *m, int off, int icmp6len)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
@@ -563,12 +563,13 @@
struct rtentry *rt;
struct sockaddr_dl *sdl;
union nd_opts ndopts;
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
- ip6->ip6_hlim, ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst), if_name(ifp)));
+ ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
@@ -595,7 +596,7 @@
if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
nd6log((LOG_ERR,
"nd6_na_input: invalid target address %s\n",
- ip6_sprintf(&taddr6)));
+ ip6_sprintf(ip6bufs, &taddr6)));
goto bad;
}
if (IN6_IS_ADDR_MULTICAST(&daddr6))
@@ -640,13 +641,13 @@
if (ifa) {
log(LOG_ERR,
"nd6_na_input: duplicate IP6 address %s\n",
- ip6_sprintf(&taddr6));
+ ip6_sprintf(ip6bufs, &taddr6));
goto freeit;
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
- "(if %d, NA packet %d)\n", ip6_sprintf(&taddr6),
+ "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
@@ -813,21 +814,20 @@
if (ln->ln_hold) {
struct mbuf *m_hold, *m_hold_next;
- for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold_next) {
- struct mbuf *mpkt = NULL;
-
+ /*
+ * reset the ln_hold in advance, to explicitly
+ * prevent a ln_hold lookup in nd6_output()
+ * (wouldn't happen, though...)
+ */
+ for (m_hold = ln->ln_hold;
+ m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
- mpkt = m_copym(m_hold, 0, M_COPYALL, M_DONTWAIT);
- if (mpkt == NULL) {
- m_freem(m_hold);
- break;
- }
- mpkt->m_nextpkt = NULL;
+ m_hold->m_nextpkt = NULL;
/*
* we assume ifp is not a loopback here, so just set
* the 2nd argument as the 1st one.
*/
- nd6_output(ifp, ifp, mpkt,
+ nd6_output(ifp, ifp, m_hold,
(struct sockaddr_in6 *)rt_key(rt), rt);
}
ln->ln_hold = NULL;
@@ -850,14 +850,14 @@
* the following items are not implemented yet:
* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
+ *
+ * tlladdr - 1 if include target link-layer address
+ * sdl0 - sockaddr_dl (= proxy NA) or NULL
*/
void
-nd6_na_output(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0)
- struct ifnet *ifp;
- const struct in6_addr *daddr6_0, *taddr6;
- u_long flags;
- int tlladdr; /* 1 if include target link-layer address */
- struct sockaddr *sdl0; /* sockaddr_dl (= proxy NA) or NULL */
+nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
+ const struct in6_addr *taddr6, u_long flags, int tlladdr,
+ struct sockaddr *sdl0)
{
struct mbuf *m;
struct ip6_hdr *ip6;
@@ -938,9 +938,10 @@
bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
src = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &error);
if (src == NULL) {
+ char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
"determined: dst=%s, error=%d\n",
- ip6_sprintf(&dst_sa.sin6_addr), error));
+ ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
goto bad;
}
ip6->ip6_src = *src;
@@ -1020,8 +1021,7 @@
}
caddr_t
-nd6_ifptomac(ifp)
- struct ifnet *ifp;
+nd6_ifptomac(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ARCNET:
@@ -1061,8 +1061,7 @@
static int dad_init = 0;
static struct dadq *
-nd6_dad_find(ifa)
- struct ifaddr *ifa;
+nd6_dad_find(struct ifaddr *ifa)
{
struct dadq *dp;
@@ -1074,9 +1073,7 @@
}
static void
-nd6_dad_starttimer(dp, ticks)
- struct dadq *dp;
- int ticks;
+nd6_dad_starttimer(struct dadq *dp, int ticks)
{
callout_reset(&dp->dad_timer_ch, ticks,
@@ -1084,8 +1081,7 @@
}
static void
-nd6_dad_stoptimer(dp)
- struct dadq *dp;
+nd6_dad_stoptimer(struct dadq *dp)
{
callout_stop(&dp->dad_timer_ch);
@@ -1095,12 +1091,11 @@
* Start Duplicate Address Detection (DAD) for specified interface address.
*/
void
-nd6_dad_start(ifa, delay)
- struct ifaddr *ifa;
- int delay;
+nd6_dad_start(struct ifaddr *ifa, int delay)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct dadq *dp;
+ char ip6buf[INET6_ADDRSTRLEN];
if (!dad_init) {
TAILQ_INIT(&dadq);
@@ -1117,7 +1112,7 @@
log(LOG_DEBUG,
"nd6_dad_start: called with non-tentative address "
"%s(%s)\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr),
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
@@ -1143,7 +1138,7 @@
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
"%s(%s)\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr),
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
@@ -1152,7 +1147,7 @@
TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);
nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
- ip6_sprintf(&ia->ia_addr.sin6_addr)));
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
/*
* Send NS packet for DAD, ip6_dad_count times.
@@ -1178,8 +1173,7 @@
* terminate DAD unconditionally. used for address removals.
*/
void
-nd6_dad_stop(ifa)
- struct ifaddr *ifa;
+nd6_dad_stop(struct ifaddr *ifa)
{
struct dadq *dp;
@@ -1200,12 +1194,12 @@
}
static void
-nd6_dad_timer(ifa)
- struct ifaddr *ifa;
+nd6_dad_timer(struct ifaddr *ifa)
{
int s;
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct dadq *dp;
+ char ip6buf[INET6_ADDRSTRLEN];
s = splnet(); /* XXX */
@@ -1222,14 +1216,14 @@
if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
"%s(%s)\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr),
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
goto done;
}
if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
"%s(%s)\n",
- ip6_sprintf(&ia->ia_addr.sin6_addr),
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
goto done;
}
@@ -1290,7 +1284,7 @@
nd6log((LOG_DEBUG,
"%s: DAD complete for %s - no duplicates found\n",
if_name(ifa->ifa_ifp),
- ip6_sprintf(&ia->ia_addr.sin6_addr)));
+ ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
TAILQ_REMOVE(&dadq, (struct dadq *)dp, dad_list);
free(dp, M_IP6NDP);
@@ -1304,12 +1298,12 @@
}
void
-nd6_dad_duplicated(ifa)
- struct ifaddr *ifa;
+nd6_dad_duplicated(struct ifaddr *ifa)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp;
struct dadq *dp;
+ char ip6buf[INET6_ADDRSTRLEN];
dp = nd6_dad_find(ifa);
if (dp == NULL) {
@@ -1319,7 +1313,7 @@
log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
"NS in/out=%d/%d, NA in=%d\n",
- if_name(ifa->ifa_ifp), ip6_sprintf(&ia->ia_addr.sin6_addr),
+ if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
@@ -1330,7 +1324,7 @@
ifp = ifa->ifa_ifp;
log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
- if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr));
+ if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
log(LOG_ERR, "%s: manual intervention required\n",
if_name(ifp));
@@ -1375,9 +1369,7 @@
}
static void
-nd6_dad_ns_output(dp, ifa)
- struct dadq *dp;
- struct ifaddr *ifa;
+nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp = ifa->ifa_ifp;
@@ -1395,8 +1387,7 @@
}
static void
-nd6_dad_ns_input(ifa)
- struct ifaddr *ifa;
+nd6_dad_ns_input(struct ifaddr *ifa)
{
struct in6_ifaddr *ia;
struct ifnet *ifp;
@@ -1415,9 +1406,10 @@
/* Quickhack - completely ignore DAD NS packets */
if (dad_ignore_ns) {
+ char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_INFO,
"nd6_dad_ns_input: ignoring DAD NS packet for "
- "address %s(%s)\n", ip6_sprintf(taddr6),
+ "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
if_name(ifa->ifa_ifp)));
return;
}
@@ -1445,8 +1437,7 @@
}
static void
-nd6_dad_na_input(ifa)
- struct ifaddr *ifa;
+nd6_dad_na_input(struct ifaddr *ifa)
{
struct dadq *dp;
Index: mld6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/mld6.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/mld6.c -L sys/netinet6/mld6.c -u -r1.2 -r1.3
--- sys/netinet6/mld6.c
+++ sys/netinet6/mld6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/mld6.c,v 1.19.2.6 2006/03/11 10:36:23 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/mld6.c,v 1.31 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ */
/*-
@@ -89,8 +89,6 @@
#include <netinet/icmp6.h>
#include <netinet6/mld6_var.h>
-#include <net/net_osdep.h>
-
/*
* Protocol constants
*/
@@ -112,7 +110,7 @@
static u_long mld_timerresid(struct in6_multi *);
void
-mld6_init()
+mld6_init(void)
{
static u_int8_t hbh_buf[8];
struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
@@ -133,8 +131,7 @@
}
static void
-mld_starttimer(in6m)
- struct in6_multi *in6m;
+mld_starttimer(struct in6_multi *in6m)
{
struct timeval now;
@@ -153,8 +150,7 @@
}
static void
-mld_stoptimer(in6m)
- struct in6_multi *in6m;
+mld_stoptimer(struct in6_multi *in6m)
{
if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
return;
@@ -164,8 +160,7 @@
}
static void
-mld_timeo(in6m)
- struct in6_multi *in6m;
+mld_timeo(struct in6_multi *in6m)
{
int s = splnet();
@@ -186,8 +181,7 @@
}
static u_long
-mld_timerresid(in6m)
- struct in6_multi *in6m;
+mld_timerresid(struct in6_multi *in6m)
{
struct timeval now, diff;
@@ -211,8 +205,7 @@
}
void
-mld6_start_listening(in6m)
- struct in6_multi *in6m;
+mld6_start_listening(struct in6_multi *in6m)
{
struct in6_addr all_in6;
int s = splnet();
@@ -247,8 +240,7 @@
}
void
-mld6_stop_listening(in6m)
- struct in6_multi *in6m;
+mld6_stop_listening(struct in6_multi *in6m)
{
struct in6_addr allnode, allrouter;
@@ -271,9 +263,7 @@
}
void
-mld6_input(m, off)
- struct mbuf *m;
- int off;
+mld6_input(struct mbuf *m, int off)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct mld_hdr *mldh;
@@ -298,10 +288,11 @@
/* source address validation */
ip6 = mtod(m, struct ip6_hdr *); /* in case mpullup */
if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
log(LOG_ERR,
"mld6_input: src %s is not link-local (grp=%s)\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&mldh->mld_addr));
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufg, &mldh->mld_addr));
/*
* spec (RFC2710) does not explicitly
* specify to discard the packet from a non link-local
@@ -327,7 +318,7 @@
*
* In Non-Listener state, we simply don't have a membership record.
* In Delaying Listener state, our timer is running (in6m->in6m_timer)
- * In Idle Listener state, our timer is not running
+ * In Idle Listener state, our timer is not running
* (in6m->in6m_timer==IN6M_TIMER_UNDEF)
*
* The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
@@ -442,10 +433,7 @@
}
static void
-mld6_sendpkt(in6m, type, dst)
- struct in6_multi *in6m;
- int type;
- const struct in6_addr *dst;
+mld6_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
{
struct mbuf *mh, *md;
struct mld_hdr *mldh;
@@ -544,108 +532,117 @@
* Add source addresses to the list also, if upstream router is MLDv2 capable
* and the number of source is not 0.
*/
-struct in6_multi *
-in6_addmulti(maddr6, ifp, errorp, delay)
- struct in6_addr *maddr6;
- struct ifnet *ifp;
- int *errorp, delay;
+struct in6_multi *
+in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp,
+ int *errorp, int delay)
{
struct in6_multi *in6m;
- struct ifmultiaddr *ifma;
- struct sockaddr_in6 sa6;
- int s = splnet();
*errorp = 0;
+ in6m = NULL;
- /*
- * Call generic routine to add membership or increment
- * refcount. It wants addresses in the form of a sockaddr,
- * so we build one here (being careful to zero the unused bytes).
- */
- bzero(&sa6, sizeof(sa6));
- sa6.sin6_family = AF_INET6;
- sa6.sin6_len = sizeof(struct sockaddr_in6);
- sa6.sin6_addr = *maddr6;
- *errorp = if_addmulti(ifp, (struct sockaddr *)&sa6, &ifma);
- if (*errorp) {
- splx(s);
- return 0;
- }
+ IFF_LOCKGIANT(ifp);
+ /*IN6_MULTI_LOCK();*/
- /*
- * If ifma->ifma_protospec is null, then if_addmulti() created
- * a new record. Otherwise, we are done.
- */
- if (ifma->ifma_protospec != NULL) {
- splx(s);
- return ifma->ifma_protospec;
- }
-
- /* XXX - if_addmulti uses M_WAITOK. Can this really be called
- at interrupt time? If so, need to fix if_addmulti. XXX */
- in6m = (struct in6_multi *)malloc(sizeof(*in6m), M_IP6MADDR, M_NOWAIT);
- if (in6m == NULL) {
- splx(s);
- return (NULL);
- }
-
- bzero(in6m, sizeof *in6m);
- in6m->in6m_addr = *maddr6;
- in6m->in6m_ifp = ifp;
- in6m->in6m_refcount = 1;
- in6m->in6m_ifma = ifma;
- ifma->ifma_protospec = in6m;
- in6m->in6m_timer_ch = malloc(sizeof(*in6m->in6m_timer_ch), M_IP6MADDR,
- M_NOWAIT);
- if (in6m->in6m_timer_ch == NULL) {
- free(in6m, M_IP6MADDR);
- splx(s);
- return (NULL);
- }
- LIST_INSERT_HEAD(&in6_multihead, in6m, in6m_entry);
+ IN6_LOOKUP_MULTI(*maddr6, ifp, in6m);
+ if (in6m != NULL) {
+ /*
+ * If we already joined this group, just bump the
+ * refcount and return it.
+ */
+ KASSERT(in6m->in6m_refcount >= 1,
+ ("%s: bad refcount %d", __func__, in6m->in6m_refcount));
+ ++in6m->in6m_refcount;
+ } else do {
+ struct in6_multi *nin6m;
+ struct ifmultiaddr *ifma;
+ struct sockaddr_in6 sa6;
+
+ bzero(&sa6, sizeof(sa6));
+ sa6.sin6_family = AF_INET6;
+ sa6.sin6_len = sizeof(struct sockaddr_in6);
+ sa6.sin6_addr = *maddr6;
- callout_init(in6m->in6m_timer_ch, 0);
- in6m->in6m_timer = delay;
- if (in6m->in6m_timer > 0) {
- in6m->in6m_state = MLD_REPORTPENDING;
- mld_starttimer(in6m);
+ *errorp = if_addmulti(ifp, (struct sockaddr *)&sa6, &ifma);
+ if (*errorp)
+ break;
- splx(s);
- return (in6m);
- }
+ /*
+ * If ifma->ifma_protospec is null, then if_addmulti() created
+ * a new record. Otherwise, bump refcount, and we are done.
+ */
+ if (ifma->ifma_protospec != NULL) {
+ in6m = ifma->ifma_protospec;
+ ++in6m->in6m_refcount;
+ break;
+ }
+
+ nin6m = malloc(sizeof(*nin6m), M_IP6MADDR, M_NOWAIT | M_ZERO);
+ if (nin6m == NULL) {
+ if_delmulti_ifma(ifma);
+ break;
+ }
+
+ nin6m->in6m_addr = *maddr6;
+ nin6m->in6m_ifp = ifp;
+ nin6m->in6m_refcount = 1;
+ nin6m->in6m_ifma = ifma;
+ ifma->ifma_protospec = nin6m;
+
+ nin6m->in6m_timer_ch = malloc(sizeof(*nin6m->in6m_timer_ch),
+ M_IP6MADDR, M_NOWAIT);
+ if (nin6m->in6m_timer_ch == NULL) {
+ free(nin6m, M_IP6MADDR);
+ if_delmulti_ifma(ifma);
+ break;
+ }
+
+ LIST_INSERT_HEAD(&in6_multihead, nin6m, in6m_entry);
+
+ callout_init(nin6m->in6m_timer_ch, 0);
+ nin6m->in6m_timer = delay;
+ if (nin6m->in6m_timer > 0) {
+ nin6m->in6m_state = MLD_REPORTPENDING;
+ mld_starttimer(nin6m);
+ }
+
+ mld6_start_listening(nin6m);
+
+ in6m = nin6m;
+
+ } while (0);
+
+ /*IN6_MULTI_UNLOCK();*/
+ IFF_UNLOCKGIANT(ifp);
- /*
- * Let MLD6 know that we have joined a new IPv6 multicast
- * group.
- */
- mld6_start_listening(in6m);
- splx(s);
return (in6m);
}
/*
* Delete a multicast address record.
+ *
+ * TODO: Locking, as per netinet.
*/
void
-in6_delmulti(in6m)
- struct in6_multi *in6m;
+in6_delmulti(struct in6_multi *in6m)
{
- struct ifmultiaddr *ifma = in6m->in6m_ifma;
- int s = splnet();
+ struct ifmultiaddr *ifma;
- if (ifma->ifma_refcount == 1) {
- /*
- * No remaining claims to this record; let MLD6 know
- * that we are leaving the multicast group.
- */
+ KASSERT(in6m->in6m_refcount >= 1, ("%s: freeing freed in6m", __func__));
+
+ if (--in6m->in6m_refcount == 0) {
mld_stoptimer(in6m);
mld6_stop_listening(in6m);
+
+ ifma = in6m->in6m_ifma;
+ KASSERT(ifma->ifma_protospec == in6m,
+ ("%s: ifma_protospec != in6m", __func__));
ifma->ifma_protospec = NULL;
+
LIST_REMOVE(in6m, in6m_entry);
free(in6m->in6m_timer_ch, M_IP6MADDR);
free(in6m, M_IP6MADDR);
+
+ if_delmulti_ifma(ifma);
}
- /* XXX - should be separate API for when we have an ifma? */
- if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
- splx(s);
}
Index: nd6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/nd6.c -L sys/netinet6/nd6.c -u -r1.3 -r1.4
--- sys/netinet6/nd6.c
+++ sys/netinet6/nd6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netinet6/nd6.c,v 1.48.2.13 2006/06/17 17:58:33 gnn Exp $ */
+/* $FreeBSD: src/sys/netinet6/nd6.c,v 1.83.2.1 2007/10/30 18:03:50 jhb Exp $ */
/* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */
/*-
@@ -37,7 +37,6 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
-#include <sys/mac.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@@ -69,7 +68,7 @@
#include <sys/limits.h>
-#include <net/net_osdep.h>
+#include <security/mac/mac_framework.h>
#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
@@ -121,7 +120,7 @@
extern struct callout in6_tmpaddrtimer_ch;
void
-nd6_init()
+nd6_init(void)
{
static int nd6_init_done = 0;
int i;
@@ -148,8 +147,7 @@
}
struct nd_ifinfo *
-nd6_ifattach(ifp)
- struct ifnet *ifp;
+nd6_ifattach(struct ifnet *ifp)
{
struct nd_ifinfo *nd;
@@ -176,8 +174,7 @@
}
void
-nd6_ifdetach(nd)
- struct nd_ifinfo *nd;
+nd6_ifdetach(struct nd_ifinfo *nd)
{
free(nd, M_IP6NDP);
@@ -188,8 +185,7 @@
* changes, which means we might have to adjust the ND level MTU.
*/
void
-nd6_setmtu(ifp)
- struct ifnet *ifp;
+nd6_setmtu(struct ifnet *ifp)
{
nd6_setmtu0(ifp, ND_IFINFO(ifp));
@@ -197,9 +193,7 @@
/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
void
-nd6_setmtu0(ifp, ndi)
- struct ifnet *ifp;
- struct nd_ifinfo *ndi;
+nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
{
u_int32_t omaxmtu;
@@ -239,10 +233,7 @@
}
void
-nd6_option_init(opt, icmp6len, ndopts)
- void *opt;
- int icmp6len;
- union nd_opts *ndopts;
+nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
{
bzero(ndopts, sizeof(*ndopts));
@@ -260,8 +251,7 @@
* Take one ND option.
*/
struct nd_opt_hdr *
-nd6_option(ndopts)
- union nd_opts *ndopts;
+nd6_option(union nd_opts *ndopts)
{
struct nd_opt_hdr *nd_opt;
int olen;
@@ -312,8 +302,7 @@
* multiple options of the same type.
*/
int
-nd6_options(ndopts)
- union nd_opts *ndopts;
+nd6_options(union nd_opts *ndopts)
{
struct nd_opt_hdr *nd_opt;
int i = 0;
@@ -392,9 +381,7 @@
* ND6 timer routine to handle ND6 entries
*/
void
-nd6_llinfo_settimer(ln, tick)
- struct llinfo_nd6 *ln;
- long tick;
+nd6_llinfo_settimer(struct llinfo_nd6 *ln, long tick)
{
if (tick < 0) {
ln->ln_expire = 0;
@@ -415,8 +402,7 @@
}
static void
-nd6_llinfo_timer(arg)
- void *arg;
+nd6_llinfo_timer(void *arg)
{
struct llinfo_nd6 *ln;
struct rtentry *rt;
@@ -475,7 +461,7 @@
ln->ln_hold = m0;
clear_llinfo_pqueue(ln);
}
- if (rt)
+ if (rt && rt->rt_llinfo)
(void)nd6_free(rt, 0);
ln = NULL;
}
@@ -490,7 +476,8 @@
case ND6_LLINFO_STALE:
/* Garbage Collection(RFC 2461 5.3) */
if (!ND6_LLINFO_PERMANENT(ln)) {
- (void)nd6_free(rt, 1);
+ if (rt && rt->rt_llinfo)
+ (void)nd6_free(rt, 1);
ln = NULL;
}
break;
@@ -520,13 +507,14 @@
* specified as the destination of a p2p interface
* (see in6_ifinit()). We should not free the entry
* since this is sort of a "static" entry generated
- * via interface address configuration.
+ * via interface address configuration.
*/
ln->ln_asked = 0;
ln->ln_expire = 0; /* make it permanent */
ln->ln_state = ND6_LLINFO_STALE;
} else {
- (void)nd6_free(rt, 0);
+ if (rt && rt->rt_llinfo)
+ (void)nd6_free(rt, 0);
ln = NULL;
}
break;
@@ -538,8 +526,7 @@
* ND6 timer routine to expire default route list and prefix list
*/
void
-nd6_timer(ignored_arg)
- void *ignored_arg;
+nd6_timer(void *ignored_arg)
{
int s;
struct nd_defrouter *dr;
@@ -662,9 +649,11 @@
splx(s);
}
+/*
+ * ia6 - deprecated/invalidated temporary address
+ */
static int
-regen_tmpaddr(ia6)
- struct in6_ifaddr *ia6; /* deprecated/invalidated temporary address */
+regen_tmpaddr(struct in6_ifaddr *ia6)
{
struct ifaddr *ifa;
struct ifnet *ifp;
@@ -730,8 +719,7 @@
* ifp goes away.
*/
void
-nd6_purge(ifp)
- struct ifnet *ifp;
+nd6_purge(struct ifnet *ifp)
{
struct llinfo_nd6 *ln, *nln;
struct nd_defrouter *dr, *ndr;
@@ -818,13 +806,11 @@
}
struct rtentry *
-nd6_lookup(addr6, create, ifp)
- struct in6_addr *addr6;
- int create;
- struct ifnet *ifp;
+nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp)
{
struct rtentry *rt;
struct sockaddr_in6 sin6;
+ char ip6buf[INET6_ADDRSTRLEN];
bzero(&sin6, sizeof(sin6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
@@ -872,7 +858,7 @@
log(LOG_ERR,
"nd6_lookup: failed to add route for a "
"neighbor(%s), errno=%d\n",
- ip6_sprintf(addr6), e);
+ ip6_sprintf(ip6buf, addr6), e);
}
if (rt == NULL)
return (NULL);
@@ -909,7 +895,7 @@
if (create) {
nd6log((LOG_DEBUG,
"nd6_lookup: failed to lookup %s (if = %s)\n",
- ip6_sprintf(addr6),
+ ip6_sprintf(ip6buf, addr6),
ifp ? if_name(ifp) : "unspec"));
}
RT_UNLOCK(rt);
@@ -925,9 +911,7 @@
* to not reenter the routing code from within itself.
*/
static int
-nd6_is_new_addr_neighbor(addr, ifp)
- struct sockaddr_in6 *addr;
- struct ifnet *ifp;
+nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
{
struct nd_prefix *pr;
struct ifaddr *dstaddr;
@@ -1001,9 +985,7 @@
* XXX: should take care of the destination of a p2p link?
*/
int
-nd6_is_addr_neighbor(addr, ifp)
- struct sockaddr_in6 *addr;
- struct ifnet *ifp;
+nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
{
if (nd6_is_new_addr_neighbor(addr, ifp))
@@ -1026,9 +1008,7 @@
* that the change is safe.
*/
static struct llinfo_nd6 *
-nd6_free(rt, gc)
- struct rtentry *rt;
- int gc;
+nd6_free(struct rtentry *rt, int gc)
{
struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo, *next;
struct in6_addr in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
@@ -1137,10 +1117,7 @@
* XXX cost-effective methods?
*/
void
-nd6_nud_hint(rt, dst6, force)
- struct rtentry *rt;
- struct in6_addr *dst6;
- int force;
+nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
{
struct llinfo_nd6 *ln;
@@ -1184,11 +1161,11 @@
}
}
+/*
+ * info - XXX unused
+ */
void
-nd6_rtrequest(req, rt, info)
- int req;
- struct rtentry *rt;
- struct rt_addrinfo *info; /* xxx unused */
+nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
{
struct sockaddr *gate = rt->rt_gateway;
struct llinfo_nd6 *ln = (struct llinfo_nd6 *)rt->rt_llinfo;
@@ -1311,6 +1288,7 @@
nd6_inuse++;
nd6_allocated++;
bzero(ln, sizeof(*ln));
+ RT_ADDREF(rt);
ln->ln_rt = rt;
callout_init(&ln->ln_timer_ch, 0);
@@ -1387,9 +1365,11 @@
break;
if (in6_addmulti(&llsol, ifp,
&error, 0) == NULL) {
+ char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_ERR, "%s: failed to join "
"%s (errno=%d)\n", if_name(ifp),
- ip6_sprintf(&llsol), error));
+ ip6_sprintf(ip6buf, &llsol),
+ error));
}
}
}
@@ -1421,6 +1401,7 @@
ln->ln_prev->ln_next = ln->ln_next;
ln->ln_prev = NULL;
nd6_llinfo_settimer(ln, -1);
+ RT_REMREF(rt);
rt->rt_llinfo = 0;
rt->rt_flags &= ~RTF_LLINFO;
clear_llinfo_pqueue(ln);
@@ -1429,10 +1410,7 @@
}
int
-nd6_ioctl(cmd, data, ifp)
- u_long cmd;
- caddr_t data;
- struct ifnet *ifp;
+nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
{
struct in6_drlist *drl = (struct in6_drlist *)data;
struct in6_oprlist *oprl = (struct in6_oprlist *)data;
@@ -1664,15 +1642,13 @@
/*
* Create neighbor cache entry and cache link-layer address,
* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
+ *
+ * type - ICMP6 type
+ * code - type dependent information
*/
struct rtentry *
-nd6_cache_lladdr(ifp, from, lladdr, lladdrlen, type, code)
- struct ifnet *ifp;
- struct in6_addr *from;
- char *lladdr;
- int lladdrlen;
- int type; /* ICMP6 type */
- int code; /* type dependent information */
+nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
+ int lladdrlen, int type, int code)
{
struct rtentry *rt = NULL;
struct llinfo_nd6 *ln = NULL;
@@ -1790,28 +1766,26 @@
if (ln->ln_hold) {
struct mbuf *m_hold, *m_hold_next;
- for (m_hold = ln->ln_hold; m_hold;
- m_hold = m_hold_next) {
- struct mbuf *mpkt = NULL;
+ /*
+ * reset the ln_hold in advance, to explicitly
+ * prevent a ln_hold lookup in nd6_output()
+ * (wouldn't happen, though...)
+ */
+ for (m_hold = ln->ln_hold, ln->ln_hold = NULL;
+ m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
- mpkt = m_copym(m_hold, 0, M_COPYALL, M_DONTWAIT);
- if (mpkt == NULL) {
- m_freem(m_hold);
- break;
- }
- mpkt->m_nextpkt = NULL;
+ m_hold->m_nextpkt = NULL;
/*
* we assume ifp is not a p2p here, so
* just set the 2nd argument as the
* 1st one.
*/
- nd6_output(ifp, ifp, mpkt,
+ nd6_output(ifp, ifp, m_hold,
(struct sockaddr_in6 *)rt_key(rt),
rt);
}
- ln->ln_hold = NULL;
}
} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
/* probe right away */
@@ -1843,7 +1817,7 @@
* 0 n y -- (3) c s s
* 0 y y n (4) c s s
* 0 y y y (5) c s s
- * 1 -- n -- (6) c c c s
+ * 1 -- n -- (6) c c c s
* 1 -- y -- (7) c c s c s
*
* (c=clear s=set)
@@ -1906,8 +1880,7 @@
}
static void
-nd6_slowtimo(ignored_arg)
- void *ignored_arg;
+nd6_slowtimo(void *ignored_arg)
{
struct nd_ifinfo *nd6if;
struct ifnet *ifp;
@@ -1934,12 +1907,8 @@
#define senderr(e) { error = (e); goto bad;}
int
-nd6_output(ifp, origifp, m0, dst, rt0)
- struct ifnet *ifp;
- struct ifnet *origifp;
- struct mbuf *m0;
- struct sockaddr_in6 *dst;
- struct rtentry *rt0;
+nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
+ struct sockaddr_in6 *dst, struct rtentry *rt0)
{
struct mbuf *m = m0;
struct rtentry *rt = rt0;
@@ -1956,13 +1925,16 @@
/*
* next hop determination. This routine is derived from ether_output.
*/
+ /* NB: the locking here is tortuous... */
+ if (rt != NULL)
+ RT_LOCK(rt);
again:
- if (rt) {
+ if (rt != NULL) {
if ((rt->rt_flags & RTF_UP) == 0) {
+ RT_UNLOCK(rt);
rt0 = rt = rtalloc1((struct sockaddr *)dst, 1, 0UL);
if (rt != NULL) {
RT_REMREF(rt);
- RT_UNLOCK(rt);
if (rt->rt_ifp != ifp)
/*
* XXX maybe we should update ifp too,
@@ -1987,6 +1959,7 @@
*/
if (!nd6_is_addr_neighbor(gw6, ifp) ||
in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
+ RT_UNLOCK(rt);
/*
* We allow this kind of tricky route only
* when the outgoing interface is p2p.
@@ -1998,18 +1971,34 @@
goto sendpkt;
}
- if (rt->rt_gwroute == 0)
+ if (rt->rt_gwroute == NULL)
goto lookup;
- if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
- RT_LOCK(rt);
- rtfree(rt); rt = rt0;
+ rt = rt->rt_gwroute;
+ RT_LOCK(rt); /* NB: gwroute */
+ if ((rt->rt_flags & RTF_UP) == 0) {
+ RTFREE_LOCKED(rt); /* unlock gwroute */
+ rt = rt0;
+ rt0->rt_gwroute = NULL;
lookup:
- rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
- if ((rt = rt->rt_gwroute) == 0)
+ RT_UNLOCK(rt0);
+ rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+ if (rt == rt0) {
+ RT_REMREF(rt0);
+ RT_UNLOCK(rt0);
senderr(EHOSTUNREACH);
- RT_UNLOCK(rt);
+ }
+ RT_LOCK(rt0);
+ if (rt0->rt_gwroute != NULL)
+ RTFREE(rt0->rt_gwroute);
+ rt0->rt_gwroute = rt;
+ if (rt == NULL) {
+ RT_UNLOCK(rt0);
+ senderr(EHOSTUNREACH);
+ }
}
+ RT_UNLOCK(rt0);
}
+ RT_UNLOCK(rt);
}
/*
@@ -2035,10 +2024,11 @@
if (ln == NULL || rt == NULL) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
+ char ip6buf[INET6_ADDRSTRLEN];
log(LOG_DEBUG,
"nd6_output: can't allocate llinfo for %s "
"(ln=%p, rt=%p)\n",
- ip6_sprintf(&dst->sin6_addr), ln, rt);
+ ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt);
senderr(EIO); /* XXX: good error? */
}
@@ -2123,11 +2113,6 @@
goto bad;
}
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
-
#ifdef MAC
mac_create_mbuf_linklayer(ifp, m);
#endif
@@ -2145,8 +2130,7 @@
#undef senderr
int
-nd6_need_cache(ifp)
- struct ifnet *ifp;
+nd6_need_cache(struct ifnet *ifp)
{
/*
* XXX: we currently do not make neighbor cache on any interface
@@ -2173,6 +2157,7 @@
case IFT_PPP:
case IFT_TUNNEL:
case IFT_BRIDGE:
+ case IFT_PROPVIRTUAL:
return (1);
default:
return (0);
@@ -2180,12 +2165,8 @@
}
int
-nd6_storelladdr(ifp, rt0, m, dst, desten)
- struct ifnet *ifp;
- struct rtentry *rt0;
- struct mbuf *m;
- struct sockaddr *dst;
- u_char *desten;
+nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
+ struct sockaddr *dst, u_char *desten)
{
struct sockaddr_dl *sdl;
struct rtentry *rt;
@@ -2255,9 +2236,8 @@
return (0);
}
-static void
-clear_llinfo_pqueue(ln)
- struct llinfo_nd6 *ln;
+static void
+clear_llinfo_pqueue(struct llinfo_nd6 *ln)
{
struct mbuf *m_hold, *m_hold_next;
@@ -2287,7 +2267,7 @@
nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
{
int error;
- char buf[1024];
+ char buf[1024] __aligned(4);
struct in6_defrouter *d, *de;
struct nd_defrouter *dr;
@@ -2305,7 +2285,9 @@
d->rtaddr.sin6_family = AF_INET6;
d->rtaddr.sin6_len = sizeof(d->rtaddr);
d->rtaddr.sin6_addr = dr->rtaddr;
- sa6_recoverscope(&d->rtaddr);
+ error = sa6_recoverscope(&d->rtaddr);
+ if (error != 0)
+ return (error);
d->flags = dr->flags;
d->rtlifetime = dr->rtlifetime;
d->expire = dr->expire;
@@ -2325,9 +2307,10 @@
nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
{
int error;
- char buf[1024];
+ char buf[1024] __aligned(4);
struct in6_prefix *p, *pe;
struct nd_prefix *pr;
+ char ip6buf[INET6_ADDRSTRLEN];
if (req->newptr)
return EPERM;
@@ -2350,7 +2333,7 @@
if (sa6_recoverscope(&p->prefix)) {
log(LOG_ERR,
"scope error in prefix list (%s)\n",
- ip6_sprintf(&p->prefix.sin6_addr));
+ ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
/* XXX: press on... */
}
p->raflags = pr->ndpr_raf;
@@ -2393,7 +2376,8 @@
log(LOG_ERR,
"scope error in "
"prefix list (%s)\n",
- ip6_sprintf(&pfr->router->rtaddr));
+ ip6_sprintf(ip6buf,
+ &pfr->router->rtaddr));
}
advrtrs++;
}
Index: in6_pcb.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_pcb.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/in6_pcb.h -L sys/netinet6/in6_pcb.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/in6_pcb.h
+++ sys/netinet6/in6_pcb.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_pcb.h,v 1.16 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_pcb.h,v 1.19 2007/05/11 10:20:50 rwatson Exp $ */
/* $KAME: in6_pcb.h,v 1.13 2001/02/06 09:16:53 itojun Exp $ */
/*-
@@ -70,12 +70,13 @@
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
-void in6_pcbpurgeif0 __P((struct in6pcb *, struct ifnet *));
+void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
void in6_losing __P((struct inpcb *));
int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
int in6_pcbconnect __P((struct inpcb *, struct sockaddr *, struct ucred *));
void in6_pcbdetach __P((struct inpcb *));
void in6_pcbdisconnect __P((struct inpcb *));
+void in6_pcbfree __P((struct inpcb *));
int in6_pcbladdr __P((struct inpcb *, struct sockaddr *,
struct in6_addr **));
struct inpcb *
@@ -94,8 +95,8 @@
in6_sockaddr __P((in_port_t port, struct in6_addr *addr_p));
struct sockaddr *
in6_v4mapsin6_sockaddr __P((in_port_t port, struct in_addr *addr_p));
-int in6_setpeeraddr __P((struct socket *so, struct sockaddr **nam));
-int in6_setsockaddr __P((struct socket *so, struct sockaddr **nam));
+int in6_getpeeraddr __P((struct socket *so, struct sockaddr **nam));
+int in6_getsockaddr __P((struct socket *so, struct sockaddr **nam));
int in6_mapped_sockaddr __P((struct socket *so, struct sockaddr **nam));
int in6_mapped_peeraddr __P((struct socket *so, struct sockaddr **nam));
int in6_selecthlim __P((struct in6pcb *, struct ifnet *));
Index: ip6_mroute.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_mroute.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -L sys/netinet6/ip6_mroute.c -L sys/netinet6/ip6_mroute.c -u -r1.2 -r1.3
--- sys/netinet6/ip6_mroute.c
+++ sys/netinet6/ip6_mroute.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_mroute.c,v 1.29.2.7.2.1 2006/04/20 16:05:17 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_mroute.c,v 1.46 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $ */
/*-
@@ -96,6 +96,7 @@
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
@@ -114,13 +115,13 @@
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_mroute.h>
+#include <netinet6/ip6protosw.h>
#include <netinet6/pim6.h>
#include <netinet6/pim6_var.h>
-#include <net/net_osdep.h>
-
static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
+/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
#define M_HASCL(m) ((m)->m_flags & M_EXT)
static int ip6_mdq __P((struct mbuf *, struct ifnet *, struct mf6c *));
@@ -132,23 +133,49 @@
static int register_send __P((struct ip6_hdr *, struct mif6 *,
struct mbuf *));
-/*
- * Globals. All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static,
- * except for netstat or debugging purposes.
- */
-struct socket *ip6_mrouter = NULL;
-int ip6_mrouter_ver = 0;
-int ip6_mrtproto = IPPROTO_PIM; /* for netstat only */
-struct mrt6stat mrt6stat;
+extern struct domain inet6domain;
-#define NO_RTE_FOUND 0x1
+/* XXX: referenced from ip_mroute.c for dynamically loading this code. */
+struct ip6protosw in6_pim_protosw = {
+ .pr_type = SOCK_RAW,
+ .pr_domain = &inet6domain,
+ .pr_protocol = IPPROTO_PIM,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
+ .pr_input = pim6_input,
+ .pr_output = rip6_output,
+ .pr_ctloutput = rip6_ctloutput,
+ .pr_usrreqs = &rip6_usrreqs
+};
+
+static int ip6_mrouter_ver = 0;
+
+SYSCTL_DECL(_net_inet6);
+SYSCTL_DECL(_net_inet6_ip6);
+SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
+
+static struct mrt6stat mrt6stat;
+SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
+ &mrt6stat, mrt6stat,
+ "Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
+
+#define NO_RTE_FOUND 0x1
#define RTE_FOUND 0x2
-struct mf6c *mf6ctable[MF6CTBLSIZ];
-u_char n6expire[MF6CTBLSIZ];
+static struct mf6c *mf6ctable[MF6CTBLSIZ];
+SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
+ &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
+ "Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
+ "netinet6/ip6_mroute.h)");
+
+static u_char n6expire[MF6CTBLSIZ];
+
static struct mif6 mif6table[MAXMIFS];
+SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
+ &mif6table, sizeof(mif6table), "S,vif[MAXMIFS]",
+ "Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)");
+
#ifdef MRT6DEBUG
-u_int mrt6debug = 0; /* debug level */
+static u_int mrt6debug = 0; /* debug level */
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
#define DEBUG_EXPIRE 0x08
@@ -173,9 +200,9 @@
* by a broken gateway). Different from IPv4 register_if,
* these interfaces are linked into the system ifnet list,
* because per-interface IPv6 statistics are maintained in
- * ifp->if_afdata. But it does not have any routes point
+ * ifp->if_afdata. But it does not have any routes point
* to them. I.e., packets can't be sent this way. They
- * only exist as a placeholder for multicast source
+ * only exist as a placeholder for multicast source
* verification.
*/
static struct ifnet *multicast_register_if6;
@@ -189,6 +216,10 @@
static mifi_t reg_mif_num = (mifi_t)-1;
static struct pim6stat pim6stat;
+SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD,
+ &pim6stat, pim6stat,
+ "PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
+
static int pim6;
/*
@@ -201,9 +232,7 @@
/*
* Find a route for a given origin IPv6 address and Multicast group address.
- * Quality of service parameter to be added in the future!!!
*/
-
#define MF6CFIND(o, g, rt) do { \
struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
rt = NULL; \
@@ -225,6 +254,7 @@
/*
* Macros to compute elapsed time efficiently
* Borrowed from Van Jacobson's scheduling code
+ * XXX: replace with timersub() ?
*/
#define TV_DELTA(a, b, delta) do { \
int xxs; \
@@ -244,12 +274,13 @@
} \
} while (/*CONSTCOND*/ 0)
+/* XXX: replace with timercmp(a, b, <) ? */
#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
#ifdef UPCALL_TIMING
#define UPCALL_MAX 50
-u_long upcall_data[UPCALL_MAX + 1];
+static u_long upcall_data[UPCALL_MAX + 1];
static void collate();
#endif /* UPCALL_TIMING */
@@ -263,13 +294,17 @@
static struct callout expire_upcalls_ch;
+int X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m);
+int X_ip6_mrouter_done(void);
+int X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt);
+int X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt);
+int X_mrt6_ioctl(int cmd, caddr_t data);
+
/*
* Handle MRT setsockopt commands to modify the multicast routing tables.
*/
int
-ip6_mrouter_set(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
{
int error = 0;
int optval;
@@ -292,7 +327,7 @@
error = ip6_mrouter_init(so, optval, sopt->sopt_name);
break;
case MRT6_DONE:
- error = ip6_mrouter_done();
+ error = X_ip6_mrouter_done();
break;
case MRT6_ADD_MIF:
error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
@@ -337,9 +372,7 @@
* Handle MRT getsockopt commands
*/
int
-ip6_mrouter_get(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
{
int error = 0;
@@ -358,9 +391,7 @@
* Handle ioctl commands to obtain information from the cache
*/
int
-mrt6_ioctl(cmd, data)
- int cmd;
- caddr_t data;
+X_mrt6_ioctl(int cmd, caddr_t data)
{
switch (cmd) {
case SIOCGETSGCNT_IN6:
@@ -376,8 +407,7 @@
* returns the packet, byte, rpf-failure count for the source group provided
*/
static int
-get_sg_cnt(req)
- struct sioc_sg_req6 *req;
+get_sg_cnt(struct sioc_sg_req6 *req)
{
struct mf6c *rt;
int s;
@@ -402,8 +432,7 @@
* returns the input and output packet and byte counts on the mif provided
*/
static int
-get_mif6_cnt(req)
- struct sioc_mif_req6 *req;
+get_mif6_cnt(struct sioc_mif_req6 *req)
{
mifi_t mifi = req->mifi;
@@ -419,8 +448,7 @@
}
static int
-set_pim6(i)
- int *i;
+set_pim6(int *i)
{
if ((*i != 1) && (*i != 0))
return (EINVAL);
@@ -434,10 +462,7 @@
* Enable multicast routing
*/
static int
-ip6_mrouter_init(so, v, cmd)
- struct socket *so;
- int v;
- int cmd;
+ip6_mrouter_init(struct socket *so, int v, int cmd)
{
#ifdef MRT6DEBUG
if (mrt6debug)
@@ -480,7 +505,7 @@
* Disable multicast routing
*/
int
-ip6_mrouter_done()
+X_ip6_mrouter_done(void)
{
mifi_t mifi;
int i;
@@ -513,10 +538,6 @@
}
}
}
-#ifdef notyet
- bzero((caddr_t)qtable, sizeof(qtable));
- bzero((caddr_t)tbftable, sizeof(tbftable));
-#endif
bzero((caddr_t)mif6table, sizeof(mif6table));
nummifs = 0;
@@ -576,15 +597,11 @@
* Add a mif to the mif table
*/
static int
-add_m6if(mifcp)
- struct mif6ctl *mifcp;
+add_m6if(struct mif6ctl *mifcp)
{
struct mif6 *mifp;
struct ifnet *ifp;
int error, s;
-#ifdef notyet
- struct tbf *m_tbf = tbftable + mifcp->mif6c_mifi;
-#endif
if (mifcp->mif6c_mifi >= MAXMIFS)
return (EINVAL);
@@ -604,8 +621,8 @@
if_attach(ifp);
multicast_register_if6 = ifp;
reg_mif_num = mifcp->mif6c_mifi;
- /*
- * it is impossible to guess the ifindex of the
+ /*
+ * it is impossible to guess the ifindex of the
* register interface. So mif6c_pifi is automatically
* calculated.
*/
@@ -630,10 +647,7 @@
s = splnet();
mifp->m6_flags = mifcp->mif6c_flags;
mifp->m6_ifp = ifp;
-#ifdef notyet
- /* scaling up here allows division by 1024 in critical code */
- mifp->m6_rate_limit = mifcp->mif6c_rate_limit * 1024 / 1000;
-#endif
+
/* initialize per mif pkt counters */
mifp->m6_pkt_in = 0;
mifp->m6_pkt_out = 0;
@@ -660,8 +674,7 @@
* Delete a mif from the mif table
*/
static int
-del_m6if(mifip)
- mifi_t *mifip;
+del_m6if(mifi_t *mifip)
{
struct mif6 *mifp = mif6table + *mifip;
mifi_t mifi;
@@ -693,10 +706,6 @@
}
}
-#ifdef notyet
- bzero((caddr_t)qtable[*mifip], sizeof(qtable[*mifip]));
- bzero((caddr_t)mifp->m6_tbf, sizeof(*(mifp->m6_tbf)));
-#endif
bzero((caddr_t)mifp, sizeof(*mifp));
/* Adjust nummifs down */
@@ -719,14 +728,14 @@
* Add an mfc entry
*/
static int
-add_m6fc(mfccp)
- struct mf6cctl *mfccp;
+add_m6fc(struct mf6cctl *mfccp)
{
struct mf6c *rt;
u_long hash;
struct rtdetq *rte;
u_short nstl;
int s;
+ char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
mfccp->mf6cc_mcastgrp.sin6_addr, rt);
@@ -734,12 +743,13 @@
/* If an entry already exists, just update the fields */
if (rt) {
#ifdef MRT6DEBUG
- if (mrt6debug & DEBUG_MFC)
- log(LOG_DEBUG,
- "add_m6fc no upcall h %d o %s g %s p %x\n",
- ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
- mfccp->mf6cc_parent);
+ if (mrt6debug & DEBUG_MFC) {
+ log(LOG_DEBUG,
+ "add_m6fc no upcall h %d o %s g %s p %x\n",
+ ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+ mfccp->mf6cc_parent);
+ }
#endif
s = splnet();
@@ -766,16 +776,20 @@
log(LOG_ERR,
"add_m6fc: %s o %s g %s p %x dbx %p\n",
"multiple kernel entries",
- ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
+ ip6_sprintf(ip6bufo,
+ &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg,
+ &mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent, rt->mf6c_stall);
#ifdef MRT6DEBUG
if (mrt6debug & DEBUG_MFC)
log(LOG_DEBUG,
"add_m6fc o %s g %s p %x dbg %x\n",
- ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
+ ip6_sprintf(ip6bufo,
+ &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg,
+ &mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent, rt->mf6c_stall);
#endif
@@ -812,12 +826,12 @@
if (nstl == 0) {
#ifdef MRT6DEBUG
if (mrt6debug & DEBUG_MFC)
- log(LOG_DEBUG,
- "add_mfc no upcall h %d o %s g %s p %x\n",
- hash,
- ip6_sprintf(&mfccp->mf6cc_origin.sin6_addr),
- ip6_sprintf(&mfccp->mf6cc_mcastgrp.sin6_addr),
- mfccp->mf6cc_parent);
+ log(LOG_DEBUG,
+ "add_mfc no upcall h %d o %s g %s p %x\n",
+ hash,
+ ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
+ mfccp->mf6cc_parent);
#endif
for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
@@ -876,8 +890,7 @@
* collect delay statistics on the upcalls
*/
static void
-collate(t)
- struct timeval *t;
+collate(struct timeval *t)
{
u_long d;
struct timeval tp;
@@ -902,14 +915,13 @@
* Delete an mfc entry
*/
static int
-del_m6fc(mfccp)
- struct mf6cctl *mfccp;
+del_m6fc(struct mf6cctl *mfccp)
{
- struct sockaddr_in6 origin;
- struct sockaddr_in6 mcastgrp;
- struct mf6c *rt;
- struct mf6c **nptr;
- u_long hash;
+ struct sockaddr_in6 origin;
+ struct sockaddr_in6 mcastgrp;
+ struct mf6c *rt;
+ struct mf6c **nptr;
+ u_long hash;
int s;
origin = mfccp->mf6cc_origin;
@@ -917,10 +929,12 @@
hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
#ifdef MRT6DEBUG
- if (mrt6debug & DEBUG_MFC)
+ if (mrt6debug & DEBUG_MFC) {
+ char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
- ip6_sprintf(&origin.sin6_addr),
- ip6_sprintf(&mcastgrp.sin6_addr));
+ ip6_sprintf(ip6bufo, &origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
+ }
#endif
s = splnet();
@@ -950,11 +964,9 @@
}
static int
-socket_send(s, mm, src)
- struct socket *s;
- struct mbuf *mm;
- struct sockaddr_in6 *src;
+socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
{
+
if (s) {
if (sbappendaddr(&s->so_rcv,
(struct sockaddr *)src,
@@ -985,23 +997,21 @@
* that if this function is called from somewhere else in the originating
* context in the future.
*/
-
int
-ip6_mforward(ip6, ifp, m)
- struct ip6_hdr *ip6;
- struct ifnet *ifp;
- struct mbuf *m;
+X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
{
struct mf6c *rt;
struct mif6 *mifp;
struct mbuf *mm;
int s;
mifi_t mifi;
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifdef MRT6DEBUG
if (mrt6debug & DEBUG_FORWARD)
log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
- ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ifp->if_index);
#endif
@@ -1028,8 +1038,8 @@
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ip6->ip6_nxt,
if_name(m->m_pkthdr.rcvif));
}
@@ -1067,8 +1077,8 @@
#ifdef MRT6DEBUG
if (mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst));
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst));
#endif
/*
@@ -1253,8 +1263,7 @@
* Call from the Slow Timeout mechanism, every half second.
*/
static void
-expire_upcalls(unused)
- void *unused;
+expire_upcalls(void *unused)
{
struct rtdetq *rte;
struct mf6c *mfc, **nptr;
@@ -1277,10 +1286,13 @@
mfc->mf6c_expire != 0 &&
--mfc->mf6c_expire == 0) {
#ifdef MRT6DEBUG
- if (mrt6debug & DEBUG_EXPIRE)
+ if (mrt6debug & DEBUG_EXPIRE) {
+ char ip6bufo[INET6_ADDRSTRLEN];
+ char ip6bufg[INET6_ADDRSTRLEN];
log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
- ip6_sprintf(&mfc->mf6c_origin.sin6_addr),
- ip6_sprintf(&mfc->mf6c_mcastgrp.sin6_addr));
+ ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
+ ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
+ }
#endif
/*
* drop all the packets
@@ -1311,10 +1323,7 @@
* Packet forwarding routine once entry in the cache is made
*/
static int
-ip6_mdq(m, ifp, rt)
- struct mbuf *m;
- struct ifnet *ifp;
- struct mf6c *rt;
+ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
mifi_t mifi, iif;
@@ -1492,10 +1501,7 @@
}
static void
-phyint_send(ip6, mifp, m)
- struct ip6_hdr *ip6;
- struct mif6 *mifp;
- struct mbuf *m;
+phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
{
struct mbuf *mb_copy;
struct ifnet *ifp = mifp->m6_ifp;
@@ -1590,14 +1596,17 @@
icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
else {
#ifdef MRT6DEBUG
- if (mrt6debug & DEBUG_XMIT)
+ if (mrt6debug & DEBUG_XMIT) {
+ char ip6bufs[INET6_ADDRSTRLEN];
+ char ip6bufd[INET6_ADDRSTRLEN];
log(LOG_DEBUG,
"phyint_send: packet too big on %s o %s "
"g %s size %d(discarded)\n",
if_name(ifp),
- ip6_sprintf(&ip6->ip6_src),
- ip6_sprintf(&ip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
mb_copy->m_pkthdr.len);
+ }
#endif /* MRT6DEBUG */
m_freem(mb_copy); /* simply discard the packet */
}
@@ -1607,10 +1616,7 @@
}
static int
-register_send(ip6, mif, m)
- struct ip6_hdr *ip6;
- struct mif6 *mif;
- struct mbuf *m;
+register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
{
struct mbuf *mm;
int i, len = m->m_pkthdr.len;
@@ -1618,9 +1624,12 @@
struct mrt6msg *im6;
#ifdef MRT6DEBUG
- if (mrt6debug)
+ if (mrt6debug) {
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
- ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst));
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst));
+ }
#endif
++pim6stat.pim6s_snd_registers;
@@ -1679,9 +1688,7 @@
* is stripped off, and the inner packet is passed to register_mforward.
*/
int
-pim6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+pim6_input(struct mbuf **mp, int *offp, int proto)
{
struct pim *pim; /* pointer to a pim struct */
struct ip6_hdr *ip6;
@@ -1788,6 +1795,9 @@
struct ip6_hdr *eip6;
u_int32_t *reghdr;
int rc;
+#ifdef MRT6DEBUG
+ char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+#endif
++pim6stat.pim6s_rcv_registers;
@@ -1817,7 +1827,7 @@
log(LOG_ERR,
"pim6_input: register packet size too "
"small %d from %s\n",
- pimlen, ip6_sprintf(&ip6->ip6_src));
+ pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
#endif
m_freem(m);
return (IPPROTO_DONE);
@@ -1829,8 +1839,8 @@
log(LOG_DEBUG,
"pim6_input[register], eip6: %s -> %s, "
"eip6 plen %d\n",
- ip6_sprintf(&eip6->ip6_src),
- ip6_sprintf(&eip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &eip6->ip6_src),
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst),
ntohs(eip6->ip6_plen));
#endif
@@ -1854,7 +1864,7 @@
log(LOG_DEBUG,
"pim6_input: inner packet of register "
"is not multicast %s\n",
- ip6_sprintf(&eip6->ip6_dst));
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst));
#endif
m_freem(m);
return (IPPROTO_DONE);
@@ -1883,8 +1893,8 @@
log(LOG_DEBUG,
"pim6_input: forwarding decapsulated register: "
"src %s, dst %s, mif %d\n",
- ip6_sprintf(&eip6->ip6_src),
- ip6_sprintf(&eip6->ip6_dst),
+ ip6_sprintf(ip6bufs, &eip6->ip6_src),
+ ip6_sprintf(ip6bufd, &eip6->ip6_dst),
reg_mif_num);
}
#endif
Index: in6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_var.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_var.h -L sys/netinet6/in6_var.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_var.h
+++ sys/netinet6/in6_var.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_var.h,v 1.21.2.6 2005/12/25 14:03:37 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_var.h,v 1.31 2007/06/02 08:02:36 jinmei Exp $ */
/* $KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $ */
/*-
@@ -115,6 +115,9 @@
/* back pointer to the ND prefix (for autoconfigured addresses only) */
struct nd_prefix *ia6_ndpr;
+
+ /* multicast addresses joined from the kernel */
+ LIST_HEAD(, in6_multi_mship) ia6_memberships;
};
/* control structure to manage address selection policy */
@@ -493,9 +496,7 @@
/* struct in6_ifaddr *ia; */ \
do { \
struct ifaddr *ifa; \
- for (ifa = (ifp)->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { \
- if (!ifa->ifa_addr) \
- continue; \
+ TAILQ_FOREACH(ifa, &(ifp)->if_addrlist, ifa_list) { \
if (ifa->ifa_addr->sa_family == AF_INET6) \
break; \
} \
@@ -577,14 +578,14 @@
/* struct in6_multi *in6m; */ \
do { \
if (((in6m) = (step).i_in6m) != NULL) \
- (step).i_in6m = (step).i_in6m->in6m_entry.le_next; \
+ (step).i_in6m = LIST_NEXT((step).i_in6m, in6m_entry); \
} while(0)
#define IN6_FIRST_MULTI(step, in6m) \
/* struct in6_multistep step; */ \
/* struct in6_multi *in6m */ \
do { \
- (step).i_in6m = in6_multihead.lh_first; \
+ (step).i_in6m = LIST_FIRST(&in6_multihead); \
IN6_NEXT_MULTI((step), (in6m)); \
} while(0)
@@ -610,7 +611,7 @@
void in6_purgemkludge __P((struct ifnet *));
struct in6_ifaddr *in6ifa_ifpforlinklocal __P((struct ifnet *, int));
struct in6_ifaddr *in6ifa_ifpwithaddr __P((struct ifnet *, struct in6_addr *));
-char *ip6_sprintf __P((const struct in6_addr *));
+char *ip6_sprintf __P((char *, const struct in6_addr *));
int in6_addr2zoneid __P((struct ifnet *, struct in6_addr *, u_int32_t *));
int in6_matchlen __P((struct in6_addr *, struct in6_addr *));
int in6_are_prefix_equal __P((struct in6_addr *, struct in6_addr *, int));
Index: nd6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/nd6.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/nd6.h -L sys/netinet6/nd6.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/nd6.h
+++ sys/netinet6/nd6.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/nd6.h,v 1.19.2.2 2005/12/25 14:03:38 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/nd6.h,v 1.21 2005/10/21 16:23:00 suz Exp $ */
/* $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $ */
/*-
Index: ip6_output.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/ip6_output.c -L sys/netinet6/ip6_output.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.90.2.10 2006/02/14 21:38:46 rwatson Exp $ */
+/* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.109 2007/07/05 16:29:40 delphij Exp $ */
/* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
/*-
@@ -61,7 +61,6 @@
* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
*/
-#include "opt_ip6fw.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
@@ -71,10 +70,10 @@
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/errno.h>
+#include <sys/priv.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/systm.h>
#include <sys/kernel.h>
#include <net/if.h>
@@ -93,22 +92,11 @@
#include <netinet6/nd6.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#ifdef INET6
-#include <netinet6/ipsec6.h>
-#endif
-#include <netkey/key.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
-#endif /* FAST_IPSEC */
-
-#include <netinet6/ip6_fw.h>
-
-#include <net/net_osdep.h>
+#include <netinet6/ip6_ipsec.h>
+#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>
@@ -144,6 +132,42 @@
/*
+ * Make an extension header from option data. hp is the source, and
+ * mp is the destination.
+ */
+#define MAKE_EXTHDR(hp, mp) \
+ do { \
+ if (hp) { \
+ struct ip6_ext *eh = (struct ip6_ext *)(hp); \
+ error = ip6_copyexthdr((mp), (caddr_t)(hp), \
+ ((eh)->ip6e_len + 1) << 3); \
+ if (error) \
+ goto freehdrs; \
+ } \
+ } while (/*CONSTCOND*/ 0)
+
+/*
+ * Form a chain of extension headers.
+ * m is the extension header mbuf
+ * mp is the previous mbuf in the chain
+ * p is the next header
+ * i is the type of option.
+ */
+#define MAKE_CHAIN(m, mp, p, i)\
+ do {\
+ if (m) {\
+ if (!hdrsplit) \
+ panic("assumption failed: hdr not split"); \
+ *mtod((m), u_char *) = *(p);\
+ *(p) = (i);\
+ p = mtod((m), u_char *);\
+ (m)->m_next = (mp)->m_next;\
+ (mp)->m_next = (m);\
+ (mp) = (m);\
+ }\
+ } while (/*CONSTCOND*/ 0)
+
+/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
* This function may modify ver and hlim only.
@@ -153,20 +177,18 @@
* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
* which is rt_rmx.rmx_mtu.
+ *
+ * ifpp - XXX: just for statistics
*/
int
-ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
- struct mbuf *m0;
- struct ip6_pktopts *opt;
- struct route_in6 *ro;
- int flags;
- struct ip6_moptions *im6o;
- struct ifnet **ifpp; /* XXX: just for statistics */
- struct inpcb *inp;
+ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
+ struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
+ struct ifnet **ifpp, struct inpcb *inp)
{
struct ip6_hdr *ip6, *mhip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
+ struct mbuf *mprev = NULL;
int hlen, tlen, len, off;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
@@ -183,24 +205,21 @@
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
int needipsec = 0;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
+ struct ipsec_output_state state;
+ struct ip6_rthdr *rh = NULL;
int needipsectun = 0;
+ int segleft_org = 0;
struct secpolicy *sp = NULL;
-#endif /*IPSEC || FAST_IPSEC*/
+#endif /* IPSEC */
ip6 = mtod(m, struct ip6_hdr *);
- finaldst = ip6->ip6_dst;
+ if (ip6 == NULL) {
+ printf ("ip6 is NULL");
+ goto bad;
+ }
-#define MAKE_EXTHDR(hp, mp) \
- do { \
- if (hp) { \
- struct ip6_ext *eh = (struct ip6_ext *)(hp); \
- error = ip6_copyexthdr((mp), (caddr_t)(hp), \
- ((eh)->ip6e_len + 1) << 3); \
- if (error) \
- goto freehdrs; \
- } \
- } while (/*CONSTCOND*/ 0)
+ finaldst = ip6->ip6_dst;
bzero(&exthdrs, sizeof(exthdrs));
@@ -211,7 +230,7 @@
if (opt->ip6po_rthdr) {
/*
* Destination options header(1st part)
- * This only makes sence with a routing header.
+ * This only makes sense with a routing header.
* See Section 9.2 of RFC 3542.
* Disabling this part just for MIP6 convenience is
* a bad idea. We need to think carefully about a
@@ -227,104 +246,39 @@
MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
}
+ /*
+ * IPSec checking which handles several cases.
+ * FAST IPSEC: We re-injected the packet.
+ */
#ifdef IPSEC
- /* get a security policy for this packet */
- if (inp == NULL)
- sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
- else
- sp = ipsec6_getpolicybypcb(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
- if (sp == NULL) {
- ipsec6stat.out_inval++;
- goto freehdrs;
- }
-
- error = 0;
-
- /* check policy */
- switch (sp->policy) {
- case IPSEC_POLICY_DISCARD:
- /*
- * This packet is just discarded.
- */
- ipsec6stat.out_polvio++;
+ switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
+ {
+ case 1: /* Bad packet */
goto freehdrs;
-
- case IPSEC_POLICY_BYPASS:
- case IPSEC_POLICY_NONE:
- /* no need to do IPsec. */
- needipsec = 0;
- break;
-
- case IPSEC_POLICY_IPSEC:
- if (sp->req == NULL) {
- /* acquire a policy */
- error = key_spdacquire(sp);
- goto freehdrs;
- }
+ case -1: /* Do IPSec */
needipsec = 1;
- break;
-
- case IPSEC_POLICY_ENTRUST:
+ case 0: /* No IPSec */
default:
- printf("ip6_output: Invalid policy found. %d\n", sp->policy);
- }
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
- /* get a security policy for this packet */
- if (inp == NULL)
- sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
- else
- sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
-
- if (sp == NULL) {
- newipsecstat.ips_out_inval++;
- goto freehdrs;
- }
-
- error = 0;
-
- /* check policy */
- switch (sp->policy) {
- case IPSEC_POLICY_DISCARD:
- /*
- * This packet is just discarded.
- */
- newipsecstat.ips_out_polvio++;
- goto freehdrs;
-
- case IPSEC_POLICY_BYPASS:
- case IPSEC_POLICY_NONE:
- /* no need to do IPsec. */
- needipsec = 0;
break;
-
- case IPSEC_POLICY_IPSEC:
- if (sp->req == NULL) {
- /* acquire a policy */
- error = key_spdacquire(sp);
- goto freehdrs;
- }
- needipsec = 1;
- break;
-
- case IPSEC_POLICY_ENTRUST:
- default:
- printf("ip6_output: Invalid policy found. %d\n", sp->policy);
}
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
/*
* Calculate the total length of the extension header chain.
* Keep the length of the unfragmentable part for fragmentation.
*/
optlen = 0;
- if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
- if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
- if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
+ if (exthdrs.ip6e_hbh)
+ optlen += exthdrs.ip6e_hbh->m_len;
+ if (exthdrs.ip6e_dest1)
+ optlen += exthdrs.ip6e_dest1->m_len;
+ if (exthdrs.ip6e_rthdr)
+ optlen += exthdrs.ip6e_rthdr->m_len;
unfragpartlen = optlen + sizeof(struct ip6_hdr);
+
/* NOTE: we don't add AH/ESP length here. do that later. */
- if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
+ if (exthdrs.ip6e_dest2)
+ optlen += exthdrs.ip6e_dest2->m_len;
/*
* If we need IPsec, or there is at least one extension header,
@@ -374,106 +328,94 @@
* during the header composing process, "m" points to IPv6 header.
* "mprev" points to an extension header prior to esp.
*/
- {
- u_char *nexthdrp = &ip6->ip6_nxt;
- struct mbuf *mprev = m;
+ u_char *nexthdrp = &ip6->ip6_nxt;
+ mprev = m;
- /*
- * we treat dest2 specially. this makes IPsec processing
- * much easier. the goal here is to make mprev point the
- * mbuf prior to dest2.
- *
- * result: IPv6 dest2 payload
- * m and mprev will point to IPv6 header.
- */
- if (exthdrs.ip6e_dest2) {
- if (!hdrsplit)
- panic("assumption failed: hdr not split");
- exthdrs.ip6e_dest2->m_next = m->m_next;
- m->m_next = exthdrs.ip6e_dest2;
- *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
- ip6->ip6_nxt = IPPROTO_DSTOPTS;
- }
+ /*
+ * we treat dest2 specially. this makes IPsec processing
+ * much easier. the goal here is to make mprev point the
+ * mbuf prior to dest2.
+ *
+ * result: IPv6 dest2 payload
+ * m and mprev will point to IPv6 header.
+ */
+ if (exthdrs.ip6e_dest2) {
+ if (!hdrsplit)
+ panic("assumption failed: hdr not split");
+ exthdrs.ip6e_dest2->m_next = m->m_next;
+ m->m_next = exthdrs.ip6e_dest2;
+ *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
+ ip6->ip6_nxt = IPPROTO_DSTOPTS;
+ }
-#define MAKE_CHAIN(m, mp, p, i)\
- do {\
- if (m) {\
- if (!hdrsplit) \
- panic("assumption failed: hdr not split"); \
- *mtod((m), u_char *) = *(p);\
- *(p) = (i);\
- p = mtod((m), u_char *);\
- (m)->m_next = (mp)->m_next;\
- (mp)->m_next = (m);\
- (mp) = (m);\
- }\
- } while (/*CONSTCOND*/ 0)
- /*
- * result: IPv6 hbh dest1 rthdr dest2 payload
- * m will point to IPv6 header. mprev will point to the
- * extension header prior to dest2 (rthdr in the above case).
- */
- MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
- MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
- IPPROTO_DSTOPTS);
- MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
- IPPROTO_ROUTING);
-
-#if defined(IPSEC) || defined(FAST_IPSEC)
- if (!needipsec)
- goto skip_ipsec2;
-
- /*
- * pointers after IPsec headers are not valid any more.
- * other pointers need a great care too.
- * (IPsec routines should not mangle mbufs prior to AH/ESP)
- */
- exthdrs.ip6e_dest2 = NULL;
+ /*
+ * result: IPv6 hbh dest1 rthdr dest2 payload
+ * m will point to IPv6 header. mprev will point to the
+ * extension header prior to dest2 (rthdr in the above case).
+ */
+ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
+ MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
+ IPPROTO_DSTOPTS);
+ MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
+ IPPROTO_ROUTING);
- {
- struct ip6_rthdr *rh = NULL;
- int segleft_org = 0;
- struct ipsec_output_state state;
+#ifdef IPSEC
+ if (!needipsec)
+ goto skip_ipsec2;
- if (exthdrs.ip6e_rthdr) {
- rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
- segleft_org = rh->ip6r_segleft;
- rh->ip6r_segleft = 0;
- }
+ /*
+ * pointers after IPsec headers are not valid any more.
+ * other pointers need a great care too.
+ * (IPsec routines should not mangle mbufs prior to AH/ESP)
+ */
+ exthdrs.ip6e_dest2 = NULL;
- bzero(&state, sizeof(state));
- state.m = m;
- error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
- &needipsectun);
- m = state.m;
- if (error) {
- /* mbuf is already reclaimed in ipsec6_output_trans. */
- m = NULL;
- switch (error) {
- case EHOSTUNREACH:
- case ENETUNREACH:
- case EMSGSIZE:
- case ENOBUFS:
- case ENOMEM:
- break;
- default:
- printf("ip6_output (ipsec): error code %d\n", error);
- /* FALLTHROUGH */
- case ENOENT:
- /* don't show these error codes to the user */
- error = 0;
- break;
- }
- goto bad;
- }
- if (exthdrs.ip6e_rthdr) {
- /* ah6_output doesn't modify mbuf chain */
- rh->ip6r_segleft = segleft_org;
+ if (exthdrs.ip6e_rthdr) {
+ rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
+ segleft_org = rh->ip6r_segleft;
+ rh->ip6r_segleft = 0;
+ }
+
+ bzero(&state, sizeof(state));
+ state.m = m;
+ error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
+ &needipsectun);
+ m = state.m;
+ if (error) {
+ /* mbuf is already reclaimed in ipsec6_output_trans. */
+ m = NULL;
+ switch (error) {
+ case EHOSTUNREACH:
+ case ENETUNREACH:
+ case EMSGSIZE:
+ case ENOBUFS:
+ case ENOMEM:
+ break;
+ default:
+ printf("ip6_output (ipsec): error code %d\n", error);
+ /* FALLTHROUGH */
+ case ENOENT:
+ /* don't show these error codes to the user */
+ error = 0;
+ break;
}
- }
-skip_ipsec2:;
-#endif
+ goto bad;
+ } else if (!needipsectun) {
+ /*
+ * In the FAST IPSec case we have already
+ * re-injected the packet and it has been freed
+ * by the ipsec_done() function. So, just clean
+ * up after ourselves.
+ */
+ m = NULL;
+ goto done;
+ }
+ if (exthdrs.ip6e_rthdr) {
+ /* ah6_output doesn't modify mbuf chain */
+ rh->ip6r_segleft = segleft_org;
}
+skip_ipsec2:;
+#endif /* IPSEC */
/*
* If there is a routing header, replace the destination address field
@@ -551,7 +493,7 @@
dst = (struct sockaddr_in6 *)&ro->ro_dst;
again:
- /*
+ /*
* if specified, try to fill in the traffic class field.
* do not override if a non-zero value is already set.
* we check the diffserv field and the ecn field separately.
@@ -577,7 +519,10 @@
ip6->ip6_hlim = ip6_defmcasthlim;
}
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
+ /*
+ * We may re-inject packets into the stack here.
+ */
if (needipsec && needipsectun) {
struct ipsec_output_state state;
@@ -622,6 +567,15 @@
break;
}
goto bad;
+ } else {
+ /*
+ * In the FAST IPSec case we have already
+ * re-injected the packet and it has been freed
+ * by the ipsec_done() function. So, just clean
+ * up after ourselves.
+ */
+ m = NULL;
+ goto done;
}
exthdrs.ip6e_ip6 = m;
@@ -839,23 +793,6 @@
in6_clearscope(&ip6->ip6_dst);
/*
- * Check with the firewall...
- */
- if (ip6_fw_enable && ip6_fw_chk_ptr) {
- u_short port = 0;
- m->m_pkthdr.rcvif = NULL; /* XXX */
- /* If ipfw says divert, we have to just drop packet */
- if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
- m_freem(m);
- goto done;
- }
- if (!m) {
- error = EACCES;
- goto done;
- }
- }
-
- /*
* If the outgoing packet contains a hop-by-hop options header,
* it must be examined and processed even by the source node.
* (RFC 2460, section 4.)
@@ -889,7 +826,7 @@
}
/* Jump over all PFIL processing if hooks are not active. */
- if (inet6_pfil_hook.ph_busy_count == -1)
+ if (!PFIL_HOOKED(&inet6_pfil_hook))
goto passout;
odst = ip6->ip6_dst;
@@ -987,10 +924,6 @@
ia6->ia_ifa.if_opackets++;
ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
}
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
goto done;
}
@@ -1013,10 +946,7 @@
struct ip6_frag *ip6f;
u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
-#if 0
- struct ip6ctlparam ip6cp;
- u_int32_t mtu32;
-#endif
+
int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
/*
@@ -1028,25 +958,6 @@
if (mtu > IPV6_MAXPACKET)
mtu = IPV6_MAXPACKET;
-#if 0
- /*
- * It is believed this code is a leftover from the
- * development of the IPV6_RECVPATHMTU sockopt and
- * associated work to implement RFC3542.
- * It's not entirely clear what the intent of the API
- * is at this point, so disable this code for now.
- * The IPV6_RECVPATHMTU sockopt and/or IPV6_DONTFRAG
- * will send notifications if the application requests.
- */
-
- /* Notify a proper path MTU to applications. */
- mtu32 = (u_int32_t)mtu;
- bzero(&ip6cp, sizeof(ip6cp));
- ip6cp.ip6c_cmdarg = (void *)&mtu32;
- pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
- (void *)&ip6cp);
-#endif
-
len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
if (len < 8) {
error = EMSGSIZE;
@@ -1147,15 +1058,11 @@
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
if (error == 0) {
- /* Record statistics for this interface address. */
- if (ia) {
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
- }
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
+ /* Record statistics for this interface address. */
+ if (ia) {
+ ia->ia_ifa.if_opackets++;
+ ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ }
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
} else
m_freem(m);
@@ -1171,15 +1078,6 @@
RTFREE(ro_pmtu->ro_rt);
}
-#ifdef IPSEC
- if (sp != NULL)
- key_freesp(sp);
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif /* FAST_IPSEC */
-
return (error);
freehdrs:
@@ -1189,15 +1087,13 @@
m_freem(exthdrs.ip6e_dest2);
/* FALLTHROUGH */
bad:
- m_freem(m);
+ if (m)
+ m_freem(m);
goto done;
}
static int
-ip6_copyexthdr(mp, hdr, hlen)
- struct mbuf **mp;
- caddr_t hdr;
- int hlen;
+ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
{
struct mbuf *m;
@@ -1227,9 +1123,7 @@
* Insert jumbo payload option.
*/
static int
-ip6_insert_jumboopt(exthdrs, plen)
- struct ip6_exthdrs *exthdrs;
- u_int32_t plen;
+ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
{
struct mbuf *mopt;
u_char *optbuf;
@@ -1324,10 +1218,8 @@
* Insert fragment header and copy unfragmentable header portions.
*/
static int
-ip6_insertfraghdr(m0, m, hlen, frghdrp)
- struct mbuf *m0, *m;
- int hlen;
- struct ip6_frag **frghdrp;
+ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
+ struct ip6_frag **frghdrp)
{
struct mbuf *n, *mlast;
@@ -1367,12 +1259,9 @@
}
static int
-ip6_getpmtu(ro_pmtu, ro, ifp, dst, mtup, alwaysfragp)
- struct route_in6 *ro_pmtu, *ro;
- struct ifnet *ifp;
- struct in6_addr *dst;
- u_long *mtup;
- int *alwaysfragp;
+ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
+ struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
+ int *alwaysfragp)
{
u_int32_t mtu = 0;
int alwaysfrag = 0;
@@ -1453,9 +1342,7 @@
* IP6 socket option processing.
*/
int
-ip6_ctloutput(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+ip6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int privileged, optdatalen, uproto;
void *optdata;
@@ -1869,7 +1756,7 @@
}
break;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
case IPV6_IPSEC_POLICY:
{
caddr_t req = NULL;
@@ -1889,28 +1776,7 @@
m_freem(m);
}
break;
-#endif /* KAME IPSEC */
-
- case IPV6_FW_ADD:
- case IPV6_FW_DEL:
- case IPV6_FW_FLUSH:
- case IPV6_FW_ZERO:
- {
- struct mbuf *m;
- struct mbuf **mp = &m;
-
- if (ip6_fw_ctl_ptr == NULL)
- return EINVAL;
- /* XXX */
- if ((error = soopt_getm(sopt, &m)) != 0)
- break;
- /* XXX */
- if ((error = soopt_mcopyin(sopt, m)) != 0)
- break;
- error = (*ip6_fw_ctl_ptr)(optname, mp);
- m = *mp;
- }
- break;
+#endif /* IPSEC */
default:
error = ENOPROTOOPT;
@@ -2107,7 +1973,7 @@
}
break;
-#if defined(IPSEC) || defined(FAST_IPSEC)
+#ifdef IPSEC
case IPV6_IPSEC_POLICY:
{
caddr_t req = NULL;
@@ -2136,24 +2002,7 @@
m_freem(m);
break;
}
-#endif /* KAME IPSEC */
-
- case IPV6_FW_GET:
- {
- struct mbuf *m;
- struct mbuf **mp = &m;
-
- if (ip6_fw_ctl_ptr == NULL)
- {
- return EINVAL;
- }
- error = (*ip6_fw_ctl_ptr)(optname, mp);
- if (error == 0)
- error = soopt_mcopyout(sopt, m); /* XXX */
- if (error == 0 && m)
- m_freem(m);
- }
- break;
+#endif /* IPSEC */
default:
error = ENOPROTOOPT;
@@ -2168,9 +2017,7 @@
}
int
-ip6_raw_ctloutput(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error = 0, optval, optlen;
const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
@@ -2248,11 +2095,8 @@
* specifying behavior of outgoing packets.
*/
static int
-ip6_pcbopts(pktopt, m, so, sopt)
- struct ip6_pktopts **pktopt;
- struct mbuf *m;
- struct socket *so;
- struct sockopt *sopt;
+ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
+ struct socket *so, struct sockopt *sopt)
{
struct ip6_pktopts *opt = *pktopt;
int error = 0;
@@ -2299,8 +2143,7 @@
* the struct.
*/
void
-ip6_initpktopts(opt)
- struct ip6_pktopts *opt;
+ip6_initpktopts(struct ip6_pktopts *opt)
{
bzero(opt, sizeof(*opt));
@@ -2311,11 +2154,8 @@
}
static int
-ip6_pcbopt(optname, buf, len, pktopt, priv, uproto)
- int optname, len, priv;
- u_char *buf;
- struct ip6_pktopts **pktopt;
- int uproto;
+ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
+ int priv, int uproto)
{
struct ip6_pktopts *opt;
@@ -2330,10 +2170,7 @@
}
static int
-ip6_getpcbopt(pktopt, optname, sopt)
- struct ip6_pktopts *pktopt;
- struct sockopt *sopt;
- int optname;
+ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
{
void *optdata = NULL;
int optdatalen = 0;
@@ -2431,9 +2268,7 @@
}
void
-ip6_clearpktopts(pktopt, optname)
- struct ip6_pktopts *pktopt;
- int optname;
+ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
{
if (pktopt == NULL)
return;
@@ -2494,9 +2329,7 @@
} while (/*CONSTCOND*/ 0)
static int
-copypktopts(dst, src, canwait)
- struct ip6_pktopts *dst, *src;
- int canwait;
+copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
{
if (dst == NULL || src == NULL) {
printf("ip6_clearpktopts: invalid argument\n");
@@ -2509,7 +2342,7 @@
if (src->ip6po_pktinfo) {
dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
M_IP6OPT, canwait);
- if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
+ if (dst->ip6po_pktinfo == NULL)
goto bad;
*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
}
@@ -2539,15 +2372,13 @@
#undef PKTOPT_EXTHDRCPY
struct ip6_pktopts *
-ip6_copypktopts(src, canwait)
- struct ip6_pktopts *src;
- int canwait;
+ip6_copypktopts(struct ip6_pktopts *src, int canwait)
{
int error;
struct ip6_pktopts *dst;
dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
- if (dst == NULL && canwait == M_NOWAIT)
+ if (dst == NULL)
return (NULL);
ip6_initpktopts(dst);
@@ -2560,8 +2391,7 @@
}
void
-ip6_freepcbopts(pktopt)
- struct ip6_pktopts *pktopt;
+ip6_freepcbopts(struct ip6_pktopts *pktopt)
{
if (pktopt == NULL)
return;
@@ -2575,10 +2405,7 @@
* Set the IP6 multicast options in response to user setsockopt().
*/
static int
-ip6_setmoptions(optname, im6op, m)
- int optname;
- struct ip6_moptions **im6op;
- struct mbuf *m;
+ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
{
int error = 0;
u_int loop, ifindex;
@@ -2880,10 +2707,7 @@
* Return the IP6 multicast options in response to user getsockopt().
*/
static int
-ip6_getmoptions(optname, im6o, mp)
- int optname;
- struct ip6_moptions *im6o;
- struct mbuf **mp;
+ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
{
u_int *hlim, *loop, *ifindex;
@@ -2927,8 +2751,7 @@
* Discard the IP6 multicast options.
*/
void
-ip6_freemoptions(im6o)
- struct ip6_moptions *im6o;
+ip6_freemoptions(struct ip6_moptions *im6o)
{
struct in6_multi_mship *imm;
@@ -2948,10 +2771,8 @@
* Set IPv6 outgoing packet options based on advanced API.
*/
int
-ip6_setpktopts(control, opt, stickyopt, priv, uproto)
- struct mbuf *control;
- struct ip6_pktopts *opt, *stickyopt;
- int priv, uproto;
+ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
+ struct ip6_pktopts *stickyopt, int priv, int uproto)
{
struct cmsghdr *cm = 0;
@@ -3014,10 +2835,8 @@
* "sticky=1, cmsg=1": RFC2292 socket option
*/
static int
-ip6_setpktopt(optname, buf, len, opt, priv, sticky, cmsg, uproto)
- int optname, len, priv, sticky, cmsg, uproto;
- u_char *buf;
- struct ip6_pktopts *opt;
+ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
+ int priv, int sticky, int cmsg, int uproto)
{
int minmtupolicy, preftemp;
@@ -3400,10 +3219,7 @@
* pointer that might NOT be &loif -- easier than replicating that code here.
*/
void
-ip6_mloopback(ifp, m, dst)
- struct ifnet *ifp;
- struct mbuf *m;
- struct sockaddr_in6 *dst;
+ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
{
struct mbuf *copym;
struct ip6_hdr *ip6;
@@ -3446,9 +3262,7 @@
* Chop IPv6 header off from the payload.
*/
static int
-ip6_splithdr(m, exthdrs)
- struct mbuf *m;
- struct ip6_exthdrs *exthdrs;
+ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
{
struct mbuf *mh;
struct ip6_hdr *ip6;
@@ -3477,8 +3291,7 @@
* Compute IPv6 extension header length.
*/
int
-ip6_optlen(in6p)
- struct in6pcb *in6p;
+ip6_optlen(struct in6pcb *in6p)
{
int len;
Index: in6_ifattach.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_ifattach.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_ifattach.h -L sys/netinet6/in6_ifattach.h -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_ifattach.h
+++ sys/netinet6/in6_ifattach.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_ifattach.h,v 1.5.2.2 2005/12/25 14:03:37 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_ifattach.h,v 1.7 2005/10/21 16:23:00 suz Exp $ */
/* $KAME: in6_ifattach.h,v 1.14 2001/02/08 12:48:39 jinmei Exp $ */
/*-
Index: pim6_var.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/pim6_var.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/pim6_var.h -L sys/netinet6/pim6_var.h -u -r1.1.1.1 -r1.2
--- sys/netinet6/pim6_var.h
+++ sys/netinet6/pim6_var.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/pim6_var.h,v 1.4 2005/01/07 02:30:35 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/pim6_var.h,v 1.5 2005/08/10 07:10:02 obrien Exp $ */
/* $KAME: pim6_var.h,v 1.8 2000/06/06 08:07:43 jinmei Exp $ */
/*-
@@ -52,8 +52,6 @@
};
#if (defined(KERNEL)) || (defined(_KERNEL))
-extern struct pim6stat pim6stat;
-
int pim6_input __P((struct mbuf **, int*, int));
#endif /* KERNEL */
Index: udp6_usrreq.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/udp6_usrreq.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/udp6_usrreq.c -L sys/netinet6/udp6_usrreq.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -1,5 +1,5 @@
-/* $FreeBSD: src/sys/netinet6/udp6_usrreq.c,v 1.54.2.2 2006/02/09 02:29:06 ume Exp $ */
/* $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ */
+/* $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -31,8 +31,9 @@
*/
/*-
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
+ * The Regents of the University of California.
+ * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -58,24 +59,25 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)udp_var.h 8.1 (Berkeley) 6/10/93
+ * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
+ * $FreeBSD: src/sys/netinet6/udp6_usrreq.c,v 1.81 2007/09/08 08:18:24 rwatson Exp $
*/
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_mac.h"
#include <sys/param.h>
-#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -90,7 +92,9 @@
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
#include <netinet/ip6.h>
+#include <netinet/icmp_var.h>
#include <netinet/icmp6.h>
#include <netinet/ip_var.h>
#include <netinet/udp.h>
@@ -102,44 +106,79 @@
#include <netinet6/scope6_var.h>
#ifdef IPSEC
-#include <netinet6/ipsec.h>
-#include <netinet6/ipsec6.h>
-#endif /* IPSEC */
-
-#ifdef FAST_IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
-#endif /* FAST_IPSEC */
+#endif /* IPSEC */
+
+#include <security/mac/mac_framework.h>
/*
- * UDP protocol inplementation.
+ * UDP protocol implementation.
* Per RFC 768, August, 1980.
*/
-extern struct protosw inetsw[];
-static int udp6_detach __P((struct socket *so));
+extern struct protosw inetsw[];
+static void udp6_detach(struct socket *so);
+
+static void
+udp6_append(struct inpcb *inp, struct mbuf *n, int off,
+ struct sockaddr_in6 *fromsa)
+{
+ struct socket *so;
+ struct mbuf *opts;
+
+ INP_LOCK_ASSERT(inp);
+
+#ifdef IPSEC
+ /* Check AH/ESP integrity. */
+ if (ipsec6_in_reject(n, inp)) {
+ m_freem(n);
+ ipsec6stat.in_polvio++;
+ return;
+ }
+#endif /* IPSEC */
+#ifdef MAC
+ if (mac_check_inpcb_deliver(inp, n) != 0) {
+ m_freem(n);
+ return;
+ }
+#endif
+ opts = NULL;
+ if (inp->in6p_flags & IN6P_CONTROLOPTS ||
+ inp->inp_socket->so_options & SO_TIMESTAMP)
+ ip6_savecontrol(inp, n, &opts);
+ m_adj(n, off + sizeof(struct udphdr));
+
+ so = inp->inp_socket;
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
+ opts) == 0) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ m_freem(n);
+ if (opts)
+ m_freem(opts);
+ udpstat.udps_fullsock++;
+ } else
+ sorwakeup_locked(so);
+}
int
-udp6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
-{
- struct mbuf *m = *mp, *opts;
- register struct ip6_hdr *ip6;
- register struct udphdr *uh;
- register struct inpcb *in6p;
+udp6_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct mbuf *m = *mp;
+ struct ip6_hdr *ip6;
+ struct udphdr *uh;
+ struct inpcb *inp;
int off = *offp;
int plen, ulen;
struct sockaddr_in6 fromsa;
- opts = NULL;
-
ip6 = mtod(m, struct ip6_hdr *);
if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
/* XXX send icmp6 host/port unreach? */
m_freem(m);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
#ifndef PULLDOWN_TEST
@@ -149,17 +188,23 @@
#else
IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
if (!uh)
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
#endif
udpstat.udps_ipackets++;
+ /*
+ * Destination port of 0 is illegal, based on RFC768.
+ */
+ if (uh->uh_dport == 0)
+ goto badunlocked;
+
plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
ulen = ntohs((u_short)uh->uh_ulen);
if (plen != ulen) {
udpstat.udps_badlen++;
- goto bad;
+ goto badunlocked;
}
/*
@@ -167,229 +212,151 @@
*/
if (uh->uh_sum == 0) {
udpstat.udps_nosum++;
- goto bad;
+ goto badunlocked;
}
if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) {
udpstat.udps_badsum++;
- goto bad;
+ goto badunlocked;
}
- if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
- struct inpcb *last;
+ /*
+ * Construct sockaddr format source address.
+ */
+ init_sin6(&fromsa, m);
+ fromsa.sin6_port = uh->uh_sport;
- /*
- * Deliver a multicast datagram to all sockets
- * for which the local and remote addresses and ports match
- * those of the incoming datagram. This allows more than
- * one process to receive multicasts on the same port.
- * (This really ought to be done for unicast datagrams as
- * well, but that would cause problems with existing
- * applications that open both address-specific sockets and
- * a wildcard socket listening to the same port -- they would
- * end up receiving duplicates of every unicast datagram.
- * Those applications open the multiple sockets to overcome an
- * inadequacy of the UDP socket interface, but for backwards
- * compatibility we avoid the problem here rather than
- * fixing the interface. Maybe 4.5BSD will remedy this?)
- */
+ INP_INFO_RLOCK(&udbinfo);
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+ struct inpcb *last;
/*
- * In a case that laddr should be set to the link-local
+ * In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
- * specified in the received packet does not match with
- * laddr. To cure this situation, the matching is relaxed
- * if the receiving interface is the same as one specified
- * in the socket and if the destination multicast address
- * matches one of the multicast groups specified in the socket.
- */
-
- /*
- * Construct sockaddr format source address.
- */
- init_sin6(&fromsa, m);
- fromsa.sin6_port = uh->uh_sport;
- /*
- * KAME note: traditionally we dropped udpiphdr from mbuf here.
- * We need udphdr for IPsec processing so we do that later.
+ * specified in the received packet will not match laddr. To
+ * handle this situation, matching is relaxed if the
+ * receiving interface is the same as one specified in the
+ * socket and if the destination multicast address matches
+ * one of the multicast groups specified in the socket.
*/
/*
- * Locate pcb(s) for datagram.
- * (Algorithm copied from raw_intr().)
+ * KAME note: traditionally we dropped udpiphdr from mbuf
+ * here. We need udphdr for IPsec processing so we do that
+ * later.
*/
last = NULL;
- LIST_FOREACH(in6p, &udb, inp_list) {
- if ((in6p->inp_vflag & INP_IPV6) == 0)
+ LIST_FOREACH(inp, &udb, inp_list) {
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+ if (inp->in6p_lport != uh->uh_dport)
continue;
- if (in6p->in6p_lport != uh->uh_dport)
+ /*
+ * XXX: Do not check source port of incoming datagram
+ * unless inp_connect() has been called to bind the
+ * fport part of the 4-tuple; the source could be
+ * trying to talk to us with an ephemeral port.
+ */
+ if (inp->inp_fport != 0 &&
+ inp->inp_fport != uh->uh_sport)
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
- if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
&ip6->ip6_dst))
continue;
}
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
- if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr,
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
&ip6->ip6_src) ||
- in6p->in6p_fport != uh->uh_sport)
+ inp->in6p_fport != uh->uh_sport)
continue;
}
if (last != NULL) {
struct mbuf *n;
-#if defined(IPSEC) || defined(FAST_IPSEC)
- /*
- * Check AH/ESP integrity.
- */
- if (ipsec6_in_reject(m, last)) {
-#ifdef IPSEC
- ipsec6stat.in_polvio++;
-#endif /* IPSEC */
- /* do not inject data into pcb */
- } else
-#endif /*IPSEC || FAST_IPSEC*/
if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
- /*
- * KAME NOTE: do not
- * m_copy(m, offset, ...) above.
- * sbappendaddr() expects M_PKTHDR,
- * and m_copy() will copy M_PKTHDR
- * only if offset is 0.
- */
- if (last->in6p_flags & IN6P_CONTROLOPTS
- || last->in6p_socket->so_options & SO_TIMESTAMP)
- ip6_savecontrol(last, n, &opts);
-
- m_adj(n, off + sizeof(struct udphdr));
- if (sbappendaddr(&last->in6p_socket->so_rcv,
- (struct sockaddr *)&fromsa,
- n, opts) == 0) {
- m_freem(n);
- if (opts)
- m_freem(opts);
- udpstat.udps_fullsock++;
- } else
- sorwakeup(last->in6p_socket);
- opts = NULL;
+ INP_LOCK(last);
+ udp6_append(last, n, off, &fromsa);
+ INP_UNLOCK(last);
}
}
- last = in6p;
+ last = inp;
/*
* Don't look for additional matches if this one does
* not have either the SO_REUSEPORT or SO_REUSEADDR
- * socket options set. This heuristic avoids searching
- * through all pcbs in the common case of a non-shared
- * port. It assumes that an application will never
- * clear these options after setting them.
+ * socket options set. This heuristic avoids
+ * searching through all pcbs in the common case of a
+ * non-shared port. It assumes that an application
+ * will never clear these options after setting them.
*/
- if ((last->in6p_socket->so_options &
+ if ((last->inp_socket->so_options &
(SO_REUSEPORT|SO_REUSEADDR)) == 0)
break;
}
if (last == NULL) {
/*
- * No matching pcb found; discard datagram.
- * (No need to send an ICMP Port Unreachable
- * for a broadcast or multicast datgram.)
+ * No matching pcb found; discard datagram. (No need
+ * to send an ICMP Port Unreachable for a broadcast
+ * or multicast datgram.)
*/
udpstat.udps_noport++;
udpstat.udps_noportmcast++;
- goto bad;
- }
-#if defined(IPSEC) || defined(FAST_IPSEC)
- /*
- * Check AH/ESP integrity.
- */
- if (ipsec6_in_reject(m, last)) {
-#ifdef IPSEC
- ipsec6stat.in_polvio++;
-#endif /* IPSEC */
- goto bad;
- }
-#endif /*IPSEC || FAST_IPSEC*/
- if (last->in6p_flags & IN6P_CONTROLOPTS
- || last->in6p_socket->so_options & SO_TIMESTAMP)
- ip6_savecontrol(last, m, &opts);
-
- m_adj(m, off + sizeof(struct udphdr));
- if (sbappendaddr(&last->in6p_socket->so_rcv,
- (struct sockaddr *)&fromsa,
- m, opts) == 0) {
- udpstat.udps_fullsock++;
- goto bad;
+ goto badheadlocked;
}
- sorwakeup(last->in6p_socket);
- return IPPROTO_DONE;
+ INP_LOCK(last);
+ udp6_append(last, m, off, &fromsa);
+ INP_UNLOCK(last);
+ INP_INFO_RUNLOCK(&udbinfo);
+ return (IPPROTO_DONE);
}
/*
* Locate pcb for datagram.
*/
- in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport,
- &ip6->ip6_dst, uh->uh_dport, 1,
- m->m_pkthdr.rcvif);
- if (in6p == 0) {
- if (log_in_vain) {
- char buf[INET6_ADDRSTRLEN];
+ inp = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport,
+ &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif);
+ if (inp == NULL) {
+ if (udp_log_in_vain) {
+ char ip6bufs[INET6_ADDRSTRLEN];
+ char ip6bufd[INET6_ADDRSTRLEN];
- strcpy(buf, ip6_sprintf(&ip6->ip6_dst));
log(LOG_INFO,
"Connection attempt to UDP [%s]:%d from [%s]:%d\n",
- buf, ntohs(uh->uh_dport),
- ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport));
+ ip6_sprintf(ip6bufd, &ip6->ip6_dst),
+ ntohs(uh->uh_dport),
+ ip6_sprintf(ip6bufs, &ip6->ip6_src),
+ ntohs(uh->uh_sport));
}
udpstat.udps_noport++;
if (m->m_flags & M_MCAST) {
printf("UDP6: M_MCAST is set in a unicast packet.\n");
udpstat.udps_noportmcast++;
- goto bad;
+ goto badheadlocked;
}
+ INP_INFO_RUNLOCK(&udbinfo);
+ if (udp_blackhole)
+ goto badunlocked;
+ if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
+ goto badunlocked;
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
- return IPPROTO_DONE;
- }
-#if defined(IPSEC) || defined(FAST_IPSEC)
- /*
- * Check AH/ESP integrity.
- */
- if (ipsec6_in_reject(m, in6p)) {
-#ifdef IPSEC
- ipsec6stat.in_polvio++;
-#endif /* IPSEC */
- goto bad;
+ return (IPPROTO_DONE);
}
-#endif /*IPSEC || FAST_IPSEC*/
+ INP_LOCK(inp);
+ udp6_append(inp, m, off, &fromsa);
+ INP_UNLOCK(inp);
+ INP_INFO_RUNLOCK(&udbinfo);
+ return (IPPROTO_DONE);
- /*
- * Construct sockaddr format source address.
- * Stuff source address and datagram in user buffer.
- */
- init_sin6(&fromsa, m);
- fromsa.sin6_port = uh->uh_sport;
- if (in6p->in6p_flags & IN6P_CONTROLOPTS
- || in6p->in6p_socket->so_options & SO_TIMESTAMP)
- ip6_savecontrol(in6p, m, &opts);
- m_adj(m, off + sizeof(struct udphdr));
- if (sbappendaddr(&in6p->in6p_socket->so_rcv,
- (struct sockaddr *)&fromsa, m, opts) == 0) {
- udpstat.udps_fullsock++;
- goto bad;
- }
- sorwakeup(in6p->in6p_socket);
- return IPPROTO_DONE;
-bad:
+badheadlocked:
+ INP_INFO_RUNLOCK(&udbinfo);
+badunlocked:
if (m)
m_freem(m);
- if (opts)
- m_freem(opts);
- return IPPROTO_DONE;
+ return (IPPROTO_DONE);
}
void
-udp6_ctlinput(cmd, sa, d)
- int cmd;
- struct sockaddr *sa;
- void *d;
+udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
{
struct udphdr uh;
struct ip6_hdr *ip6;
@@ -438,7 +405,7 @@
* M and OFF are valid.
*/
- /* check if we can safely examine src and dst ports */
+ /* Check if we can safely examine src and dst ports. */
if (m->m_pkthdr.len < off + sizeof(*uhp))
return;
@@ -446,12 +413,11 @@
m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
(void) in6_pcbnotify(&udbinfo, sa, uh.uh_dport,
- (struct sockaddr *)ip6cp->ip6c_src,
- uh.uh_sport, cmd, cmdarg, notify);
+ (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
+ cmdarg, notify);
} else
(void) in6_pcbnotify(&udbinfo, sa, 0,
- (const struct sockaddr *)sa6_src,
- 0, cmd, cmdarg, notify);
+ (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
}
static int
@@ -460,9 +426,9 @@
struct xucred xuc;
struct sockaddr_in6 addrs[2];
struct inpcb *inp;
- int error, s;
+ int error;
- error = suser(req->td);
+ error = priv_check(req->td, PRIV_NETINET_GETCRED);
if (error)
return (error);
@@ -477,76 +443,305 @@
(error = sa6_embedscope(&addrs[1], ip6_use_defzone)) != 0) {
return (error);
}
- s = splnet();
+ INP_INFO_RLOCK(&udbinfo);
inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr,
- addrs[1].sin6_port,
- &addrs[0].sin6_addr, addrs[0].sin6_port,
- 1, NULL);
- if (!inp || !inp->inp_socket) {
+ addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, 1,
+ NULL);
+ if (inp == NULL) {
+ INP_INFO_RUNLOCK(&udbinfo);
+ return (ENOENT);
+ }
+ INP_LOCK(inp);
+ if (inp->inp_socket == NULL) {
error = ENOENT;
goto out;
}
+ error = cr_canseesocket(req->td->td_ucred, inp->inp_socket);
+ if (error)
+ goto out;
cru2x(inp->inp_socket->so_cred, &xuc);
- error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
out:
- splx(s);
+ INP_UNLOCK(inp);
+ INP_INFO_RUNLOCK(&udbinfo);
+ if (error == 0)
+ error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
return (error);
}
-SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW,
- 0, 0,
- udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
+SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
+ 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
static int
+udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
+ struct mbuf *control, struct thread *td)
+{
+ u_int32_t ulen = m->m_pkthdr.len;
+ u_int32_t plen = sizeof(struct udphdr) + ulen;
+ struct ip6_hdr *ip6;
+ struct udphdr *udp6;
+ struct in6_addr *laddr, *faddr;
+ struct sockaddr_in6 *sin6 = NULL;
+ struct ifnet *oifp = NULL;
+ int scope_ambiguous = 0;
+ u_short fport;
+ int error = 0;
+ struct ip6_pktopts *optp, opt;
+ int priv;
+ int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
+ int flags;
+ struct sockaddr_in6 tmp;
+
+ INP_LOCK_ASSERT(inp);
+
+ priv = 0;
+ if (td && !suser(td))
+ priv = 1;
+
+ if (addr6) {
+ /* addr6 has been validated in udp6_send(). */
+ sin6 = (struct sockaddr_in6 *)addr6;
+
+ /* protect *sin6 from overwrites */
+ tmp = *sin6;
+ sin6 = &tmp;
+
+ /*
+ * Application should provide a proper zone ID or the use of
+ * default zone IDs should be enabled. Unfortunately, some
+ * applications do not behave as it should, so we need a
+ * workaround. Even if an appropriate ID is not determined,
+ * we'll see if we can determine the outgoing interface. If we
+ * can, determine the zone ID based on the interface below.
+ */
+ if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
+ scope_ambiguous = 1;
+ if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
+ return (error);
+ }
+
+ if (control) {
+ if ((error = ip6_setpktopts(control, &opt,
+ inp->in6p_outputopts, priv, IPPROTO_UDP)) != 0)
+ goto release;
+ optp = &opt;
+ } else
+ optp = inp->in6p_outputopts;
+
+ if (sin6) {
+ faddr = &sin6->sin6_addr;
+
+ /*
+ * IPv4 version of udp_output calls in_pcbconnect in this case,
+ * which needs splnet and affects performance.
+ * Since we saw no essential reason for calling in_pcbconnect,
+ * we get rid of such kind of logic, and call in6_selectsrc
+ * and in6_pcbsetport in order to fill in the local address
+ * and the local port.
+ */
+ if (sin6->sin6_port == 0) {
+ error = EADDRNOTAVAIL;
+ goto release;
+ }
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ /* how about ::ffff:0.0.0.0 case? */
+ error = EISCONN;
+ goto release;
+ }
+
+ fport = sin6->sin6_port; /* allow 0 port */
+
+ if (IN6_IS_ADDR_V4MAPPED(faddr)) {
+ if ((inp->in6p_flags & IN6P_IPV6_V6ONLY)) {
+ /*
+ * I believe we should explicitly discard the
+ * packet when mapped addresses are disabled,
+ * rather than send the packet as an IPv6 one.
+ * If we chose the latter approach, the packet
+ * might be sent out on the wire based on the
+ * default route, the situation which we'd
+ * probably want to avoid.
+ * (20010421 jinmei at kame.net)
+ */
+ error = EINVAL;
+ goto release;
+ }
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
+ /*
+ * when remote addr is an IPv4-mapped address,
+ * local addr should not be an IPv6 address,
+ * since you cannot determine how to map IPv6
+ * source address to IPv4.
+ */
+ error = EINVAL;
+ goto release;
+ }
+
+ af = AF_INET;
+ }
+
+ if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
+ laddr = in6_selectsrc(sin6, optp, inp->in6p_moptions,
+ NULL, &inp->in6p_laddr, &oifp, &error);
+ if (oifp && scope_ambiguous &&
+ (error = in6_setscope(&sin6->sin6_addr,
+ oifp, NULL))) {
+ goto release;
+ }
+ } else
+ laddr = &inp->in6p_laddr; /* XXX */
+ if (laddr == NULL) {
+ if (error == 0)
+ error = EADDRNOTAVAIL;
+ goto release;
+ }
+ if (inp->in6p_lport == 0 &&
+ (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0)
+ goto release;
+ } else {
+ if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ error = ENOTCONN;
+ goto release;
+ }
+ if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
+ if ((inp->in6p_flags & IN6P_IPV6_V6ONLY)) {
+ /*
+ * XXX: this case would happen when the
+ * application sets the V6ONLY flag after
+ * connecting the foreign address.
+ * Such applications should be fixed,
+ * so we bark here.
+ */
+ log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
+ "option was set for a connected socket\n");
+ error = EINVAL;
+ goto release;
+ } else
+ af = AF_INET;
+ }
+ laddr = &inp->in6p_laddr;
+ faddr = &inp->in6p_faddr;
+ fport = inp->in6p_fport;
+ }
+
+ if (af == AF_INET)
+ hlen = sizeof(struct ip);
+
+ /*
+ * Calculate data length and get a mbuf
+ * for UDP and IP6 headers.
+ */
+ M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
+ if (m == 0) {
+ error = ENOBUFS;
+ goto release;
+ }
+
+ /*
+ * Stuff checksum and output datagram.
+ */
+ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
+ udp6->uh_sport = inp->in6p_lport; /* lport is always set in the PCB */
+ udp6->uh_dport = fport;
+ if (plen <= 0xffff)
+ udp6->uh_ulen = htons((u_short)plen);
+ else
+ udp6->uh_ulen = 0;
+ udp6->uh_sum = 0;
+
+ switch (af) {
+ case AF_INET6:
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = inp->in6p_flowinfo & IPV6_FLOWINFO_MASK;
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+#if 0 /* ip6_plen will be filled in ip6_output. */
+ ip6->ip6_plen = htons((u_short)plen);
+#endif
+ ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ ip6->ip6_src = *laddr;
+ ip6->ip6_dst = *faddr;
+
+ if ((udp6->uh_sum = in6_cksum(m, IPPROTO_UDP,
+ sizeof(struct ip6_hdr), plen)) == 0) {
+ udp6->uh_sum = 0xffff;
+ }
+
+ flags = 0;
+
+ udpstat.udps_opackets++;
+ error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
+ NULL, inp);
+ break;
+ case AF_INET:
+ error = EAFNOSUPPORT;
+ goto release;
+ }
+ goto releaseopt;
+
+release:
+ m_freem(m);
+
+releaseopt:
+ if (control) {
+ ip6_clearpktopts(&opt, -1);
+ m_freem(control);
+ }
+ return (error);
+}
+
+static void
udp6_abort(struct socket *so)
{
struct inpcb *inp;
- int s;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL; /* ??? possible? panic instead? */
+ KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
+
+#ifdef INET
+ if (inp->inp_vflag & INP_IPV4) {
+ struct pr_usrreqs *pru;
+
+ pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ (*pru->pru_abort)(so);
+ return;
}
- soisdisconnected(so);
- s = splnet();
+#endif
+
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
- in6_pcbdetach(inp);
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ in6_pcbdisconnect(inp);
+ inp->in6p_laddr = in6addr_any;
+ soisdisconnected(so);
+ }
+ INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- splx(s);
- return 0;
}
static int
udp6_attach(struct socket *so, int proto, struct thread *td)
{
struct inpcb *inp;
- int s, error;
+ int error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp != 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
error = soreserve(so, udp_sendspace, udp_recvspace);
- if (error) {
- INP_INFO_WUNLOCK(&udbinfo);
- return error;
- }
+ if (error)
+ return (error);
}
- s = splnet();
- error = in_pcballoc(so, &udbinfo, "udp6inp");
- splx(s);
+ INP_INFO_WLOCK(&udbinfo);
+ error = in_pcballoc(so, &udbinfo);
if (error) {
INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
}
inp = (struct inpcb *)so->so_pcb;
- INP_LOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
@@ -561,23 +756,20 @@
*/
inp->inp_ip_ttl = ip_defttl;
INP_UNLOCK(inp);
- return 0;
+ return (0);
}
static int
udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct inpcb *inp;
- int s, error;
+ int error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
- INP_LOCK(inp);
+ KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
+ INP_LOCK(inp);
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -593,36 +785,58 @@
in6_sin6_2_sin(&sin, sin6_p);
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
- s = splnet();
error = in_pcbbind(inp, (struct sockaddr *)&sin,
td->td_ucred);
goto out;
}
}
- s = splnet();
error = in6_pcbbind(inp, nam, td->td_ucred);
out:
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- splx(s);
- return error;
+ return (error);
}
-static int
-udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+static void
+udp6_close(struct socket *so)
{
struct inpcb *inp;
- int s, error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
+ KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
+
+#ifdef INET
+ if (inp->inp_vflag & INP_IPV4) {
+ struct pr_usrreqs *pru;
+
+ pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
+ (*pru->pru_disconnect)(so);
+ return;
}
+#endif
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
+ in6_pcbdisconnect(inp);
+ inp->in6p_laddr = in6addr_any;
+ soisdisconnected(so);
+ }
+ INP_UNLOCK(inp);
+ INP_INFO_WUNLOCK(&udbinfo);
+}
+
+static int
+udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ struct inpcb *inp;
+ int error;
+
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
+ INP_LOCK(inp);
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
struct sockaddr_in6 *sin6_p;
@@ -630,13 +844,13 @@
if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
struct sockaddr_in sin;
- if (inp->inp_faddr.s_addr != INADDR_ANY)
- return EISCONN;
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ error = EISCONN;
+ goto out;
+ }
in6_sin6_2_sin(&sin, sin6_p);
- s = splnet();
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
- splx(s);
if (error == 0) {
inp->inp_vflag |= INP_IPV4;
inp->inp_vflag &= ~INP_IPV6;
@@ -649,9 +863,7 @@
error = EISCONN;
goto out;
}
- s = splnet();
error = in6_pcbconnect(inp, nam, td->td_ucred);
- splx(s);
if (error == 0) {
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
/* should be non mapped addr */
@@ -663,41 +875,34 @@
out:
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
}
-static int
+static void
udp6_detach(struct socket *so)
{
struct inpcb *inp;
- int s;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
+
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
- s = splnet();
in6_pcbdetach(inp);
- splx(s);
+ in6_pcbfree(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return 0;
}
static int
udp6_disconnect(struct socket *so)
{
struct inpcb *inp;
- int error, s;
+ int error;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- return EINVAL;
- }
+ KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
+
+ INP_INFO_WLOCK(&udbinfo);
INP_LOCK(inp);
#ifdef INET
@@ -715,36 +920,30 @@
goto out;
}
- s = splnet();
in6_pcbdisconnect(inp);
inp->in6p_laddr = in6addr_any;
- splx(s);
/* XXXRW: so_state locking? */
so->so_state &= ~SS_ISCONNECTED; /* XXX */
out:
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return 0;
+ return (0);
}
static int
-udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
- struct mbuf *control, struct thread *td)
+udp6_send(struct socket *so, int flags, struct mbuf *m,
+ struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
struct inpcb *inp;
int error = 0;
- INP_INFO_WLOCK(&udbinfo);
inp = sotoinpcb(so);
- if (inp == 0) {
- INP_INFO_WUNLOCK(&udbinfo);
- m_freem(m);
- return EINVAL;
- }
- INP_LOCK(inp);
+ KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
+ INP_INFO_WLOCK(&udbinfo);
+ INP_LOCK(inp);
if (addr) {
- if (addr->sa_len != sizeof(struct sockaddr_in6)) {
+ if (addr->sa_len != sizeof(struct sockaddr_in6)) {
error = EINVAL;
goto bad;
}
@@ -764,7 +963,7 @@
else {
sin6 = (struct sockaddr_in6 *)addr;
hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
- ? 1 : 0;
+ ? 1 : 0;
}
if (hasv4addr) {
struct pr_usrreqs *pru;
@@ -772,11 +971,10 @@
if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
!IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
/*
- * when remote addr is IPv4-mapped
- * address, local addr should not be
- * an IPv6 address; since you cannot
- * determine how to map IPv6 source
- * address to IPv4.
+ * When remote addr is IPv4-mapped address,
+ * local addr should not be an IPv6 address;
+ * since you cannot determine how to map IPv6
+ * source address to IPv4.
*/
error = EINVAL;
goto out;
@@ -785,20 +983,22 @@
in6_sin6_2_sin_in_sock(addr);
pru = inetsw[ip_protox[IPPROTO_UDP]].pr_usrreqs;
error = ((*pru->pru_send)(so, flags, m, addr, control,
- td));
+ td));
/* addr will just be freed in sendit(). */
goto out;
}
}
#endif
-
+#ifdef MAC
+ mac_create_mbuf_from_inpcb(inp, m);
+#endif
error = udp6_output(inp, m, addr, control, td);
out:
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
- return error;
+ return (error);
- bad:
+bad:
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
m_freem(m);
@@ -817,5 +1017,6 @@
.pru_send = udp6_send,
.pru_shutdown = udp_shutdown,
.pru_sockaddr = in6_mapped_sockaddr,
- .pru_sosetlabel = in_pcbsosetlabel
+ .pru_sosetlabel = in_pcbsosetlabel,
+ .pru_close = udp6_close
};
Index: in6.h
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/netinet6/in6.h -L sys/netinet6/in6.h -u -r1.3 -r1.4
--- sys/netinet6/in6.h
+++ sys/netinet6/in6.h
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6.h,v 1.36.2.6 2006/03/22 06:32:54 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6.h,v 1.51 2007/07/19 09:16:40 bz Exp $ */
/* $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */
/*-
@@ -77,31 +77,9 @@
#define __KAME_VERSION "FreeBSD"
/*
- * Local port number conventions:
- *
- * Ports < IPPORT_RESERVED are reserved for privileged processes (e.g. root),
- * unless a kernel is compiled with IPNOPRIVPORTS defined.
- *
- * When a user does a bind(2) or connect(2) with a port number of zero,
- * a non-conflicting local port address is chosen.
- *
- * The default range is IPPORT_ANONMIN to IPPORT_ANONMAX, although
- * that is settable by sysctl(3); net.inet.ip.anonportmin and
- * net.inet.ip.anonportmax respectively.
- *
- * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
- * default assignment range.
- *
- * The value IP_PORTRANGE_DEFAULT causes the default behavior.
- *
- * The value IP_PORTRANGE_HIGH is the same as IP_PORTRANGE_DEFAULT,
- * and exists only for FreeBSD compatibility purposes.
- *
- * The value IP_PORTRANGE_LOW changes the range to the "low" are
- * that is (by convention) restricted to privileged processes.
- * This convention is based on "vouchsafe" principles only.
- * It is only secure if you trust the remote host to restrict these ports.
- * The range is IPPORT_RESERVEDMIN to IPPORT_RESERVEDMAX.
+ * IPv6 port allocation rules should mirror the IPv4 rules and are controlled
+ * by the the net.inet.ip.portrange sysctl tree. The following defines exist
+ * for compatibility with userland applications that need them.
*/
#if __BSD_VISIBLE
#define IPV6PORT_RESERVED 1024
@@ -339,7 +317,7 @@
(IN6_IS_ADDR_MULTICAST(a) && \
(IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL))
#define IN6_IS_ADDR_MC_SITELOCAL(a) \
- (IN6_IS_ADDR_MULTICAST(a) && \
+ (IN6_IS_ADDR_MULTICAST(a) && \
(IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL))
#define IN6_IS_ADDR_MC_ORGLOCAL(a) \
(IN6_IS_ADDR_MULTICAST(a) && \
@@ -355,7 +333,7 @@
(IN6_IS_ADDR_MULTICAST(a) && \
(__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL))
#define IN6_IS_ADDR_MC_SITELOCAL(a) \
- (IN6_IS_ADDR_MULTICAST(a) && \
+ (IN6_IS_ADDR_MULTICAST(a) && \
(__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
#define IN6_IS_ADDR_MC_ORGLOCAL(a) \
(IN6_IS_ADDR_MULTICAST(a) && \
@@ -372,6 +350,10 @@
#define IN6_IS_SCOPE_LINKLOCAL(a) \
((IN6_IS_ADDR_LINKLOCAL(a)) || \
(IN6_IS_ADDR_MC_LINKLOCAL(a)))
+#define IN6_IS_SCOPE_EMBED(a) \
+ ((IN6_IS_ADDR_LINKLOCAL(a)) || \
+ (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \
+ (IN6_IS_ADDR_MC_INTFACELOCAL(a)))
#define IFA6_IS_DEPRECATED(a) \
((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
@@ -433,7 +415,8 @@
#if 1 /* IPSEC */
#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */
-#endif
+#endif /* IPSEC */
+
#define IPV6_FAITH 29 /* bool; accept FAITH'ed connections */
#if 1 /* IPV6FIREWALL */
@@ -489,6 +472,14 @@
* the source address.
*/
+/*
+ * The following option is private; do not use it from user applications.
+ * It is deliberately defined to the same value as IP_MSFILTER.
+ */
+#define IPV6_MSFILTER 74 /* struct __msfilterreq;
+ * set/get multicast source filter list.
+ */
+
/* to define items, should talk with KAME guys first, for *BSD compatibility */
#define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */
@@ -509,6 +500,18 @@
unsigned int ipv6mr_interface;
};
+#ifdef notyet
+/*
+ * Argument structure for IPV6_ADD_SOURCE_MEMBERSHIP,
+ * IPV6_DROP_SOURCE_MEMBERSHIP, IPV6_BLOCK_SOURCE, and IPV6_UNBLOCK_SOURCE.
+ */
+struct ipv6_mreq_source {
+ struct in6_addr ipv6mr_multiaddr;
+ struct in6_addr ipv6mr_sourceaddr;
+ uint32_t ipv6mr_interface;
+};
+#endif
+
/*
* IPV6_PKTINFO: Packet information(RFC2292 sec 5)
*/
@@ -596,8 +599,7 @@
/* New entries should be added here from current IPV6CTL_MAXID value. */
/* to define items, should talk with KAME guys first, for *BSD compatibility */
#define IPV6CTL_STEALTH 45
-#define IPV6CTL_RTHDR0_ALLOWED 46
-#define IPV6CTL_MAXID 47
+#define IPV6CTL_MAXID 46
#endif /* __BSD_VISIBLE */
/*
Index: scope6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/scope6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/scope6.c -L sys/netinet6/scope6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/scope6.c
+++ sys/netinet6/scope6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/scope6.c,v 1.12.2.1 2005/11/04 20:26:15 ume Exp $ */
+/* $FreeBSD: src/sys/netinet6/scope6.c,v 1.17 2007/07/05 16:23:48 delphij Exp $ */
/* $KAME: scope6.c,v 1.10 2000/07/24 13:29:31 itojun Exp $ */
/*-
@@ -67,7 +67,7 @@
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id)
void
-scope6_init()
+scope6_init(void)
{
SCOPE6_LOCK_INIT();
@@ -75,8 +75,7 @@
}
struct scope6_id *
-scope6_ifattach(ifp)
- struct ifnet *ifp;
+scope6_ifattach(struct ifnet *ifp)
{
struct scope6_id *sid;
@@ -99,17 +98,14 @@
}
void
-scope6_ifdetach(sid)
- struct scope6_id *sid;
+scope6_ifdetach(struct scope6_id *sid)
{
free(sid, M_IFADDR);
}
int
-scope6_set(ifp, idlist)
- struct ifnet *ifp;
- struct scope6_id *idlist;
+scope6_set(struct ifnet *ifp, struct scope6_id *idlist)
{
int i;
int error = 0;
@@ -176,9 +172,7 @@
}
int
-scope6_get(ifp, idlist)
- struct ifnet *ifp;
- struct scope6_id *idlist;
+scope6_get(struct ifnet *ifp, struct scope6_id *idlist)
{
/* We only need to lock the interface's afdata for SID() to work. */
IF_AFDATA_LOCK(ifp);
@@ -202,8 +196,7 @@
* Get a scope of the address. Node-local, link-local, site-local or global.
*/
int
-in6_addrscope(addr)
- struct in6_addr *addr;
+in6_addrscope(struct in6_addr *addr)
{
int scope;
@@ -261,9 +254,12 @@
return IPV6_ADDR_SCOPE_GLOBAL;
}
+/*
+ * ifp - note that this might be NULL
+ */
+
void
-scope6_setdefault(ifp)
- struct ifnet *ifp; /* note that this might be NULL */
+scope6_setdefault(struct ifnet *ifp)
{
/*
* Currently, this function just sets the default "interfaces"
@@ -285,8 +281,7 @@
}
int
-scope6_get_default(idlist)
- struct scope6_id *idlist;
+scope6_get_default(struct scope6_id *idlist)
{
SCOPE6_LOCK();
@@ -297,8 +292,7 @@
}
u_int32_t
-scope6_addr2default(addr)
- struct in6_addr *addr;
+scope6_addr2default(struct in6_addr *addr)
{
u_int32_t id;
@@ -328,9 +322,7 @@
* address.
*/
int
-sa6_embedscope(sin6, defaultok)
- struct sockaddr_in6 *sin6;
- int defaultok;
+sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
{
struct ifnet *ifp;
u_int32_t zoneid;
@@ -366,15 +358,15 @@
* generate standard sockaddr_in6 from embedded form.
*/
int
-sa6_recoverscope(sin6)
- struct sockaddr_in6 *sin6;
+sa6_recoverscope(struct sockaddr_in6 *sin6)
{
+ char ip6buf[INET6_ADDRSTRLEN];
u_int32_t zoneid;
if (sin6->sin6_scope_id != 0) {
log(LOG_NOTICE,
"sa6_recoverscope: assumption failure (non 0 ID): %s%%%d\n",
- ip6_sprintf(&sin6->sin6_addr), sin6->sin6_scope_id);
+ ip6_sprintf(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
/* XXX: proceed anyway... */
}
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
@@ -401,12 +393,11 @@
* Determine the appropriate scope zone ID for in6 and ifp. If ret_id is
* non NULL, it is set to the zone ID. If the zone ID needs to be embedded
* in the in6_addr structure, in6 will be modified.
+ *
+ * ret_id - unnecessary?
*/
int
-in6_setscope(in6, ifp, ret_id)
- struct in6_addr *in6;
- struct ifnet *ifp;
- u_int32_t *ret_id; /* unnecessary? */
+in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
{
int scope;
u_int32_t zoneid = 0;
@@ -480,8 +471,7 @@
* is intact; return non 0 if the address is modified.
*/
int
-in6_clearscope(in6)
- struct in6_addr *in6;
+in6_clearscope(struct in6_addr *in6)
{
int modified = 0;
Index: in6_src.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -L sys/netinet6/in6_src.c -L sys/netinet6/in6_src.c -u -r1.1.1.2 -r1.2
--- sys/netinet6/in6_src.c
+++ sys/netinet6/in6_src.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.30.2.4 2005/12/25 14:03:37 suz Exp $ */
+/* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.46 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */
/*-
@@ -66,8 +66,10 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/priv.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -93,8 +95,6 @@
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
-#include <net/net_osdep.h>
-
static struct mtx addrsel_lock;
#define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
#define ADDRSEL_LOCK() mtx_lock(&addrsel_lock)
@@ -147,24 +147,19 @@
sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
ip6stat.ip6s_sources_rule[(r)]++; \
/* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \
- goto next; /* XXX: we can't use 'continue' here */ \
+ goto next; /* XXX: we can't use 'continue' here */ \
} while(0)
#define BREAK(r) do { \
if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
ip6stat.ip6s_sources_rule[(r)]++; \
- goto out; /* XXX: we can't use 'break' here */ \
+ goto out; /* XXX: we can't use 'break' here */ \
} while(0)
struct in6_addr *
-in6_selectsrc(dstsock, opts, mopts, ro, laddr, ifpp, errorp)
- struct sockaddr_in6 *dstsock;
- struct ip6_pktopts *opts;
- struct ip6_moptions *mopts;
- struct route_in6 *ro;
- struct in6_addr *laddr;
- struct ifnet **ifpp;
- int *errorp;
+in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct ip6_moptions *mopts, struct route_in6 *ro,
+ struct in6_addr *laddr, struct ifnet **ifpp, int *errorp)
{
struct in6_addr dst;
struct ifnet *ifp = NULL;
@@ -430,16 +425,14 @@
return (&ia->ia_addr.sin6_addr);
}
+/*
+ * clone - meaningful only for bsdi and freebsd
+ */
static int
-selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone, norouteok)
- struct sockaddr_in6 *dstsock;
- struct ip6_pktopts *opts;
- struct ip6_moptions *mopts;
- struct route_in6 *ro;
- struct ifnet **retifp;
- struct rtentry **retrt;
- int clone; /* meaningful only for bsdi and freebsd. */
- int norouteok;
+selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct ip6_moptions *mopts, struct route_in6 *ro,
+ struct ifnet **retifp, struct rtentry **retrt, int clone,
+ int norouteok)
{
int error = 0;
struct ifnet *ifp = NULL;
@@ -447,16 +440,17 @@
struct sockaddr_in6 *sin6_next;
struct in6_pktinfo *pi = NULL;
struct in6_addr *dst = &dstsock->sin6_addr;
-
#if 0
+ char ip6buf[INET6_ADDRSTRLEN];
+
if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
dstsock->sin6_addr.s6_addr32[1] == 0 &&
!IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
printf("in6_selectroute: strange destination %s\n",
- ip6_sprintf(&dstsock->sin6_addr));
+ ip6_sprintf(ip6buf, &dstsock->sin6_addr));
} else {
printf("in6_selectroute: destination = %s%%%d\n",
- ip6_sprintf(&dstsock->sin6_addr),
+ ip6_sprintf(ip6buf, &dstsock->sin6_addr),
dstsock->sin6_scope_id); /* for debug */
}
#endif
@@ -632,12 +626,8 @@
}
static int
-in6_selectif(dstsock, opts, mopts, ro, retifp)
- struct sockaddr_in6 *dstsock;
- struct ip6_pktopts *opts;
- struct ip6_moptions *mopts;
- struct route_in6 *ro;
- struct ifnet **retifp;
+in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp)
{
int error;
struct route_in6 sro;
@@ -650,7 +640,7 @@
if ((error = selectroute(dstsock, opts, mopts, ro, retifp,
&rt, 0, 1)) != 0) {
- if (rt && rt == sro.ro_rt)
+ if (ro == &sro && rt && rt == sro.ro_rt)
RTFREE(rt);
return (error);
}
@@ -675,7 +665,7 @@
if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
- if (rt && rt == sro.ro_rt)
+ if (ro == &sro && rt && rt == sro.ro_rt)
RTFREE(rt);
return (flags);
}
@@ -690,21 +680,20 @@
if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp)
*retifp = rt->rt_ifa->ifa_ifp;
- if (rt && rt == sro.ro_rt)
+ if (ro == &sro && rt && rt == sro.ro_rt)
RTFREE(rt);
return (0);
}
+/*
+ * clone - meaningful only for bsdi and freebsd
+ */
int
-in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone)
- struct sockaddr_in6 *dstsock;
- struct ip6_pktopts *opts;
- struct ip6_moptions *mopts;
- struct route_in6 *ro;
- struct ifnet **retifp;
- struct rtentry **retrt;
- int clone; /* meaningful only for bsdi and freebsd. */
+in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+ struct ip6_moptions *mopts, struct route_in6 *ro,
+ struct ifnet **retifp, struct rtentry **retrt, int clone)
{
+
return (selectroute(dstsock, opts, mopts, ro, retifp,
retrt, clone, 0));
}
@@ -717,10 +706,9 @@
* 3. The system default hoplimit.
*/
int
-in6_selecthlim(in6p, ifp)
- struct in6pcb *in6p;
- struct ifnet *ifp;
+in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
{
+
if (in6p && in6p->in6p_hops >= 0)
return (in6p->in6p_hops);
else if (ifp)
@@ -750,16 +738,16 @@
* share this function by all *bsd*...
*/
int
-in6_pcbsetport(laddr, inp, cred)
- struct in6_addr *laddr;
- struct inpcb *inp;
- struct ucred *cred;
+in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
{
struct socket *so = inp->inp_socket;
u_int16_t lport = 0, first, last, *lastport;
int count, error = 0, wild = 0;
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ INP_LOCK_ASSERT(inp);
+
/* XXX: this is redundant when called from in6_pcbbind */
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
wild = INPLOOKUP_WILDCARD;
@@ -769,17 +757,18 @@
if (inp->inp_flags & INP_HIGHPORT) {
first = ipport_hifirstauto; /* sysctl */
last = ipport_hilastauto;
- lastport = &pcbinfo->lasthi;
+ lastport = &pcbinfo->ipi_lasthi;
} else if (inp->inp_flags & INP_LOWPORT) {
- if ((error = suser_cred(cred, 0)))
+ error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+ if (error)
return error;
first = ipport_lowfirstauto; /* 1023 */
last = ipport_lowlastauto; /* 600 */
- lastport = &pcbinfo->lastlow;
+ lastport = &pcbinfo->ipi_lastlow;
} else {
first = ipport_firstauto; /* sysctl */
last = ipport_lastauto;
- lastport = &pcbinfo->lastport;
+ lastport = &pcbinfo->ipi_lastport;
}
/*
* Simple check to ensure all ports are not used up causing
@@ -843,7 +832,7 @@
}
void
-addrsel_policy_init()
+addrsel_policy_init(void)
{
ADDRSEL_LOCK_INIT();
ADDRSEL_SXLOCK_INIT();
@@ -856,8 +845,7 @@
}
static struct in6_addrpolicy *
-lookup_addrsel_policy(key)
- struct sockaddr_in6 *key;
+lookup_addrsel_policy(struct sockaddr_in6 *key)
{
struct in6_addrpolicy *match = NULL;
@@ -900,9 +888,7 @@
}
int
-in6_src_ioctl(cmd, data)
- u_long cmd;
- caddr_t data;
+in6_src_ioctl(u_long cmd, caddr_t data)
{
int i;
struct in6_addrpolicy ent0;
@@ -950,14 +936,14 @@
struct addrsel_policyhead addrsel_policytab;
static void
-init_policy_queue()
+init_policy_queue(void)
{
+
TAILQ_INIT(&addrsel_policytab);
}
static int
-add_addrsel_policyent(newpolicy)
- struct in6_addrpolicy *newpolicy;
+add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
{
struct addrsel_policyent *new, *pol;
@@ -992,8 +978,7 @@
}
static int
-delete_addrsel_policyent(key)
- struct in6_addrpolicy *key;
+delete_addrsel_policyent(struct in6_addrpolicy *key)
{
struct addrsel_policyent *pol;
@@ -1023,9 +1008,8 @@
}
static int
-walk_addrsel_policy(callback, w)
- int (*callback) __P((struct in6_addrpolicy *, void *));
- void *w;
+walk_addrsel_policy(int (*callback) __P((struct in6_addrpolicy *, void *)),
+ void *w)
{
struct addrsel_policyent *pol;
int error = 0;
@@ -1042,9 +1026,7 @@
}
static int
-dump_addrsel_policyent(pol, arg)
- struct in6_addrpolicy *pol;
- void *arg;
+dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
{
int error = 0;
struct walkarg *w = arg;
@@ -1055,8 +1037,7 @@
}
static struct in6_addrpolicy *
-match_addrsel_policy(key)
- struct sockaddr_in6 *key;
+match_addrsel_policy(struct sockaddr_in6 *key)
{
struct addrsel_policyent *pent;
struct in6_addrpolicy *bestpol = NULL, *pol;
Index: frag6.c
===================================================================
RCS file: /home/cvs/src/sys/netinet6/frag6.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -L sys/netinet6/frag6.c -L sys/netinet6/frag6.c -u -r1.1.1.1 -r1.2
--- sys/netinet6/frag6.c
+++ sys/netinet6/frag6.c
@@ -1,4 +1,4 @@
-/* $FreeBSD: src/sys/netinet6/frag6.c,v 1.25 2005/01/07 02:30:34 imp Exp $ */
+/* $FreeBSD: src/sys/netinet6/frag6.c,v 1.33 2007/07/05 16:29:39 delphij Exp $ */
/* $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ */
/*-
@@ -53,8 +53,6 @@
#include <netinet/in_systm.h> /* for ECN definitions */
#include <netinet/ip.h> /* for ECN definitions */
-#include <net/net_osdep.h>
-
/*
* Define it to get a correct behavior on per-interface statistics.
* You will need to perform an extra routing table lookup, per fragment,
@@ -87,12 +85,22 @@
/*
* Initialise reassembly queue and fragment identifier.
*/
+static void
+frag6_change(void *tag)
+{
+
+ ip6_maxfragpackets = nmbclusters / 4;
+ ip6_maxfrags = nmbclusters / 4;
+}
+
void
-frag6_init()
+frag6_init(void)
{
ip6_maxfragpackets = nmbclusters / 4;
ip6_maxfrags = nmbclusters / 4;
+ EVENTHANDLER_REGISTER(nmbclusters_change,
+ frag6_change, NULL, EVENTHANDLER_PRI_ANY);
IP6Q_LOCK_INIT();
@@ -132,9 +140,7 @@
* Fragment input
*/
int
-frag6_input(mp, offp, proto)
- struct mbuf **mp;
- int *offp, proto;
+frag6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp, *t;
struct ip6_hdr *ip6;
@@ -149,6 +155,9 @@
int fragoff, frgpartlen; /* must be larger than u_int16_t */
struct ifnet *dstifp;
u_int8_t ecn, ecn0;
+#if 0
+ char ip6buf[INET6_ADDRSTRLEN];
+#endif
ip6 = mtod(m, struct ip6_hdr *);
#ifndef PULLDOWN_TEST
@@ -248,10 +257,11 @@
q6->ip6q_nxtp = (u_char *)nxtp;
#endif
q6->ip6q_ident = ip6f->ip6f_ident;
- q6->ip6q_arrive = 0; /* Is it used anywhere? */
- q6->ip6q_ttl = IPV6_FRAGTTL;
+ q6->ip6q_ttl = IPV6_FRAGTTL;
q6->ip6q_src = ip6->ip6_src;
q6->ip6q_dst = ip6->ip6_dst;
+ q6->ip6q_ecn =
+ (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
q6->ip6q_nfrag = 0;
@@ -332,10 +342,6 @@
if (ip6af == NULL)
goto dropfrag;
bzero(ip6af, sizeof(*ip6af));
- ip6af->ip6af_head = ip6->ip6_flow;
- ip6af->ip6af_len = ip6->ip6_plen;
- ip6af->ip6af_nxt = ip6->ip6_nxt;
- ip6af->ip6af_hlim = ip6->ip6_hlim;
ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
ip6af->ip6af_off = fragoff;
ip6af->ip6af_frglen = frgpartlen;
@@ -353,14 +359,14 @@
* drop if CE and not-ECT are mixed for the same packet.
*/
ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
- ecn0 = (ntohl(q6->ip6q_down->ip6af_head) >> 20) & IPTOS_ECN_MASK;
+ ecn0 = q6->ip6q_ecn;
if (ecn == IPTOS_ECN_CE) {
if (ecn0 == IPTOS_ECN_NOTECT) {
free(ip6af, M_FTABLE);
goto dropfrag;
}
if (ecn0 != IPTOS_ECN_CE)
- q6->ip6q_down->ip6af_head |= htonl(IPTOS_ECN_CE << 20);
+ q6->ip6q_ecn = IPTOS_ECN_CE;
}
if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
free(ip6af, M_FTABLE);
@@ -417,6 +423,9 @@
* existing fragments from a security point of view.
* We don't know which fragment is the bad guy - here we trust
* fragment that came in earlier, with no real reason.
+ *
+ * Note: due to changes after disabling this part, mbuf passed to
+ * m_adj() below now does not meet the requirement.
*/
if (af6->ip6af_up != (struct ip6asfrag *)q6) {
i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
@@ -425,7 +434,7 @@
#if 0 /* suppress the noisy log */
log(LOG_ERR, "%d bytes of a fragment from %s "
"overlaps the previous fragment\n",
- i, ip6_sprintf(&q6->ip6q_src));
+ i, ip6_sprintf(ip6buf, &q6->ip6q_src));
#endif
free(ip6af, M_FTABLE);
goto dropfrag;
@@ -437,7 +446,7 @@
#if 0 /* suppress the noisy log */
log(LOG_ERR, "%d bytes of a fragment from %s "
"overlaps the succeeding fragment",
- i, ip6_sprintf(&q6->ip6q_src));
+ i, ip6_sprintf(ip6buf, &q6->ip6q_src));
#endif
free(ip6af, M_FTABLE);
goto dropfrag;
@@ -499,19 +508,18 @@
free(ip6af, M_FTABLE);
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
- ip6->ip6_src = q6->ip6q_src;
- ip6->ip6_dst = q6->ip6q_dst;
+ if (q6->ip6q_ecn == IPTOS_ECN_CE)
+ ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
nxt = q6->ip6q_nxt;
#ifdef notyet
*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
#endif
- /*
- * Delete frag6 header with as a few cost as possible.
- */
- if (offset < m->m_len) {
+ /* Delete frag6 header */
+ if (m->m_len >= offset + sizeof(struct ip6_frag)) {
+ /* This is the only possible case with !PULLDOWN_TEST */
ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
- offset);
+ offset);
m->m_data += sizeof(struct ip6_frag);
m->m_len -= sizeof(struct ip6_frag);
} else {
@@ -573,8 +581,7 @@
* associated datagrams.
*/
void
-frag6_freef(q6)
- struct ip6q *q6;
+frag6_freef(struct ip6q *q6)
{
struct ip6asfrag *af6, *down6;
@@ -618,8 +625,7 @@
* Like insque, but pointers in middle of structure.
*/
void
-frag6_enq(af6, up6)
- struct ip6asfrag *af6, *up6;
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
{
IP6Q_LOCK_ASSERT();
@@ -634,8 +640,7 @@
* To frag6_enq as remque is to insque.
*/
void
-frag6_deq(af6)
- struct ip6asfrag *af6;
+frag6_deq(struct ip6asfrag *af6)
{
IP6Q_LOCK_ASSERT();
@@ -645,8 +650,7 @@
}
void
-frag6_insque(new, old)
- struct ip6q *new, *old;
+frag6_insque(struct ip6q *new, struct ip6q *old)
{
IP6Q_LOCK_ASSERT();
@@ -658,8 +662,7 @@
}
void
-frag6_remque(p6)
- struct ip6q *p6;
+frag6_remque(struct ip6q *p6)
{
IP6Q_LOCK_ASSERT();
@@ -674,10 +677,13 @@
* queue, discard it.
*/
void
-frag6_slowtimo()
+frag6_slowtimo(void)
{
struct ip6q *q6;
- int s = splnet();
+
+#if 0
+ GIANT_REQUIRED; /* XXX bz: ip6_forward_rt */
+#endif
IP6Q_LOCK();
q6 = ip6q.ip6q_next;
@@ -719,15 +725,13 @@
ipsrcchk_rt.ro_rt = 0;
}
#endif
-
- splx(s);
}
/*
* Drain off all datagram fragments.
*/
void
-frag6_drain()
+frag6_drain(void)
{
if (IP6Q_TRYLOCK() == 0)
More information about the Midnightbsd-cvs
mailing list