[Midnightbsd-cvs] src [9116] trunk/sys/netinet: rollback to pre toecore
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Sat Oct 1 20:29:44 EDT 2016
Revision: 9116
http://svnweb.midnightbsd.org/src/?rev=9116
Author: laffer1
Date: 2016-10-01 20:29:44 -0400 (Sat, 01 Oct 2016)
Log Message:
-----------
rollback to pre toecore
Modified Paths:
--------------
trunk/sys/netinet/tcp_offload.c
trunk/sys/netinet/tcp_offload.h
trunk/sys/netinet/tcp_syncache.c
trunk/sys/netinet/tcp_syncache.h
Modified: trunk/sys/netinet/tcp_offload.c
===================================================================
--- trunk/sys/netinet/tcp_offload.c 2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_offload.c 2016-10-02 00:29:44 UTC (rev 9116)
@@ -1,176 +1,145 @@
/*-
- * Copyright (c) 2012 Chelsio Communications, Inc.
+ * Copyright (c) 2007, Chelsio Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * modification, are permitted provided that the following conditions are met:
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/netinet/tcp_offload.c 252555 2013-07-03 09:25:29Z np $");
+__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_offload.c 196019 2009-08-01 19:26:27Z rwatson $");
-#include "opt_inet.h"
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-#include <sys/sockopt.h>
+
#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/vnet.h>
+
#include <netinet/in.h>
+#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
-#define TCPOUTFLAGS
-#include <netinet/tcp_fsm.h>
-#include <netinet/toecore.h>
+#include <netinet/toedev.h>
-int registered_toedevs;
+uint32_t toedev_registration_count;
-/*
- * Provide an opportunity for a TOE driver to offload.
- */
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
- struct toedev *tod;
+ struct toedev *tdev;
struct rtentry *rt;
- int error = EOPNOTSUPP;
+ int error;
- INP_WLOCK_ASSERT(sotoinpcb(so));
- KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
- ("%s: called with sa_family %d", __func__, nam->sa_family));
-
- if (registered_toedevs == 0)
- return (error);
-
- rt = rtalloc1(nam, 0, 0);
- if (rt)
+ if (toedev_registration_count == 0)
+ return (EINVAL);
+
+ /*
+ * Look up the route used for the connection to
+ * determine if it uses an interface capable of
+ * offloading the connection.
+ */
+ rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
+ if (rt)
RT_UNLOCK(rt);
- else
+ else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
-
- if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
- goto done;
- if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
- goto done;
-
- tod = TOEDEV(ifp);
- if (tod != NULL)
- error = tod->tod_connect(tod, so, rt, nam);
-done:
+ if ((ifp->if_capenable & IFCAP_TOE) == 0) {
+ error = EINVAL;
+ goto fail;
+ }
+
+ tdev = TOEDEV(ifp);
+ if (tdev == NULL) {
+ error = EPERM;
+ goto fail;
+ }
+
+ if (tdev->tod_can_offload(tdev, so) == 0) {
+ error = EPERM;
+ goto fail;
+ }
+
+ return (tdev->tod_connect(tdev, so, rt, nam));
+fail:
RTFREE(rt);
return (error);
}
-void
-tcp_offload_listen_start(struct tcpcb *tp)
-{
- INP_WLOCK_ASSERT(tp->t_inpcb);
+/*
+ * This file contains code as a short-term staging area before it is moved in
+ * to sys/netinet/tcp_offload.c
+ */
- EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-}
-
void
-tcp_offload_listen_stop(struct tcpcb *tp)
+tcp_offload_twstart(struct tcpcb *tp)
{
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(tp->t_inpcb);
+ tcp_twstart(tp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
}
-void
-tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
+struct tcpcb *
+tcp_offload_close(struct tcpcb *tp)
{
- struct toedev *tod = tp->tod;
- KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
- INP_WLOCK_ASSERT(tp->t_inpcb);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(tp->t_inpcb);
+ tp = tcp_close(tp);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (tp)
+ INP_WUNLOCK(tp->t_inpcb);
- tod->tod_input(tod, tp, m);
+ return (tp);
}
-int
-tcp_offload_output(struct tcpcb *tp)
+struct tcpcb *
+tcp_offload_drop(struct tcpcb *tp, int error)
{
- struct toedev *tod = tp->tod;
- int error, flags;
- KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
- INP_WLOCK_ASSERT(tp->t_inpcb);
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(tp->t_inpcb);
+ tp = tcp_drop(tp, error);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (tp)
+ INP_WUNLOCK(tp->t_inpcb);
- flags = tcp_outflags[tp->t_state];
-
- if (flags & TH_RST) {
- /* XXX: avoid repeated calls like we do for FIN */
- error = tod->tod_send_rst(tod, tp);
- } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
- (tp->t_flags & TF_SENTFIN) == 0) {
- error = tod->tod_send_fin(tod, tp);
- if (error == 0)
- tp->t_flags |= TF_SENTFIN;
- } else
- error = tod->tod_output(tod, tp);
-
- return (error);
+ return (tp);
}
-void
-tcp_offload_rcvd(struct tcpcb *tp)
-{
- struct toedev *tod = tp->tod;
-
- KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- tod->tod_rcvd(tod, tp);
-}
-
-void
-tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
-{
- struct toedev *tod = tp->tod;
-
- KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
-}
-
-void
-tcp_offload_detach(struct tcpcb *tp)
-{
- struct toedev *tod = tp->tod;
-
- KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
- INP_WLOCK_ASSERT(tp->t_inpcb);
-
- tod->tod_pcb_detach(tod, tp);
-}
Modified: trunk/sys/netinet/tcp_offload.h
===================================================================
--- trunk/sys/netinet/tcp_offload.h 2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_offload.h 2016-10-02 00:29:44 UTC (rev 9116)
@@ -24,7 +24,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * $MidnightBSD$
+ * $FreeBSD: stable/9/sys/netinet/tcp_offload.h 218909 2011-02-21 09:01:34Z brucec $
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,15 +34,321 @@
#error "no user-serviceable parts inside"
#endif
-extern int registered_toedevs;
+/*
+ * A driver publishes that it provides offload services
+ * by setting IFCAP_TOE in the ifnet. The offload connect
+ * will bypass any further work if the interface that a
+ * connection would use does not support TCP offload.
+ *
+ * The TOE API assumes that the tcp offload engine can offload the
+ * the entire connection from set up to teardown, with some provision
+ * being made to allowing the software stack to handle time wait. If
+ * the device does not meet these criteria, it is the driver's responsibility
+ * to overload the functions that it needs to in tcp_usrreqs and make
+ * its own calls to tcp_output if it needs to do so.
+ *
+ * There is currently no provision for the device advertising the congestion
+ * control algorithms it supports as there is currently no API for querying
+ * an operating system for the protocols that it has loaded. This is a desirable
+ * future extension.
+ *
+ *
+ *
+ * It is assumed that individuals deploying TOE will want connections
+ * to be offloaded without software changes so all connections on an
+ * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
+ * flag is set on the socket.
+ *
+ *
+ * The toe_usrreqs structure constitutes the TOE driver's
+ * interface to the TCP stack for functionality that doesn't
+ * interact directly with userspace. If one wants to provide
+ * (optional) functionality to do zero-copy to/from
+ * userspace one still needs to override soreceive/sosend
+ * with functions that fault in and pin the user buffers.
+ *
+ * + tu_send
+ * - tells the driver that new data may have been added to the
+ * socket's send buffer - the driver should not fail if the
+ * buffer is in fact unchanged
+ * - the driver is responsible for providing credits (bytes in the send window)
+ * back to the socket by calling sbdrop() as segments are acknowledged.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_rcvd
+ * - returns credits to the driver and triggers window updates
+ * to the peer (a credit as used here is a byte in the peer's receive window)
+ * - the driver is expected to determine how many bytes have been
+ * consumed and credit that back to the card so that it can grow
+ * the window again by maintaining its own state between invocations.
+ * - In principle this could be used to shrink the window as well as
+ * grow the window, although it is not used for that now.
+ * - this function needs to correctly handle being called any number of
+ * times without any bytes being consumed from the receive buffer.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_disconnect
+ * - tells the driver to send FIN to peer
+ * - driver is expected to send the remaining data and then do a clean half close
+ * - disconnect implies at least half-close so only send, reset, and detach
+ * are legal
+ * - the driver is expected to handle transition through the shutdown
+ * state machine and allow the stack to support SO_LINGER.
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * + tu_reset
+ * - closes the connection and sends a RST to peer
+ * - driver is expectd to trigger an RST and detach the toepcb
+ * - no further calls are legal after reset
+ * - The driver expects the inpcb lock to be held - the driver is expected
+ * not to drop the lock. Hence the driver is not allowed to acquire the
+ * pcbinfo lock during this call.
+ *
+ * The following fields in the tcpcb are expected to be referenced by the driver:
+ * + iss
+ * + rcv_nxt
+ * + rcv_wnd
+ * + snd_isn
+ * + snd_max
+ * + snd_nxt
+ * + snd_una
+ * + t_flags
+ * + t_inpcb
+ * + t_maxseg
+ * + t_toe
+ *
+ * The following fields in the inpcb are expected to be referenced by the driver:
+ * + inp_lport
+ * + inp_fport
+ * + inp_laddr
+ * + inp_fport
+ * + inp_socket
+ * + inp_ip_tos
+ *
+ * The following fields in the socket are expected to be referenced by the
+ * driver:
+ * + so_comp
+ * + so_error
+ * + so_linger
+ * + so_options
+ * + so_rcv
+ * + so_snd
+ * + so_state
+ * + so_timeo
+ *
+ * These functions all return 0 on success and can return the following errors
+ * as appropriate:
+ * + EPERM:
+ * + ENOBUFS: memory allocation failed
+ * + EMSGSIZE: MTU changed during the call
+ * + EHOSTDOWN:
+ * + EHOSTUNREACH:
+ * + ENETDOWN:
+ * * ENETUNREACH: the peer is no longer reachable
+ *
+ * + tu_detach
+ * - tells driver that the socket is going away so disconnect
+ * the toepcb and free appropriate resources
+ * - allows the driver to cleanly handle the case of connection state
+ * outliving the socket
+ * - no further calls are legal after detach
+ * - the driver is expected to provide its own synchronization between
+ * detach and receiving new data.
+ *
+ * + tu_syncache_event
+ * - even if it is not actually needed, the driver is expected to
+ * call syncache_add for the initial SYN and then syncache_expand
+ * for the SYN,ACK
+ * - tells driver that a connection either has not been added or has
+ * been dropped from the syncache
+ * - the driver is expected to maintain state that lives outside the
+ * software stack so the syncache needs to be able to notify the
+ * toe driver that the software stack is not going to create a connection
+ * for a received SYN
+ * - The driver is responsible for any synchronization required between
+ * the syncache dropping an entry and the driver processing the SYN,ACK.
+ *
+ */
+struct toe_usrreqs {
+ int (*tu_send)(struct tcpcb *tp);
+ int (*tu_rcvd)(struct tcpcb *tp);
+ int (*tu_disconnect)(struct tcpcb *tp);
+ int (*tu_reset)(struct tcpcb *tp);
+ void (*tu_detach)(struct tcpcb *tp);
+ void (*tu_syncache_event)(int event, void *toep);
+};
-int tcp_offload_connect(struct socket *, struct sockaddr *);
-void tcp_offload_listen_start(struct tcpcb *);
-void tcp_offload_listen_stop(struct tcpcb *);
-void tcp_offload_input(struct tcpcb *, struct mbuf *);
-int tcp_offload_output(struct tcpcb *);
-void tcp_offload_rcvd(struct tcpcb *);
-void tcp_offload_ctloutput(struct tcpcb *, int, int);
-void tcp_offload_detach(struct tcpcb *);
+/*
+ * Proxy for struct tcpopt between TOE drivers and TCP functions.
+ */
+struct toeopt {
+ u_int64_t to_flags; /* see tcpopt in tcp_var.h */
+ u_int16_t to_mss; /* maximum segment size */
+ u_int8_t to_wscale; /* window scaling */
+ u_int8_t _pad1; /* explicit pad for 64bit alignment */
+ u_int32_t _pad2; /* explicit pad for 64bit alignment */
+ u_int64_t _pad3[4]; /* TBD */
+};
+
+#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
+#define TOE_SC_DROP 2 /* connection was timed out */
+
+/*
+ * Because listen is a one-to-many relationship (a socket can be listening
+ * on all interfaces on a machine some of which may be using different TCP
+ * offload devices), listen uses a publish/subscribe mechanism. The TCP
+ * offload driver registers a listen notification function with the stack.
+ * When a listen socket is created all TCP offload devices are notified
+ * so that they can do the appropriate set up to offload connections on the
+ * port to which the socket is bound. When the listen socket is closed,
+ * the offload devices are notified so that they will stop listening on that
+ * port and free any associated resources as well as sending RSTs on any
+ * connections in the SYN_RCVD state.
+ *
+ */
+
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+/*
+ * Check if the socket can be offloaded by the following steps:
+ * - determine the egress interface
+ * - check the interface for TOE capability and TOE is enabled
+ * - check if the device has resources to offload the connection
+ */
+int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
+
+/*
+ * The tcp_output_* routines are wrappers around the toe_usrreqs calls
+ * which trigger packet transmission. In the non-offloaded case they
+ * translate to tcp_output. The tcp_offload_* routines notify TOE
+ * of specific events. I the non-offloaded case they are no-ops.
+ *
+ * Listen is a special case because it is a 1 to many relationship
+ * and there can be more than one offload driver in the system.
+ */
+
+/*
+ * Connection is offloaded
+ */
+#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
+
+/*
+ * hackish way of allowing this file to also be included by TOE
+ * which needs to be kept ignorant of socket implementation details
+ */
+#ifdef _SYS_SOCKETVAR_H_
+/*
+ * The socket has not been marked as "do not offload"
+ */
+#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
+
+static __inline int
+tcp_output_connect(struct socket *so, struct sockaddr *nam)
+{
+ struct tcpcb *tp = sototcpcb(so);
+ int error;
+
+ /*
+ * If offload has been disabled for this socket or the
+ * connection cannot be offloaded just call tcp_output
+ * to start the TCP state machine.
+ */
+#ifndef TCP_OFFLOAD_DISABLE
+ if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
+#endif
+ error = tcp_output(tp);
+ return (error);
+}
+
+static __inline int
+tcp_output_send(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_send(tp));
#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_rcvd(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_rcvd(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_disconnect(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_disconnect(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_reset(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ return (tp->t_tu->tu_reset(tp));
+#endif
+ return (tcp_output(tp));
+}
+
+static __inline void
+tcp_offload_detach(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (tp_offload(tp))
+ tp->t_tu->tu_detach(tp);
+#endif
+}
+
+static __inline void
+tcp_offload_listen_open(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+#endif
+}
+
+static __inline void
+tcp_offload_listen_close(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+#endif
+}
+#undef SO_OFFLOADABLE
+#endif /* _SYS_SOCKETVAR_H_ */
+#undef tp_offload
+
+void tcp_offload_twstart(struct tcpcb *tp);
+struct tcpcb *tcp_offload_close(struct tcpcb *tp);
+struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
+
+#endif /* _NETINET_TCP_OFFLOAD_H_ */
Modified: trunk/sys/netinet/tcp_syncache.c
===================================================================
--- trunk/sys/netinet/tcp_syncache.c 2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_syncache.c 2016-10-02 00:29:44 UTC (rev 9116)
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_syncache.c 248085 2013-03-09 02:36:32Z marius $");
#include "opt_inet.h"
#include "opt_inet6.h"
@@ -81,12 +81,10 @@
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
-#ifdef TCP_OFFLOAD
-#include <netinet/toecore.h>
-#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -112,8 +110,10 @@
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-#ifdef TCP_OFFLOAD
-#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
+#ifdef TCP_OFFLOAD_DISABLE
+#define TOEPCB_ISSET(sc) (0)
+#else
+#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
@@ -332,14 +332,6 @@
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
-#ifdef TCP_OFFLOAD
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- tod->tod_syncache_added(tod, sc->sc_todctx);
- }
-#endif
-
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@@ -364,14 +356,10 @@
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifdef TCP_OFFLOAD
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- tod->tod_syncache_removed(tod, sc->sc_todctx);
- }
-#endif
-
+#ifndef TCP_OFFLOAD_DISABLE
+ if (sc->sc_tu)
+ sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
+#endif
syncache_free(sc);
V_tcp_syncache.cache_count--;
}
@@ -858,19 +846,7 @@
if (sc->sc_rxmits > 1)
tp->snd_cwnd = tp->t_maxseg;
-#ifdef TCP_OFFLOAD
/*
- * Allow a TOE driver to install its hooks. Note that we hold the
- * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
- * new connection before the TOE driver has done its thing.
- */
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- tod->tod_offload_socket(tod, sc->sc_todctx, so);
- }
-#endif
- /*
* Copy and activate timers.
*/
tp->t_keepinit = sototcpcb(lso)->t_keepinit;
@@ -950,13 +926,6 @@
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifdef TCP_OFFLOAD
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- tod->tod_syncache_removed(tod, sc->sc_todctx);
- }
-#endif
V_tcp_syncache.cache_count--;
SCH_UNLOCK(sch);
}
@@ -965,7 +934,7 @@
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
- if (th->th_ack != sc->sc_iss + 1) {
+ if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -976,8 +945,9 @@
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
- if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
+ if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
+ !TOEPCB_ISSET(sc)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -994,7 +964,8 @@
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
- if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
+ !TOEPCB_ISSET(sc)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@@ -1022,6 +993,25 @@
return (0);
}
+int
+tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+ struct tcphdr *th, struct socket **lsop, struct mbuf *m)
+{
+ struct tcpopt to;
+ int rc;
+
+ bzero(&to, sizeof(struct tcpopt));
+ to.to_mss = toeo->to_mss;
+ to.to_wscale = toeo->to_wscale;
+ to.to_flags = toeo->to_flags;
+
+ INP_INFO_WLOCK(&V_tcbinfo);
+ rc = syncache_expand(inc, &to, th, lsop, m);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
+
+ return (rc);
+}
+
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1037,8 +1027,8 @@
*/
static void
_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
- void *todctx)
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m,
+ struct toe_usrreqs *tu, void *toepcb)
{
struct tcpcb *tp;
struct socket *so;
@@ -1124,6 +1114,11 @@
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
+#ifndef TCP_OFFLOAD_DISABLE
+ if (sc->sc_tu)
+ sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
+ sc->sc_toepcb);
+#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1156,7 +1151,7 @@
s, __func__);
free(s, M_TCPLOG);
}
- if (syncache_respond(sc) == 0) {
+ if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1207,9 +1202,9 @@
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
-#ifdef TCP_OFFLOAD
- sc->sc_tod = tod;
- sc->sc_todctx = todctx;
+#ifndef TCP_OFFLOAD_DISABLE
+ sc->sc_tu = tu;
+ sc->sc_toepcb = toepcb;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@@ -1304,7 +1299,7 @@
/*
* Do a standard 3-way handshake.
*/
- if (syncache_respond(sc) == 0) {
+ if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1485,15 +1480,6 @@
th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
IPPROTO_TCP, 0);
ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
-#ifdef TCP_OFFLOAD
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
-
- return (error);
- }
-#endif
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
}
#endif
@@ -1505,15 +1491,6 @@
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
-#ifdef TCP_OFFLOAD
- if (ADDED_BY_TOE(sc)) {
- struct toedev *tod = sc->sc_tod;
-
- error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
-
- return (error);
- }
-#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
@@ -1528,12 +1505,23 @@
}
void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, void *tod, void *todctx)
+tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
+ struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
+ struct toe_usrreqs *tu, void *toepcb)
{
+ struct tcpopt to;
- _syncache_add(inc, to, th, inp, lsop, NULL, tod, todctx);
+ bzero(&to, sizeof(struct tcpopt));
+ to.to_mss = toeo->to_mss;
+ to.to_wscale = toeo->to_wscale;
+ to.to_flags = toeo->to_flags;
+
+ INP_INFO_WLOCK(&V_tcbinfo);
+ INP_WLOCK(inp);
+
+ _syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
}
+
/*
* The purpose of SYN cookies is to avoid keeping track of all SYN's we
* receive and to be able to handle SYN floods from bogus source addresses
Modified: trunk/sys/netinet/tcp_syncache.h
===================================================================
--- trunk/sys/netinet/tcp_syncache.h 2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_syncache.h 2016-10-02 00:29:44 UTC (rev 9116)
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*
* @(#)tcp_var.h 8.4 (Berkeley) 5/24/95
- * $MidnightBSD$
+ * $FreeBSD: stable/9/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $
*/
#ifndef _NETINET_TCP_SYNCACHE_H_
@@ -34,6 +34,8 @@
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
+struct toeopt;
+
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@@ -41,10 +43,14 @@
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
+int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+ struct tcphdr *th, struct socket **lsop, struct mbuf *m);
void syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, void *, void *);
+void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
+ struct tcphdr *, struct inpcb *, struct socket **,
+ struct toe_usrreqs *tu, void *toepcb);
+
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@@ -69,10 +75,10 @@
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
-#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
- struct toedev *sc_tod; /* entry added by this TOE */
- void *sc_todctx; /* TOE driver context */
-#endif
+#ifndef TCP_OFFLOAD_DISABLE
+ struct toe_usrreqs *sc_tu; /* TOE operations */
+ void *sc_toepcb; /* TOE protocol block */
+#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
More information about the Midnightbsd-cvs
mailing list