[Midnightbsd-cvs] src [9116] trunk/sys/netinet: rollback to pre toecore

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sat Oct 1 20:29:44 EDT 2016


Revision: 9116
          http://svnweb.midnightbsd.org/src/?rev=9116
Author:   laffer1
Date:     2016-10-01 20:29:44 -0400 (Sat, 01 Oct 2016)
Log Message:
-----------
rollback to pre toecore

Modified Paths:
--------------
    trunk/sys/netinet/tcp_offload.c
    trunk/sys/netinet/tcp_offload.h
    trunk/sys/netinet/tcp_syncache.c
    trunk/sys/netinet/tcp_syncache.h

Modified: trunk/sys/netinet/tcp_offload.c
===================================================================
--- trunk/sys/netinet/tcp_offload.c	2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_offload.c	2016-10-02 00:29:44 UTC (rev 9116)
@@ -1,176 +1,145 @@
 /*-
- * Copyright (c) 2012 Chelsio Communications, Inc.
+ * Copyright (c) 2007, Chelsio Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
+ * modification, are permitted provided that the following conditions are met:
  *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Neither the name of the Chelsio Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: release/9.2.0/sys/netinet/tcp_offload.c 252555 2013-07-03 09:25:29Z np $");
+__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_offload.c 196019 2009-08-01 19:26:27Z rwatson $");
 
-#include "opt_inet.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
-#include <sys/sockopt.h>
+
 #include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
 #include <net/route.h>
+#include <net/vnet.h>
+
 #include <netinet/in.h>
+#include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_offload.h>
-#define	TCPOUTFLAGS
-#include <netinet/tcp_fsm.h>
-#include <netinet/toecore.h>
+#include <netinet/toedev.h>
 
-int registered_toedevs;
+uint32_t toedev_registration_count;
 
-/*
- * Provide an opportunity for a TOE driver to offload.
- */
 int
 tcp_offload_connect(struct socket *so, struct sockaddr *nam)
 {
 	struct ifnet *ifp;
-	struct toedev *tod;
+	struct toedev *tdev;
 	struct rtentry *rt;
-	int error = EOPNOTSUPP;
+	int error;
 
-	INP_WLOCK_ASSERT(sotoinpcb(so));
-	KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
-	    ("%s: called with sa_family %d", __func__, nam->sa_family));
-
-	if (registered_toedevs == 0)
-		return (error);
-
-	rt = rtalloc1(nam, 0, 0);
-	if (rt)
+	if (toedev_registration_count == 0)
+		return (EINVAL);
+	
+	/*
+	 * Look up the route used for the connection to 
+	 * determine if it uses an interface capable of
+	 * offloading the connection.
+	 */
+	rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
+	if (rt) 
 		RT_UNLOCK(rt);
-	else
+	else 
 		return (EHOSTUNREACH);
 
 	ifp = rt->rt_ifp;
-
-	if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
-		goto done;
-	if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
-		goto done;
-
-	tod = TOEDEV(ifp);
-	if (tod != NULL)
-		error = tod->tod_connect(tod, so, rt, nam);
-done:
+	if ((ifp->if_capenable & IFCAP_TOE) == 0) {
+		error = EINVAL;
+		goto fail;
+	}
+	
+	tdev = TOEDEV(ifp);
+	if (tdev == NULL) {
+		error = EPERM;
+		goto fail;
+	}
+	
+	if (tdev->tod_can_offload(tdev, so) == 0) {
+		error = EPERM;
+		goto fail;
+	}
+	
+	return (tdev->tod_connect(tdev, so, rt, nam));
+fail:
 	RTFREE(rt);
 	return (error);
 }
 
-void
-tcp_offload_listen_start(struct tcpcb *tp)
-{
 
-	INP_WLOCK_ASSERT(tp->t_inpcb);
+/*
+ * This file contains code as a short-term staging area before it is moved in 
+ * to sys/netinet/tcp_offload.c
+ */
 
-	EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-}
-
 void
-tcp_offload_listen_stop(struct tcpcb *tp)
+tcp_offload_twstart(struct tcpcb *tp)
 {
 
-	INP_WLOCK_ASSERT(tp->t_inpcb);
-
-	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tcp_twstart(tp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
 }
 
-void
-tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
+struct tcpcb *
+tcp_offload_close(struct tcpcb *tp)
 {
-	struct toedev *tod = tp->tod;
 
-	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
-	INP_WLOCK_ASSERT(tp->t_inpcb);
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tp = tcp_close(tp);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (tp)
+		INP_WUNLOCK(tp->t_inpcb);
 
-	tod->tod_input(tod, tp, m);
+	return (tp);
 }
 
-int
-tcp_offload_output(struct tcpcb *tp)
+struct tcpcb *
+tcp_offload_drop(struct tcpcb *tp, int error)
 {
-	struct toedev *tod = tp->tod;
-	int error, flags;
 
-	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
-	INP_WLOCK_ASSERT(tp->t_inpcb);
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(tp->t_inpcb);
+	tp = tcp_drop(tp, error);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+	if (tp)
+		INP_WUNLOCK(tp->t_inpcb);
 
-	flags = tcp_outflags[tp->t_state];
-
-	if (flags & TH_RST) {
-		/* XXX: avoid repeated calls like we do for FIN */
-		error = tod->tod_send_rst(tod, tp);
-	} else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
-	    (tp->t_flags & TF_SENTFIN) == 0) {
-		error = tod->tod_send_fin(tod, tp);
-		if (error == 0)
-			tp->t_flags |= TF_SENTFIN;
-	} else
-		error = tod->tod_output(tod, tp);
-
-	return (error);
+	return (tp);
 }
 
-void
-tcp_offload_rcvd(struct tcpcb *tp)
-{
-	struct toedev *tod = tp->tod;
-
-	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
-	INP_WLOCK_ASSERT(tp->t_inpcb);
-
-	tod->tod_rcvd(tod, tp);
-}
-
-void
-tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
-{
-	struct toedev *tod = tp->tod;
-
-	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
-	INP_WLOCK_ASSERT(tp->t_inpcb);
-
-	tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
-}
-
-void
-tcp_offload_detach(struct tcpcb *tp)
-{
-	struct toedev *tod = tp->tod;
-
-	KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
-	INP_WLOCK_ASSERT(tp->t_inpcb);
-
-	tod->tod_pcb_detach(tod, tp);
-}

Modified: trunk/sys/netinet/tcp_offload.h
===================================================================
--- trunk/sys/netinet/tcp_offload.h	2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_offload.h	2016-10-02 00:29:44 UTC (rev 9116)
@@ -24,7 +24,7 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
- * $MidnightBSD$
+ * $FreeBSD: stable/9/sys/netinet/tcp_offload.h 218909 2011-02-21 09:01:34Z brucec $
  */
 
 #ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,15 +34,321 @@
 #error "no user-serviceable parts inside"
 #endif
 
-extern int registered_toedevs;
+/*
+ * A driver publishes that it provides offload services
+ * by setting IFCAP_TOE in the ifnet. The offload connect
+ * will bypass any further work if the interface that a
+ * connection would use does not support TCP offload.
+ *
+ * The TOE API assumes that the tcp offload engine can offload the 
+ * the entire connection from set up to teardown, with some provision 
+ * being made to allowing the software stack to handle time wait. If
+ * the device does not meet these criteria, it is the driver's responsibility
+ * to overload the functions that it needs to in tcp_usrreqs and make
+ * its own calls to tcp_output if it needs to do so.
+ *
+ * There is currently no provision for the device advertising the congestion
+ * control algorithms it supports as there is currently no API for querying 
+ * an operating system for the protocols that it has loaded. This is a desirable
+ * future extension.
+ *
+ *
+ *
+ * It is assumed that individuals deploying TOE will want connections
+ * to be offloaded without software changes so all connections on an
+ * interface providing TOE are offloaded unless the SO_NO_OFFLOAD 
+ * flag is set on the socket.
+ *
+ *
+ * The toe_usrreqs structure constitutes the TOE driver's 
+ * interface to the TCP stack for functionality that doesn't
+ * interact directly with userspace. If one wants to provide
+ * (optional) functionality to do zero-copy to/from
+ * userspace one still needs to override soreceive/sosend 
+ * with functions that fault in and pin the user buffers.
+ *
+ * + tu_send
+ *   - tells the driver that new data may have been added to the 
+ *     socket's send buffer - the driver should not fail if the
+ *     buffer is in fact unchanged
+ *   - the driver is responsible for providing credits (bytes in the send window)
+ *     back to the socket by calling sbdrop() as segments are acknowledged.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_rcvd
+ *   - returns credits to the driver and triggers window updates
+ *     to the peer (a credit as used here is a byte in the peer's receive window)
+ *   - the driver is expected to determine how many bytes have been 
+ *     consumed and credit that back to the card so that it can grow
+ *     the window again by maintaining its own state between invocations.
+ *   - In principle this could be used to shrink the window as well as
+ *     grow the window, although it is not used for that now.
+ *   - this function needs to correctly handle being called any number of
+ *     times without any bytes being consumed from the receive buffer.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_disconnect
+ *   - tells the driver to send FIN to peer
+ *   - driver is expected to send the remaining data and then do a clean half close
+ *   - disconnect implies at least half-close so only send, reset, and detach
+ *     are legal
+ *   - the driver is expected to handle transition through the shutdown
+ *     state machine and allow the stack to support SO_LINGER.
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ * + tu_reset
+ *   - closes the connection and sends a RST to peer
+ *   - driver is expectd to trigger an RST and detach the toepcb
+ *   - no further calls are legal after reset
+ *   - The driver expects the inpcb lock to be held - the driver is expected
+ *     not to drop the lock. Hence the driver is not allowed to acquire the
+ *     pcbinfo lock during this call.
+ *
+ *   The following fields in the tcpcb are expected to be referenced by the driver:
+ *	+ iss
+ *	+ rcv_nxt
+ *	+ rcv_wnd
+ *	+ snd_isn
+ *	+ snd_max
+ *	+ snd_nxt
+ *	+ snd_una
+ *	+ t_flags
+ *	+ t_inpcb
+ *	+ t_maxseg
+ *	+ t_toe
+ *
+ *   The following fields in the inpcb are expected to be referenced by the driver:
+ *	+ inp_lport
+ *	+ inp_fport
+ *	+ inp_laddr
+ *	+ inp_fport
+ *	+ inp_socket
+ *	+ inp_ip_tos
+ *
+ *   The following fields in the socket are expected to be referenced by the
+ *   driver:
+ *	+ so_comp
+ *	+ so_error
+ *	+ so_linger
+ *	+ so_options
+ *	+ so_rcv
+ *	+ so_snd
+ *	+ so_state
+ *	+ so_timeo
+ *
+ *   These functions all return 0 on success and can return the following errors
+ *   as appropriate:
+ *	+ EPERM:
+ *	+ ENOBUFS: memory allocation failed
+ *	+ EMSGSIZE: MTU changed during the call
+ *	+ EHOSTDOWN:
+ *	+ EHOSTUNREACH:
+ *	+ ENETDOWN:
+ *	* ENETUNREACH: the peer is no longer reachable
+ *
+ * + tu_detach
+ *   - tells driver that the socket is going away so disconnect
+ *     the toepcb and free appropriate resources
+ *   - allows the driver to cleanly handle the case of connection state
+ *     outliving the socket
+ *   - no further calls are legal after detach
+ *   - the driver is expected to provide its own synchronization between
+ *     detach and receiving new data.
+ * 
+ * + tu_syncache_event
+ *   - even if it is not actually needed, the driver is expected to
+ *     call syncache_add for the initial SYN and then syncache_expand
+ *     for the SYN,ACK
+ *   - tells driver that a connection either has not been added or has 
+ *     been dropped from the syncache
+ *   - the driver is expected to maintain state that lives outside the 
+ *     software stack so the syncache needs to be able to notify the
+ *     toe driver that the software stack is not going to create a connection
+ *     for a received SYN
+ *   - The driver is responsible for any synchronization required between
+ *     the syncache dropping an entry and the driver processing the SYN,ACK.
+ * 
+ */
+struct toe_usrreqs {
+	int (*tu_send)(struct tcpcb *tp);
+	int (*tu_rcvd)(struct tcpcb *tp);
+	int (*tu_disconnect)(struct tcpcb *tp);
+	int (*tu_reset)(struct tcpcb *tp);
+	void (*tu_detach)(struct tcpcb *tp);
+	void (*tu_syncache_event)(int event, void *toep);
+};
 
-int  tcp_offload_connect(struct socket *, struct sockaddr *);
-void tcp_offload_listen_start(struct tcpcb *);
-void tcp_offload_listen_stop(struct tcpcb *);
-void tcp_offload_input(struct tcpcb *, struct mbuf *);
-int  tcp_offload_output(struct tcpcb *);
-void tcp_offload_rcvd(struct tcpcb *);
-void tcp_offload_ctloutput(struct tcpcb *, int, int);
-void tcp_offload_detach(struct tcpcb *);
+/*
+ * Proxy for struct tcpopt between TOE drivers and TCP functions.
+ */
+struct toeopt {
+	u_int64_t	to_flags;	/* see tcpopt in tcp_var.h */
+	u_int16_t	to_mss;		/* maximum segment size */
+	u_int8_t	to_wscale;	/* window scaling */
 
+	u_int8_t	_pad1;		/* explicit pad for 64bit alignment */
+	u_int32_t	_pad2;		/* explicit pad for 64bit alignment */
+	u_int64_t	_pad3[4];	/* TBD */
+};
+
+#define	TOE_SC_ENTRY_PRESENT		1	/* 4-tuple already present */
+#define	TOE_SC_DROP			2	/* connection was timed out */
+
+/*
+ * Because listen is a one-to-many relationship (a socket can be listening 
+ * on all interfaces on a machine some of which may be using different TCP
+ * offload devices), listen uses a publish/subscribe mechanism. The TCP
+ * offload driver registers a listen notification function with the stack.
+ * When a listen socket is created all TCP offload devices are notified
+ * so that they can do the appropriate set up to offload connections on the
+ * port to which the socket is bound. When the listen socket is closed,
+ * the offload devices are notified so that they will stop listening on that
+ * port and free any associated resources as well as sending RSTs on any
+ * connections in the SYN_RCVD state.
+ *
+ */
+
+typedef	void	(*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef	void	(*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+/*
+ * Check if the socket can be offloaded by the following steps:
+ * - determine the egress interface
+ * - check the interface for TOE capability and TOE is enabled
+ * - check if the device has resources to offload the connection
+ */
+int	tcp_offload_connect(struct socket *so, struct sockaddr *nam);
+
+/*
+ * The tcp_output_* routines are wrappers around the toe_usrreqs calls
+ * which trigger packet transmission. In the non-offloaded case they
+ * translate to tcp_output. The tcp_offload_* routines notify TOE
+ * of specific events. I the non-offloaded case they are no-ops.
+ *
+ * Listen is a special case because it is a 1 to many relationship
+ * and there can be more than one offload driver in the system.
+ */
+
+/*
+ * Connection is offloaded
+ */
+#define	tp_offload(tp)		((tp)->t_flags & TF_TOE)
+
+/*
+ * hackish way of allowing this file to also be included by TOE
+ * which needs to be kept ignorant of socket implementation details
+ */
+#ifdef _SYS_SOCKETVAR_H_
+/*
+ * The socket has not been marked as "do not offload"
+ */
+#define	SO_OFFLOADABLE(so)	((so->so_options & SO_NO_OFFLOAD) == 0)
+
+static __inline int
+tcp_output_connect(struct socket *so, struct sockaddr *nam)
+{
+	struct tcpcb *tp = sototcpcb(so);
+	int error;
+
+	/*
+	 * If offload has been disabled for this socket or the 
+	 * connection cannot be offloaded just call tcp_output
+	 * to start the TCP state machine.
+	 */
+#ifndef TCP_OFFLOAD_DISABLE	
+	if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
+#endif		
+		error = tcp_output(tp);
+	return (error);
+}
+
+static __inline int
+tcp_output_send(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_send(tp));
 #endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_rcvd(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_rcvd(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_disconnect(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_disconnect(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline int
+tcp_output_reset(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		return (tp->t_tu->tu_reset(tp));
+#endif
+	return (tcp_output(tp));
+}
+
+static __inline void
+tcp_offload_detach(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (tp_offload(tp))
+		tp->t_tu->tu_detach(tp);
+#endif	
+}
+
+static __inline void
+tcp_offload_listen_open(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
+		EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+#endif	
+}
+
+static __inline void
+tcp_offload_listen_close(struct tcpcb *tp)
+{
+
+#ifndef TCP_OFFLOAD_DISABLE
+	EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
+#endif	
+}
+#undef SO_OFFLOADABLE
+#endif /* _SYS_SOCKETVAR_H_ */
+#undef tp_offload
+
+void tcp_offload_twstart(struct tcpcb *tp);
+struct tcpcb *tcp_offload_close(struct tcpcb *tp);
+struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
+
+#endif /* _NETINET_TCP_OFFLOAD_H_ */

Modified: trunk/sys/netinet/tcp_syncache.c
===================================================================
--- trunk/sys/netinet/tcp_syncache.c	2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_syncache.c	2016-10-02 00:29:44 UTC (rev 9116)
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__MBSDID("$MidnightBSD$");
+__FBSDID("$FreeBSD: stable/9/sys/netinet/tcp_syncache.c 248085 2013-03-09 02:36:32Z marius $");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
@@ -81,12 +81,10 @@
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
-#ifdef TCP_OFFLOAD
-#include <netinet/toecore.h>
-#endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
@@ -112,8 +110,10 @@
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
-#ifdef TCP_OFFLOAD
-#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
+#ifdef TCP_OFFLOAD_DISABLE
+#define TOEPCB_ISSET(sc) (0)
+#else
+#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
 #endif
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
@@ -332,14 +332,6 @@
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
-#ifdef TCP_OFFLOAD
-	if (ADDED_BY_TOE(sc)) {
-		struct toedev *tod = sc->sc_tod;
-
-		tod->tod_syncache_added(tod, sc->sc_todctx);
-	}
-#endif
-
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
@@ -364,14 +356,10 @@
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
-#ifdef TCP_OFFLOAD
-	if (ADDED_BY_TOE(sc)) {
-		struct toedev *tod = sc->sc_tod;
-
-		tod->tod_syncache_removed(tod, sc->sc_todctx);
-	}
-#endif
-
+#ifndef TCP_OFFLOAD_DISABLE
+	if (sc->sc_tu)
+		sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
+#endif		    
 	syncache_free(sc);
 	V_tcp_syncache.cache_count--;
 }
@@ -858,19 +846,7 @@
 	if (sc->sc_rxmits > 1)
 		tp->snd_cwnd = tp->t_maxseg;
 
-#ifdef TCP_OFFLOAD
 	/*
-	 * Allow a TOE driver to install its hooks.  Note that we hold the
-	 * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
-	 * new connection before the TOE driver has done its thing.
-	 */
-	if (ADDED_BY_TOE(sc)) {
-		struct toedev *tod = sc->sc_tod;
-
-		tod->tod_offload_socket(tod, sc->sc_todctx, so);
-	}
-#endif
-	/*
 	 * Copy and activate timers.
 	 */
 	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
@@ -950,13 +926,6 @@
 		/* Pull out the entry to unlock the bucket row. */
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
-#ifdef TCP_OFFLOAD
-		if (ADDED_BY_TOE(sc)) {
-			struct toedev *tod = sc->sc_tod;
-
-			tod->tod_syncache_removed(tod, sc->sc_todctx);
-		}
-#endif
 		V_tcp_syncache.cache_count--;
 		SCH_UNLOCK(sch);
 	}
@@ -965,7 +934,7 @@
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
-	if (th->th_ack != sc->sc_iss + 1) {
+	if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -976,8 +945,9 @@
 	 * The SEQ must fall in the window starting at the received
 	 * initial receive sequence number + 1 (the SYN).
 	 */
-	if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
-	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
+	if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
+	    !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -994,7 +964,8 @@
 	 * If timestamps were negotiated the reflected timestamp
 	 * must be equal to what we actually sent in the SYN|ACK.
 	 */
-	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
+	if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
+	    !TOEPCB_ISSET(sc)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
 			    "segment rejected\n",
@@ -1022,6 +993,25 @@
 	return (0);
 }
 
+int
+tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+    struct tcphdr *th, struct socket **lsop, struct mbuf *m)
+{
+	struct tcpopt to;
+	int rc;
+
+	bzero(&to, sizeof(struct tcpopt));
+	to.to_mss = toeo->to_mss;
+	to.to_wscale = toeo->to_wscale;
+	to.to_flags = toeo->to_flags;
+	
+	INP_INFO_WLOCK(&V_tcbinfo);
+	rc = syncache_expand(inc, &to, th, lsop, m);
+	INP_INFO_WUNLOCK(&V_tcbinfo);
+
+	return (rc);
+}
+
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
@@ -1037,8 +1027,8 @@
  */
 static void
 _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
-    struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
-    void *todctx)
+    struct inpcb *inp, struct socket **lsop, struct mbuf *m,
+    struct toe_usrreqs *tu, void *toepcb)
 {
 	struct tcpcb *tp;
 	struct socket *so;
@@ -1124,6 +1114,11 @@
 	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
+#ifndef TCP_OFFLOAD_DISABLE
+		if (sc->sc_tu)
+			sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
+			    sc->sc_toepcb);
+#endif		    
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {
 			/*
@@ -1156,7 +1151,7 @@
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
-		if (syncache_respond(sc) == 0) {
+		if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
@@ -1207,9 +1202,9 @@
 		sc->sc_ip_tos = ip_tos;
 		sc->sc_ip_ttl = ip_ttl;
 	}
-#ifdef TCP_OFFLOAD
-	sc->sc_tod = tod;
-	sc->sc_todctx = todctx;
+#ifndef TCP_OFFLOAD_DISABLE	
+	sc->sc_tu = tu;
+	sc->sc_toepcb = toepcb;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_iss = arc4random();
@@ -1304,7 +1299,7 @@
 	/*
 	 * Do a standard 3-way handshake.
 	 */
-	if (syncache_respond(sc) == 0) {
+	if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
 		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
@@ -1485,15 +1480,6 @@
 		th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
 		    IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
-#ifdef TCP_OFFLOAD
-		if (ADDED_BY_TOE(sc)) {
-			struct toedev *tod = sc->sc_tod;
-
-			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
-
-			return (error);
-		}
-#endif
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
@@ -1505,15 +1491,6 @@
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(tlen + optlen - hlen + IPPROTO_TCP));
-#ifdef TCP_OFFLOAD
-		if (ADDED_BY_TOE(sc)) {
-			struct toedev *tod = sc->sc_tod;
-
-			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
-
-			return (error);
-		}
-#endif
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 #endif
@@ -1528,12 +1505,23 @@
 }
 
 void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
-    struct inpcb *inp, struct socket **lsop, void *tod, void *todctx)
+tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
+    struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
+    struct toe_usrreqs *tu, void *toepcb)
 {
+	struct tcpopt to;
 
-	_syncache_add(inc, to, th, inp, lsop, NULL, tod, todctx);
+	bzero(&to, sizeof(struct tcpopt));
+	to.to_mss = toeo->to_mss;
+	to.to_wscale = toeo->to_wscale;
+	to.to_flags = toeo->to_flags;
+
+	INP_INFO_WLOCK(&V_tcbinfo);
+	INP_WLOCK(inp);
+
+	_syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
 }
+
 /*
  * The purpose of SYN cookies is to avoid keeping track of all SYN's we
  * receive and to be able to handle SYN floods from bogus source addresses

Modified: trunk/sys/netinet/tcp_syncache.h
===================================================================
--- trunk/sys/netinet/tcp_syncache.h	2016-10-02 00:13:34 UTC (rev 9115)
+++ trunk/sys/netinet/tcp_syncache.h	2016-10-02 00:29:44 UTC (rev 9116)
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
- * $MidnightBSD$
+ * $FreeBSD: stable/9/sys/netinet/tcp_syncache.h 224151 2011-07-17 21:15:20Z bz $
  */
 
 #ifndef _NETINET_TCP_SYNCACHE_H_
@@ -34,6 +34,8 @@
 #define _NETINET_TCP_SYNCACHE_H_
 #ifdef _KERNEL
 
+struct toeopt;
+
 void	 syncache_init(void);
 #ifdef VIMAGE
 void	syncache_destroy(void);
@@ -41,10 +43,14 @@
 void	 syncache_unreach(struct in_conninfo *, struct tcphdr *);
 int	 syncache_expand(struct in_conninfo *, struct tcpopt *,
 	     struct tcphdr *, struct socket **, struct mbuf *);
+int	 tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
+             struct tcphdr *th, struct socket **lsop, struct mbuf *m);
 void	 syncache_add(struct in_conninfo *, struct tcpopt *,
 	     struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void	 tcp_offload_syncache_add(struct in_conninfo *, struct tcpopt *,
-	     struct tcphdr *, struct inpcb *, struct socket **, void *, void *);
+void	 tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
+             struct tcphdr *, struct inpcb *, struct socket **,
+             struct toe_usrreqs *tu, void *toepcb);
+
 void	 syncache_chkrst(struct in_conninfo *, struct tcphdr *);
 void	 syncache_badack(struct in_conninfo *);
 int	 syncache_pcbcount(void);
@@ -69,10 +75,10 @@
 	u_int8_t	sc_requested_s_scale:4,
 			sc_requested_r_scale:4;
 	u_int16_t	sc_flags;
-#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
-	struct toedev	*sc_tod;		/* entry added by this TOE */
-	void		*sc_todctx;		/* TOE driver context */
-#endif
+#ifndef TCP_OFFLOAD_DISABLE
+	struct toe_usrreqs *sc_tu;		/* TOE operations */
+	void		*sc_toepcb;		/* TOE protocol block */
+#endif			
 	struct label	*sc_label;		/* MAC label reference */
 	struct ucred	*sc_cred;		/* cred cache for jail checks */
 



More information about the Midnightbsd-cvs mailing list