[Midnightbsd-cvs] src [8018] trunk/sys: Update cxgbe
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Thu Sep 15 16:11:39 EDT 2016
Revision: 8018
http://svnweb.midnightbsd.org/src/?rev=8018
Author: laffer1
Date: 2016-09-15 16:11:39 -0400 (Thu, 15 Sep 2016)
Log Message:
-----------
Update cxgbe
Convert some fixed params to tunables
if_iqdrops should include frames truncated within the chip.
Assume INET/INET6 and TCP_OFFLOAD when the driver is built out of tree.
Fix some buffer sizes.
Modified Paths:
--------------
trunk/sys/dev/cxgb/ulp/tom/cxgb_listen.c
trunk/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
trunk/sys/dev/cxgbe/adapter.h
trunk/sys/dev/cxgbe/common/t4_hw.h
trunk/sys/dev/cxgbe/common/t4_msg.h
trunk/sys/dev/cxgbe/firmware/t4fw_cfg.txt
trunk/sys/dev/cxgbe/offload.h
trunk/sys/dev/cxgbe/t4_main.c
trunk/sys/dev/cxgbe/t4_sge.c
trunk/sys/dev/cxgbe/tom/t4_connect.c
trunk/sys/dev/cxgbe/tom/t4_cpl_io.c
trunk/sys/dev/cxgbe/tom/t4_listen.c
trunk/sys/dev/cxgbe/tom/t4_tom.c
trunk/sys/dev/cxgbe/tom/t4_tom.h
trunk/sys/modules/cxgbe/if_cxgbe/Makefile
trunk/sys/modules/cxgbe/tom/Makefile
Modified: trunk/sys/dev/cxgb/ulp/tom/cxgb_listen.c
===================================================================
--- trunk/sys/dev/cxgb/ulp/tom/cxgb_listen.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgb/ulp/tom/cxgb_listen.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -41,6 +41,7 @@
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
+#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
@@ -759,6 +760,15 @@
goto reset;
}
+ if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
+ struct inpcb *new_inp = sotoinpcb(so);
+
+ INP_WLOCK(new_inp);
+ tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
+ t3_offload_socket(tod, synqe, so);
+ INP_WUNLOCK(new_inp);
+ }
+
/* Remove the synq entry and release its reference on the lctx */
TAILQ_REMOVE(&lctx->synq, synqe, link);
inp = release_lctx(td, lctx);
@@ -1136,5 +1146,6 @@
offload_socket(so, toep);
make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
update_tid(td, toep, synqe->tid);
+ synqe->flags |= TP_SYNQE_EXPANDED;
}
#endif
Modified: trunk/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
===================================================================
--- trunk/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -44,6 +44,7 @@
#define TP_IS_A_SYNQ_ENTRY (1 << 9)
#define TP_ABORT_RPL_SENT (1 << 10)
#define TP_SEND_FIN (1 << 11)
+#define TP_SYNQE_EXPANDED (1 << 12)
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
Modified: trunk/sys/dev/cxgbe/adapter.h
===================================================================
--- trunk/sys/dev/cxgbe/adapter.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/adapter.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -135,6 +135,7 @@
#else
FL_BUF_SIZES = 3, /* cluster, jumbo9k, jumbo16k */
#endif
+ OFLD_BUF_SIZE = MJUM16BYTES, /* size of fl buffer for TOE rxq */
CTRL_EQ_QSIZE = 128,
@@ -143,6 +144,12 @@
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
+#ifdef T4_PKT_TIMESTAMP
+#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
+#else
+#define RX_COPY_THRESHOLD MINCLSIZE
+#endif
+
enum {
/* adapter intr_type */
INTR_INTX = (1 << 0),
@@ -510,6 +517,7 @@
typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *,
struct mbuf *);
typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *);
+typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *);
struct adapter {
SLIST_ENTRY(adapter) link;
@@ -582,7 +590,8 @@
struct callout sfl_callout;
an_handler_t an_handler __aligned(CACHE_LINE_SIZE);
- cpl_handler_t cpl_handler[256];
+ fw_msg_handler_t fw_msg_handler[4]; /* NUM_FW6_TYPES */
+ cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */
};
#define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock)
@@ -741,6 +750,8 @@
void t4_iterate(void (*)(struct adapter *, void *), void *);
int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
int t4_register_an_handler(struct adapter *, an_handler_t);
+int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t);
+int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
/* t4_sge.c */
void t4_sge_modload(void);
Modified: trunk/sys/dev/cxgbe/common/t4_hw.h
===================================================================
--- trunk/sys/dev/cxgbe/common/t4_hw.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/common/t4_hw.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -161,10 +161,12 @@
#define S_PPOD_TAG 6
#define M_PPOD_TAG 0xFFFFFF
#define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
+#define G_PPOD_TAG(x) (((x) >> S_PPOD_TAG) & M_PPOD_TAG)
#define S_PPOD_PGSZ 30
#define M_PPOD_PGSZ 0x3
#define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
+#define G_PPOD_PGSZ(x) (((x) >> S_PPOD_PGSZ) & M_PPOD_PGSZ)
#define S_PPOD_TID 32
#define M_PPOD_TID 0xFFFFFF
Modified: trunk/sys/dev/cxgbe/common/t4_msg.h
===================================================================
--- trunk/sys/dev/cxgbe/common/t4_msg.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/common/t4_msg.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -792,6 +792,14 @@
__be64 val;
};
+struct cpl_set_tcb_field_core {
+ union opcode_tid ot;
+ __be16 reply_ctrl;
+ __be16 word_cookie;
+ __be64 mask;
+ __be64 val;
+};
+
/* cpl_set_tcb_field.word_cookie fields */
#define S_WORD 0
#define M_WORD 0x1F
@@ -1376,6 +1384,11 @@
__be32 credit_dack;
};
+struct cpl_rx_data_ack_core {
+ union opcode_tid ot;
+ __be32 credit_dack;
+};
+
/* cpl_rx_data_ack.ack_seq fields */
#define S_RX_CREDITS 0
#define M_RX_CREDITS 0x3FFFFFF
@@ -2281,6 +2294,8 @@
FW6_TYPE_WR_RPL = 1,
FW6_TYPE_CQE = 2,
FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3,
+
+ NUM_FW6_TYPES
};
struct cpl_fw6_msg_ofld_connection_wr_rpl {
Modified: trunk/sys/dev/cxgbe/firmware/t4fw_cfg.txt
===================================================================
--- trunk/sys/dev/cxgbe/firmware/t4fw_cfg.txt 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/firmware/t4fw_cfg.txt 2016-09-15 20:11:39 UTC (rev 8018)
@@ -20,7 +20,7 @@
filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
# TP rx and tx payload memory (% of the total EDRAM + DDR3).
- tp_pmrx = 40
+ tp_pmrx = 38
tp_pmtx = 60
tp_pmrx_pagesize = 64K
tp_pmtx_pagesize = 64K
@@ -67,7 +67,8 @@
# driver will mask off features it won't use
protocol = ofld
- tp_l2t = 100
+ tp_l2t = 4096
+ tp_ddp = 2
# TCAM has 8K cells; each region must start at a multiple of 128 cell.
# Each entry in these categories takes 4 cells each. nhash will use the
@@ -136,7 +137,7 @@
[fini]
version = 0x1
- checksum = 0xdb5813f9
+ checksum = 0x162df193
#
# $MidnightBSD$
#
Modified: trunk/sys/dev/cxgbe/offload.h
===================================================================
--- trunk/sys/dev/cxgbe/offload.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/offload.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -31,13 +31,16 @@
#ifndef __T4_OFFLOAD_H__
#define __T4_OFFLOAD_H__
-#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
- (w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
- (w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
+#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
+ (w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
+ (w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
V_FW_WR_FLOWID(tid)); \
- (w)->wr.wr_lo = cpu_to_be64(0); \
+ (w)->wr_lo = cpu_to_be64(0); \
} while (0)
+#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
+ INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
+
#define INIT_TP_WR(w, tid) do { \
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \
Modified: trunk/sys/dev/cxgbe/t4_main.c
===================================================================
--- trunk/sys/dev/cxgbe/t4_main.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/t4_main.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -306,6 +306,7 @@
static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
+static int fw_msg_not_handled(struct adapter *, const __be64 *);
static int t4_sysctls(struct adapter *);
static int cxgbe_sysctls(struct port_info *);
static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
@@ -345,8 +346,6 @@
static void clear_filter(struct filter_entry *);
static int set_filter_wr(struct adapter *, int);
static int del_filter_wr(struct adapter *, int);
-static int filter_rpl(struct sge_iq *, const struct rss_header *,
- struct mbuf *);
static int get_sge_context(struct adapter *, struct t4_sge_context *);
static int read_card_mem(struct adapter *, struct t4_mem_range *);
#ifdef TCP_OFFLOAD
@@ -381,6 +380,10 @@
CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
#endif
+/* No easy way to include t4_msg.h before adapter.h so we check this way */
+CTASSERT(ARRAY_SIZE(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
+CTASSERT(ARRAY_SIZE(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
+
static int
t4_probe(device_t dev)
{
@@ -458,7 +461,9 @@
sc->an_handler = an_not_handled;
for (i = 0; i < ARRAY_SIZE(sc->cpl_handler); i++)
sc->cpl_handler[i] = cpl_not_handled;
- t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, filter_rpl);
+ for (i = 0; i < ARRAY_SIZE(sc->fw_msg_handler); i++)
+ sc->fw_msg_handler[i] = fw_msg_not_handled;
+ t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
/* Prepare the adapter for operation */
rc = -t4_prep_adapter(sc);
@@ -510,18 +515,24 @@
goto done; /* error message displayed already */
if (sc->flags & MASTER_PF) {
+ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
/* final tweaks to some settings */
t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd,
sc->params.b_wnd);
- t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
+ /* 4K, 16K, 64K, 256K DDP "page sizes" */
+ t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(0) | V_HPZ1(2) |
+ V_HPZ2(4) | V_HPZ3(6));
+ t4_set_reg_field(sc, A_ULP_RX_CTL, F_TDDPTAGTCB, F_TDDPTAGTCB);
t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
- F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0);
+ F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3,
+ F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
+ F_TUNNELCNGDROP3);
t4_set_reg_field(sc, A_TP_PARA_REG5,
V_INDICATESIZE(M_INDICATESIZE) |
F_REARMDDPOFFSET | F_RESETDDPOFFSET,
- V_INDICATESIZE(M_INDICATESIZE) |
+ V_INDICATESIZE(indsz) |
F_REARMDDPOFFSET | F_RESETDDPOFFSET);
} else {
/*
@@ -2942,7 +2953,8 @@
ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause;
ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause;
ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
- s->rx_ovflow3;
+ s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
+ s->rx_trunc3;
drops = s->tx_drop;
for_each_txq(pi, i, txq)
@@ -2977,7 +2989,7 @@
panic("%s: opcode 0x%02x on iq %p with payload %p",
__func__, rss->opcode, iq, m);
#else
- log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p",
+ log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
__func__, rss->opcode, iq, m);
m_freem(m);
#endif
@@ -3006,7 +3018,7 @@
#ifdef INVARIANTS
panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
#else
- log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)",
+ log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
__func__, iq, ctrl);
#endif
return (EDOOFUS);
@@ -3025,6 +3037,35 @@
}
static int
+fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
+{
+ __be64 *r = __DECONST(__be64 *, rpl);
+ struct cpl_fw6_msg *cpl = member2struct(cpl_fw6_msg, data, r);
+
+#ifdef INVARIANTS
+ panic("%s: fw_msg type %d", __func__, cpl->type);
+#else
+ log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
+#endif
+ return (EDOOFUS);
+}
+
+int
+t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
+{
+ uintptr_t *loc, new;
+
+ if (type >= ARRAY_SIZE(sc->fw_msg_handler))
+ return (EINVAL);
+
+ new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
+ loc = (uintptr_t *) &sc->fw_msg_handler[type];
+ atomic_store_rel_ptr(loc, new);
+
+ return (0);
+}
+
+static int
t4_sysctls(struct adapter *sc)
{
struct sysctl_ctx_list *ctx;
@@ -3191,10 +3232,13 @@
sc->tt.ddp = 0;
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
&sc->tt.ddp, 0, "DDP allowed");
- sc->tt.indsz = M_INDICATESIZE;
+
+ sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
&sc->tt.indsz, 0, "DDP max indicate size allowed");
- sc->tt.ddp_thres = 3*4096;
+
+ sc->tt.ddp_thres =
+ G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
&sc->tt.ddp_thres, 0, "DDP threshold");
}
@@ -4961,8 +5005,8 @@
return (0);
}
-static int
-filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+int
+t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
struct adapter *sc = iq->adapter;
const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
Modified: trunk/sys/dev/cxgbe/t4_sge.c
===================================================================
--- trunk/sys/dev/cxgbe/t4_sge.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/t4_sge.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -68,13 +68,38 @@
#define FL_BUF_TYPE(x) (fl_buf_info[x].type)
#define FL_BUF_ZONE(x) (fl_buf_info[x].zone)
-enum {
- FL_PKTSHIFT = 2
-};
+/*
+ * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
+ * 0-7 are valid values.
+ */
+static int fl_pktshift = 2;
+TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
-static int fl_pad = CACHE_LINE_SIZE;
-static int spg_len = 64;
+/*
+ * Pad ethernet payload up to this boundary.
+ * -1: driver should figure out a good value.
+ * Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
+ */
+static int fl_pad = -1;
+TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
+/*
+ * Status page length.
+ * -1: driver should figure out a good value.
+ * 64 or 128 are the only other valid values.
+ */
+static int spg_len = -1;
+TUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
+
+/*
+ * Congestion drops.
+ * -1: no congestion feedback (not recommended).
+ * 0: backpressure the channel instead of dropping packets right away.
+ * 1: no backpressure, drop packets for the congested queue immediately.
+ */
+static int cong_drop = 0;
+TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
+
/* Used to track coalesced tx work request */
struct txpkts {
uint64_t *flitp; /* ptr to flit where next pkt should start */
@@ -160,7 +185,7 @@
static __be64 get_flit(bus_dma_segment_t *, int, int);
static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
struct mbuf *);
-static int handle_fw_rpl(struct sge_iq *, const struct rss_header *,
+static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
@@ -170,7 +195,8 @@
#endif
/*
- * Called on MOD_LOAD and fills up fl_buf_info[].
+ * Called on MOD_LOAD. Fills up fl_buf_info[] and validates/calculates the SGE
+ * tunables.
*/
void
t4_sge_modload(void)
@@ -191,10 +217,49 @@
FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
}
+ if (fl_pktshift < 0 || fl_pktshift > 7) {
+ printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
+ " using 2 instead.\n", fl_pktshift);
+ fl_pktshift = 2;
+ }
+
+ if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
+ int pad;
+
#if defined(__i386__) || defined(__amd64__)
- fl_pad = max(cpu_clflush_line_size, 32);
- spg_len = cpu_clflush_line_size > 64 ? 128 : 64;
+ pad = max(cpu_clflush_line_size, 32);
+#else
+ pad = max(CACHE_LINE_SIZE, 32);
#endif
+ pad = min(pad, 4096);
+
+ if (fl_pad != -1) {
+ printf("Invalid hw.cxgbe.fl_pad value (%d),"
+ " using %d instead.\n", fl_pad, pad);
+ }
+ fl_pad = pad;
+ }
+
+ if (spg_len != 64 && spg_len != 128) {
+ int len;
+
+#if defined(__i386__) || defined(__amd64__)
+ len = cpu_clflush_line_size > 64 ? 128 : 64;
+#else
+ len = 64;
+#endif
+ if (spg_len != -1) {
+ printf("Invalid hw.cxgbe.spg_len value (%d),"
+ " using %d instead.\n", spg_len, len);
+ }
+ spg_len = len;
+ }
+
+ if (cong_drop < -1 || cong_drop > 1) {
+ printf("Invalid hw.cxgbe.cong_drop value (%d),"
+ " using 0 instead.\n", cong_drop);
+ cong_drop = 0;
+ }
}
/**
@@ -215,7 +280,7 @@
ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
F_EGRSTATUSPAGESIZE;
- ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE |
+ ctrl_val = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
V_EGRSTATUSPAGESIZE(spg_len == 128);
@@ -296,11 +361,13 @@
sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc);
sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc);
- t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl);
- t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl);
+ t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
+ t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
+ t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
+
return (rc);
}
@@ -477,6 +544,18 @@
return (iq);
}
+static inline int
+mtu_to_bufsize(int mtu)
+{
+ int bufsize;
+
+ /* large enough for a frame even when VLAN extraction is disabled */
+ bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
+ bufsize = roundup(bufsize + fl_pktshift, fl_pad);
+
+ return (bufsize);
+}
+
int
t4_setup_port_queues(struct port_info *pi)
{
@@ -493,6 +572,7 @@
struct adapter *sc = pi->adapter;
struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+ int bufsize = mtu_to_bufsize(pi->ifp->if_mtu);
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
NULL, "rx queues");
@@ -522,7 +602,7 @@
snprintf(name, sizeof(name), "%s rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name);
+ init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
if (sc->flags & INTR_DIRECT
#ifdef TCP_OFFLOAD
@@ -547,7 +627,7 @@
snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name);
+ init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, OFLD_BUF_SIZE, name);
if (sc->flags & INTR_DIRECT ||
(sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
@@ -942,13 +1022,6 @@
return (0);
}
-
-#ifdef T4_PKT_TIMESTAMP
-#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
-#else
-#define RX_COPY_THRESHOLD MINCLSIZE
-#endif
-
static struct mbuf *
get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
int *fl_bufs_used)
@@ -1050,9 +1123,9 @@
KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
rss->opcode));
- m0->m_pkthdr.len -= FL_PKTSHIFT;
- m0->m_len -= FL_PKTSHIFT;
- m0->m_data += FL_PKTSHIFT;
+ m0->m_pkthdr.len -= fl_pktshift;
+ m0->m_len -= fl_pktshift;
+ m0->m_data += fl_pktshift;
m0->m_pkthdr.rcvif = ifp;
m0->m_flags |= M_FLOWID;
@@ -1386,11 +1459,8 @@
struct port_info *pi = ifp->if_softc;
struct sge_rxq *rxq;
struct sge_fl *fl;
- int i, bufsize;
+ int i, bufsize = mtu_to_bufsize(ifp->if_mtu);
- /* large enough for a frame even when VLAN extraction is disabled */
- bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ifp->if_mtu;
- bufsize = roundup(bufsize + FL_PKTSHIFT, fl_pad);
for_each_rxq(pi, i, rxq) {
fl = &rxq->fl;
@@ -1793,6 +1863,18 @@
return free_wrq(sc, &sc->sge.mgmtq);
}
+static inline int
+tnl_cong(struct port_info *pi)
+{
+
+ if (cong_drop == -1)
+ return (-1);
+ else if (cong_drop == 1)
+ return (0);
+ else
+ return (1 << pi->tx_chan);
+}
+
static int
alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
struct sysctl_oid *oid)
@@ -1801,7 +1883,7 @@
struct sysctl_oid_list *children;
char name[16];
- rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan);
+ rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
if (rc != 0)
return (rc);
@@ -3433,17 +3515,15 @@
}
static int
-handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
{
+ struct adapter *sc = iq->adapter;
const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
rss->opcode));
- if (cpl->type == FW6_TYPE_CMD_RPL)
- t4_handle_fw_rpl(iq->adapter, cpl->data);
-
- return (0);
+ return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
}
static int
Modified: trunk/sys/dev/cxgbe/tom/t4_connect.c
===================================================================
--- trunk/sys/dev/cxgbe/tom/t4_connect.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/tom/t4_connect.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -247,10 +247,14 @@
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
+#ifdef USE_DDP_RX_FLOW_CONTROL
+ if (toep->ulp_mode == ULP_MODE_TCPDDP)
+ opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
+#endif
+
return (htobe32(opt2));
}
-
void
t4_init_connect_cpl_handlers(struct adapter *sc)
{
@@ -320,7 +324,10 @@
toep->tid = atid;
toep->l2te = e;
- toep->ulp_mode = ULP_MODE_NONE;
+ if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
+ set_tcpddp_ulp_mode(toep);
+ else
+ toep->ulp_mode = ULP_MODE_NONE;
SOCKBUF_LOCK(&so->so_rcv);
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
@@ -354,7 +361,7 @@
rc = t4_l2t_send(sc, wr, e);
if (rc == 0) {
- toepcb_set_flag(toep, TPF_CPL_PENDING);
+ toep->flags |= TPF_CPL_PENDING;
return (0);
}
Modified: trunk/sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- trunk/sys/dev/cxgbe/tom/t4_cpl_io.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/tom/t4_cpl_io.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -53,6 +53,7 @@
#include "common/common.h"
#include "common/t4_msg.h"
#include "common/t4_regs.h"
+#include "common/t4_tcb.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
@@ -80,7 +81,7 @@
unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN;
struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
- KASSERT(!toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+ KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
("%s: flowc for tid %u sent already", __func__, toep->tid));
CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
@@ -130,7 +131,7 @@
toep->txsd_pidx = 0;
toep->txsd_avail--;
- toepcb_set_flag(toep, TPF_FLOWC_WR_SENT);
+ toep->flags |= TPF_FLOWC_WR_SENT;
t4_wrq_tx(sc, wr);
}
@@ -150,15 +151,15 @@
inp->inp_flags & INP_DROPPED ? "inp dropped" :
tcpstates[tp->t_state],
toep->flags, inp->inp_flags,
- toepcb_flag(toep, TPF_ABORT_SHUTDOWN) ?
+ toep->flags & TPF_ABORT_SHUTDOWN ?
" (abort already in progress)" : "");
- if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+ if (toep->flags & TPF_ABORT_SHUTDOWN)
return; /* abort already in progress */
- toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN);
+ toep->flags |= TPF_ABORT_SHUTDOWN;
- KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+ KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
("%s: flowc_wr not sent for tid %d.", __func__, tid));
wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
@@ -173,7 +174,7 @@
req->rsvd0 = htobe32(snd_nxt);
else
req->rsvd0 = htobe32(tp->snd_nxt);
- req->rsvd1 = !toepcb_flag(toep, TPF_TX_DATA_SENT);
+ req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
req->cmd = CPL_ABORT_SEND_RST;
/*
@@ -299,12 +300,14 @@
}
static int
-send_rx_credits(struct adapter *sc, struct toepcb *toep, uint32_t credits)
+send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
{
struct wrqe *wr;
struct cpl_rx_data_ack *req;
uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
+ KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
+
wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
if (wr == NULL)
return (0);
@@ -323,25 +326,28 @@
struct adapter *sc = tod->tod_softc;
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- struct sockbuf *so_rcv = &so->so_rcv;
+ struct sockbuf *sb = &so->so_rcv;
struct toepcb *toep = tp->t_toe;
- int must_send;
+ int credits;
INP_WLOCK_ASSERT(inp);
- SOCKBUF_LOCK(so_rcv);
- KASSERT(toep->enqueued >= so_rcv->sb_cc,
- ("%s: so_rcv->sb_cc > enqueued", __func__));
- toep->rx_credits += toep->enqueued - so_rcv->sb_cc;
- toep->enqueued = so_rcv->sb_cc;
- SOCKBUF_UNLOCK(so_rcv);
+ SOCKBUF_LOCK(sb);
+ KASSERT(toep->sb_cc >= sb->sb_cc,
+ ("%s: sb %p has more data (%d) than last time (%d).",
+ __func__, sb, sb->sb_cc, toep->sb_cc));
+ toep->rx_credits += toep->sb_cc - sb->sb_cc;
+ toep->sb_cc = sb->sb_cc;
+ credits = toep->rx_credits;
+ SOCKBUF_UNLOCK(sb);
- must_send = toep->rx_credits + 16384 >= tp->rcv_wnd;
- if (must_send || toep->rx_credits >= 15 * 1024) {
- int credits;
+ if (credits > 0 &&
+ (credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) {
- credits = send_rx_credits(sc, toep, toep->rx_credits);
+ credits = send_rx_credits(sc, toep, credits);
+ SOCKBUF_LOCK(sb);
toep->rx_credits -= credits;
+ SOCKBUF_UNLOCK(sb);
tp->rcv_wnd += credits;
tp->rcv_adv += credits;
}
@@ -358,12 +364,12 @@
unsigned int tid = toep->tid;
CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
- toepcb_flag(toep, TPF_FIN_SENT) ? ", IGNORED" : "");
+ toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
- if (toepcb_flag(toep, TPF_FIN_SENT))
+ if (toep->flags & TPF_FIN_SENT)
return (0);
- KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+ KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
("%s: flowc_wr not sent for tid %u.", __func__, tid));
wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
@@ -381,8 +387,8 @@
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
req->rsvd = 0;
- toepcb_set_flag(toep, TPF_FIN_SENT);
- toepcb_clr_flag(toep, TPF_SEND_FIN);
+ toep->flags |= TPF_FIN_SENT;
+ toep->flags &= ~TPF_SEND_FIN;
t4_l2t_send(sc, wr, toep->l2te);
return (0);
@@ -534,10 +540,11 @@
struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
INP_WLOCK_ASSERT(inp);
- KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+ KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
- if (toep->ulp_mode != ULP_MODE_NONE)
+ if (__predict_false(toep->ulp_mode != ULP_MODE_NONE &&
+ toep->ulp_mode != ULP_MODE_TCPDDP))
CXGBE_UNIMPLEMENTED("ulp_mode");
/*
@@ -544,7 +551,7 @@
* This function doesn't resume by itself. Someone else must clear the
* flag and call this function.
*/
- if (__predict_false(toepcb_flag(toep, TPF_TX_SUSPENDED)))
+ if (__predict_false(toep->flags & TPF_TX_SUSPENDED))
return;
do {
@@ -570,7 +577,7 @@
plen -= m->m_len;
if (plen == 0) {
/* Too few credits */
- toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+ toep->flags |= TPF_TX_SUSPENDED;
SOCKBUF_UNLOCK(sb);
return;
}
@@ -613,7 +620,7 @@
break;
}
- if (__predict_false(toepcb_flag(toep, TPF_FIN_SENT)))
+ if (__predict_false(toep->flags & TPF_FIN_SENT))
panic("%s: excess tx.", __func__);
if (plen <= max_imm) {
@@ -624,7 +631,7 @@
toep->ofld_txq);
if (wr == NULL) {
/* XXX: how will we recover from this? */
- toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+ toep->flags |= TPF_TX_SUSPENDED;
return;
}
txwr = wrtod(wr);
@@ -642,7 +649,7 @@
wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq);
if (wr == NULL) {
/* XXX: how will we recover from this? */
- toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+ toep->flags |= TPF_TX_SUSPENDED;
return;
}
txwr = wrtod(wr);
@@ -671,7 +678,7 @@
sb->sb_sndptr = sb_sndptr;
SOCKBUF_UNLOCK(sb);
- toepcb_set_flag(toep, TPF_TX_DATA_SENT);
+ toep->flags |= TPF_TX_DATA_SENT;
KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
txsd->plen = plen;
@@ -687,7 +694,7 @@
} while (m != NULL);
/* Send a FIN if requested, but only if there's no more data to send */
- if (m == NULL && toepcb_flag(toep, TPF_SEND_FIN))
+ if (m == NULL && toep->flags & TPF_SEND_FIN)
close_conn(sc, toep);
}
@@ -724,7 +731,7 @@
("%s: inp %p dropped.", __func__, inp));
KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
- toepcb_set_flag(toep, TPF_SEND_FIN);
+ toep->flags |= TPF_SEND_FIN;
t4_push_frames(sc, toep);
return (0);
@@ -745,7 +752,7 @@
KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
/* hmmmm */
- KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+ KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
("%s: flowc for tid %u [%s] not sent already",
__func__, toep->tid, tcpstates[tp->t_state]));
@@ -765,7 +772,8 @@
struct toepcb *toep = lookup_tid(sc, tid);
struct inpcb *inp = toep->inp;
struct tcpcb *tp = NULL;
- struct socket *so = NULL;
+ struct socket *so;
+ struct sockbuf *sb;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -782,13 +790,38 @@
CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);
- if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+ if (toep->flags & TPF_ABORT_SHUTDOWN)
goto done;
+ tp->rcv_nxt++; /* FIN */
+
so = inp->inp_socket;
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
+ m = m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
+ CXGBE_UNIMPLEMENTED("mbuf alloc failure");
- socantrcvmore(so);
- tp->rcv_nxt++; /* FIN */
+ m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt;
+ m->m_flags |= M_DDP; /* Data is already where it should be */
+ m->m_data = "nothing to see here";
+ tp->rcv_nxt = be32toh(cpl->rcv_nxt);
+
+ toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);
+
+ KASSERT(toep->sb_cc >= sb->sb_cc,
+ ("%s: sb %p has more data (%d) than last time (%d).",
+ __func__, sb, sb->sb_cc, toep->sb_cc));
+ toep->rx_credits += toep->sb_cc - sb->sb_cc;
+#ifdef USE_DDP_RX_FLOW_CONTROL
+ toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */
+#endif
+ sbappendstream_locked(sb, m);
+ toep->sb_cc = sb->sb_cc;
+ }
+ socantrcvmore_locked(so); /* unlocks the sockbuf */
+
KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
be32toh(cpl->rcv_nxt)));
@@ -855,7 +888,7 @@
CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
__func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
- if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+ if (toep->flags & TPF_ABORT_SHUTDOWN)
goto done;
so = inp->inp_socket;
@@ -953,7 +986,7 @@
("%s: unexpected opcode 0x%x", __func__, opcode));
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
- if (toepcb_flag(toep, TPF_SYNQE))
+ if (toep->flags & TPF_SYNQE)
return (do_abort_req_synqe(iq, rss, m));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
@@ -974,8 +1007,8 @@
CTR6(KTR_CXGBE,
"%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
- __func__, tid, tcpstates[tp->t_state], toep->flags, inp->inp_flags,
- cpl->status);
+ __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
+ inp->inp_flags, cpl->status);
/*
* If we'd initiated an abort earlier the reply to it is responsible for
@@ -982,11 +1015,11 @@
* cleaning up resources. Otherwise we tear everything down right here
* right now. We owe the T4 a CPL_ABORT_RPL no matter what.
*/
- if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) {
+ if (toep->flags & TPF_ABORT_SHUTDOWN) {
INP_WUNLOCK(inp);
goto done;
}
- toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN);
+ toep->flags |= TPF_ABORT_SHUTDOWN;
so_error_set(so, abort_status_to_errno(tp, cpl->status));
tp = tcp_close(tp);
@@ -1019,7 +1052,7 @@
("%s: unexpected opcode 0x%x", __func__, opcode));
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
- if (toepcb_flag(toep, TPF_SYNQE))
+ if (toep->flags & TPF_SYNQE)
return (do_abort_rpl_synqe(iq, rss, m));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
@@ -1027,7 +1060,7 @@
CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
__func__, tid, toep, inp, cpl->status);
- KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
+ KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
("%s: wasn't expecting abort reply", __func__));
INP_WLOCK(inp);
@@ -1046,15 +1079,16 @@
struct inpcb *inp = toep->inp;
struct tcpcb *tp;
struct socket *so;
- struct sockbuf *so_rcv;
+ struct sockbuf *sb;
+ int len;
- if (__predict_false(toepcb_flag(toep, TPF_SYNQE))) {
+ if (__predict_false(toep->flags & TPF_SYNQE)) {
/*
* do_pass_establish failed and must be attempting to abort the
* synqe's tid. Meanwhile, the T4 has sent us data for such a
* connection.
*/
- KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
+ KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
("%s: synqe and tid isn't being aborted.", __func__));
m_freem(m);
return (0);
@@ -1064,11 +1098,12 @@
/* strip off CPL header */
m_adj(m, sizeof(*cpl));
+ len = m->m_pkthdr.len;
INP_WLOCK(inp);
if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
- __func__, tid, m->m_pkthdr.len, inp->inp_flags);
+ __func__, tid, len, inp->inp_flags);
INP_WUNLOCK(inp);
m_freem(m);
return (0);
@@ -1084,21 +1119,20 @@
}
#endif
- tp->rcv_nxt += m->m_pkthdr.len;
- KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
- ("%s: negative window size", __func__));
- tp->rcv_wnd -= m->m_pkthdr.len;
+ tp->rcv_nxt += len;
+ KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
+ tp->rcv_wnd -= len;
tp->t_rcvtime = ticks;
so = inp_inpcbtosocket(inp);
- so_rcv = &so->so_rcv;
- SOCKBUF_LOCK(so_rcv);
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
- if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
+ if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
- __func__, tid, m->m_pkthdr.len);
+ __func__, tid, len);
m_freem(m);
- SOCKBUF_UNLOCK(so_rcv);
+ SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
INP_INFO_WLOCK(&V_tcbinfo);
@@ -1112,23 +1146,76 @@
}
/* receive buffer autosize */
- if (so_rcv->sb_flags & SB_AUTOSIZE &&
+ if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
- so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
- m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7)) {
- unsigned int hiwat = so_rcv->sb_hiwat;
+ sb->sb_hiwat < V_tcp_autorcvbuf_max &&
+ len > (sbspace(sb) / 8 * 7)) {
+ unsigned int hiwat = sb->sb_hiwat;
unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
V_tcp_autorcvbuf_max);
- if (!sbreserve_locked(so_rcv, newsize, so, NULL))
- so_rcv->sb_flags &= ~SB_AUTOSIZE;
+ if (!sbreserve_locked(sb, newsize, so, NULL))
+ sb->sb_flags &= ~SB_AUTOSIZE;
else
toep->rx_credits += newsize - hiwat;
}
- toep->enqueued += m->m_pkthdr.len;
- sbappendstream_locked(so_rcv, m);
+
+ if (toep->ulp_mode == ULP_MODE_TCPDDP) {
+ int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
+
+ if (changed) {
+ if (__predict_false(!(toep->ddp_flags & DDP_SC_REQ))) {
+ /* XXX: handle this if legitimate */
+ panic("%s: unexpected DDP state change %d",
+ __func__, cpl->ddp_off);
+ }
+ toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
+ }
+
+ if ((toep->ddp_flags & DDP_OK) == 0 &&
+ time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
+ toep->ddp_score = DDP_LOW_SCORE;
+ toep->ddp_flags |= DDP_OK;
+ CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
+ __func__, tid, time_uptime);
+ }
+
+ if (toep->ddp_flags & DDP_ON) {
+
+ /*
+ * CPL_RX_DATA with DDP on can only be an indicate. Ask
+ * soreceive to post a buffer or disable DDP. The
+ * payload that arrived in this indicate is appended to
+ * the socket buffer as usual.
+ */
+
+#if 0
+ CTR5(KTR_CXGBE,
+ "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
+ __func__, tid, toep->flags, be32toh(cpl->seq), len);
+#endif
+ sb->sb_flags |= SB_DDP_INDICATE;
+ } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
+ tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {
+
+ /*
+ * DDP allowed but isn't on (and a request to switch it
+ * on isn't pending either), and conditions are ripe for
+ * it to work. Switch it on.
+ */
+
+ enable_ddp(sc, toep);
+ }
+ }
+
+ KASSERT(toep->sb_cc >= sb->sb_cc,
+ ("%s: sb %p has more data (%d) than last time (%d).",
+ __func__, sb, sb->sb_cc, toep->sb_cc));
+ toep->rx_credits += toep->sb_cc - sb->sb_cc;
+ sbappendstream_locked(sb, m);
+ toep->sb_cc = sb->sb_cc;
sorwakeup_locked(so);
- SOCKBUF_UNLOCK_ASSERT(so_rcv);
+ SOCKBUF_UNLOCK_ASSERT(sb);
INP_WUNLOCK(inp);
return (0);
@@ -1179,8 +1266,8 @@
* Very unusual case: we'd sent a flowc + abort_req for a synq entry and
* now this comes back carrying the credits for the flowc.
*/
- if (__predict_false(toepcb_flag(toep, TPF_SYNQE))) {
- KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
+ if (__predict_false(toep->flags & TPF_SYNQE)) {
+ KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
("%s: credits for a synq entry %p", __func__, toep));
return (0);
}
@@ -1194,7 +1281,7 @@
INP_WLOCK(inp);
- if (__predict_false(toepcb_flag(toep, TPF_ABORT_SHUTDOWN))) {
+ if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
INP_WUNLOCK(inp);
return (0);
}
@@ -1250,11 +1337,11 @@
}
/* XXX */
- if ((toepcb_flag(toep, TPF_TX_SUSPENDED) &&
+ if ((toep->flags & TPF_TX_SUSPENDED &&
toep->tx_credits >= MIN_OFLD_TX_CREDITS) ||
toep->tx_credits == toep->txsd_total *
howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16)) {
- toepcb_clr_flag(toep, TPF_TX_SUSPENDED);
+ toep->flags &= ~TPF_TX_SUSPENDED;
t4_push_frames(sc, toep);
}
INP_WUNLOCK(inp);
@@ -1262,7 +1349,52 @@
return (0);
}
+static int
+do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+ struct adapter *sc = iq->adapter;
+ const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
+ unsigned int tid = GET_TID(cpl);
+#ifdef INVARIANTS
+ unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
+#endif
+
+ KASSERT(opcode == CPL_SET_TCB_RPL,
+ ("%s: unexpected opcode 0x%x", __func__, opcode));
+ KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
+
+ if (tid >= sc->tids.ftid_base &&
+ tid < sc->tids.ftid_base + sc->tids.nftids)
+ return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */
+
+ CXGBE_UNIMPLEMENTED(__func__);
+}
+
void
+t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, uint16_t word,
+ uint64_t mask, uint64_t val)
+{
+ struct wrqe *wr;
+ struct cpl_set_tcb_field *req;
+
+ wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
+ if (wr == NULL) {
+ /* XXX */
+ panic("%s: allocation failure.", __func__);
+ }
+ req = wrtod(wr);
+
+ INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
+ req->reply_ctrl = htobe16(V_NO_REPLY(1) |
+ V_QUEUENO(toep->ofld_rxq->iq.abs_id));
+ req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
+ req->mask = htobe64(mask);
+ req->val = htobe64(val);
+
+ t4_wrq_tx(sc, wr);
+}
+
+void
t4_init_cpl_io_handlers(struct adapter *sc)
{
@@ -1272,5 +1404,13 @@
t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack);
+ t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
}
+
+void
+t4_uninit_cpl_io_handlers(struct adapter *sc)
+{
+
+ t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
+}
#endif
Modified: trunk/sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- trunk/sys/dev/cxgbe/tom/t4_listen.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/tom/t4_listen.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -50,6 +50,7 @@
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
+#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
@@ -283,11 +284,11 @@
CTR4(KTR_CXGBE, "%s: synqe %p, tid %d%s",
__func__, synqe, synqe->tid,
- synqe_flag(synqe, TPF_ABORT_SHUTDOWN) ?
+ synqe->flags & TPF_ABORT_SHUTDOWN ?
" (abort already in progress)" : "");
- if (synqe_flag(synqe, TPF_ABORT_SHUTDOWN))
+ if (synqe->flags & TPF_ABORT_SHUTDOWN)
return; /* abort already in progress */
- synqe_set_flag(synqe, TPF_ABORT_SHUTDOWN);
+ synqe->flags |= TPF_ABORT_SHUTDOWN;
get_qids_from_mbuf(m, &txqid, &rxqid);
ofld_txq = &sc->sge.ofld_txq[txqid];
@@ -318,7 +319,7 @@
flowc->mnemval[2].val = htobe32(pi->tx_chan);
flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
- synqe_set_flag(synqe, TPF_FLOWC_WR_SENT);
+ synqe->flags |= TPF_FLOWC_WR_SENT;
/* ... then ABORT request */
INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
@@ -515,7 +516,7 @@
{
if (refcount_release(&synqe->refcnt)) {
- int needfree = synqe_flag(synqe, TPF_SYNQE_NEEDFREE);
+ int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
m_freem(synqe->syn);
if (needfree)
@@ -740,7 +741,7 @@
* cleaning up resources. Otherwise we tear everything down right here
* right now. We owe the T4 a CPL_ABORT_RPL no matter what.
*/
- if (synqe_flag(synqe, TPF_ABORT_SHUTDOWN)) {
+ if (synqe->flags & TPF_ABORT_SHUTDOWN) {
INP_WUNLOCK(inp);
goto done;
}
@@ -775,7 +776,7 @@
__func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
INP_WLOCK(inp);
- KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN),
+ KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
("%s: wasn't expecting abort reply for synqe %p (0x%x)",
__func__, synqe, synqe->flags));
@@ -798,13 +799,14 @@
INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
INP_WLOCK_ASSERT(inp);
- KASSERT(synqe_flag(synqe, TPF_SYNQE),
+ KASSERT(synqe->flags & TPF_SYNQE,
("%s: %p not a synq_entry?", __func__, arg));
offload_socket(so, toep);
make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
- toepcb_set_flag(toep, TPF_CPL_PENDING);
+ toep->flags |= TPF_CPL_PENDING;
update_tid(sc, synqe->tid, toep);
+ synqe->flags |= TPF_SYNQE_EXPANDED;
}
static inline void
@@ -843,14 +845,12 @@
synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
if (synqe == NULL)
return (NULL);
- } else
+ synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
+ } else {
synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe));
+ synqe->flags = TPF_SYNQE;
+ }
- synqe->flags = 0;
- synqe_set_flag(synqe, TPF_SYNQE);
- if (tspace < len)
- synqe_set_flag(synqe, TPF_SYNQE_NEEDFREE);
-
return (synqe);
}
@@ -881,7 +881,7 @@
*/
static uint32_t
calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
- const struct tcp_options *tcpopt, struct tcphdr *th)
+ const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
{
uint32_t opt2 = 0;
struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
@@ -902,6 +902,11 @@
opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
+#ifdef USE_DDP_RX_FLOW_CONTROL
+ if (ulp_mode == ULP_MODE_TCPDDP)
+ opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
+#endif
+
return htobe32(opt2);
}
@@ -985,7 +990,7 @@
struct l2t_entry *e = NULL;
struct rtentry *rt;
struct sockaddr_in nam;
- int rscale, mtu_idx, rx_credits, rxqid;
+ int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
struct synq_entry *synqe = NULL;
int reject_reason;
uint16_t vid;
@@ -1108,9 +1113,13 @@
get_qids_from_mbuf(m, NULL, &rxqid);
INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
- rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits,
- ULP_MODE_NONE);
- rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th);
+ if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
+ ulp_mode = ULP_MODE_TCPDDP;
+ synqe->flags |= TPF_SYNQE_TCPDDP;
+ } else
+ ulp_mode = ULP_MODE_NONE;
+ rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode);
+ rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
synqe->tid = tid;
synqe->lctx = lctx;
@@ -1151,7 +1160,7 @@
INP_WLOCK(inp);
if (__predict_false(inp->inp_flags & INP_DROPPED)) {
/* listener closed. synqe must have been aborted. */
- KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN),
+ KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
("%s: listener %p closed but synqe %p not aborted",
__func__, inp, synqe));
@@ -1169,7 +1178,7 @@
* that can only happen if the listener was closed and we just
* checked for that.
*/
- KASSERT(!synqe_flag(synqe, TPF_ABORT_SHUTDOWN),
+ KASSERT(!(synqe->flags & TPF_ABORT_SHUTDOWN),
("%s: synqe %p aborted, but listener %p not dropped.",
__func__, synqe, inp));
@@ -1266,7 +1275,7 @@
("%s: unexpected opcode 0x%x", __func__, opcode));
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
- KASSERT(synqe_flag(synqe, TPF_SYNQE),
+ KASSERT(synqe->flags & TPF_SYNQE,
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
@@ -1283,7 +1292,7 @@
* on the lctx's synq. do_abort_rpl for the tid is responsible
* for cleaning up.
*/
- KASSERT(synqe_flag(synqe, TPF_ABORT_SHUTDOWN),
+ KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
("%s: listen socket dropped but tid %u not aborted.",
__func__, tid));
@@ -1313,7 +1322,10 @@
}
toep->tid = tid;
toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
- toep->ulp_mode = ULP_MODE_NONE;
+ if (synqe->flags & TPF_SYNQE_TCPDDP)
+ set_tcpddp_ulp_mode(toep);
+ else
+ toep->ulp_mode = ULP_MODE_NONE;
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = synqe->rcv_bufsize;
@@ -1339,6 +1351,24 @@
goto reset;
}
+ /*
+ * This is for the unlikely case where the syncache entry that we added
+ * has been evicted from the syncache, but the syncache_expand above
+ * works because of syncookies.
+ *
+ * XXX: we've held the tcbinfo lock throughout so there's no risk of
+ * anyone accept'ing a connection before we've installed our hooks, but
+ * this somewhat defeats the purpose of having a tod_offload_socket :-(
+ */
+ if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
+ struct inpcb *new_inp = sotoinpcb(so);
+
+ INP_WLOCK(new_inp);
+ tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
+ t4_offload_socket(TOEDEV(ifp), synqe, so);
+ INP_WUNLOCK(new_inp);
+ }
+
/* Done with the synqe */
TAILQ_REMOVE(&lctx->synq, synqe, link);
inp = release_lctx(sc, lctx);
Modified: trunk/sys/dev/cxgbe/tom/t4_tom.c
===================================================================
--- trunk/sys/dev/cxgbe/tom/t4_tom.c 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/tom/t4_tom.c 2016-09-15 20:11:39 UTC (rev 8018)
@@ -55,6 +55,9 @@
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
+static struct protosw ddp_protosw;
+static struct pr_usrreqs ddp_usrreqs;
+
/* Module ops */
static int t4_tom_mod_load(void);
static int t4_tom_mod_unload(void);
@@ -138,9 +141,9 @@
free_toepcb(struct toepcb *toep)
{
- KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0,
+ KASSERT(!(toep->flags & TPF_ATTACHED),
("%s: attached to an inpcb", __func__));
- KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0,
+ KASSERT(!(toep->flags & TPF_CPL_PENDING),
("%s: CPL pending", __func__));
free(toep, M_CXGBE);
@@ -167,6 +170,8 @@
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
+ if (toep->ulp_mode == ULP_MODE_TCPDDP)
+ so->so_proto = &ddp_protosw;
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
@@ -176,7 +181,7 @@
/* Install an extra hold on inp */
toep->inp = inp;
- toepcb_set_flag(toep, TPF_ATTACHED);
+ toep->flags |= TPF_ATTACHED;
in_pcbref(inp);
/* Add the TOE PCB to the active list */
@@ -211,7 +216,7 @@
tp->t_flags &= ~TF_TOE;
toep->inp = NULL;
- toepcb_clr_flag(toep, TPF_ATTACHED);
+ toep->flags &= ~TPF_ATTACHED;
if (in_pcbrele_wlocked(inp))
panic("%s: inp freed.", __func__);
@@ -227,14 +232,17 @@
struct adapter *sc = td_adapter(td);
int tid = toep->tid;
- KASSERT(toepcb_flag(toep, TPF_CPL_PENDING) == 0,
+ KASSERT(!(toep->flags & TPF_CPL_PENDING),
("%s: %p has CPL pending.", __func__, toep));
- KASSERT(toepcb_flag(toep, TPF_ATTACHED) == 0,
+ KASSERT(!(toep->flags & TPF_ATTACHED),
("%s: %p is still attached.", __func__, toep));
CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)",
__func__, toep, tid, toep->l2te);
+ if (toep->ulp_mode == ULP_MODE_TCPDDP)
+ release_ddp_resources(toep);
+
if (toep->l2te)
t4_l2t_release(toep->l2te);
@@ -269,7 +277,7 @@
INP_WLOCK_ASSERT(inp);
KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
- KASSERT(toepcb_flag(toep, TPF_ATTACHED),
+ KASSERT(toep->flags & TPF_ATTACHED,
("%s: not attached", __func__));
#ifdef KTR
@@ -287,9 +295,9 @@
tp->t_toe = NULL;
tp->t_flags &= ~TF_TOE;
- toepcb_clr_flag(toep, TPF_ATTACHED);
+ toep->flags &= ~TPF_ATTACHED;
- if (toepcb_flag(toep, TPF_CPL_PENDING) == 0)
+ if (!(toep->flags & TPF_CPL_PENDING))
release_offload_resources(toep);
}
@@ -304,7 +312,7 @@
KASSERT(inp != NULL, ("%s: inp is NULL", __func__));
INP_WLOCK_ASSERT(inp);
- KASSERT(toepcb_flag(toep, TPF_CPL_PENDING),
+ KASSERT(toep->flags & TPF_CPL_PENDING,
("%s: CPL not pending already?", __func__));
CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)",
@@ -311,9 +319,9 @@
__func__, toep->tid, toep, toep->flags, inp, inp->inp_flags);
toep->inp = NULL;
- toepcb_clr_flag(toep, TPF_CPL_PENDING);
+ toep->flags &= ~TPF_CPL_PENDING;
- if (toepcb_flag(toep, TPF_ATTACHED) == 0)
+ if (!(toep->flags & TPF_ATTACHED))
release_offload_resources(toep);
if (!in_pcbrele_wlocked(inp))
@@ -568,6 +576,8 @@
("%s: lctx hash table is not empty.", __func__));
t4_uninit_l2t_cpl_handlers(sc);
+ t4_uninit_cpl_io_handlers(sc);
+ t4_uninit_ddp(sc, td);
if (td->listen_mask != 0)
hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
@@ -613,6 +623,8 @@
if (rc != 0)
goto done;
+ t4_init_ddp(sc, td);
+
/* CPL handlers */
t4_init_connect_cpl_handlers(sc);
t4_init_l2t_cpl_handlers(sc);
@@ -688,7 +700,17 @@
t4_tom_mod_load(void)
{
int rc;
+ struct protosw *tcp_protosw;
+ tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
+ if (tcp_protosw == NULL)
+ return (ENOPROTOOPT);
+
+ bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw));
+ bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs));
+ ddp_usrreqs.pru_soreceive = t4_soreceive_ddp;
+ ddp_protosw.pr_usrreqs = &ddp_usrreqs;
+
rc = t4_register_uld(&tom_uld_info);
if (rc != 0)
t4_tom_mod_unload();
Modified: trunk/sys/dev/cxgbe/tom/t4_tom.h
===================================================================
--- trunk/sys/dev/cxgbe/tom/t4_tom.h 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/dev/cxgbe/tom/t4_tom.h 2016-09-15 20:11:39 UTC (rev 8018)
@@ -46,25 +46,58 @@
*/
#define MAX_RCV_WND ((1U << 27) - 1)
+#define DDP_RSVD_WIN (16 * 1024U)
+#define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */
+
+#define M_DDP M_PROTO1
+
+#define USE_DDP_RX_FLOW_CONTROL
+
/* TOE PCB flags */
enum {
- TPF_ATTACHED, /* a tcpcb refers to this toepcb */
- TPF_FLOWC_WR_SENT, /* firmware flow context WR sent */
- TPF_TX_DATA_SENT, /* some data sent */
- TPF_TX_SUSPENDED, /* tx suspended for lack of resources */
- TPF_SEND_FIN, /* send FIN after sending all pending data */
- TPF_FIN_SENT, /* FIN has been sent */
- TPF_ABORT_SHUTDOWN, /* connection abort is in progress */
- TPF_CPL_PENDING, /* haven't received the last CPL */
- TPF_SYNQE, /* synq_entry, not really a toepcb */
- TPF_SYNQE_NEEDFREE, /* synq_entry was allocated externally */
+ TPF_ATTACHED = (1 << 0), /* a tcpcb refers to this toepcb */
+ TPF_FLOWC_WR_SENT = (1 << 1), /* firmware flow context WR sent */
+ TPF_TX_DATA_SENT = (1 << 2), /* some data sent */
+ TPF_TX_SUSPENDED = (1 << 3), /* tx suspended for lack of resources */
+ TPF_SEND_FIN = (1 << 4), /* send FIN after all pending data */
+ TPF_FIN_SENT = (1 << 5), /* FIN has been sent */
+ TPF_ABORT_SHUTDOWN = (1 << 6), /* connection abort is in progress */
+ TPF_CPL_PENDING = (1 << 7), /* haven't received the last CPL */
+ TPF_SYNQE = (1 << 8), /* synq_entry, not really a toepcb */
+ TPF_SYNQE_NEEDFREE = (1 << 9), /* synq_entry was malloc'd separately */
+ TPF_SYNQE_TCPDDP = (1 << 10), /* ulp_mode TCPDDP in toepcb */
+ TPF_SYNQE_EXPANDED = (1 << 11), /* toepcb ready, tid context updated */
};
+enum {
+ DDP_OK = (1 << 0), /* OK to turn on DDP */
+ DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */
+ DDP_ON = (1 << 2), /* DDP is turned on */
+ DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */
+ DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */
+};
+
struct ofld_tx_sdesc {
uint32_t plen; /* payload length */
uint8_t tx_credits; /* firmware tx credits (unit is 16B) */
};
+struct ppod_region {
+ TAILQ_ENTRY(ppod_region) link;
+ int used; /* # of pods used by this region */
+ int free; /* # of contiguous pods free right after this region */
+};
+
+struct ddp_buffer {
+ uint32_t tag; /* includes color, page pod addr, and DDP page size */
+ int nppods;
+ int offset;
+ int len;
+ struct ppod_region ppod_region;
+ int npages;
+ vm_page_t *pages;
+};
+
struct toepcb {
TAILQ_ENTRY(toepcb) link; /* toep_list */
unsigned int flags; /* miscellaneous flags */
@@ -77,11 +110,16 @@
struct l2t_entry *l2te; /* L2 table entry used by this connection */
int tid; /* Connection identifier */
unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */
- unsigned int enqueued; /* # of bytes added to so_rcv (not yet read) */
+ unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */
int rx_credits; /* rx credits (in bytes) to be returned to hw */
unsigned int ulp_mode; /* ULP mode */
+ unsigned int ddp_flags;
+ struct ddp_buffer *db[2];
+ time_t ddp_disabled;
+ uint8_t ddp_score;
+
/* Tx software descriptor */
uint8_t txsd_total;
uint8_t txsd_pidx;
@@ -97,27 +135,19 @@
unsigned int mss;
};
-static inline int
-toepcb_flag(struct toepcb *toep, int flag)
-{
+#define DDP_RETRY_WAIT 5 /* seconds to wait before re-enabling DDP */
+#define DDP_LOW_SCORE 1
+#define DDP_HIGH_SCORE 3
- return isset(&toep->flags, flag);
-}
-
static inline void
-toepcb_set_flag(struct toepcb *toep, int flag)
+set_tcpddp_ulp_mode(struct toepcb *toep)
{
- setbit(&toep->flags, flag);
+ toep->ulp_mode = ULP_MODE_TCPDDP;
+ toep->ddp_flags = DDP_OK;
+ toep->ddp_score = DDP_LOW_SCORE;
}
-static inline void
-toepcb_clr_flag(struct toepcb *toep, int flag)
-{
-
- clrbit(&toep->flags, flag);
-}
-
/*
* Compressed state for embryonic connections for a listener. Barely fits in
* 64B, try not to grow it further.
@@ -136,27 +166,6 @@
uint16_t rcv_bufsize;
};
-static inline int
-synqe_flag(struct synq_entry *synqe, int flag)
-{
-
- return isset(&synqe->flags, flag);
-}
-
-static inline void
-synqe_set_flag(struct synq_entry *synqe, int flag)
-{
-
- setbit(&synqe->flags, flag);
-}
-
-static inline void
-synqe_clr_flag(struct synq_entry *synqe, int flag)
-{
-
- clrbit(&synqe->flags, flag);
-}
-
/* listen_ctx flags */
#define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */
@@ -171,6 +180,8 @@
TAILQ_HEAD(, synq_entry) synq;
};
+TAILQ_HEAD(ppod_head, ppod_region);
+
struct tom_data {
struct toedev tod;
@@ -178,10 +189,16 @@
struct mtx toep_list_lock;
TAILQ_HEAD(, toepcb) toep_list;
+ struct mtx lctx_hash_lock;
LIST_HEAD(, listen_ctx) *listen_hash;
u_long listen_mask;
int lctx_count; /* # of lctx in the hash table */
- struct mtx lctx_hash_lock;
+
+ struct mtx ppod_lock;
+ int nppods;
+ int nppods_free; /* # of available ppods */
+ int nppods_free_head; /* # of available ppods at the begining */
+ struct ppod_head ppods;
};
static inline struct tom_data *
@@ -236,6 +253,7 @@
/* t4_cpl_io.c */
void t4_init_cpl_io_handlers(struct adapter *);
+void t4_uninit_cpl_io_handlers(struct adapter *);
void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int);
void send_flowc_wr(struct toepcb *, struct flowc_tx_params *);
void send_reset(struct adapter *, struct toepcb *, uint32_t);
@@ -244,5 +262,14 @@
int t4_tod_output(struct toedev *, struct tcpcb *);
int t4_send_fin(struct toedev *, struct tcpcb *);
int t4_send_rst(struct toedev *, struct tcpcb *);
+void t4_set_tcb_field(struct adapter *, struct toepcb *, uint16_t, uint64_t,
+ uint64_t);
+/* t4_ddp.c */
+void t4_init_ddp(struct adapter *, struct tom_data *);
+void t4_uninit_ddp(struct adapter *, struct tom_data *);
+int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
+ struct mbuf **, struct mbuf **, int *);
+void enable_ddp(struct adapter *, struct toepcb *toep);
+void release_ddp_resources(struct toepcb *toep);
#endif
Modified: trunk/sys/modules/cxgbe/if_cxgbe/Makefile
===================================================================
--- trunk/sys/modules/cxgbe/if_cxgbe/Makefile 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/modules/cxgbe/if_cxgbe/Makefile 2016-09-15 20:11:39 UTC (rev 8018)
@@ -2,6 +2,8 @@
# $FreeBSD$
#
+.include <bsd.own.mk>
+
CXGBE = ${.CURDIR}/../../../dev/cxgbe
.PATH: ${CXGBE} ${CXGBE}/common
@@ -17,4 +19,17 @@
# Provide the timestamp of a packet in its header mbuf.
#CFLAGS+= -DT4_PKT_TIMESTAMP
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+ @echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
.include <bsd.kmod.mk>
Modified: trunk/sys/modules/cxgbe/tom/Makefile
===================================================================
--- trunk/sys/modules/cxgbe/tom/Makefile 2016-09-15 09:05:39 UTC (rev 8017)
+++ trunk/sys/modules/cxgbe/tom/Makefile 2016-09-15 20:11:39 UTC (rev 8018)
@@ -2,14 +2,24 @@
# $FreeBSD$
#
+.include <bsd.own.mk>
+
CXGBE = ${.CURDIR}/../../../dev/cxgbe
.PATH: ${CXGBE}/tom
KMOD = t4_tom
-SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c
+SRCS = t4_tom.c t4_connect.c t4_listen.c t4_cpl_io.c t4_tom_l2t.c t4_ddp.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h
CFLAGS+= -I${CXGBE}
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ echo "#define INET 1" > ${.TARGET}
+ echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
+.endif
+.endif
+
.include <bsd.kmod.mk>
More information about the Midnightbsd-cvs
mailing list