[Midnightbsd-cvs] src [12378] trunk/sys/sys/mbuf.h: Sync with FreeBSD 11-stable

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun Feb 16 17:35:11 EST 2020


Revision: 12378
          http://svnweb.midnightbsd.org/src/?rev=12378
Author:   laffer1
Date:     2020-02-16 17:35:10 -0500 (Sun, 16 Feb 2020)
Log Message:
-----------
Sync with FreeBSD 11-stable

Modified Paths:
--------------
    trunk/sys/sys/mbuf.h

Modified: trunk/sys/sys/mbuf.h
===================================================================
--- trunk/sys/sys/mbuf.h	2020-02-16 22:33:15 UTC (rev 12377)
+++ trunk/sys/sys/mbuf.h	2020-02-16 22:35:10 UTC (rev 12378)
@@ -29,7 +29,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
- * $FreeBSD: stable/10/sys/sys/mbuf.h 282786 2015-05-12 00:27:20Z hiren $
+ * $FreeBSD: stable/11/sys/sys/mbuf.h 338618 2018-09-12 19:13:32Z markj $
  */
 
 #ifndef _SYS_MBUF_H_
@@ -45,6 +45,32 @@
 #endif
 #endif
 
+#ifdef _KERNEL
+#include <sys/sdt.h>
+
+#define	MBUF_PROBE1(probe, arg0)					\
+	SDT_PROBE1(sdt, , , probe, arg0)
+#define	MBUF_PROBE2(probe, arg0, arg1)					\
+	SDT_PROBE2(sdt, , , probe, arg0, arg1)
+#define	MBUF_PROBE3(probe, arg0, arg1, arg2)				\
+	SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2)
+#define	MBUF_PROBE4(probe, arg0, arg1, arg2, arg3)			\
+	SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3)
+#define	MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)		\
+	SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4)
+
+SDT_PROBE_DECLARE(sdt, , , m__init);
+SDT_PROBE_DECLARE(sdt, , , m__gethdr);
+SDT_PROBE_DECLARE(sdt, , , m__get);
+SDT_PROBE_DECLARE(sdt, , , m__getcl);
+SDT_PROBE_DECLARE(sdt, , , m__clget);
+SDT_PROBE_DECLARE(sdt, , , m__cljget);
+SDT_PROBE_DECLARE(sdt, , , m__cljset);
+SDT_PROBE_DECLARE(sdt, , , m__free);
+SDT_PROBE_DECLARE(sdt, , , m__freem);
+
+#endif /* _KERNEL */
+
 /*
  * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
  * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
@@ -61,9 +87,15 @@
  * MLEN is data length in a normal mbuf.
  * MHLEN is data length in an mbuf with pktheader.
  * MINCLSIZE is a smallest amount of data that should be put into cluster.
+ *
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are sensible.
  */
-#define	MLEN		((int)(MSIZE - sizeof(struct m_hdr)))
-#define	MHLEN		((int)(MLEN - sizeof(struct pkthdr)))
+struct mbuf;
+#define	MHSIZE		offsetof(struct mbuf, m_dat)
+#define	MPKTHSIZE	offsetof(struct mbuf, m_pktdat)
+#define	MLEN		((int)(MSIZE - MHSIZE))
+#define	MHLEN		((int)(MSIZE - MPKTHSIZE))
 #define	MINCLSIZE	(MHLEN + 1)
 
 #ifdef _KERNEL
@@ -88,23 +120,6 @@
 #endif /* _KERNEL */
 
 /*
- * Header present at the beginning of every mbuf.
- * Size ILP32: 24
- *	 LP64: 32
- */
-struct m_hdr {
-	struct mbuf	*mh_next;	/* next buffer in chain */
-	struct mbuf	*mh_nextpkt;	/* next chain in queue/record */
-	caddr_t		 mh_data;	/* location of data */
-	int32_t		 mh_len;	/* amount of data in this mbuf */
-	uint32_t	 mh_type:8,	/* type of data in this mbuf */
-			 mh_flags:24;	/* flags; see below */
-#if !defined(__LP64__)
-	uint32_t	 mh_pad;	/* pad for 64bit alignment */
-#endif
-};
-
-/*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
@@ -119,6 +134,8 @@
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  * Size ILP32: 48
  *	 LP64: 56
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
  */
 struct pkthdr {
 	struct ifnet	*rcvif;		/* rcv interface */
@@ -136,7 +153,7 @@
 	uint8_t		 l4hlen;	/* layer 4 header length */
 	uint8_t		 l5hlen;	/* layer 5 header length */
 	union {
-		uint8_t  eigth[8];
+		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
@@ -146,7 +163,7 @@
 
 	/* Layer specific non-persistent local storage for reassembly, etc. */
 	union {
-		uint8_t  eigth[8];
+		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
@@ -166,14 +183,19 @@
  * set.
  * Size ILP32: 28
  *	 LP64: 48
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
  */
 struct m_ext {
-	volatile u_int	*ref_cnt;	/* pointer to ref count info */
+	union {
+		volatile u_int	 ext_count;	/* value of ref count info */
+		volatile u_int	*ext_cnt;	/* pointer to ref count info */
+	};
 	caddr_t		 ext_buf;	/* start of buffer */
 	uint32_t	 ext_size;	/* size of buffer, for ext_free */
 	uint32_t	 ext_type:8,	/* type of external storage */
 			 ext_flags:24;	/* external storage mbuf flags */
-	int		(*ext_free)	/* free routine if not the usual */
+	void		(*ext_free)	/* free routine if not the usual */
 			    (struct mbuf *, void *, void *);
 	void		*ext_arg1;	/* optional argument pointer */
 	void		*ext_arg2;	/* optional argument pointer */
@@ -184,39 +206,50 @@
  * purposes.
  */
 struct mbuf {
-	struct m_hdr	m_hdr;
+	/*
+	 * Header present at the beginning of every mbuf.
+	 * Size ILP32: 24
+	 *      LP64: 32
+	 * Compile-time assertions in uipc_mbuf.c test these values to ensure
+	 * that they are correct.
+	 */
+	union {	/* next buffer in chain */
+		struct mbuf		*m_next;
+		SLIST_ENTRY(mbuf)	m_slist;
+		STAILQ_ENTRY(mbuf)	m_stailq;
+	};
+	union {	/* next chain in queue/record */
+		struct mbuf		*m_nextpkt;
+		SLIST_ENTRY(mbuf)	m_slistpkt;
+		STAILQ_ENTRY(mbuf)	m_stailqpkt;
+	};
+	caddr_t		 m_data;	/* location of data */
+	int32_t		 m_len;		/* amount of data in this mbuf */
+	uint32_t	 m_type:8,	/* type of data in this mbuf */
+			 m_flags:24;	/* flags; see below */
+#if !defined(__LP64__)
+	uint32_t	 m_pad;		/* pad for 64bit alignment */
+#endif
+
+	/*
+	 * A set of optional headers (packet header, external storage header)
+	 * and internal data storage.  Historically, these arrays were sized
+	 * to MHLEN (space left after a packet header) and MLEN (space left
+	 * after only a regular mbuf header); they are now variable size in
+	 * order to support future work on variable-size mbufs.
+	 */
 	union {
 		struct {
-			struct pkthdr	MH_pkthdr;	/* M_PKTHDR set */
+			struct pkthdr	m_pkthdr;	/* M_PKTHDR set */
 			union {
-				struct m_ext	MH_ext;	/* M_EXT set */
-				char		MH_databuf[MHLEN];
-			} MH_dat;
-		} MH;
-		char	M_databuf[MLEN];		/* !M_PKTHDR, !M_EXT */
-	} M_dat;
+				struct m_ext	m_ext;	/* M_EXT set */
+				char		m_pktdat[0];
+			};
+		};
+		char	m_dat[0];			/* !M_PKTHDR, !M_EXT */
+	};
 };
-#define	m_next		m_hdr.mh_next
-#define	m_len		m_hdr.mh_len
-#define	m_data		m_hdr.mh_data
-#define	m_type		m_hdr.mh_type
-#define	m_flags		m_hdr.mh_flags
-#define	m_nextpkt	m_hdr.mh_nextpkt
-#define	m_pkthdr	M_dat.MH.MH_pkthdr
-#define	m_ext		M_dat.MH.MH_dat.MH_ext
-#define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
-#define	m_dat		M_dat.M_databuf
 
-/* 
- * NOTE: forwards compatibility definitions for mbuf(9)
- *
- * These aren't 1:1 with the macros in r277203; in particular they're exposed
- * to both userland and kernel, whereas this is exposed to just _KERNEL -- to
- * avoid disruption with existing KBI/KPIs
- */
-#define	MHSIZE		offsetof(struct mbuf, m_dat)
-#define	MPKTHSIZE	offsetof(struct mbuf, m_pktdat)
-
 /*
  * mbuf flags of global significance and layer crossing.
  * Those of only protocol/layer specific significance are to be mapped
@@ -231,7 +264,7 @@
 #define	M_MCAST		0x00000020 /* send/received as link-level multicast */
 #define	M_PROMISC	0x00000040 /* packet was not for us */
 #define	M_VLANTAG	0x00000080 /* ether_vtag is valid */
-#define	M_FLOWID	0x00000100 /* deprecated: flowid is valid */
+#define	M_UNUSED_8	0x00000100 /* --available-- */
 #define	M_NOFREE	0x00000200 /* do not free mbuf, embedded in cluster */
 
 #define	M_PROTO1	0x00001000 /* protocol-specific */
@@ -247,6 +280,8 @@
 #define	M_PROTO11	0x00400000 /* protocol-specific */
 #define	M_PROTO12	0x00800000 /* protocol-specific */
 
+#define MB_DTOR_SKIP	0x1	/* don't pollute the cache by touching a freed mbuf */
+
 /*
  * Flags to purge when crossing layers.
  */
@@ -258,7 +293,7 @@
  * Flags preserved when copying m_pkthdr.
  */
 #define M_COPYFLAGS \
-    (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_FLOWID| \
+    (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \
      M_PROTOFLAGS)
 
 /*
@@ -266,7 +301,7 @@
  */
 #define	M_FLAG_BITS \
     "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
-    "\7M_PROMISC\10M_VLANTAG\11M_FLOWID"
+    "\7M_PROMISC\10M_VLANTAG"
 #define	M_FLAG_PROTOBITS \
     "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
     "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
@@ -284,32 +319,41 @@
  *
  * Most NICs support RSS, which provides ordering and explicit affinity, and
  * use the hash m_flag bits to indicate what header fields were covered by
- * the hash.  M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
- * that provide an opaque flow identifier, allowing for ordering and
- * distribution without explicit affinity.
+ * the hash.  M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non-
+ * RSS cards or configurations that provide an opaque flow identifier, allowing
+ * for ordering and distribution without explicit affinity.  Additionally,
+ * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash
+ * properties.
  */
+#define	M_HASHTYPE_HASHPROP		0x80	/* has hash properties */
+#define	M_HASHTYPE_HASH(t)		(M_HASHTYPE_HASHPROP | (t))
+/* Microsoft RSS standard hash types */
 #define	M_HASHTYPE_NONE			0
-#define	M_HASHTYPE_RSS_IPV4		1	/* IPv4 2-tuple */
-#define	M_HASHTYPE_RSS_TCP_IPV4		2	/* TCPv4 4-tuple */
-#define	M_HASHTYPE_RSS_IPV6		3	/* IPv6 2-tuple */
-#define	M_HASHTYPE_RSS_TCP_IPV6		4	/* TCPv6 4-tuple */
-#define	M_HASHTYPE_RSS_IPV6_EX		5	/* IPv6 2-tuple + ext hdrs */
-#define	M_HASHTYPE_RSS_TCP_IPV6_EX	6	/* TCPv6 4-tiple + ext hdrs */
-#define	M_HASHTYPE_OPAQUE		255	/* ordering, not affinity */
+#define	M_HASHTYPE_RSS_IPV4		M_HASHTYPE_HASH(1) /* IPv4 2-tuple */
+#define	M_HASHTYPE_RSS_TCP_IPV4		M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */
+#define	M_HASHTYPE_RSS_IPV6		M_HASHTYPE_HASH(3) /* IPv6 2-tuple */
+#define	M_HASHTYPE_RSS_TCP_IPV6		M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */
+#define	M_HASHTYPE_RSS_IPV6_EX		M_HASHTYPE_HASH(5) /* IPv6 2-tuple +
+							    * ext hdrs */
+#define	M_HASHTYPE_RSS_TCP_IPV6_EX	M_HASHTYPE_HASH(6) /* TCPv6 4-tuple +
+							    * ext hdrs */
+/* Non-standard RSS hash types */
+#define	M_HASHTYPE_RSS_UDP_IPV4		M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/
+#define	M_HASHTYPE_RSS_UDP_IPV4_EX	M_HASHTYPE_HASH(8) /* IPv4 UDP 4-tuple +
+							    * ext hdrs */
+#define	M_HASHTYPE_RSS_UDP_IPV6		M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/
+#define	M_HASHTYPE_RSS_UDP_IPV6_EX	M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple +
+							    * ext hdrs */
 
+#define	M_HASHTYPE_OPAQUE		63	/* ordering, not affinity */
+#define	M_HASHTYPE_OPAQUE_HASH		M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE)
+						/* ordering+hash, not affinity*/
+
 #define	M_HASHTYPE_CLEAR(m)	((m)->m_pkthdr.rsstype = 0)
-/*
- * Handle M_FLOWID for legacy drivers still using them.
- */
-#define	M_HASHTYPE_GET(m)	(((m->m_flags & M_FLOWID) &&	    \
-    (m)->m_pkthdr.rsstype == M_HASHTYPE_NONE) ? M_HASHTYPE_OPAQUE : \
-    (m)->m_pkthdr.rsstype)
-#define	M_HASHTYPE_SET(m, v)	do {	    \
-	    if ((v) != M_HASHTYPE_NONE)	    \
-		m->m_flags |= M_FLOWID;	    \
-	    (m)->m_pkthdr.rsstype = (v);    \
-} while (0)
+#define	M_HASHTYPE_GET(m)	((m)->m_pkthdr.rsstype)
+#define	M_HASHTYPE_SET(m, v)	((m)->m_pkthdr.rsstype = (v))
 #define	M_HASHTYPE_TEST(m, v)	(M_HASHTYPE_GET(m) == (v))
+#define	M_HASHTYPE_ISHASH(m)	(M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
 
 /*
  * COS/QOS class and quality of service tags.
@@ -342,12 +386,13 @@
  * External mbuf storage buffer types.
  */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
-#define	EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
-#define	EXT_JUMBOP	3	/* jumbo cluster 4096 bytes */
+#define	EXT_SFBUF	2	/* sendfile(2)'s sf_buf */
+#define	EXT_JUMBOP	3	/* jumbo cluster page sized */
 #define	EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
 #define	EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
+#define	EXT_SFBUF_NOCACHE 8	/* sendfile(2)'s sf_buf not to be cached */
 
 #define	EXT_VENDOR1	224	/* for vendor-internal use */
 #define	EXT_VENDOR2	225	/* for vendor-internal use */
@@ -362,14 +407,15 @@
 #define	EXT_NET_DRV	252	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	253	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	254	/* can throw this buffer away w/page flipping */
-#define	EXT_EXTREF	255	/* has externally maintained ref_cnt ptr */
+#define	EXT_EXTREF	255	/* has externally maintained ext_cnt ptr */
 
 /*
  * Flags for external mbuf buffer types.
  * NB: limited to the lower 24 bits.
  */
-#define	EXT_FLAG_EMBREF		0x000001	/* embedded ref_cnt, notyet */
-#define	EXT_FLAG_EXTREF		0x000002	/* external ref_cnt, notyet */
+#define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_count */
+#define	EXT_FLAG_EXTREF		0x000002	/* external ext_cnt, notyet */
+
 #define	EXT_FLAG_NOFREE		0x000010	/* don't free mbuf to pool, notyet */
 
 #define	EXT_FLAG_VENDOR1	0x010000	/* for vendor-internal use */
@@ -392,9 +438,10 @@
     "\30EXT_FLAG_EXP4"
 
 /*
- * Return values for (*ext_free).
+ * External reference/free functions.
  */
-#define	EXT_FREE_OK	0	/* Normal return */
+void sf_ext_free(void *, void *);
+void sf_ext_free_nocache(void *, void *);
 
 /*
  * Flags indicating checksum, segmentation and other offload work to be
@@ -404,7 +451,7 @@
  * Outbound flags that are set by upper protocol layers requesting lower
  * layers, or ideally the hardware, to perform these offloading tasks.
  * For outbound packets this field and its flags can be directly tested
- * against if_data.ifi_hwassist.
+ * against ifnet if_hwassist.
  */
 #define	CSUM_IP			0x00000001	/* IP header checksum offload */
 #define	CSUM_IP_UDP		0x00000002	/* UDP checksum offload */
@@ -426,7 +473,7 @@
 #define	CSUM_L4_VALID		0x08000000	/* checksum is correct */
 #define	CSUM_L5_CALC		0x10000000	/* calculated layer 5 csum */
 #define	CSUM_L5_VALID		0x20000000	/* checksum is correct */
-#define	CSUM_COALESED		0x40000000	/* contains merged segments */
+#define	CSUM_COALESCED		0x40000000	/* contains merged segments */
 
 /*
  * CSUM flag description for use with printf(9) %b identifier.
@@ -437,7 +484,7 @@
     "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
     "\16CSUM_IP6_ISCSI" \
     "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
-    "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED"
+    "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
 
 /* CSUM flags compatibility mappings. */
 #define	CSUM_IP_CHECKED		CSUM_L3_CALC
@@ -456,7 +503,6 @@
 #define	CSUM_UDP_IPV6		CSUM_IP6_UDP
 #define	CSUM_TCP_IPV6		CSUM_IP6_TCP
 #define	CSUM_SCTP_IPV6		CSUM_IP6_SCTP
-#define	CSUM_FRAGMENT		0x0		/* Unused */
 
 /*
  * mbuf types describing the content of the mbuf (including external storage).
@@ -478,7 +524,8 @@
 #define	MT_EXP4		12	/* for experimental use */
 
 #define	MT_CONTROL	14	/* extra-data protocol message */
-#define	MT_OOBDATA	15	/* expedited data  */
+#define	MT_EXTCONTROL	15	/* control message with externalized contents */
+#define	MT_OOBDATA	16	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 #define	MT_NOINIT	255	/* Not a type but a flag to allocate
@@ -485,14 +532,6 @@
 				   a non-initialized mbuf */
 
 /*
- * Compatibility with historic mbuf allocator.
- */
-#define	MBTOM(how)	(how)
-#define	M_DONTWAIT	M_NOWAIT
-#define	M_TRYWAIT	M_WAITOK
-#define	M_WAIT		M_WAITOK
-
-/*
  * String names of mbuf-related UMA(9) and malloc(9) types.  Exposed to
  * !_KERNEL so that monitoring tools can look up the zones with
  * libmemstat(3).
@@ -529,10 +568,54 @@
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
-extern uma_zone_t	zone_ext_refcnt;
 
+void		 mb_dupcl(struct mbuf *, struct mbuf *);
 void		 mb_free_ext(struct mbuf *);
+void		 m_adj(struct mbuf *, int);
+int		 m_apply(struct mbuf *, int, int,
+		    int (*)(void *, void *, u_int), void *);
+int		 m_append(struct mbuf *, int, c_caddr_t);
+void		 m_cat(struct mbuf *, struct mbuf *);
+void		 m_catpkt(struct mbuf *, struct mbuf *);
+int		 m_clget(struct mbuf *m, int how);
+void 		*m_cljget(struct mbuf *m, int how, int size);
+struct mbuf	*m_collapse(struct mbuf *, int, int);
+void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
+void		 m_copydata(const struct mbuf *, int, int, caddr_t);
+struct mbuf	*m_copym(struct mbuf *, int, int, int);
+struct mbuf	*m_copypacket(struct mbuf *, int);
+void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
+struct mbuf	*m_copyup(struct mbuf *, int, int);
+struct mbuf	*m_defrag(struct mbuf *, int);
+void		 m_demote_pkthdr(struct mbuf *);
+void		 m_demote(struct mbuf *, int, int);
+struct mbuf	*m_devget(char *, int, int, struct ifnet *,
+		    void (*)(char *, caddr_t, u_int));
+void		 m_dispose_extcontrolm(struct mbuf *m);
+struct mbuf	*m_dup(const struct mbuf *, int);
+int		 m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
+void		 m_extadd(struct mbuf *, caddr_t, u_int,
+		    void (*)(struct mbuf *, void *, void *), void *, void *,
+		    int, int);
+u_int		 m_fixhdr(struct mbuf *);
+struct mbuf	*m_fragment(struct mbuf *, int, int);
+void		 m_freem(struct mbuf *);
+struct mbuf	*m_get2(int, int, short, int);
+struct mbuf	*m_getjcl(int, short, int, int);
+struct mbuf	*m_getm2(struct mbuf *, int, int, short, int);
+struct mbuf	*m_getptr(struct mbuf *, int, int *);
+u_int		 m_length(struct mbuf *, struct mbuf **);
+int		 m_mbuftouio(struct uio *, struct mbuf *, int);
+void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 int		 m_pkthdr_init(struct mbuf *, int);
+struct mbuf	*m_prepend(struct mbuf *, int, int);
+void		 m_print(const struct mbuf *, int);
+struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
+struct mbuf	*m_pullup(struct mbuf *, int);
+int		 m_sanity(struct mbuf *, int);
+struct mbuf	*m_split(struct mbuf *, int, int);
+struct mbuf	*m_uiotombuf(struct uio *, int, int, int, int);
+struct mbuf	*m_unshare(struct mbuf *, int);
 
 static __inline int
 m_gettype(int size)
@@ -569,7 +652,7 @@
  */
 static __inline void
 m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
-    int (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
+    void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
 {
 
 	KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__));
@@ -577,7 +660,7 @@
 	atomic_add_int(ref_cnt, 1);
 	m->m_flags |= M_EXT;
 	m->m_ext.ext_buf = buf;
-	m->m_ext.ref_cnt = ref_cnt;
+	m->m_ext.ext_cnt = ref_cnt;
 	m->m_data = m->m_ext.ext_buf;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_free = freef;
@@ -622,8 +705,7 @@
  * should go away with constant propagation for !MGETHDR.
  */
 static __inline int
-m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type,
-    int flags)
+m_init(struct mbuf *m, int how, short type, int flags)
 {
 	int error;
 
@@ -633,30 +715,18 @@
 	m->m_len = 0;
 	m->m_flags = flags;
 	m->m_type = type;
-	if (flags & M_PKTHDR) {
-		if ((error = m_pkthdr_init(m, how)) != 0)
-			return (error);
-	}
+	if (flags & M_PKTHDR)
+		error = m_pkthdr_init(m, how);
+	else
+		error = 0;
 
-	return (0);
+	MBUF_PROBE5(m__init, m, how, type, flags, error);
+	return (error);
 }
 
 static __inline struct mbuf *
 m_get(int how, short type)
 {
-	struct mb_args args;
-
-	args.flags = 0;
-	args.type = type;
-	return (uma_zalloc_arg(zone_mbuf, &args, how));
-}
-
-/*
- * XXX This should be deprecated, very little use.
- */
-static __inline struct mbuf *
-m_getclr(int how, short type)
-{
 	struct mbuf *m;
 	struct mb_args args;
 
@@ -663,8 +733,7 @@
 	args.flags = 0;
 	args.type = type;
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
-	if (m != NULL)
-		bzero(m->m_data, MLEN);
+	MBUF_PROBE3(m__get, how, type, m);
 	return (m);
 }
 
@@ -671,86 +740,53 @@
 static __inline struct mbuf *
 m_gethdr(int how, short type)
 {
+	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = M_PKTHDR;
 	args.type = type;
-	return (uma_zalloc_arg(zone_mbuf, &args, how));
+	m = uma_zalloc_arg(zone_mbuf, &args, how);
+	MBUF_PROBE3(m__gethdr, how, type, m);
+	return (m);
 }
 
 static __inline struct mbuf *
 m_getcl(int how, short type, int flags)
 {
+	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = flags;
 	args.type = type;
-	return (uma_zalloc_arg(zone_pack, &args, how));
+	m = uma_zalloc_arg(zone_pack, &args, how);
+	MBUF_PROBE4(m__getcl, how, type, flags, m);
+	return (m);
 }
 
-static __inline void
-m_clget(struct mbuf *m, int how)
-{
-
-	if (m->m_flags & M_EXT)
-		printf("%s: %p mbuf already has cluster\n", __func__, m);
-	m->m_ext.ext_buf = (char *)NULL;
-	uma_zalloc_arg(zone_clust, m, how);
-	/*
-	 * On a cluster allocation failure, drain the packet zone and retry,
-	 * we might be able to loosen a few clusters up on the drain.
-	 */
-	if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
-		zone_drain(zone_pack);
-		uma_zalloc_arg(zone_clust, m, how);
-	}
-}
-
 /*
- * m_cljget() is different from m_clget() as it can allocate clusters without
- * attaching them to an mbuf.  In that case the return value is the pointer
- * to the cluster of the requested size.  If an mbuf was specified, it gets
- * the cluster attached to it and the return value can be safely ignored.
- * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ * XXX: m_cljset() is a dangerous API.  One must attach only a new,
+ * unreferenced cluster to an mbuf(9).  It is not possible to assert
+ * that, so care can be taken only by users of the API.
  */
-static __inline void *
-m_cljget(struct mbuf *m, int how, int size)
-{
-	uma_zone_t zone;
-
-	if (m && m->m_flags & M_EXT)
-		printf("%s: %p mbuf already has cluster\n", __func__, m);
-	if (m != NULL)
-		m->m_ext.ext_buf = NULL;
-
-	zone = m_getzone(size);
-	return (uma_zalloc_arg(zone, m, how));
-}
-
 static __inline void
 m_cljset(struct mbuf *m, void *cl, int type)
 {
-	uma_zone_t zone;
 	int size;
 
 	switch (type) {
 	case EXT_CLUSTER:
 		size = MCLBYTES;
-		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case EXT_JUMBOP:
 		size = MJUMPAGESIZE;
-		zone = zone_jumbop;
 		break;
 #endif
 	case EXT_JUMBO9:
 		size = MJUM9BYTES;
-		zone = zone_jumbo9;
 		break;
 	case EXT_JUMBO16:
 		size = MJUM16BYTES;
-		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: unknown cluster type %d", __func__, type);
@@ -761,10 +797,10 @@
 	m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_type = type;
-	m->m_ext.ext_flags = 0;
-	m->m_ext.ref_cnt = uma_find_refcnt(zone, cl);
+	m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+	m->m_ext.ext_count = 1;
 	m->m_flags |= M_EXT;
-
+	MBUF_PROBE3(m__cljset, m, cl, type);
 }
 
 static __inline void
@@ -778,7 +814,10 @@
 m_clrprotoflags(struct mbuf *m)
 {
 
-	m->m_flags &= ~M_PROTOFLAGS;
+	while (m) {
+		m->m_flags &= ~M_PROTOFLAGS;
+		m = m->m_next;
+	}
 }
 
 static __inline struct mbuf *
@@ -790,6 +829,16 @@
 	return (m);
 }
 
+static inline u_int
+m_extrefcnt(struct mbuf *m)
+{
+
+	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__));
+
+	return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count :
+	    *m->m_ext.ext_cnt);
+}
+
 /*
  * mbuf, cluster, and external object allocation macros (for compatibility
  * purposes).
@@ -799,8 +848,8 @@
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, arg1, arg2, flags, type)		\
-    (void )m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),\
-    (flags), (type), M_NOWAIT)
+    m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),	\
+    (flags), (type))
 #define	m_getm(m, len, how, type)					\
     m_getm2((m), (len), (how), (type), M_PKTHDR)
 
@@ -811,7 +860,7 @@
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
-			 (*((m)->m_ext.ref_cnt) == 1)) )		\
+			 (m_extrefcnt(m) == 1)))
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\
@@ -828,41 +877,51 @@
 	    ("%s: attempted use of a free mbuf!", __func__))
 
 /*
- * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an
- * object of the specified size at the end of the mbuf, longword aligned.
+ * Return the address of the start of the buffer associated with an mbuf,
+ * handling external storage, packet-header mbufs, and regular data mbufs.
  */
-#define	M_ALIGN(m, len) do {						\
-	KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)),			\
-		("%s: M_ALIGN not normal mbuf", __func__));		\
-	KASSERT((m)->m_data == (m)->m_dat,				\
-		("%s: M_ALIGN not a virgin mbuf", __func__));		\
-	(m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1);		\
-} while (0)
+#define	M_START(m)							\
+	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
+	 ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] :		\
+	 &(m)->m_dat[0])
 
 /*
- * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by
- * M_DUP/MOVE_PKTHDR.
+ * Return the size of the buffer associated with an mbuf, handling external
+ * storage, packet-header mbufs, and regular data mbufs.
  */
-#define	MH_ALIGN(m, len) do {						\
-	KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT),	\
-		("%s: MH_ALIGN not PKTHDR mbuf", __func__));		\
-	KASSERT((m)->m_data == (m)->m_pktdat,				\
-		("%s: MH_ALIGN not a virgin mbuf", __func__));		\
-	(m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1);		\
-} while (0)
+#define	M_SIZE(m)							\
+	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :			\
+	 ((m)->m_flags & M_PKTHDR) ? MHLEN :				\
+	 MLEN)
 
 /*
- * As above, for mbuf with external storage.
+ * Set the m_data pointer of a newly allocated mbuf to place an object of the
+ * specified size at the end of the mbuf, longword aligned.
+ *
+ * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as
+ * separate macros, each asserting that it was called at the proper moment.
+ * This required callers to themselves test the storage type and call the
+ * right one.  Rather than require callers to be aware of those layout
+ * decisions, we centralize here.
  */
-#define	MEXT_ALIGN(m, len) do {						\
-	KASSERT((m)->m_flags & M_EXT,					\
-		("%s: MEXT_ALIGN not an M_EXT mbuf", __func__));	\
-	KASSERT((m)->m_data == (m)->m_ext.ext_buf,			\
-		("%s: MEXT_ALIGN not a virgin mbuf", __func__));	\
-	(m)->m_data += ((m)->m_ext.ext_size - (len)) &			\
-	    ~(sizeof(long) - 1); 					\
-} while (0)
+static __inline void
+m_align(struct mbuf *m, int len)
+{
+#ifdef INVARIANTS
+	const char *msg = "%s: not a virgin mbuf";
+#endif
+	int adjust;
 
+	KASSERT(m->m_data == M_START(m), (msg, __func__));
+
+	adjust = M_SIZE(m) - len;
+	m->m_data += adjust &~ (sizeof(long)-1);
+}
+
+#define	M_ALIGN(m, len)		m_align(m, len)
+#define	MH_ALIGN(m, len)	m_align(m, len)
+#define	MEXT_ALIGN(m, len)	m_align(m, len)
+
 /*
  * Compute the amount of space available before the current start of data in
  * an mbuf.
@@ -869,12 +928,13 @@
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage.  We now allow mbuf-embedded data to be
+ * read-only as well.
  */
 #define	M_LEADINGSPACE(m)						\
-	((m)->m_flags & M_EXT ?						\
-	    (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0):	\
-	    (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat :	\
-	    (m)->m_data - (m)->m_dat)
+	(M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
 
 /*
  * Compute the amount of space available after the end of data in an mbuf.
@@ -881,12 +941,14 @@
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage.  We now allow mbuf-embedded data to be
+ * read-only as well.
  */
 #define	M_TRAILINGSPACE(m)						\
-	((m)->m_flags & M_EXT ?						\
-	    (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size	\
-		- ((m)->m_data + (m)->m_len) : 0) :			\
-	    &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
+	(M_WRITABLE(m) ?						\
+	    ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
 
 /*
  * Arrange to prepend space of size plen to mbuf m.  If a new mbuf must be
@@ -928,51 +990,6 @@
 extern int		max_protohdr;	/* Largest protocol header */
 extern int		nmbclusters;	/* Maximum number of clusters */
 
-struct uio;
-
-void		 m_adj(struct mbuf *, int);
-void		 m_align(struct mbuf *, int);
-int		 m_apply(struct mbuf *, int, int,
-		    int (*)(void *, void *, u_int), void *);
-int		 m_append(struct mbuf *, int, c_caddr_t);
-void		 m_cat(struct mbuf *, struct mbuf *);
-int		 m_extadd(struct mbuf *, caddr_t, u_int,
-		    int (*)(struct mbuf *, void *, void *), void *, void *,
-		    int, int, int);
-struct mbuf	*m_collapse(struct mbuf *, int, int);
-void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
-void		 m_copydata(const struct mbuf *, int, int, caddr_t);
-struct mbuf	*m_copym(struct mbuf *, int, int, int);
-struct mbuf	*m_copymdata(struct mbuf *, struct mbuf *,
-		    int, int, int, int);
-struct mbuf	*m_copypacket(struct mbuf *, int);
-void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf	*m_copyup(struct mbuf *, int, int);
-struct mbuf	*m_defrag(struct mbuf *, int);
-void		 m_demote(struct mbuf *, int);
-struct mbuf	*m_devget(char *, int, int, struct ifnet *,
-		    void (*)(char *, caddr_t, u_int));
-struct mbuf	*m_dup(struct mbuf *, int);
-int		 m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
-u_int		 m_fixhdr(struct mbuf *);
-struct mbuf	*m_fragment(struct mbuf *, int, int);
-void		 m_freem(struct mbuf *);
-struct mbuf	*m_get2(int, int, short, int);
-struct mbuf	*m_getjcl(int, short, int, int);
-struct mbuf	*m_getm2(struct mbuf *, int, int, short, int);
-struct mbuf	*m_getptr(struct mbuf *, int, int *);
-u_int		 m_length(struct mbuf *, struct mbuf **);
-int		 m_mbuftouio(struct uio *, struct mbuf *, int);
-void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf	*m_prepend(struct mbuf *, int, int);
-void		 m_print(const struct mbuf *, int);
-struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
-struct mbuf	*m_pullup(struct mbuf *, int);
-int		 m_sanity(struct mbuf *, int);
-struct mbuf	*m_split(struct mbuf *, int, int);
-struct mbuf	*m_uiotombuf(struct uio *, int, int, int, int);
-struct mbuf	*m_unshare(struct mbuf *, int);
-
 /*-
  * Network packets may have annotations attached by affixing a list of
  * "packet tags" to the pkthdr structure.  Packet tags are dynamically
@@ -1060,7 +1077,7 @@
 void		 m_tag_free_default(struct m_tag *);
 struct m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
 struct m_tag	*m_tag_copy(struct m_tag *, int);
-int		 m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
+int		 m_tag_copy_chain(struct mbuf *, const struct mbuf *, int);
 void		 m_tag_delete_nonpersistent(struct mbuf *);
 
 /*
@@ -1112,7 +1129,7 @@
  * Return the next tag in the list of tags associated with an mbuf.
  */
 static __inline struct m_tag *
-m_tag_next(struct mbuf *m, struct m_tag *t)
+m_tag_next(struct mbuf *m __unused, struct m_tag *t)
 {
 
 	return (SLIST_NEXT(t, m_tag_link));
@@ -1159,6 +1176,7 @@
 {
 	struct mbuf *n = m->m_next;
 
+	MBUF_PROBE1(m__free, m);
 	if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
 		m_tag_delete_chain(m, NULL);
 	if (m->m_flags & M_EXT)
@@ -1168,7 +1186,7 @@
 	return (n);
 }
 
-static int inline
+static __inline int
 rt_m_getfib(struct mbuf *m)
 {
 	KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf."));
@@ -1182,8 +1200,15 @@
 	((_m)->m_pkthdr.fibnum) = (_fib);				\
 } while (0)
 
-#endif /* _KERNEL */
+/* flags passed as first argument for "m_ether_tcpip_hash()" */
+#define	MBUF_HASHFLAG_L2	(1 << 2)
+#define	MBUF_HASHFLAG_L3	(1 << 3)
+#define	MBUF_HASHFLAG_L4	(1 << 4)
 
+/* mbuf hashing helper routines */
+uint32_t	m_ether_tcpip_hash_init(void);
+uint32_t	m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
+
 #ifdef MBUF_PROFILING
  void m_profile(struct mbuf *m);
  #define M_PROFILE(m) m_profile(m)
@@ -1191,5 +1216,103 @@
  #define M_PROFILE(m)
 #endif
 
+struct mbufq {
+	STAILQ_HEAD(, mbuf)	mq_head;
+	int			mq_len;
+	int			mq_maxlen;
+};
 
+static inline void
+mbufq_init(struct mbufq *mq, int maxlen)
+{
+
+	STAILQ_INIT(&mq->mq_head);
+	mq->mq_maxlen = maxlen;
+	mq->mq_len = 0;
+}
+
+static inline struct mbuf *
+mbufq_flush(struct mbufq *mq)
+{
+	struct mbuf *m;
+
+	m = STAILQ_FIRST(&mq->mq_head);
+	STAILQ_INIT(&mq->mq_head);
+	mq->mq_len = 0;
+	return (m);
+}
+
+static inline void
+mbufq_drain(struct mbufq *mq)
+{
+	struct mbuf *m, *n;
+
+	n = mbufq_flush(mq);
+	while ((m = n) != NULL) {
+		n = STAILQ_NEXT(m, m_stailqpkt);
+		m_freem(m);
+	}
+}
+
+static inline struct mbuf *
+mbufq_first(const struct mbufq *mq)
+{
+
+	return (STAILQ_FIRST(&mq->mq_head));
+}
+
+static inline struct mbuf *
+mbufq_last(const struct mbufq *mq)
+{
+
+	return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt));
+}
+
+static inline int
+mbufq_full(const struct mbufq *mq)
+{
+
+	return (mq->mq_len >= mq->mq_maxlen);
+}
+
+static inline int
+mbufq_len(const struct mbufq *mq)
+{
+
+	return (mq->mq_len);
+}
+
+static inline int
+mbufq_enqueue(struct mbufq *mq, struct mbuf *m)
+{
+
+	if (mbufq_full(mq))
+		return (ENOBUFS);
+	STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt);
+	mq->mq_len++;
+	return (0);
+}
+
+static inline struct mbuf *
+mbufq_dequeue(struct mbufq *mq)
+{
+	struct mbuf *m;
+
+	m = STAILQ_FIRST(&mq->mq_head);
+	if (m) {
+		STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt);
+		m->m_nextpkt = NULL;
+		mq->mq_len--;
+	}
+	return (m);
+}
+
+static inline void
+mbufq_prepend(struct mbufq *mq, struct mbuf *m)
+{
+
+	STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt);
+	mq->mq_len++;
+}
+#endif /* _KERNEL */
 #endif /* !_SYS_MBUF_H_ */



More information about the Midnightbsd-cvs mailing list