[Midnightbsd-cvs] src [9923] trunk/sys/netpfil/ipfw: sync with freebsd 10

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Fri May 25 09:05:12 EDT 2018


Revision: 9923
          http://svnweb.midnightbsd.org/src/?rev=9923
Author:   laffer1
Date:     2018-05-25 09:05:12 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd 10

Modified Paths:
--------------
    trunk/sys/netpfil/ipfw/dn_heap.c
    trunk/sys/netpfil/ipfw/dn_heap.h
    trunk/sys/netpfil/ipfw/dn_sched.h
    trunk/sys/netpfil/ipfw/dn_sched_fifo.c
    trunk/sys/netpfil/ipfw/dn_sched_prio.c
    trunk/sys/netpfil/ipfw/dn_sched_qfq.c
    trunk/sys/netpfil/ipfw/dn_sched_rr.c
    trunk/sys/netpfil/ipfw/dn_sched_wf2q.c
    trunk/sys/netpfil/ipfw/dummynet.txt
    trunk/sys/netpfil/ipfw/ip_dn_glue.c
    trunk/sys/netpfil/ipfw/ip_dn_io.c
    trunk/sys/netpfil/ipfw/ip_dn_private.h
    trunk/sys/netpfil/ipfw/ip_dummynet.c
    trunk/sys/netpfil/ipfw/ip_fw2.c
    trunk/sys/netpfil/ipfw/ip_fw_dynamic.c
    trunk/sys/netpfil/ipfw/ip_fw_log.c
    trunk/sys/netpfil/ipfw/ip_fw_nat.c
    trunk/sys/netpfil/ipfw/ip_fw_pfil.c
    trunk/sys/netpfil/ipfw/ip_fw_private.h
    trunk/sys/netpfil/ipfw/ip_fw_sockopt.c
    trunk/sys/netpfil/ipfw/ip_fw_table.c
    trunk/sys/netpfil/ipfw/test/Makefile
    trunk/sys/netpfil/ipfw/test/dn_test.h
    trunk/sys/netpfil/ipfw/test/main.c
    trunk/sys/netpfil/ipfw/test/mylist.h
    trunk/sys/netpfil/ipfw/test/test_dn_heap.c
    trunk/sys/netpfil/ipfw/test/test_dn_sched.c

Added Paths:
-----------
    trunk/sys/netpfil/ipfw/dn_aqm.h
    trunk/sys/netpfil/ipfw/dn_aqm_codel.c
    trunk/sys/netpfil/ipfw/dn_aqm_codel.h
    trunk/sys/netpfil/ipfw/dn_aqm_pie.c
    trunk/sys/netpfil/ipfw/dn_aqm_pie.h
    trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
    trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
    trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
    trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c

Added: trunk/sys/netpfil/ipfw/dn_aqm.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm.h	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * API for writing an Active Queue Management algorithm for Dummynet
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm.h 301772 2016-06-10 00:00:25Z truckman $
+ */
+
+#ifndef _IP_DN_AQM_H
+#define _IP_DN_AQM_H
+
+
+/* NOW is the current time in millisecond*/
+#define NOW ((dn_cfg.curr_time * tick) / 1000)
+
+#define AQM_UNOW (dn_cfg.curr_time * tick)
+#define AQM_TIME_1US ((aqm_time_t)(1))
+#define AQM_TIME_1MS ((aqm_time_t)(1000))
+#define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000))
+
+/* aqm time allows to store up to 4294 seconds */
+typedef uint32_t aqm_time_t;
+typedef int32_t aqm_stime_t;
+
+#define DN_AQM_MTAG_TS 55345
+
+/* Macro for variable bounding */
+#define BOUND_VAR(x,l,h)  ((x) > (h)? (h) : ((x) > (l)? (x) : (l)))
+
+/* sysctl variable to count number of dropped packets */
+extern unsigned long io_pkt_drop; 
+
+/*
+ * Structure for holding data and function pointers that together represent a
+ * AQM algorithm.
+ */
+ struct dn_aqm {
+#define DN_AQM_NAME_MAX 50
+	char			name[DN_AQM_NAME_MAX];	/* name of AQM algorithm */
+	uint32_t	type;	/* AQM type number */
+	
+	/* Methods implemented by AQM algorithm:
+	 * 
+	 * enqueue	enqueue packet 'm' on queue 'q'.
+	 * 	Return 0 on success, 1 on drop.
+	 * 
+	 * dequeue	dequeue a packet from queue 'q'.
+	 * 	Return a packet, NULL if no packet available.
+	 * 
+	 * config	configure AQM algorithm
+	 * If required, this function should allocate space to store 
+	 * the configurations and set 'fs->aqmcfg' to point to this space.
+	 * 'dn_extra_parms' includes array of parameters send
+	 * from ipfw userland command.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * deconfig	deconfigure AQM algorithm.
+	 * The allocated configuration memory space should be freed here.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * init	initialise AQM status variables of queue 'q'
+	 * This function is used to allocate space and init AQM status for a
+	 * queue and q->aqm_status to point to this space.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * cleanup	cleanup AQM status variables of queue 'q'
+	 * The allocated memory space for AQM status should be freed here.
+	 * 	Return 0 on success, non-zero otherwise.
+	 * 
+	 * getconfig	retrieve AQM configurations 
+	 * This function is used to return AQM parameters to userland
+	 * command. The function should fill 'dn_extra_parms' struct with 
+	 * the AQM configurations using 'par' array.
+	 * 
+	 */
+	
+	int (*enqueue)(struct dn_queue *, struct mbuf *);
+	struct mbuf * (*dequeue)(struct dn_queue *);
+	int (*config)(struct dn_fsk *, struct dn_extra_parms *ep, int);
+	int (*deconfig)(struct dn_fsk *);
+	int (*init)(struct dn_queue *);
+	int (*cleanup)(struct dn_queue *);
+	int (*getconfig)(struct dn_fsk *, struct dn_extra_parms *);
+
+	int	ref_count; /*Number of queues instances in the system */
+	int	cfg_ref_count;	/*Number of AQM instances in the system */
+	SLIST_ENTRY (dn_aqm) next; /* Next AQM in the list */
+};
+
+/* Helper function to update queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+update_stats(struct dn_queue *q, int len, int drop)
+{
+	int inc = 0;
+	struct dn_flow *sni;
+	struct dn_flow *qni;
+	
+	sni = &q->_si->ni;
+	qni = &q->ni;
+
+	if (len < 0)
+			inc = -1;
+	else if(len > 0)
+			inc = 1;
+
+	if (drop) {
+			qni->drops++;
+			sni->drops++;
+			io_pkt_drop++;
+	} else {
+		/*update queue stats */
+		qni->length += inc;
+		qni->len_bytes += len;
+
+		/*update scheduler instance stats */
+		sni->length += inc;
+		sni->len_bytes += len;
+	}
+	/* tot_pkts  is updated in dn_enqueue function */
+}
+
+
+/* kernel module related function */
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNAQM_MODULE(name, dnaqm)			\
+	static moduledata_t name##_mod = {			\
+		#name, dn_aqm_modevent, dnaqm		\
+	};							\
+	DECLARE_MODULE(name, name##_mod, 			\
+		SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); 	\
+        MODULE_DEPEND(name, dummynet, 3, 3, 3)
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_aqm.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_codel.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_codel.c	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_codel.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,445 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_codel.c 317489 2017-04-27 07:32:07Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>		/* ip_len, ip_off */
+#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/if_ether.h> /* various ether_* routines */
+#include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+#include <netpfil/ipfw/dn_heap.h>
+
+#ifdef NEW_AQM
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_codel.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+#define DN_AQM_CODEL 1
+
+static struct dn_aqm codel_desc;
+
+/* default codel parameters */
+struct dn_aqm_codel_parms codel_sysctl = {5000 * AQM_TIME_1US,
+	100000 * AQM_TIME_1US, 0};
+
+static int
+codel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = codel_sysctl.interval;
+	value /= AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 100 * AQM_TIME_1S)
+		return (EINVAL);
+	codel_sysctl.interval = value * AQM_TIME_1US ;
+	return (0);
+}
+
+static int
+codel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = codel_sysctl.target;
+	value /= AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	D("%ld", value);
+	if (value < 1 || value > 5 * AQM_TIME_1S)
+		return (EINVAL);
+	codel_sysctl.target = value * AQM_TIME_1US ;
+	return (0);
+}
+
+/* defining Codel sysctl variables */
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, 
+	codel, CTLFLAG_RW, 0, "CODEL");
+
+#ifdef SYSCTL_NODE
+SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, target,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,codel_sysctl_target_handler, "L",
+	"CoDel target in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, interval,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, codel_sysctl_interval_handler, "L",
+	"CoDel interval in microsecond");
+#endif
+
+/* This function computes codel_interval/sqrt(count) 
+ *  Newton's method of approximation is used to compute 1/sqrt(count).
+ * http://betterexplained.com/articles/
+ * 	understanding-quakes-fast-inverse-square-root/ 
+ */
+aqm_time_t 
+control_law(struct codel_status *cst, struct dn_aqm_codel_parms *cprms,
+	aqm_time_t t)
+{
+	uint32_t count;
+	uint64_t temp;
+	count = cst->count;
+
+	/* we don't calculate isqrt(1) to get more accurate result*/
+	if (count == 1) {
+		/* prepare isqrt (old guess) for the next iteration i.e. 1/sqrt(2)*/
+		cst->isqrt = (1UL<< FIX_POINT_BITS) * 7/10;
+		/* return time + isqrt(1)*interval */
+		return t + cprms->interval;
+	}
+
+	/* newguess = g(1.5 - 0.5*c*g^2)
+	 * Multiplying both sides by 2 to make all the constants intergers
+	 * newguess * 2  = g(3 - c*g^2) g=old guess, c=count
+	 * So, newguess = newguess /2
+	 * Fixed point operations are used here.  
+	 */
+
+	/* Calculate g^2 */
+	temp = (uint32_t) cst->isqrt * cst->isqrt;
+	/* Calculate (3 - c*g^2) i.e. (3 - c * temp) */
+	temp = (3ULL<< (FIX_POINT_BITS*2)) - (count * temp);
+
+	/* 
+	 * Divide by 2 because we multiplied the original equation by two 
+	 * Also, we shift the result by 8 bits to prevent overflow. 
+	 * */
+	temp >>= (1 + 8); 
+
+	/*  Now, temp = (1.5 - 0.5*c*g^2)
+	 * Calculate g (1.5 - 0.5*c*g^2) i.e. g * temp 
+	 */
+	temp = (cst->isqrt * temp) >> (FIX_POINT_BITS + FIX_POINT_BITS - 8);
+	cst->isqrt = temp;
+
+	 /* calculate codel_interval/sqrt(count) */
+	 return t + ((cprms->interval * temp) >> FIX_POINT_BITS);
+}
+
+/*
+ * Extract a packet from the head of queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * Also extract packet's timestamp from mtag.
+ */
+struct mbuf *
+codel_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts)
+{
+	struct m_tag *mtag;
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return m;
+	q->mq.head = m->m_nextpkt;
+
+	/* Update stats */
+	update_stats(q, -m->m_pkthdr.len, 0);
+
+	if (q->ni.length == 0) /* queue is now idle */
+			q->q_time = dn_cfg.curr_time;
+
+	/* extract packet TS*/
+	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+	if (mtag == NULL) {
+		D("Codel timestamp mtag not found!");
+		*pkt_ts = 0;
+	} else {
+		*pkt_ts = *(aqm_time_t *)(mtag + 1);
+		m_tag_delete(m,mtag); 
+	}
+
+	return m;
+}
+
+/*
+ * Enqueue a packet 'm' in queue 'q'
+ */
+static int
+aqm_codel_enqueue(struct dn_queue *q, struct mbuf *m)
+{
+	struct dn_fs *f;
+	uint64_t len;
+	struct codel_status *cst;	/*codel status variables */
+	struct m_tag *mtag;
+
+	f = &(q->fs->fs);
+	len = m->m_pkthdr.len;
+	cst = q->aqm_status;
+	if(!cst) {
+		D("Codel queue is not initialized\n");
+		goto drop;
+	}
+
+	/* Finding maximum packet size */
+	// XXX we can get MTU from driver instead 
+	if (len > cst->maxpkt_size)
+		cst->maxpkt_size = len;
+
+	/* check for queue size and drop the tail if exceed queue limit*/
+	if (f->flags & DN_QSIZE_BYTES) {
+		if ( q->ni.len_bytes > f->qsize)
+			goto drop;
+	}
+	else {
+		if ( q->ni.length >= f->qsize)
+			goto drop;
+	}
+
+	/* Add timestamp as mtag */
+	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+	if (mtag == NULL)
+		mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+			sizeof(aqm_time_t), M_NOWAIT);
+	if (mtag == NULL) {
+		m_freem(m); 
+		goto drop;
+	}
+
+	*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+	m_tag_prepend(m, mtag);
+
+	mq_append(&q->mq, m);
+	update_stats(q, len, 0);
+	return (0);
+
+drop:
+	update_stats(q, 0, 1);
+	FREE_PKT(m);
+	return (1);
+}
+
+
+/* Dequeue a pcaket from queue q */
+static struct mbuf * 
+aqm_codel_dequeue(struct dn_queue *q)
+{
+	return codel_dequeue(q);
+}
+
+/* 
+ * initialize Codel for queue 'q' 
+ * First allocate memory for codel status.
+ */
+static int 
+aqm_codel_init(struct dn_queue *q)
+{
+	struct codel_status *cst;
+
+	if (!q->fs->aqmcfg) {
+		D("Codel is not configure!d");
+		return EINVAL;
+	}
+
+	q->aqm_status = malloc(sizeof(struct codel_status),
+			 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (q->aqm_status == NULL) {
+		D("Cannot allocate AQM_codel private data");
+		return ENOMEM ; 
+	}
+
+	/* init codel status variables */
+	cst = q->aqm_status;
+	cst->dropping=0;
+	cst->first_above_time=0;
+	cst->drop_next_time=0;
+	cst->count=0;
+	cst->maxpkt_size = 500;
+
+	/* increase reference counters */
+	codel_desc.ref_count++;
+
+	return 0;
+}
+
+/* 
+ * Clean up Codel status for queue 'q' 
+ * Destroy memory allocated for codel status.
+ */
+static int
+aqm_codel_cleanup(struct dn_queue *q)
+{
+
+	if (q && q->aqm_status) {
+		free(q->aqm_status, M_DUMMYNET);
+		q->aqm_status = NULL;
+		/* decrease reference counters */
+		codel_desc.ref_count--;
+	}
+	else
+		D("Codel already cleaned up");
+	return 0;
+}
+
+/* 
+ * Config codel parameters
+ * also allocate memory for codel configurations
+ */
+static int
+aqm_codel_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
+{
+	struct dn_aqm_codel_parms *ccfg;
+
+	int l = sizeof(struct dn_extra_parms);
+	if (len < l) {
+		D("invalid sched parms length got %d need %d", len, l);
+		return EINVAL;
+	}
+	/* we free the old cfg because maybe the original allocation 
+	 * not the same size as the new one (different AQM type).
+	 */
+	if (fs->aqmcfg) {
+		free(fs->aqmcfg, M_DUMMYNET);
+		fs->aqmcfg = NULL;
+	}
+
+	fs->aqmcfg = malloc(sizeof(struct dn_aqm_codel_parms),
+			 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (fs->aqmcfg== NULL) {
+		D("cannot allocate AQM_codel configuration parameters");
+		return ENOMEM; 
+	}
+	
+	/* configure codel parameters */
+	ccfg = fs->aqmcfg;
+	
+	if (ep->par[0] < 0)
+		ccfg->target = codel_sysctl.target;
+	else
+		ccfg->target = ep->par[0] * AQM_TIME_1US;
+
+	if (ep->par[1] < 0)
+		ccfg->interval = codel_sysctl.interval;
+	else
+		ccfg->interval = ep->par[1] * AQM_TIME_1US;
+
+	if (ep->par[2] < 0)
+		ccfg->flags = 0;
+	else
+		ccfg->flags = ep->par[2];
+
+	/* bound codel configurations */
+	ccfg->target = BOUND_VAR(ccfg->target,1, 5 * AQM_TIME_1S);
+	ccfg->interval = BOUND_VAR(ccfg->interval,1, 5 * AQM_TIME_1S);
+	/* increase config reference counter */
+	codel_desc.cfg_ref_count++;
+
+	return 0;
+}
+
+/*
+ * Deconfigure Codel and free memory allocation
+ */
+static int
+aqm_codel_deconfig(struct dn_fsk* fs)
+{
+
+	if (fs && fs->aqmcfg) {
+		free(fs->aqmcfg, M_DUMMYNET);
+		fs->aqmcfg = NULL;
+		fs->aqmfp = NULL;
+		/* decrease config reference counter */
+		codel_desc.cfg_ref_count--;
+	}
+
+	return 0;
+}
+
+/* 
+ * Retrieve Codel configuration parameters.
+ */ 
+static int
+aqm_codel_getconfig(struct dn_fsk *fs, struct dn_extra_parms * ep)
+{
+	struct dn_aqm_codel_parms *ccfg;
+
+	if (fs->aqmcfg) {
+		strlcpy(ep->name, codel_desc.name, sizeof(ep->name));
+		ccfg = fs->aqmcfg;
+		ep->par[0] = ccfg->target / AQM_TIME_1US;
+		ep->par[1] = ccfg->interval / AQM_TIME_1US;
+		ep->par[2] = ccfg->flags;
+		return 0;
+	}
+	return 1;
+}
+
+static struct dn_aqm codel_desc = {
+	_SI( .type = )  DN_AQM_CODEL,
+	_SI( .name = )  "CODEL",
+	_SI( .enqueue = )  aqm_codel_enqueue,
+	_SI( .dequeue = )  aqm_codel_dequeue,
+	_SI( .config = )  aqm_codel_config,
+	_SI( .getconfig = )  aqm_codel_getconfig,
+	_SI( .deconfig = )  aqm_codel_deconfig,
+	_SI( .init = )  aqm_codel_init,
+	_SI( .cleanup = )  aqm_codel_cleanup,
+};
+
+DECLARE_DNAQM_MODULE(dn_aqm_codel, &codel_desc);
+
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_codel.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_codel.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_codel.h	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_codel.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,223 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_codel.h 301772 2016-06-10 00:00:25Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ * 
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols at pollere.com>.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o  Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions, and the following disclaimer,
+ *  without modification.
+ *
+ * o  Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in
+ *  the documentation and/or other materials provided with the
+ *  distribution.
+ * 
+ * o  The names of the authors may not be used to endorse or promote
+ *  products derived from this software without specific prior written
+ *  permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_CODEL_H
+#define _IP_DN_AQM_CODEL_H
+
+
+// XXX How to choose MTAG?
+#define FIX_POINT_BITS 16 
+
+enum {
+	CODEL_ECN_ENABLED = 1
+};
+
+/* Codel parameters */
+struct dn_aqm_codel_parms {
+	aqm_time_t	target;
+	aqm_time_t	interval;
+	uint32_t	flags;
+};
+
+/* codel status variables */
+struct codel_status {
+	uint32_t	count;	/* number of dropped pkts since entering drop state */
+	uint16_t	dropping;	/* dropping state */
+	aqm_time_t	drop_next_time;	/* time for next drop */
+	aqm_time_t	first_above_time;	/* time for first ts over target we observed */
+	uint16_t	isqrt;	/* last isqrt for control low */
+	uint16_t	maxpkt_size;	/* max packet size seen so far */
+};
+
+struct mbuf *codel_extract_head(struct dn_queue *, aqm_time_t *);
+aqm_time_t control_law(struct codel_status *,
+	struct dn_aqm_codel_parms *, aqm_time_t );
+
+__inline static struct mbuf *
+codel_dodequeue(struct dn_queue *q, aqm_time_t now, uint16_t *ok_to_drop)
+{
+	struct mbuf * m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t  pkt_ts, sojourn_time;
+
+	*ok_to_drop = 0;
+	m = codel_extract_head(q, &pkt_ts);
+	
+	cst = q->aqm_status;
+	
+	if (m == NULL) {
+		/* queue is empty - we can't be above target */
+		cst->first_above_time= 0;
+		return m;
+	}
+
+	cprms = q->fs->aqmcfg;
+
+	/* To span a large range of bandwidths, CoDel runs two
+	 * different AQMs in parallel. One is sojourn-time-based
+	 * and takes effect when the time to send an MTU-sized
+	 * packet is less than target.  The 1st term of the "if"
+	 * below does this.  The other is backlog-based and takes
+	 * effect when the time to send an MTU-sized packet is >=
+	* target. The goal here is to keep the output link
+	* utilization high by never allowing the queue to get
+	* smaller than the amount that arrives in a typical
+	 * interarrival time (MTU-sized packets arriving spaced
+	 * by the amount of time it takes to send such a packet on
+	 * the bottleneck). The 2nd term of the "if" does this.
+	 */
+	sojourn_time = now - pkt_ts;
+	if (sojourn_time < cprms->target || q->ni.len_bytes <= cst->maxpkt_size) {
+		/* went below - stay below for at least interval */
+		cst->first_above_time = 0;
+	} else {
+		if (cst->first_above_time == 0) {
+			/* just went above from below. if still above at
+			 * first_above_time, will say it's ok to drop. */
+			cst->first_above_time = now + cprms->interval;
+		} else if (now >= cst->first_above_time) {
+			*ok_to_drop = 1;
+		}
+	}
+	return m;
+}
+
+/* 
+ * Dequeue a packet from queue 'q'
+ */
+__inline static struct mbuf * 
+codel_dequeue(struct dn_queue *q)
+{
+	struct mbuf *m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t now;
+	uint16_t ok_to_drop;
+
+	cst = q->aqm_status;;
+	cprms = q->fs->aqmcfg;
+	now = AQM_UNOW;
+
+	m = codel_dodequeue(q, now, &ok_to_drop);
+	if (cst->dropping) {
+		if (!ok_to_drop) {
+			/* sojourn time below target - leave dropping state */
+			cst->dropping = false;
+		}
+		/*
+		 * Time for the next drop. Drop current packet and dequeue
+		 * next.  If the dequeue doesn't take us out of dropping
+		 * state, schedule the next drop. A large backlog might
+		 * result in drop rates so high that the next drop should
+		 * happen now, hence the 'while' loop.
+		 */
+		while (now >= cst->drop_next_time && cst->dropping) {
+
+			/* mark the packet */
+			if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+				cst->count++;
+				/* schedule the next mark. */
+				cst->drop_next_time = control_law(cst, cprms,
+					cst->drop_next_time);
+				return m;
+			}
+
+			/* drop the packet */
+			update_stats(q, 0, 1);
+			FREE_PKT(m);
+			m = codel_dodequeue(q, now, &ok_to_drop);
+
+			if (!ok_to_drop) {
+				/* leave dropping state */
+				cst->dropping = false;
+			} else {
+				cst->count++;
+				/* schedule the next drop. */
+				cst->drop_next_time = control_law(cst, cprms,
+					cst->drop_next_time);
+			}
+		}
+	/* If we get here we're not in dropping state. The 'ok_to_drop'
+	 * return from dodequeue means that the sojourn time has been
+	 * above 'target' for 'interval' so enter dropping state.
+	 */
+	} else if (ok_to_drop) {
+
+		/* if ECN option is disabled or the packet cannot be marked,
+		 * drop the packet and extract another.
+		 */
+		if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+			update_stats(q, 0, 1);
+			FREE_PKT(m);
+			m = codel_dodequeue(q, now, &ok_to_drop);
+		}
+
+		cst->dropping = true;
+
+		/* If min went above target close to when it last went
+		 * below, assume that the drop rate that controlled the
+		 * queue on the last cycle is a good starting point to
+		 * control it now. ('drop_next' will be at most 'interval'
+		 * later than the time of the last drop so 'now - drop_next'
+		 * is a good approximation of the time from the last drop
+		 * until now.)
+		 */
+		cst->count = (cst->count > 2 && ((aqm_stime_t)now - 
+			(aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)?
+				cst->count - 2 : 1;
+		/* we don't have to set initial guess for Newton's method isqrt as
+		 * we initilaize  isqrt in control_law function when count == 1 */
+		cst->drop_next_time = control_law(cst, cprms, now);
+	}
+	
+	return m;
+}
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_codel.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_pie.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_pie.c	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_pie.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,811 @@
+/* $MidnightBSD$ */
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_pie.c 318905 2017-05-25 22:41:34Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>	/* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>		/* ip_len, ip_off */
+#include <netinet/ip_var.h>	/* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/if_ether.h> /* various ether_* routines */
+#include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+#include <netpfil/ipfw/dn_heap.h>
+
+#ifdef NEW_AQM
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_pie.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+/* for debugging */
+#include <sys/syslog.h>
+
+static struct dn_aqm pie_desc;
+
+/*  PIE defaults
+ * target=15ms, tupdate=15ms, max_burst=150ms, 
+ * max_ecnth=0.1, alpha=0.125, beta=1.25, 
+ */
+struct dn_aqm_pie_parms pie_sysctl = 
+	{ 15 * AQM_TIME_1MS,  15 * AQM_TIME_1MS, 150 * AQM_TIME_1MS,
+	PIE_SCALE/10 , PIE_SCALE * 0.125,  PIE_SCALE * 1.25 ,
+	PIE_CAPDROP_ENABLED | PIE_DEPRATEEST_ENABLED | PIE_DERAND_ENABLED };
+
+static int
+pie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	if (!strcmp(oidp->oid_name,"alpha"))
+		value = pie_sysctl.alpha;
+	else
+		value = pie_sysctl.beta;
+		
+	value = value * 1000 / PIE_SCALE;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 7 * PIE_SCALE)
+		return (EINVAL);
+	value = (value * PIE_SCALE) / 1000;
+	if (!strcmp(oidp->oid_name,"alpha"))
+			pie_sysctl.alpha = value;
+	else
+		pie_sysctl.beta = value;
+	return (0);
+}
+
+static int
+pie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	if (!strcmp(oidp->oid_name,"target"))
+		value = pie_sysctl.qdelay_ref;
+	else if (!strcmp(oidp->oid_name,"tupdate"))
+		value = pie_sysctl.tupdate;
+	else
+		value = pie_sysctl.max_burst;
+	
+	value = value / AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 10 * AQM_TIME_1S)
+		return (EINVAL);
+	value = value * AQM_TIME_1US;
+	
+	if (!strcmp(oidp->oid_name,"target"))
+		pie_sysctl.qdelay_ref  = value;
+	else if (!strcmp(oidp->oid_name,"tupdate"))
+		pie_sysctl.tupdate  = value;
+	else
+		pie_sysctl.max_burst = value;
+	return (0);
+}
+
+static int
+pie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = pie_sysctl.max_ecnth;
+	value = value * 1000 / PIE_SCALE;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > PIE_SCALE)
+		return (EINVAL);
+	value = (value * PIE_SCALE) / 1000;
+	pie_sysctl.max_ecnth = value;
+	return (0);
+}
+
+/* define PIE sysctl variables */
+SYSBEGIN(f4)
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, 
+	pie, CTLFLAG_RW, 0, "PIE");
+
+#ifdef SYSCTL_NODE
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, target,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 
+	pie_sysctl_target_tupdate_maxb_handler, "L",
+	"queue target in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, tupdate,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	pie_sysctl_target_tupdate_maxb_handler, "L",
+	"the frequency of drop probability calculation in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_burst,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	pie_sysctl_target_tupdate_maxb_handler, "L",
+	"Burst allowance interval in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_ecnth,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	pie_sysctl_max_ecnth_handler, "L",
+	"ECN safeguard threshold scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, alpha,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	pie_sysctl_alpha_beta_handler, "L",
+	"PIE alpha scaled by 1000");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, beta,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	pie_sysctl_alpha_beta_handler, "L",
+	"beta scaled by 1000");
+#endif
+
+
+/*
+ * Callout function for drop probability calculation 
+ * This function is called over tupdate ms and takes pointer of PIE
+ * status variables as an argument
+  */
+static void
+calculate_drop_prob(void *x)
+{
+	int64_t p, prob, oldprob;
+	struct dn_aqm_pie_parms *pprms;
+	struct pie_status *pst = (struct pie_status *) x;
+	int p_isneg;
+
+	pprms = pst->parms;
+	prob = pst->drop_prob;
+
+	/* calculate current qdelay using DRE method.
+	 * If TS is used and no data in the queue, reset current_qdelay
+	 * as it stays at last value during dequeue process. 
+	*/
+	if (pprms->flags & PIE_DEPRATEEST_ENABLED)
+		pst->current_qdelay = ((uint64_t)pst->pq->ni.len_bytes *
+			pst->avg_dq_time) >> PIE_DQ_THRESHOLD_BITS;
+	else 
+		if (!pst->pq->ni.len_bytes)
+			 pst->current_qdelay = 0;
+
+	/* calculate drop probability */
+	p = (int64_t)pprms->alpha * 
+		((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref); 
+	p +=(int64_t) pprms->beta * 
+		((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old); 
+
+	/* take absolute value so right shift result is well defined */
+	p_isneg = p < 0;
+	if (p_isneg) {
+		p = -p;
+	}
+		
+	/* We PIE_MAX_PROB shift by 12-bits to increase the division precision */
+	p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
+
+	/* auto-tune drop probability */
+	if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
+		p >>= 11 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
+		p >>= 9 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
+		p >>= 7 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
+		p >>= 5 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
+		p >>= 3 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
+		p >>= 1 + PIE_FIX_POINT_BITS + 12;
+	else
+		p >>= PIE_FIX_POINT_BITS + 12;
+
+	oldprob = prob;
+
+	if (p_isneg) {
+		prob = prob - p;
+
+		/* check for multiplication underflow */
+		if (prob > oldprob) {
+			prob= 0;
+			D("underflow");
+		}
+	} else {
+		/* Cap Drop adjustment */
+		if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
+		    prob >= PIE_MAX_PROB / 10 &&
+		    p > PIE_MAX_PROB / 50 ) {
+			p = PIE_MAX_PROB / 50;
+		}
+
+		prob = prob + p;
+
+		/* check for multiplication overflow */
+		if (prob<oldprob) {
+			D("overflow");
+			prob= PIE_MAX_PROB;
+		}
+	}
+
+	/*
+	 * decay the drop probability exponentially
+	 * and restrict it to range 0 to PIE_MAX_PROB
+	 */
+	if (prob < 0) {
+		prob = 0;
+	} else {
+		if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
+			/* 0.98 ~= 1- 1/64 */
+			prob = prob - (prob >> 6); 
+		}
+
+		if (prob > PIE_MAX_PROB) {
+			prob = PIE_MAX_PROB;
+		}
+	}
+
+	pst->drop_prob = prob;
+	
+	/* store current queue delay value in old queue delay*/
+	pst->qdelay_old = pst->current_qdelay;
+
+	/* update burst allowance */
+	if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance>0) {
+		
+		if (pst->burst_allowance > pprms->tupdate )
+			pst->burst_allowance -= pprms->tupdate;
+		else 
+			pst->burst_allowance = 0;
+	}
+
+	/* reschedule calculate_drop_prob function */
+	if (pst->sflags & PIE_ACTIVE)
+		callout_reset_sbt(&pst->aqm_pie_callout,
+			(uint64_t)pprms->tupdate * SBT_1US, 0, calculate_drop_prob, pst, 0);
+
+	mtx_unlock(&pst->lock_mtx);
+}
+
+/*
+ * Extract a packet from the head of queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * If getts is set, also extract packet's timestamp from mtag.
+ */
+static struct mbuf *
+pie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts)
+{
+	struct m_tag *mtag;
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return m;
+	q->mq.head = m->m_nextpkt;
+
+	/* Update stats */
+	update_stats(q, -m->m_pkthdr.len, 0);
+
+	if (q->ni.length == 0) /* queue is now idle */
+			q->q_time = dn_cfg.curr_time;
+
+	if (getts) {
+		/* extract packet TS*/
+		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+		if (mtag == NULL) {
+			D("PIE timestamp mtag not found!");
+			*pkt_ts = 0;
+		} else {
+			*pkt_ts = *(aqm_time_t *)(mtag + 1);
+			m_tag_delete(m,mtag); 
+		}
+	}
+	return m;
+}
+
+/* 
+ * Initiate PIE  variable and optionally activate it
+ */
+__inline static void
+init_activate_pie(struct pie_status *pst, int resettimer)
+{
+	struct dn_aqm_pie_parms *pprms;
+
+	mtx_lock(&pst->lock_mtx);
+	pprms = pst->parms;
+	pst->drop_prob = 0;
+	pst->qdelay_old = 0;
+	pst->burst_allowance = pprms->max_burst;
+	pst->accu_prob = 0;
+	pst->dq_count = 0;
+	pst->avg_dq_time = 0;
+	pst->sflags = PIE_INMEASUREMENT;
+	pst->measurement_start = AQM_UNOW;
+
+	if (resettimer) {
+		pst->sflags |= PIE_ACTIVE;
+		callout_reset_sbt(&pst->aqm_pie_callout,
+			(uint64_t)pprms->tupdate * SBT_1US,
+			0, calculate_drop_prob, pst, 0);
+	}
+	//DX(2, "PIE Activated");
+	mtx_unlock(&pst->lock_mtx);
+}
+
+/* 
+ * Deactivate PIE and stop probe update callout 
+ */
+__inline static void
+deactivate_pie(struct pie_status *pst)
+{
+	mtx_lock(&pst->lock_mtx);
+	pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
+	callout_stop(&pst->aqm_pie_callout);
+	//D("PIE Deactivated");
+	mtx_unlock(&pst->lock_mtx);
+}
+
+/* 
+ * Dequeue and return a pcaket from queue 'q' or NULL if 'q' is empty.
+ * Also, caculate depature time or queue delay using timestamp
+ */
+static struct mbuf *
+aqm_pie_dequeue(struct dn_queue *q)
+{
+	struct mbuf *m;
+	struct dn_flow *ni;	/* stats for scheduler instance */	
+	struct dn_aqm_pie_parms *pprms;
+	struct pie_status *pst;
+	aqm_time_t now;
+	aqm_time_t pkt_ts, dq_time;
+	int32_t w;
+
+	pst  = q->aqm_status;
+	pprms = pst->parms;
+	ni = &q->_si->ni;
+
+	/*we extarct packet ts only when Departure Rate Estimation dis not used*/
+	m = pie_extract_head(q, &pkt_ts, !(pprms->flags & PIE_DEPRATEEST_ENABLED));
+
+	if (!m || !(pst->sflags & PIE_ACTIVE))
+		return m;
+
+	now = AQM_UNOW;
+	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
+		/* calculate average depature time */
+		if(pst->sflags & PIE_INMEASUREMENT) {
+			pst->dq_count += m->m_pkthdr.len;
+
+			if (pst->dq_count >= PIE_DQ_THRESHOLD) {
+				dq_time = now - pst->measurement_start;
+
+				/* 
+				 * if we don't have old avg dq_time i.e PIE is (re)initialized, 
+				 * don't use weight to calculate new avg_dq_time
+				 */
+				if(pst->avg_dq_time == 0)
+					pst->avg_dq_time = dq_time;
+				else {
+					/* 
+					 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled 
+					 * weight by 2^8. Thus, scaled 
+					 * weight = PIE_DQ_THRESHOLD /2^8 
+					 * */
+					w = PIE_DQ_THRESHOLD >> 8;
+					pst->avg_dq_time = (dq_time* w
+						+ (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
+					pst->sflags &= ~PIE_INMEASUREMENT;
+				}
+			}
+		}
+
+		/* 
+		 * Start new measurment cycle when the queue has
+		 *  PIE_DQ_THRESHOLD worth of bytes.
+		 */
+		if(!(pst->sflags & PIE_INMEASUREMENT) && 
+			q->ni.len_bytes >= PIE_DQ_THRESHOLD) {
+			pst->sflags |= PIE_INMEASUREMENT;
+			pst->measurement_start = now;
+			pst->dq_count = 0;
+		}
+	}
+	/* Optionally, use packet timestamp to estimate queue delay */
+	else
+		pst->current_qdelay = now - pkt_ts;
+
+	return m;	
+}
+
+/*
+ * Enqueue a packet in q, subject to space and  PIE queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+static int
+aqm_pie_enqueue(struct dn_queue *q, struct mbuf* m)
+{
+	struct dn_fs *f;
+	uint64_t len;
+	uint32_t qlen;
+	struct pie_status *pst;
+	struct dn_aqm_pie_parms *pprms;
+	int t;
+
+	len = m->m_pkthdr.len;
+	pst  = q->aqm_status;
+	if(!pst) {
+		DX(2, "PIE queue is not initialized\n");
+		update_stats(q, 0, 1);
+		FREE_PKT(m);
+		return 1;
+	}
+
+	f = &(q->fs->fs);
+	pprms = pst->parms;
+	t = ENQUE;
+
+	/* get current queue length in bytes or packets*/
+	qlen = (f->flags & DN_QSIZE_BYTES) ?
+		q->ni.len_bytes : q->ni.length;
+
+	/* check for queue size and drop the tail if exceed queue limit*/
+	if (qlen >= f->qsize)
+		t = DROP;
+	/* drop/mark the packet when PIE is active and burst time elapsed */
+	else if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance==0
+			&& drop_early(pst, q->ni.len_bytes) == DROP) {
+				/* 
+				 * if drop_prob over ECN threshold, drop the packet 
+				 * otherwise mark and enqueue it.
+				 */
+				if ((pprms->flags & PIE_ECN_ENABLED) && pst->drop_prob <
+					(pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
+					&& ecn_mark(m))
+					t = ENQUE;
+				else
+					t = DROP;
+	}
+
+	/* Turn PIE on when 1/3 of the queue is full */ 
+	if (!(pst->sflags & PIE_ACTIVE) && qlen >= pst->one_third_q_size) {
+		init_activate_pie(pst, 1);
+	}
+
+	/*  Reset burst tolerance and optinally turn PIE off*/
+	if ((pst->sflags & PIE_ACTIVE) && pst->drop_prob == 0 &&
+		pst->current_qdelay < (pprms->qdelay_ref >> 1) &&
+		pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
+
+			pst->burst_allowance = pprms->max_burst;
+			if ((pprms->flags & PIE_ON_OFF_MODE_ENABLED) && qlen<=0)
+				deactivate_pie(pst);
+	}
+
+	/* Timestamp the packet if Departure Rate Estimation is disabled */
+	if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
+		/* Add TS to mbuf as a TAG */
+		struct m_tag *mtag;
+		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+		if (mtag == NULL)
+			mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+				sizeof(aqm_time_t), M_NOWAIT);
+		if (mtag == NULL) {
+			m_freem(m); 
+			t = DROP;
+		}
+		*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+		m_tag_prepend(m, mtag);
+	}
+
+	if (t != DROP) {
+		mq_append(&q->mq, m);
+		update_stats(q, len, 0);
+		return (0);
+	} else {
+		update_stats(q, 0, 1);
+
+		/* reset accu_prob after packet drop */
+		pst->accu_prob = 0;
+		FREE_PKT(m);
+		return 1;
+	}
+	return 0;
+}
+
+/* 
+ * initialize PIE for queue 'q' 
+ * First allocate memory for PIE status.
+ */
+static int
+aqm_pie_init(struct dn_queue *q)
+{
+	struct pie_status *pst;
+	struct dn_aqm_pie_parms *pprms;
+	int err = 0;
+	
+	pprms = q->fs->aqmcfg;
+	
+	do { /* exit with break when error occurs*/
+		if (!pprms){
+			DX(2, "AQM_PIE is not configured");
+			err = EINVAL;
+			break;
+		}
+
+		q->aqm_status = malloc(sizeof(struct pie_status),
+				 M_DUMMYNET, M_NOWAIT | M_ZERO);
+		if (q->aqm_status == NULL) {
+			D("cannot allocate PIE private data");
+			err =  ENOMEM ; 
+			break;
+		}
+
+		pst = q->aqm_status;
+		/* increase reference count for PIE module */
+		pie_desc.ref_count++;
+		
+		pst->pq = q;
+		pst->parms = pprms;
+		
+		/* For speed optimization, we caculate 1/3 queue size once here */
+		// we can use x/3 = (x >>2) + (x >>4) + (x >>7)
+		pst->one_third_q_size = q->fs->fs.qsize/3;
+		
+		mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
+		callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
+			CALLOUT_RETURNUNLOCKED);
+		
+		pst->current_qdelay = 0;
+		init_activate_pie(pst, !(pprms->flags & PIE_ON_OFF_MODE_ENABLED));
+		
+		//DX(2, "aqm_PIE_init");
+
+	} while(0);
+	
+	return err;
+}
+
+/* 
+ * Callout function to destroy pie mtx and free PIE status memory
+ */
+static void
+pie_callout_cleanup(void *x)
+{
+	struct pie_status *pst = (struct pie_status *) x;
+
+	mtx_unlock(&pst->lock_mtx);
+	mtx_destroy(&pst->lock_mtx);
+	free(x, M_DUMMYNET);
+	DN_BH_WLOCK();
+	pie_desc.ref_count--;
+	DN_BH_WUNLOCK();
+}
+
+/* 
+ * Clean up PIE status for queue 'q' 
+ * Destroy memory allocated for PIE status.
+ */
+static int
+aqm_pie_cleanup(struct dn_queue *q)
+{
+
+	if(!q) {
+		D("q is null");
+		return 0;
+	}
+	struct pie_status *pst  = q->aqm_status;
+	if(!pst) {
+		//D("queue is already cleaned up");
+		return 0;
+	}
+	if(!q->fs || !q->fs->aqmcfg) {
+		D("fs is null or no cfg");
+		return 1;
+	}
+	if (q->fs->aqmfp && q->fs->aqmfp->type !=DN_AQM_PIE) {
+		D("Not PIE fs (%d)", q->fs->fs.fs_nr);
+		return 1;
+	}
+
+	/* 
+	 * Free PIE status allocated memory using pie_callout_cleanup() callout
+	 * function to avoid any potential race.
+	 * We reset aqm_pie_callout to call pie_callout_cleanup() in next 1um. This
+	 * stops the scheduled calculate_drop_prob() callout and call pie_callout_cleanup() 
+	 * which does memory freeing.
+	 */
+	mtx_lock(&pst->lock_mtx);
+	callout_reset_sbt(&pst->aqm_pie_callout,
+		SBT_1US, 0, pie_callout_cleanup, pst, 0);
+	q->aqm_status = NULL;
+	mtx_unlock(&pst->lock_mtx);
+
+	return 0;
+}
+
+/* 
+ * Config PIE parameters
+ * also allocate memory for PIE configurations
+ */
+static int 
+aqm_pie_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
+{ 
+	struct dn_aqm_pie_parms *pcfg;
+
+	int l = sizeof(struct dn_extra_parms);
+	if (len < l) {
+		D("invalid sched parms length got %d need %d", len, l);
+		return EINVAL;
+	}
+	/* we free the old cfg because maybe the orignal allocation 
+	 * was used for diffirent AQM type.
+	 */
+	if (fs->aqmcfg) {
+		free(fs->aqmcfg, M_DUMMYNET);
+		fs->aqmcfg = NULL;
+	}
+	
+	fs->aqmcfg = malloc(sizeof(struct dn_aqm_pie_parms),
+			 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (fs->aqmcfg== NULL) {
+		D("cannot allocate PIE configuration parameters");
+		return ENOMEM; 
+	}
+
+	/* par array contains pie configuration as follow
+	 * 0- qdelay_ref,1- tupdate, 2- max_burst
+	 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
+	 */
+
+	/* configure PIE parameters */
+	pcfg = fs->aqmcfg;
+	
+	if (ep->par[0] < 0)
+		pcfg->qdelay_ref = pie_sysctl.qdelay_ref * AQM_TIME_1US;
+	else
+		pcfg->qdelay_ref = ep->par[0];
+	if (ep->par[1] < 0)
+		pcfg->tupdate = pie_sysctl.tupdate * AQM_TIME_1US;
+	else
+		pcfg->tupdate = ep->par[1];
+	if (ep->par[2] < 0)
+		pcfg->max_burst = pie_sysctl.max_burst * AQM_TIME_1US;
+	else
+		pcfg->max_burst = ep->par[2];
+	if (ep->par[3] < 0)
+		pcfg->max_ecnth = pie_sysctl.max_ecnth;
+	else
+		pcfg->max_ecnth = ep->par[3];
+	if (ep->par[4] < 0)
+		pcfg->alpha = pie_sysctl.alpha;
+	else
+		pcfg->alpha = ep->par[4];
+	if (ep->par[5] < 0)
+		pcfg->beta = pie_sysctl.beta;
+	else
+		pcfg->beta = ep->par[5];
+	if (ep->par[6] < 0)
+		pcfg->flags = pie_sysctl.flags;
+	else
+		pcfg->flags = ep->par[6];
+
+	/* bound PIE configurations */
+	pcfg->qdelay_ref = BOUND_VAR(pcfg->qdelay_ref, 1, 10 * AQM_TIME_1S);
+	pcfg->tupdate = BOUND_VAR(pcfg->tupdate, 1, 10 * AQM_TIME_1S);
+	pcfg->max_burst = BOUND_VAR(pcfg->max_burst, 0, 10 * AQM_TIME_1S);
+	pcfg->max_ecnth = BOUND_VAR(pcfg->max_ecnth, 0, PIE_SCALE);
+	pcfg->alpha = BOUND_VAR(pcfg->alpha, 0, 7 * PIE_SCALE);
+	pcfg->beta = BOUND_VAR(pcfg->beta, 0 , 7 * PIE_SCALE);
+
+	pie_desc.cfg_ref_count++;
+	//D("pie cfg_ref_count=%d", pie_desc.cfg_ref_count);
+	return 0;
+}
+
+/*
+ * Deconfigure PIE and free memory allocation
+ */
+static int
+aqm_pie_deconfig(struct dn_fsk* fs)
+{
+	if (fs && fs->aqmcfg) {
+		free(fs->aqmcfg, M_DUMMYNET);
+		fs->aqmcfg = NULL;
+		pie_desc.cfg_ref_count--;
+	}
+	return 0;
+}
+
+/* 
+ * Retrieve PIE configuration parameters.
+ */ 
+static int 
+aqm_pie_getconfig (struct dn_fsk *fs, struct dn_extra_parms * ep)
+{
+	struct dn_aqm_pie_parms *pcfg;
+	if (fs->aqmcfg) {
+		strlcpy(ep->name, pie_desc.name, sizeof(ep->name));
+		pcfg = fs->aqmcfg;
+		ep->par[0] = pcfg->qdelay_ref / AQM_TIME_1US;
+		ep->par[1] = pcfg->tupdate / AQM_TIME_1US;
+		ep->par[2] = pcfg->max_burst / AQM_TIME_1US;
+		ep->par[3] = pcfg->max_ecnth;
+		ep->par[4] = pcfg->alpha;
+		ep->par[5] = pcfg->beta;
+		ep->par[6] = pcfg->flags;
+
+		return 0;
+	}
+	return 1;
+}
+
+static struct dn_aqm pie_desc = {
+	_SI( .type = )  DN_AQM_PIE,
+	_SI( .name = )  "PIE",
+	_SI( .ref_count = )  0,
+	_SI( .cfg_ref_count = )  0,
+	_SI( .enqueue = )  aqm_pie_enqueue,
+	_SI( .dequeue = )  aqm_pie_dequeue,
+	_SI( .config = )  aqm_pie_config,
+	_SI( .deconfig = )  aqm_pie_deconfig,
+	_SI( .getconfig = )  aqm_pie_getconfig,
+	_SI( .init = )  aqm_pie_init,
+	_SI( .cleanup = )  aqm_pie_cleanup,
+};
+
+DECLARE_DNAQM_MODULE(dn_aqm_pie, &pie_desc);
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_pie.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_pie.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_pie.h	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_pie.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,154 @@
+/* $MidnightBSD$ */
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_pie.h 316325 2017-03-31 06:33:20Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_PIE_H
+#define _IP_DN_AQM_PIE_H
+
+#define DN_AQM_PIE 2
+#define PIE_DQ_THRESHOLD_BITS 14
+/* 2^14 =16KB */
+#define PIE_DQ_THRESHOLD (1L << PIE_DQ_THRESHOLD_BITS) 
+#define MEAN_PKTSIZE 800
+
+/* 31-bits because random() generates range from 0->(2**31)-1 */
+#define PIE_PROB_BITS 31
+#define PIE_MAX_PROB ((1LL<<PIE_PROB_BITS) -1)
+
+/* for 16-bits, we have 3-bits for integer part and 13-bits for fraction */
+#define PIE_FIX_POINT_BITS 13
+#define PIE_SCALE (1L<<PIE_FIX_POINT_BITS)
+
+
+/* PIE options */
+enum {
+	PIE_ECN_ENABLED =1,
+	PIE_CAPDROP_ENABLED = 2,
+	PIE_ON_OFF_MODE_ENABLED = 4,
+	PIE_DEPRATEEST_ENABLED = 8,
+	PIE_DERAND_ENABLED = 16
+};
+
+/* PIE parameters */
+struct dn_aqm_pie_parms {
+	aqm_time_t	qdelay_ref;	/* AQM Latency Target (default: 15ms) */
+	aqm_time_t	tupdate;		/* a period to calculate drop probability (default:15ms) */
+	aqm_time_t	max_burst;	/* AQM Max Burst Allowance (default: 150ms) */
+	uint16_t	max_ecnth;	/*AQM Max ECN Marking Threshold (default: 10%) */
+	uint16_t	alpha;			/* (default: 1/8) */
+	uint16_t	beta;			/* (default: 1+1/4) */
+	uint32_t	flags;			/* PIE options */
+};
+
+/* PIE status variables */
+struct pie_status{
+	struct callout	aqm_pie_callout;
+	aqm_time_t	burst_allowance;
+	uint32_t	drop_prob;
+	aqm_time_t	current_qdelay;
+	aqm_time_t	qdelay_old;
+	uint64_t	accu_prob;
+	aqm_time_t	measurement_start;
+	aqm_time_t	avg_dq_time;
+	uint32_t	dq_count;
+	uint32_t	sflags;
+	struct dn_aqm_pie_parms *parms;	/* pointer to PIE configurations */
+	/* pointer to parent queue of FQ-PIE sub-queues, or  queue of owner fs. */
+	struct dn_queue	*pq;	
+	struct mtx	lock_mtx;
+	uint32_t one_third_q_size; /* 1/3 of queue size, for speed optization */
+};
+
+enum { 
+	ENQUE = 1,
+	DROP,
+	MARKECN
+};
+
+/* PIE current state */
+enum { 
+	PIE_ACTIVE = 1,
+	PIE_INMEASUREMENT = 2
+};
+
+/* 
+ * Check if eneque should drop packet to control delay or not based on
+ * PIe algorithm.
+ * return  DROP if it is time to drop or  ENQUE otherwise.
+ * This function is used by PIE and FQ-PIE.
+ */
+__inline static int
+drop_early(struct pie_status *pst, uint32_t qlen)
+{
+	struct dn_aqm_pie_parms *pprms;
+
+	pprms = pst->parms;
+
+	/* queue is not congested */
+
+	if ((pst->qdelay_old < (pprms->qdelay_ref >> 1)
+		&& pst->drop_prob < PIE_MAX_PROB / 5 )
+		||  qlen <= 2 * MEAN_PKTSIZE)
+		return ENQUE;
+
+
+	if (pst->drop_prob == 0)
+		pst->accu_prob = 0;
+
+	/* increment accu_prob */
+	if (pprms->flags & PIE_DERAND_ENABLED)
+		pst->accu_prob += pst->drop_prob;
+
+	/* De-randomize option 
+	 * if accu_prob < 0.85 -> enqueue
+	 * if accu_prob>8.5 ->drop
+	 * between 0.85 and 8.5 || !De-randomize --> drop on prob
+	 * 
+	 * (0.85 = 17/20 ,8.5 = 17/2)
+	 */
+	if (pprms->flags & PIE_DERAND_ENABLED) {
+		if(pst->accu_prob < (uint64_t) (PIE_MAX_PROB * 17 / 20))
+			return ENQUE;
+		 if( pst->accu_prob >= (uint64_t) (PIE_MAX_PROB * 17 / 2))
+			return DROP;
+	}
+
+	if (random() < pst->drop_prob) {
+		pst->accu_prob = 0;
+		return DROP;
+	}
+
+	return ENQUE;
+}
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_pie.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netpfil/ipfw/dn_heap.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_heap.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_heap.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved
@@ -27,13 +28,13 @@
 /*
  * Binary heap and hash tables, used in dummynet
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.c 240494 2012-09-14 11:51:49Z glebius $
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #ifdef _KERNEL
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.c 240494 2012-09-14 11:51:49Z glebius $");
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
@@ -59,7 +60,7 @@
 #define free(p, t)	my_free(p)
 #endif /* !_KERNEL */
 
-MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
+static MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
 
 /*
  * Heap management functions.

Modified: trunk/sys/netpfil/ipfw/dn_heap.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_heap.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_heap.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved
@@ -27,7 +28,7 @@
 /*
  * Binary heap and hash tables, header file
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.h 313726 2017-02-14 04:52:24Z ngie $
  */
 
 #ifndef _IP_DN_HEAP_H
@@ -85,7 +86,7 @@
  * HEAP_TOP() returns a pointer to the top element of the heap,
  *	but makes no checks on its existance (XXX should we change ?)
  *
- * heap_extract() removes the entry at the top, returing the pointer.
+ * heap_extract() removes the entry at the top, returning the pointer.
  *	(the key should have been read before).
  *
  * heap_scan() invokes a callback on each entry of the heap.

Modified: trunk/sys/netpfil/ipfw/dn_sched.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
  * All rights reserved
@@ -27,7 +28,7 @@
 /*
  * The API to write a packet scheduling algorithm for dummynet.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched.h 301772 2016-06-10 00:00:25Z truckman $
  */
 
 #ifndef _DN_SCHED_H
@@ -132,6 +133,10 @@
 	int (*free_fsk)(struct dn_fsk *f);
 	int (*new_queue)(struct dn_queue *q);
 	int (*free_queue)(struct dn_queue *q);
+#ifdef NEW_AQM
+	/* Getting scheduler extra parameters */
+	int (*getconfig)(struct dn_schk *, struct dn_extra_parms *);
+#endif
 
 	/* run-time fields */
 	int ref_count;      /* XXX number of instances in the system */
@@ -165,6 +170,11 @@
 	struct mbuf *m = q->mq.head;
 	if (m == NULL)
 		return NULL;
+#ifdef NEW_AQM
+	/* Call AQM dequeue function  */
+	if (q->fs->aqmfp && q->fs->aqmfp->dequeue )
+		return q->fs->aqmfp->dequeue(q);
+#endif
 	q->mq.head = m->m_nextpkt;
 
 	/* Update stats for the queue */

Modified: trunk/sys/netpfil/ipfw/dn_sched_fifo.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fifo.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_fifo.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -25,7 +26,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fifo.c 325731 2017-11-12 01:28:20Z truckman $
  */
 
 #ifdef _KERNEL
@@ -33,15 +34,21 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 #else
 #include <dn_test.h>
@@ -115,6 +122,9 @@
 	_SI( .free_fsk = )  NULL,
 	_SI( .new_queue = )  NULL,
 	_SI( .free_queue = )  NULL,
+#ifdef NEW_AQM
+	_SI( .getconfig = )  NULL,
+#endif
 };
 
 DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);

Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,618 @@
+/* $MidnightBSD$ */
+/* 
+ * FQ_Codel - The FlowQueue-Codel scheduler/AQM
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel.c 325731 2017-11-12 01:28:20Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+//#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h>	/* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <netinet/ip_fw.h>	/* flow_id */
+#include <netinet/ip_dummynet.h>
+
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <sys/sysctl.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/queue.h>
+#include <sys/hash.h>
+
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_codel.h>
+#include <netpfil/ipfw/dn_sched.h>
+#include <netpfil/ipfw/dn_sched_fq_codel.h>
+#include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
+
+#else
+#include <dn_test.h>
+#endif
+
+/* NOTE: In fq_codel module, we reimplements CoDel AQM functions 
+ * because fq_codel use different flows (sub-queues) structure and 
+ * dn_queue includes many variables not needed by a flow (sub-queue 
+ * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
+ * Also, CoDel functions manages stats of sub-queues as well as the main queue.
+ */
+
+#define DN_SCHED_FQ_CODEL 6
+
+static struct dn_alg fq_codel_desc;
+
+/* fq_codel default parameters including codel */
+struct dn_sch_fq_codel_parms 
+fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
+	CODEL_ECN_ENABLED}, 1024, 10240, 1514};
+
+static int
+fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = fq_codel_sysctl.ccfg.interval;
+	value /= AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 100 * AQM_TIME_1S)
+		return (EINVAL);
+	fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
+
+	return (0);
+}
+
+static int
+fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = fq_codel_sysctl.ccfg.target;
+	value /= AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 5 * AQM_TIME_1S)
+		return (EINVAL);
+	fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
+
+	return (0);
+}
+
+
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
+	CTLFLAG_RW, 0, "FQ_CODEL");
+
+#ifdef SYSCTL_NODE
+	
+SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_target_handler, "L",
+	"FQ_CoDel target in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_interval_handler, "L",
+	"FQ_CoDel interval in microsecond");
+	
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
+	CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
+	CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024, 
+	"Number of queues for FQ_CoDel");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
+	CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
+#endif
+
+/* Drop a packet form the head of codel queue */
+static void
+codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return;
+	q->mq.head = m->m_nextpkt;
+
+	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
+
+	if (si->main_q.ni.length == 0) /* queue is now idle */
+			si->main_q.q_time = dn_cfg.curr_time;
+
+	FREE_PKT(m);
+}
+
+/* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
+ * Return 1 when unable to add timestamp, otherwise return 0 
+ */
+static int
+codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
+{
+	uint64_t len;
+
+	len = m->m_pkthdr.len;
+	/* finding maximum packet size */
+	if (len > q->cst.maxpkt_size)
+		q->cst.maxpkt_size = len;
+
+	/* Add timestamp to mbuf as MTAG */
+	struct m_tag *mtag;
+	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+	if (mtag == NULL)
+		mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
+			M_NOWAIT);
+	if (mtag == NULL) {
+		m_freem(m); 
+		goto drop;
+	}
+	*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+	m_tag_prepend(m, mtag);
+
+	mq_append(&q->mq, m);
+	fq_update_stats(q, si, len, 0);
+	return 0;
+
+drop:
+	fq_update_stats(q, si, len, 1);
+	m_freem(m);
+	return 1;
+}
+
+/*
+ * Classify a packet to queue number using Jenkins hash function.
+ * Return: queue number 
+ * the input of the hash are protocol no, perturbation, src IP, dst IP,
+ * src port, dst port,
+ */
+static inline int
+fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
+{
+	struct ip *ip;
+	struct tcphdr *th;
+	struct udphdr *uh;
+	uint8_t tuple[41];
+	uint16_t hash=0;
+
+	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+//#ifdef INET6
+	struct ip6_hdr *ip6;
+	int isip6;
+	isip6 = (ip->ip_v == 6);
+
+	if(isip6) {
+		ip6 = (struct ip6_hdr *)ip;
+		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
+		*((uint32_t *) &tuple[1]) = si->perturbation;
+		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
+		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
+
+		switch (ip6->ip6_nxt) {
+		case IPPROTO_TCP:
+			th = (struct tcphdr *)(ip6 + 1);
+			*((uint16_t *) &tuple[37]) = th->th_dport;
+			*((uint16_t *) &tuple[39]) = th->th_sport;
+			break;
+
+		case IPPROTO_UDP:
+			uh = (struct udphdr *)(ip6 + 1);
+			*((uint16_t *) &tuple[37]) = uh->uh_dport;
+			*((uint16_t *) &tuple[39]) = uh->uh_sport;
+			break;
+		default:
+			memset(&tuple[37], 0, 4);
+
+		}
+
+		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
+		return hash;
+	} 
+//#endif
+
+	/* IPv4 */
+	*((uint8_t *) &tuple[0]) = ip->ip_p;
+	*((uint32_t *) &tuple[1]) = si->perturbation;
+	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
+	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
+
+	switch (ip->ip_p) {
+		case IPPROTO_TCP:
+			th = (struct tcphdr *)(ip + 1);
+			*((uint16_t *) &tuple[13]) = th->th_dport;
+			*((uint16_t *) &tuple[15]) = th->th_sport;
+			break;
+
+		case IPPROTO_UDP:
+			uh = (struct udphdr *)(ip + 1);
+			*((uint16_t *) &tuple[13]) = uh->uh_dport;
+			*((uint16_t *) &tuple[15]) = uh->uh_sport;
+			break;
+		default:
+			memset(&tuple[13], 0, 4);
+
+	}
+	hash = jenkins_hash(tuple, 17, HASHINIT) %  fcount;
+
+	return hash;
+}
+
+/*
+ * Enqueue a packet into an appropriate queue according to
+ * FQ_CODEL algorithm.
+ */
+static int 
+fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q, 
+	struct mbuf *m)
+{
+	struct fq_codel_si *si;
+	struct fq_codel_schk *schk;
+	struct dn_sch_fq_codel_parms *param;
+	struct dn_queue *mainq;
+	int idx, drop, i, maxidx;
+
+	mainq = (struct dn_queue *)(_si + 1);
+	si = (struct fq_codel_si *)_si;
+	schk = (struct fq_codel_schk *)(si->_si.sched+1);
+	param = &schk->cfg;
+
+	 /* classify a packet to queue number*/
+	idx = fq_codel_classify_flow(m, param->flows_cnt, si);
+	/* enqueue packet into appropriate queue using CoDel AQM.
+	 * Note: 'codel_enqueue' function returns 1 only when it unable to 
+	 * add timestamp to packet (no limit check)*/
+	drop = codel_enqueue(&si->flows[idx], m, si);
+	
+	/* codel unable to timestamp a packet */ 
+	if (drop)
+		return 1;
+	
+	/* If the flow (sub-queue) is not active ,then add it to the tail of
+	 * new flows list, initialize and activate it.
+	 */
+	if (!si->flows[idx].active ) {
+		STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
+		si->flows[idx].deficit = param->quantum;
+		si->flows[idx].cst.dropping = false;
+		si->flows[idx].cst.first_above_time = 0;
+		si->flows[idx].active = 1;
+		//D("activate %d",idx);
+	}
+
+	/* check the limit for all queues and remove a packet from the
+	 * largest one 
+	 */
+	if (mainq->ni.length > schk->cfg.limit) { D("over limit");
+		/* find first active flow */
+		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
+			if (si->flows[maxidx].active)
+				break;
+		if (maxidx < schk->cfg.flows_cnt) {
+			/* find the largest sub- queue */
+			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 
+				if (si->flows[i].active && si->flows[i].stats.length >
+					si->flows[maxidx].stats.length)
+					maxidx = i;
+			codel_drop_head(&si->flows[maxidx], si);
+			D("maxidx = %d",maxidx);
+			drop = 1;
+		}
+	}
+
+	return drop;
+}
+
+/*
+ * Dequeue a packet from an appropriate queue according to
+ * FQ_CODEL algorithm.
+ */
+static struct mbuf *
+fq_codel_dequeue(struct dn_sch_inst *_si)
+{
+	struct fq_codel_si *si;
+	struct fq_codel_schk *schk;
+	struct dn_sch_fq_codel_parms *param;
+	struct fq_codel_flow *f;
+	struct mbuf *mbuf;
+	struct fq_codel_list *fq_codel_flowlist;
+
+	si = (struct fq_codel_si *)_si;
+	schk = (struct fq_codel_schk *)(si->_si.sched+1);
+	param = &schk->cfg;
+
+	do {
+		/* select a list to start with */
+		if (STAILQ_EMPTY(&si->newflows))
+			fq_codel_flowlist = &si->oldflows;
+		else
+			fq_codel_flowlist = &si->newflows;
+
+		/* Both new and old queue lists are empty, return NULL */
+		if (STAILQ_EMPTY(fq_codel_flowlist)) 
+			return NULL;
+
+		f = STAILQ_FIRST(fq_codel_flowlist);
+		while (f != NULL)	{
+			/* if there is no flow(sub-queue) deficit, increase deficit
+			 * by quantum, move the flow to the tail of old flows list
+			 * and try another flow.
+			 * Otherwise, the flow will be used for dequeue.
+			 */
+			if (f->deficit < 0) {
+				 f->deficit += param->quantum;
+				 STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+			 } else 
+				 break;
+
+			f = STAILQ_FIRST(fq_codel_flowlist);
+		}
+		
+		/* the new flows list is empty, try old flows list */
+		if (STAILQ_EMPTY(fq_codel_flowlist)) 
+			continue;
+
+		/* Dequeue a packet from the selected flow */
+		mbuf = fqc_codel_dequeue(f, si);
+
+		/* Codel did not return a packet */
+		if (!mbuf) {
+			/* If the selected flow belongs to new flows list, then move 
+			 * it to the tail of old flows list. Otherwise, deactivate it and
+			 * remove it from the old list and
+			 */
+			if (fq_codel_flowlist == &si->newflows) {
+				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+			}	else {
+				f->active = 0;
+				STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+			}
+			/* start again */
+			continue;
+		}
+
+		/* we have a packet to return, 
+		 * update flow deficit and return the packet*/
+		f->deficit -= mbuf->m_pkthdr.len;
+		return mbuf;
+
+	} while (1);
+	
+	/* unreachable point */
+	return NULL;
+}
+
+/*
+ * Initialize fq_codel scheduler instance.
+ * also, allocate memory for flows array.
+ */
+static int
+fq_codel_new_sched(struct dn_sch_inst *_si)
+{
+	struct fq_codel_si *si;
+	struct dn_queue *q;
+	struct fq_codel_schk *schk;
+	int i;
+
+	si = (struct fq_codel_si *)_si;
+	schk = (struct fq_codel_schk *)(_si->sched+1);
+
+	if(si->flows) {
+		D("si already configured!");
+		return 0;
+	}
+
+	/* init the main queue */
+	q = &si->main_q;
+	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+	q->_si = _si;
+	q->fs = _si->sched->fs;
+
+	/* allocate memory for flows array */
+	si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_codel_flow),
+		 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (si->flows == NULL) {
+		D("cannot allocate memory for fq_codel configuration parameters");
+		return ENOMEM ; 
+	}
+
+	/* init perturbation for this si */
+	si->perturbation = random();
+
+	/* init the old and new flows lists */
+	STAILQ_INIT(&si->newflows);
+	STAILQ_INIT(&si->oldflows);
+
+	/* init the flows (sub-queues) */
+	for (i = 0; i < schk->cfg.flows_cnt; i++) {
+		/* init codel */
+		si->flows[i].cst.maxpkt_size = 500;
+	}
+
+	fq_codel_desc.ref_count++;
+	return 0;
+}
+
+/*
+ * Free fq_codel scheduler instance.
+ */
+static int
+fq_codel_free_sched(struct dn_sch_inst *_si)
+{
+	struct fq_codel_si *si = (struct fq_codel_si *)_si ;
+
+	/* free the flows array */
+	free(si->flows , M_DUMMYNET);
+	si->flows = NULL;
+	fq_codel_desc.ref_count--;
+
+	return 0;
+}
+
+/*
+ * Configure fq_codel scheduler.
+ * the configurations for the scheduler is passed from userland.
+ */
+static int
+fq_codel_config(struct dn_schk *_schk)
+{
+	struct fq_codel_schk *schk;
+	struct dn_extra_parms *ep;
+	struct dn_sch_fq_codel_parms *fqc_cfg;
+	
+	schk = (struct fq_codel_schk *)(_schk+1);
+	ep = (struct dn_extra_parms *) _schk->cfg;
+
+	/* par array contains fq_codel configuration as follow
+	 * Codel: 0- target,1- interval, 2- flags
+	 * FQ_CODEL: 3- quantum, 4- limit, 5- flows
+	 */
+	if (ep && ep->oid.len ==sizeof(*ep) &&
+		ep->oid.subtype == DN_SCH_PARAMS) {
+
+		fqc_cfg = &schk->cfg;
+		if (ep->par[0] < 0)
+			fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
+		else
+			fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
+
+		if (ep->par[1] < 0)
+			fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
+		else
+			fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
+
+		if (ep->par[2] < 0)
+			fqc_cfg->ccfg.flags = 0;
+		else
+			fqc_cfg->ccfg.flags = ep->par[2];
+
+		/* FQ configurations */
+		if (ep->par[3] < 0)
+			fqc_cfg->quantum = fq_codel_sysctl.quantum;
+		else
+			fqc_cfg->quantum = ep->par[3];
+
+		if (ep->par[4] < 0)
+			fqc_cfg->limit = fq_codel_sysctl.limit;
+		else
+			fqc_cfg->limit = ep->par[4];
+
+		if (ep->par[5] < 0)
+			fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
+		else
+			fqc_cfg->flows_cnt = ep->par[5];
+
+		/* Bound the configurations */
+		fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 , 
+			5 * AQM_TIME_1S); ;
+		fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
+			100 * AQM_TIME_1S);
+
+		fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
+		fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
+		fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
+	}
+	else
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Return fq_codel scheduler configurations
+ * the configurations for the scheduler is passed to userland.
+ */
+static int 
+fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
+	
+	struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
+	struct dn_sch_fq_codel_parms *fqc_cfg;
+
+	fqc_cfg = &schk->cfg;
+
+	strcpy(ep->name, fq_codel_desc.name);
+	ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
+	ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
+	ep->par[2] = fqc_cfg->ccfg.flags;
+
+	ep->par[3] = fqc_cfg->quantum;
+	ep->par[4] = fqc_cfg->limit;
+	ep->par[5] = fqc_cfg->flows_cnt;
+
+	return 0;
+}
+
+/*
+ * fq_codel scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fq_codel_desc = {
+	_SI( .type = )  DN_SCHED_FQ_CODEL,
+	_SI( .name = ) "FQ_CODEL",
+	_SI( .flags = ) 0,
+
+	_SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
+	_SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
+	_SI( .q_datalen = ) 0,
+
+	_SI( .enqueue = ) fq_codel_enqueue,
+	_SI( .dequeue = ) fq_codel_dequeue,
+	_SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
+	_SI( .destroy = ) NULL,  /*sched x delete */
+	_SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
+	_SI( .free_sched = ) fq_codel_free_sched,	/* delete schd instance */
+	_SI( .new_fsk = ) NULL,
+	_SI( .free_fsk = ) NULL,
+	_SI( .new_queue = ) NULL,
+	_SI( .free_queue = ) NULL,
+	_SI( .getconfig = )  fq_codel_getconfig,
+	_SI( .ref_count = ) 0
+};
+
+DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);


Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FQ_Codel Structures and helper functions
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel.h 301772 2016-06-10 00:00:25Z truckman $
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_H
+#define _IP_DN_SCHED_FQ_CODEL_H
+
+/* list of queues */
+STAILQ_HEAD(fq_codel_list, fq_codel_flow) ;
+
+/* fq_codel parameters including codel */
+struct dn_sch_fq_codel_parms {
+	struct dn_aqm_codel_parms	ccfg;	/* CoDel Parameters */
+	/* FQ_CODEL Parameters */
+	uint32_t flows_cnt;	/* number of flows */
+	uint32_t limit;	/* hard limit of fq_codel queue size*/
+	uint32_t quantum;
+};	/* defaults */
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+	uint64_t tot_pkts;	/* statistics counters  */
+	uint64_t tot_bytes;
+	uint32_t length;		/* Queue length, in packets */
+	uint32_t len_bytes;	/* Queue length, in bytes */
+	uint32_t drops;
+};
+
+/* A flow of packets (sub-queue).*/
+struct fq_codel_flow {
+	struct mq	mq;	/* list of packets */
+	struct flow_stats stats;	/* statistics */
+	int	deficit;
+	int active;		/* 1: flow is active (in a list) */
+	struct codel_status cst;
+	STAILQ_ENTRY(fq_codel_flow) flowchain;
+};
+
+/* extra fq_codel scheduler configurations */
+struct fq_codel_schk {
+	struct dn_sch_fq_codel_parms cfg;
+};
+
+/* fq_codel scheduler instance */
+struct fq_codel_si {
+	struct dn_sch_inst _si;	/* standard scheduler instance */
+	struct dn_queue main_q; /* main queue is after si directly */
+
+	struct fq_codel_flow *flows; /* array of flows (queues) */
+	uint32_t perturbation; /* random value */
+	struct fq_codel_list newflows;	/* list of new queues */
+	struct fq_codel_list oldflows;		/* list of old queues */
+};
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len,
+	int drop)
+{
+	int inc = 0;
+
+	if (len < 0) 
+		inc = -1;
+	else if (len > 0)
+		inc = 1;
+
+	if (drop) {
+		si->main_q.ni.drops ++;
+		q->stats.drops ++;
+		si->_si.ni.drops ++;
+		io_pkt_drop ++;
+	} 
+
+	if (!drop || (drop && len < 0)) {
+		/* Update stats for the main queue */
+		si->main_q.ni.length += inc;
+		si->main_q.ni.len_bytes += len;
+
+		/*update sub-queue stats */
+		q->stats.length += inc;
+		q->stats.len_bytes += len;
+
+		/*update scheduler instance stats */
+		si->_si.ni.length += inc;
+		si->_si.ni.len_bytes += len;
+	}
+
+	if (inc > 0) {
+		si->main_q.ni.tot_bytes += len;
+		si->main_q.ni.tot_pkts ++;
+		
+		q->stats.tot_bytes +=len;
+		q->stats.tot_pkts++;
+		
+		si->_si.ni.tot_bytes +=len;
+		si->_si.ni.tot_pkts ++;
+	}
+
+}
+
+/* extract the head of fq_codel sub-queue */
+__inline static struct mbuf *
+fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_codel_si *si)
+{
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return m;
+	q->mq.head = m->m_nextpkt;
+
+	fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+	if (si->main_q.ni.length == 0) /* queue is now idle */
+			si->main_q.q_time = dn_cfg.curr_time;
+
+	/* extract packet timestamp*/
+	struct m_tag *mtag;
+	mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+	if (mtag == NULL){
+		D("timestamp tag is not found!");
+		*pkt_ts = 0;
+	} else {
+		*pkt_ts = *(aqm_time_t *)(mtag + 1);
+		m_tag_delete(m,mtag); 
+	}
+
+	return m;
+}
+
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,188 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h 301772 2016-06-10 00:00:25Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols at pollere.com>.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o  Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions, and the following disclaimer,
+ *  without modification.
+ *
+ * o  Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in
+ *  the documentation and/or other materials provided with the
+ *  distribution.
+ * 
+ * o  The names of the authors may not be used to endorse or promote
+ *  products derived from this software without specific prior written
+ *  permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_HELPER_H
+#define _IP_DN_SCHED_FQ_CODEL_HELPER_H
+
+__inline static struct mbuf *
+fqc_dodequeue(struct fq_codel_flow *q, aqm_time_t now, uint16_t *ok_to_drop,
+	struct fq_codel_si *si)
+{
+	struct mbuf * m;
+	struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+	aqm_time_t  pkt_ts, sojourn_time;
+
+	*ok_to_drop = 0;
+	m = fq_codel_extract_head(q, &pkt_ts, si);
+
+	if (m == NULL) {
+		/*queue is empty - we can't be above target*/
+		q->cst.first_above_time= 0;
+		return m;
+	}
+
+	/* To span a large range of bandwidths, CoDel runs two
+	 * different AQMs in parallel. One is sojourn-time-based
+	 * and takes effect when the time to send an MTU-sized
+	 * packet is less than target.  The 1st term of the "if"
+	 * below does this.  The other is backlog-based and takes
+	 * effect when the time to send an MTU-sized packet is >=
+	* target. The goal here is to keep the output link
+	* utilization high by never allowing the queue to get
+	* smaller than the amount that arrives in a typical
+	 * interarrival time (MTU-sized packets arriving spaced
+	 * by the amount of time it takes to send such a packet on
+	 * the bottleneck). The 2nd term of the "if" does this.
+	 */
+	sojourn_time = now - pkt_ts;
+	if (sojourn_time < schk->cfg.ccfg.target || q->stats.len_bytes <= q->cst.maxpkt_size) {
+		/* went below - stay below for at least interval */
+		q->cst.first_above_time = 0;
+	} else {
+		if (q->cst.first_above_time == 0) {
+			/* just went above from below. if still above at
+			 * first_above_time, will say it's ok to drop. */
+			q->cst.first_above_time = now + schk->cfg.ccfg.interval;
+		} else if (now >= q->cst.first_above_time) {
+			*ok_to_drop = 1;
+		}
+	}
+	return m;
+}
+
+/* Codel dequeue function */
+__inline static struct mbuf * 
+fqc_codel_dequeue(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+	struct mbuf *m;
+	struct dn_aqm_codel_parms *cprms;
+	struct codel_status *cst;
+	aqm_time_t now;
+	uint16_t ok_to_drop;
+	struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+
+	cst = &q->cst;
+	cprms = &schk->cfg.ccfg;
+
+	now = AQM_UNOW;
+	m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+	if (cst->dropping) {
+		if (!ok_to_drop) {
+			/* sojourn time below target - leave dropping state */
+			cst->dropping = false;
+		}
+
+		/* Time for the next drop. Drop current packet and dequeue
+		 * next.  If the dequeue doesn't take us out of dropping
+		 * state, schedule the next drop. A large backlog might
+		 * result in drop rates so high that the next drop should
+		 * happen now, hence the 'while' loop.
+		 */
+		while (now >= cst->drop_next_time && cst->dropping) {
+
+			/* mark the packet */
+			if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+				cst->count++;
+				/* schedule the next mark. */
+				cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+				return m;
+			}
+
+			/* drop the packet */
+			fq_update_stats(q, si, 0, 1);
+			m_freem(m);
+			m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+			if (!ok_to_drop) {
+				/* leave dropping state */
+				cst->dropping = false;
+			} else {
+				cst->count++;
+				/* schedule the next drop. */
+				cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+			}
+		}
+	/* If we get here we're not in dropping state. The 'ok_to_drop'
+	 * return from dodequeue means that the sojourn time has been
+	 * above 'target' for 'interval' so enter dropping state.
+	 */
+	} else if (ok_to_drop) {
+
+		/* if ECN option is disabled or the packet cannot be marked,
+		 * drop the packet and extract another.
+		 */
+		if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+			fq_update_stats(q, si, 0, 1);
+			m_freem(m);
+			m = fqc_dodequeue(q, now, &ok_to_drop,si);
+		}
+
+		cst->dropping = true;
+
+		/* If min went above target close to when it last went
+		 * below, assume that the drop rate that controlled the
+		 * queue on the last cycle is a good starting point to
+		 * control it now. ('drop_next' will be at most 'interval'
+		 * later than the time of the last drop so 'now - drop_next'
+		 * is a good approximation of the time from the last drop
+		 * until now.)
+		 */
+		cst->count = (cst->count > 2 && ((aqm_stime_t)now - 
+			(aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? cst->count - 2 : 1;
+
+		/* we don't have to set initial guess for Newton's method isqrt as
+		 * we initilaize  isqrt in control_law function when count == 1 */
+		cst->drop_next_time = control_law(cst, cprms, now);
+	}
+
+	return m;
+}
+
+#endif


Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c	                        (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,1236 @@
+/* $MidnightBSD$ */
+/* 
+ * FQ_PIE - The FlowQueue-PIE scheduler/AQM
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_pie.c 325731 2017-11-12 01:28:20Z truckman $
+ * 
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Important note:
+ * As there is no an office document for FQ-PIE specification, we used
+ * FQ-CoDel algorithm with some modifications to implement FQ-PIE.
+ * This FQ-PIE implementation is a beta version and have not been tested 
+ * extensively. Our FQ-PIE uses stand-alone PIE AQM per sub-queue. By
+ * default, timestamp is used to calculate queue delay instead of departure
+ * rate estimation method. Although departure rate estimation is available 
+ * as testing option, the results could be incorrect. Moreover, turning PIE on 
+ * and off option is available but it does not work properly in this version.
+ */
+
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <net/if.h>	/* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h>		/* ipfw_rule_ref */
+#include <netinet/ip_fw.h>	/* flow_id */
+#include <netinet/ip_dummynet.h>
+
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <sys/sysctl.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/queue.h>
+#include <sys/hash.h>
+
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_pie.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_FQ_PIE 7
+
+/* list of queues */
+STAILQ_HEAD(fq_pie_list, fq_pie_flow) ;
+
+/* FQ_PIE parameters including PIE */
+struct dn_sch_fq_pie_parms {
+	struct dn_aqm_pie_parms	pcfg;	/* PIE configuration Parameters */
+	/* FQ_PIE Parameters */
+	uint32_t flows_cnt;	/* number of flows */
+	uint32_t limit;	/* hard limit of FQ_PIE queue size*/
+	uint32_t quantum;
+};
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+	uint64_t tot_pkts;	/* statistics counters  */
+	uint64_t tot_bytes;
+	uint32_t length;		/* Queue length, in packets */
+	uint32_t len_bytes;	/* Queue length, in bytes */
+	uint32_t drops;
+};
+
+/* A flow of packets (sub-queue)*/
+struct fq_pie_flow {
+	struct mq	mq;	/* list of packets */
+	struct flow_stats stats;	/* statistics */
+	int deficit;
+	int active;		/* 1: flow is active (in a list) */
+	struct pie_status pst;	/* pie status variables */
+	struct fq_pie_si_extra *psi_extra;
+	STAILQ_ENTRY(fq_pie_flow) flowchain;
+};
+
+/* extra fq_pie scheduler configurations */
+struct fq_pie_schk {
+	struct dn_sch_fq_pie_parms cfg;
+};
+
+
+/* fq_pie scheduler instance extra state vars.
+ * The purpose of separation this structure is to preserve number of active
+ * sub-queues and the flows array pointer even after the scheduler instance
+ * is destroyed.
+ * Preserving these varaiables allows freeing the allocated memory by
+ * fqpie_callout_cleanup() independently from fq_pie_free_sched().
+ */
+struct fq_pie_si_extra {
+	uint32_t nr_active_q;	/* number of active queues */
+	struct fq_pie_flow *flows;	/* array of flows (queues) */
+	};
+
+/* fq_pie scheduler instance */
+struct fq_pie_si {
+	struct dn_sch_inst _si;	/* standard scheduler instance. SHOULD BE FIRST */ 
+	struct dn_queue main_q; /* main queue is after si directly */
+	uint32_t perturbation; 	/* random value */
+	struct fq_pie_list newflows;	/* list of new queues */
+	struct fq_pie_list oldflows;	/* list of old queues */
+	struct fq_pie_si_extra *si_extra; /* extra state vars*/
+};
+
+
+static struct dn_alg fq_pie_desc;
+
+/*  Default FQ-PIE parameters including PIE */
+/*  PIE defaults
+ * target=15ms, max_burst=150ms, max_ecnth=0.1, 
+ * alpha=0.125, beta=1.25, tupdate=15ms
+ * FQ-
+ * flows=1024, limit=10240, quantum =1514
+ */
+struct dn_sch_fq_pie_parms 
+ fq_pie_sysctl = {{15000 * AQM_TIME_1US, 15000 * AQM_TIME_1US,
+	150000 * AQM_TIME_1US, PIE_SCALE * 0.1, PIE_SCALE * 0.125, 
+	PIE_SCALE * 1.25,	PIE_CAPDROP_ENABLED | PIE_DERAND_ENABLED},
+	1024, 10240, 1514};
+
+static int
+fqpie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	if (!strcmp(oidp->oid_name,"alpha"))
+		value = fq_pie_sysctl.pcfg.alpha;
+	else
+		value = fq_pie_sysctl.pcfg.beta;
+		
+	value = value * 1000 / PIE_SCALE;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 7 * PIE_SCALE)
+		return (EINVAL);
+	value = (value * PIE_SCALE) / 1000;
+	if (!strcmp(oidp->oid_name,"alpha"))
+			fq_pie_sysctl.pcfg.alpha = value;
+	else
+		fq_pie_sysctl.pcfg.beta = value;
+	return (0);
+}
+
+static int
+fqpie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	if (!strcmp(oidp->oid_name,"target"))
+		value = fq_pie_sysctl.pcfg.qdelay_ref;
+	else if (!strcmp(oidp->oid_name,"tupdate"))
+		value = fq_pie_sysctl.pcfg.tupdate;
+	else
+		value = fq_pie_sysctl.pcfg.max_burst;
+	
+	value = value / AQM_TIME_1US;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > 10 * AQM_TIME_1S)
+		return (EINVAL);
+	value = value * AQM_TIME_1US;
+	
+	if (!strcmp(oidp->oid_name,"target"))
+		fq_pie_sysctl.pcfg.qdelay_ref  = value;
+	else if (!strcmp(oidp->oid_name,"tupdate"))
+		fq_pie_sysctl.pcfg.tupdate  = value;
+	else
+		fq_pie_sysctl.pcfg.max_burst = value;
+	return (0);
+}
+
+static int
+fqpie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	long  value;
+
+	value = fq_pie_sysctl.pcfg.max_ecnth;
+	value = value * 1000 / PIE_SCALE;
+	error = sysctl_handle_long(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (value < 1 || value > PIE_SCALE)
+		return (EINVAL);
+	value = (value * PIE_SCALE) / 1000;
+	fq_pie_sysctl.pcfg.max_ecnth = value;
+	return (0);
+}
+
+/* define FQ- PIE sysctl variables */
+SYSBEGIN(f4)
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqpie,
+	CTLFLAG_RW, 0, "FQ_PIE");
+
+#ifdef SYSCTL_NODE
+	
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, target,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_target_tupdate_maxb_handler, "L",
+	"queue target in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, tupdate,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_target_tupdate_maxb_handler, "L",
+	"the frequency of drop probability calculation in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_burst,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_target_tupdate_maxb_handler, "L",
+	"Burst allowance interval in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_ecnth,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_max_ecnth_handler, "L",
+	"ECN safeguard threshold scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, alpha,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_alpha_beta_handler, "L", "PIE alpha scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, beta,
+	CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+	fqpie_sysctl_alpha_beta_handler, "L", "beta scaled by 1000");
+
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, quantum,
+	CTLFLAG_RW, &fq_pie_sysctl.quantum, 1514, "quantum for FQ_PIE");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, flows,
+	CTLFLAG_RW, &fq_pie_sysctl.flows_cnt, 1024, "Number of queues for FQ_PIE");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, limit,
+	CTLFLAG_RW, &fq_pie_sysctl.limit, 10240, "limit for FQ_PIE");
+#endif
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len & drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_pie_flow *q, struct fq_pie_si *si, int len,
+	int drop)
+{
+	int inc = 0;
+
+	if (len < 0) 
+		inc = -1;
+	else if (len > 0)
+		inc = 1;
+
+	if (drop) {
+		si->main_q.ni.drops ++;
+		q->stats.drops ++;
+		si->_si.ni.drops ++;
+		io_pkt_drop ++;
+	} 
+
+	if (!drop || (drop && len < 0)) {
+		/* Update stats for the main queue */
+		si->main_q.ni.length += inc;
+		si->main_q.ni.len_bytes += len;
+
+		/*update sub-queue stats */
+		q->stats.length += inc;
+		q->stats.len_bytes += len;
+
+		/*update scheduler instance stats */
+		si->_si.ni.length += inc;
+		si->_si.ni.len_bytes += len;
+	}
+
+	if (inc > 0) {
+		si->main_q.ni.tot_bytes += len;
+		si->main_q.ni.tot_pkts ++;
+		
+		q->stats.tot_bytes +=len;
+		q->stats.tot_pkts++;
+		
+		si->_si.ni.tot_bytes +=len;
+		si->_si.ni.tot_pkts ++;
+	}
+
+}
+
+/*
+ * Extract a packet from the head of sub-queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * If getts is set, also extract packet's timestamp from mtag.
+ */
+__inline static struct mbuf *
+fq_pie_extract_head(struct fq_pie_flow *q, aqm_time_t *pkt_ts,
+	struct fq_pie_si *si, int getts)
+{
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return m;
+	q->mq.head = m->m_nextpkt;
+
+	fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+	if (si->main_q.ni.length == 0) /* queue is now idle */
+			si->main_q.q_time = dn_cfg.curr_time;
+
+	if (getts) {
+		/* extract packet timestamp*/
+		struct m_tag *mtag;
+		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+		if (mtag == NULL){
+			D("PIE timestamp mtag not found!");
+			*pkt_ts = 0;
+		} else {
+			*pkt_ts = *(aqm_time_t *)(mtag + 1);
+			m_tag_delete(m,mtag); 
+		}
+	}
+	return m;
+}
+
+/*
+ * Callout function for drop probability calculation 
+ * This function is called over tupdate ms and takes pointer of FQ-PIE
+ * flow as an argument
+  */
+static void
+fq_calculate_drop_prob(void *x)
+{
+	struct fq_pie_flow *q = (struct fq_pie_flow *) x;
+	struct pie_status *pst = &q->pst;
+	struct dn_aqm_pie_parms *pprms; 
+	int64_t p, prob, oldprob;
+	aqm_time_t now;
+	int p_isneg;
+
+	now = AQM_UNOW;
+	pprms = pst->parms;
+	prob = pst->drop_prob;
+
+	/* calculate current qdelay using DRE method.
+	 * If TS is used and no data in the queue, reset current_qdelay
+	 * as it stays at last value during dequeue process.
+	*/
+	if (pprms->flags & PIE_DEPRATEEST_ENABLED)
+		pst->current_qdelay = ((uint64_t)q->stats.len_bytes  * pst->avg_dq_time)
+			>> PIE_DQ_THRESHOLD_BITS;
+	else
+		if (!q->stats.len_bytes)
+			pst->current_qdelay = 0;
+
+	/* calculate drop probability */
+	p = (int64_t)pprms->alpha * 
+		((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref); 
+	p +=(int64_t) pprms->beta * 
+		((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old); 
+
+	/* take absolute value so right shift result is well defined */
+	p_isneg = p < 0;
+	if (p_isneg) {
+		p = -p;
+	}
+		
+	/* We PIE_MAX_PROB shift by 12-bits to increase the division precision  */
+	p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
+
+	/* auto-tune drop probability */
+	if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
+		p >>= 11 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
+		p >>= 9 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
+		p >>= 7 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
+		p >>= 5 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
+		p >>= 3 + PIE_FIX_POINT_BITS + 12;
+	else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
+		p >>= 1 + PIE_FIX_POINT_BITS + 12;
+	else
+		p >>= PIE_FIX_POINT_BITS + 12;
+
+	oldprob = prob;
+
+	if (p_isneg) {
+		prob = prob - p;
+
+		/* check for multiplication underflow */
+		if (prob > oldprob) {
+			prob= 0;
+			D("underflow");
+		}
+	} else {
+		/* Cap Drop adjustment */
+		if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
+		    prob >= PIE_MAX_PROB / 10 &&
+		    p > PIE_MAX_PROB / 50 ) {
+			p = PIE_MAX_PROB / 50;
+		}
+
+		prob = prob + p;
+
+		/* check for multiplication overflow */
+		if (prob<oldprob) {
+			D("overflow");
+			prob= PIE_MAX_PROB;
+		}
+	}
+
+	/*
+	 * decay the drop probability exponentially
+	 * and restrict it to range 0 to PIE_MAX_PROB
+	 */
+	if (prob < 0) {
+		prob = 0;
+	} else {
+		if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
+			/* 0.98 ~= 1- 1/64 */
+			prob = prob - (prob >> 6); 
+		}
+
+		if (prob > PIE_MAX_PROB) {
+			prob = PIE_MAX_PROB;
+		}
+	}
+
+	pst->drop_prob = prob;
+	
+	/* store current delay value */
+	pst->qdelay_old = pst->current_qdelay;
+
+	/* update burst allowance */
+	if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance) {
+		if (pst->burst_allowance > pprms->tupdate)
+			pst->burst_allowance -= pprms->tupdate;
+		else 
+			pst->burst_allowance = 0;
+	}
+
+	if (pst->sflags & PIE_ACTIVE)
+	callout_reset_sbt(&pst->aqm_pie_callout,
+		(uint64_t)pprms->tupdate * SBT_1US,
+		0, fq_calculate_drop_prob, q, 0);
+
+	mtx_unlock(&pst->lock_mtx);
+}
+
+/* 
+ * Reset PIE variables & activate the queue
+ */
+__inline static void
+fq_activate_pie(struct fq_pie_flow *q)
+{ 
+	struct pie_status *pst = &q->pst;
+	struct dn_aqm_pie_parms *pprms;
+
+	mtx_lock(&pst->lock_mtx);
+	pprms = pst->parms;
+
+	pprms = pst->parms;
+	pst->drop_prob = 0;
+	pst->qdelay_old = 0;
+	pst->burst_allowance = pprms->max_burst;
+	pst->accu_prob = 0;
+	pst->dq_count = 0;
+	pst->avg_dq_time = 0;
+	pst->sflags = PIE_INMEASUREMENT | PIE_ACTIVE;
+	pst->measurement_start = AQM_UNOW;
+	
+	callout_reset_sbt(&pst->aqm_pie_callout,
+		(uint64_t)pprms->tupdate * SBT_1US,
+		0, fq_calculate_drop_prob, q, 0);
+
+	mtx_unlock(&pst->lock_mtx);
+}
+
+ 
+ /* 
+  * Deactivate PIE and stop probe update callout
+  */
+__inline static void
+fq_deactivate_pie(struct pie_status *pst)
+{ 
+	mtx_lock(&pst->lock_mtx);
+	pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
+	callout_stop(&pst->aqm_pie_callout);
+	//D("PIE Deactivated");
+	mtx_unlock(&pst->lock_mtx);
+}
+
+ /* 
+  * Initialize PIE for sub-queue 'q'
+  */
+static int
+pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk)
+{
+	struct pie_status *pst=&q->pst;
+	struct dn_aqm_pie_parms *pprms = pst->parms;
+
+	int err = 0;
+	if (!pprms){
+		D("AQM_PIE is not configured");
+		err = EINVAL;
+	} else {
+		q->psi_extra->nr_active_q++;
+
+		/* For speed optimization, we caculate 1/3 queue size once here */
+		// XXX limit divided by number of queues divided by 3 ??? 
+		pst->one_third_q_size = (fqpie_schk->cfg.limit / 
+			fqpie_schk->cfg.flows_cnt) / 3;
+
+		mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
+		callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
+			CALLOUT_RETURNUNLOCKED);
+	}
+
+	return err;
+}
+
+/* 
+ * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si
+ * extra memory when number of active sub-queues reaches zero.
+ * 'x' is a fq_pie_flow to be destroyed
+ */
+static void
+fqpie_callout_cleanup(void *x)
+{
+	struct fq_pie_flow *q = x;
+	struct pie_status *pst = &q->pst;
+	struct fq_pie_si_extra *psi_extra;
+
+	mtx_unlock(&pst->lock_mtx);
+	mtx_destroy(&pst->lock_mtx);
+	psi_extra = q->psi_extra;
+	
+	DN_BH_WLOCK();
+	psi_extra->nr_active_q--;
+
+	/* when all sub-queues are destroyed, free flows fq_pie extra vars memory */
+	if (!psi_extra->nr_active_q) {
+		free(psi_extra->flows, M_DUMMYNET);
+		free(psi_extra, M_DUMMYNET);
+		fq_pie_desc.ref_count--;
+	}
+	DN_BH_WUNLOCK();
+}
+
+/* 
+ * Clean up PIE status for sub-queue 'q' 
+ * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout.
+ */
+static int
+pie_cleanup(struct fq_pie_flow *q)
+{
+	struct pie_status *pst  = &q->pst;
+
+	mtx_lock(&pst->lock_mtx);
+	callout_reset_sbt(&pst->aqm_pie_callout,
+		SBT_1US, 0, fqpie_callout_cleanup, q, 0);
+	mtx_unlock(&pst->lock_mtx);
+	return 0;
+}
+
+/* 
+ * Dequeue and return a pcaket from sub-queue 'q' or NULL if 'q' is empty.
+ * Also, caculate depature time or queue delay using timestamp
+ */
+ static struct mbuf *
+pie_dequeue(struct fq_pie_flow *q, struct fq_pie_si *si)
+{
+	struct mbuf *m;
+	struct dn_aqm_pie_parms *pprms;
+	struct pie_status *pst;
+	aqm_time_t now;
+	aqm_time_t pkt_ts, dq_time;
+	int32_t w;
+
+	pst  = &q->pst;
+	pprms = q->pst.parms;
+
+	/*we extarct packet ts only when Departure Rate Estimation dis not used*/
+	m = fq_pie_extract_head(q, &pkt_ts, si, 
+		!(pprms->flags & PIE_DEPRATEEST_ENABLED));
+	
+	if (!m || !(pst->sflags & PIE_ACTIVE))
+		return m;
+
+	now = AQM_UNOW;
+	if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
+		/* calculate average depature time */
+		if(pst->sflags & PIE_INMEASUREMENT) {
+			pst->dq_count += m->m_pkthdr.len;
+
+			if (pst->dq_count >= PIE_DQ_THRESHOLD) {
+				dq_time = now - pst->measurement_start;
+
+				/* 
+				 * if we don't have old avg dq_time i.e PIE is (re)initialized, 
+				 * don't use weight to calculate new avg_dq_time
+				 */
+				if(pst->avg_dq_time == 0)
+					pst->avg_dq_time = dq_time;
+				else {
+					/* 
+					 * weight = PIE_DQ_THRESHOLD/2^6, but we scaled 
+					 * weight by 2^8. Thus, scaled 
+					 * weight = PIE_DQ_THRESHOLD /2^8 
+					 * */
+					w = PIE_DQ_THRESHOLD >> 8;
+					pst->avg_dq_time = (dq_time* w
+						+ (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
+					pst->sflags &= ~PIE_INMEASUREMENT;
+				}
+			}
+		}
+
+		/* 
+		 * Start new measurment cycle when the queue has
+		 *  PIE_DQ_THRESHOLD worth of bytes.
+		 */
+		if(!(pst->sflags & PIE_INMEASUREMENT) && 
+			q->stats.len_bytes >= PIE_DQ_THRESHOLD) {
+			pst->sflags |= PIE_INMEASUREMENT;
+			pst->measurement_start = now;
+			pst->dq_count = 0;
+		}
+	}
+	/* Optionally, use packet timestamp to estimate queue delay */
+	else
+		pst->current_qdelay = now - pkt_ts;
+
+	return m;	
+}
+
+
+ /*
+ * Enqueue a packet in q, subject to space and FQ-PIE queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+static int
+pie_enqueue(struct fq_pie_flow *q, struct mbuf* m, struct fq_pie_si *si)
+{
+	uint64_t len;
+	struct pie_status *pst;
+	struct dn_aqm_pie_parms *pprms;
+	int t;
+
+	len = m->m_pkthdr.len;
+	pst  = &q->pst;
+	pprms = pst->parms;
+	t = ENQUE;
+
+	/* drop/mark the packet when PIE is active and burst time elapsed */
+	if (pst->sflags & PIE_ACTIVE && pst->burst_allowance == 0
+		&& drop_early(pst, q->stats.len_bytes) == DROP) {
+			/* 
+			 * if drop_prob over ECN threshold, drop the packet 
+			 * otherwise mark and enqueue it.
+			 */
+			if (pprms->flags & PIE_ECN_ENABLED && pst->drop_prob < 
+				(pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
+				&& ecn_mark(m))
+				t = ENQUE;
+			else
+				t = DROP;
+		}
+
+	/* Turn PIE on when 1/3 of the queue is full */ 
+	if (!(pst->sflags & PIE_ACTIVE) && q->stats.len_bytes >= 
+		pst->one_third_q_size) {
+		fq_activate_pie(q);
+	}
+
+	/*  reset burst tolerance and optinally turn PIE off*/
+	if (pst->drop_prob == 0 && pst->current_qdelay < (pprms->qdelay_ref >> 1)
+		&& pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
+			
+			pst->burst_allowance = pprms->max_burst;
+		if (pprms->flags & PIE_ON_OFF_MODE_ENABLED && q->stats.len_bytes<=0)
+			fq_deactivate_pie(pst);
+	}
+
+	/* Use timestamp if Departure Rate Estimation mode is disabled */
+	if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
+		/* Add TS to mbuf as a TAG */
+		struct m_tag *mtag;
+		mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+		if (mtag == NULL)
+			mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+				sizeof(aqm_time_t), M_NOWAIT);
+		if (mtag == NULL) {
+			m_freem(m); 
+			t = DROP;
+		}
+		*(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+		m_tag_prepend(m, mtag);
+	}
+
+	if (t != DROP) {
+		mq_append(&q->mq, m);
+		fq_update_stats(q, si, len, 0);
+		return 0;
+	} else {
+		fq_update_stats(q, si, len, 1);
+		pst->accu_prob = 0;
+		FREE_PKT(m);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Drop a packet form the head of FQ-PIE sub-queue */
+static void
+pie_drop_head(struct fq_pie_flow *q, struct fq_pie_si *si)
+{
+	struct mbuf *m = q->mq.head;
+
+	if (m == NULL)
+		return;
+	q->mq.head = m->m_nextpkt;
+
+	fq_update_stats(q, si, -m->m_pkthdr.len, 1);
+
+	if (si->main_q.ni.length == 0) /* queue is now idle */
+			si->main_q.q_time = dn_cfg.curr_time;
+	/* reset accu_prob after packet drop */
+	q->pst.accu_prob = 0;
+	
+	FREE_PKT(m);
+}
+
+/*
+ * Classify a packet to queue number using Jenkins hash function.
+ * Return: queue number 
+ * the input of the hash are protocol no, perturbation, src IP, dst IP,
+ * src port, dst port,
+ */
+static inline int
+fq_pie_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_pie_si *si)
+{
+	struct ip *ip;
+	struct tcphdr *th;
+	struct udphdr *uh;
+	uint8_t tuple[41];
+	uint16_t hash=0;
+
+	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+//#ifdef INET6
+	struct ip6_hdr *ip6;
+	int isip6;
+	isip6 = (ip->ip_v == 6);
+
+	if(isip6) {
+		ip6 = (struct ip6_hdr *)ip;
+		*((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
+		*((uint32_t *) &tuple[1]) = si->perturbation;
+		memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
+		memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
+
+		switch (ip6->ip6_nxt) {
+		case IPPROTO_TCP:
+			th = (struct tcphdr *)(ip6 + 1);
+			*((uint16_t *) &tuple[37]) = th->th_dport;
+			*((uint16_t *) &tuple[39]) = th->th_sport;
+			break;
+
+		case IPPROTO_UDP:
+			uh = (struct udphdr *)(ip6 + 1);
+			*((uint16_t *) &tuple[37]) = uh->uh_dport;
+			*((uint16_t *) &tuple[39]) = uh->uh_sport;
+			break;
+		default:
+			memset(&tuple[37], 0, 4);
+		}
+
+		hash = jenkins_hash(tuple, 41, HASHINIT) %  fcount;
+		return hash;
+	} 
+//#endif
+
+	/* IPv4 */
+	*((uint8_t *) &tuple[0]) = ip->ip_p;
+	*((uint32_t *) &tuple[1]) = si->perturbation;
+	*((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
+	*((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
+
+	switch (ip->ip_p) {
+		case IPPROTO_TCP:
+			th = (struct tcphdr *)(ip + 1);
+			*((uint16_t *) &tuple[13]) = th->th_dport;
+			*((uint16_t *) &tuple[15]) = th->th_sport;
+			break;
+
+		case IPPROTO_UDP:
+			uh = (struct udphdr *)(ip + 1);
+			*((uint16_t *) &tuple[13]) = uh->uh_dport;
+			*((uint16_t *) &tuple[15]) = uh->uh_sport;
+			break;
+		default:
+			memset(&tuple[13], 0, 4);
+	}
+	hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
+
+	return hash;
+}
+
+/*
+ * Enqueue a packet into an appropriate queue according to
+ * FQ-CoDe; algorithm.
+ */
+static int 
+fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q, 
+	struct mbuf *m)
+{ 
+	struct fq_pie_si *si;
+	struct fq_pie_schk *schk;
+	struct dn_sch_fq_pie_parms *param;
+	struct dn_queue *mainq;
+	struct fq_pie_flow *flows;
+	int idx, drop, i, maxidx;
+
+	mainq = (struct dn_queue *)(_si + 1);
+	si = (struct fq_pie_si *)_si;
+	flows = si->si_extra->flows;
+	schk = (struct fq_pie_schk *)(si->_si.sched+1);
+	param = &schk->cfg;
+
+	 /* classify a packet to queue number*/
+	idx = fq_pie_classify_flow(m, param->flows_cnt, si);
+
+	/* enqueue packet into appropriate queue using PIE AQM.
+	 * Note: 'pie_enqueue' function returns 1 only when it unable to 
+	 * add timestamp to packet (no limit check)*/
+	drop = pie_enqueue(&flows[idx], m, si);
+	
+	/* pie unable to timestamp a packet */ 
+	if (drop)
+		return 1;
+	
+	/* If the flow (sub-queue) is not active ,then add it to tail of
+	 * new flows list, initialize and activate it.
+	 */
+	if (!flows[idx].active) {
+		STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain);
+		flows[idx].deficit = param->quantum;
+		fq_activate_pie(&flows[idx]);
+		flows[idx].active = 1;
+	}
+
+	/* check the limit for all queues and remove a packet from the
+	 * largest one 
+	 */
+	if (mainq->ni.length > schk->cfg.limit) {
+		/* find first active flow */
+		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
+			if (flows[maxidx].active)
+				break;
+		if (maxidx < schk->cfg.flows_cnt) {
+			/* find the largest sub- queue */
+			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 
+				if (flows[i].active && flows[i].stats.length >
+					flows[maxidx].stats.length)
+					maxidx = i;
+			pie_drop_head(&flows[maxidx], si);
+			drop = 1;
+		}
+	}
+
+	return drop;
+}
+
+/*
+ * Dequeue a packet from an appropriate queue according to
+ * FQ-CoDel algorithm.
+ */
+static struct mbuf *
+fq_pie_dequeue(struct dn_sch_inst *_si)
+{ 
+	struct fq_pie_si *si;
+	struct fq_pie_schk *schk;
+	struct dn_sch_fq_pie_parms *param;
+	struct fq_pie_flow *f;
+	struct mbuf *mbuf;
+	struct fq_pie_list *fq_pie_flowlist;
+
+	si = (struct fq_pie_si *)_si;
+	schk = (struct fq_pie_schk *)(si->_si.sched+1);
+	param = &schk->cfg;
+
+	do {
+		/* select a list to start with */
+		if (STAILQ_EMPTY(&si->newflows))
+			fq_pie_flowlist = &si->oldflows;
+		else
+			fq_pie_flowlist = &si->newflows;
+
+		/* Both new and old queue lists are empty, return NULL */
+		if (STAILQ_EMPTY(fq_pie_flowlist)) 
+			return NULL;
+
+		f = STAILQ_FIRST(fq_pie_flowlist);
+		while (f != NULL)	{
+			/* if there is no flow(sub-queue) deficit, increase deficit
+			 * by quantum, move the flow to the tail of old flows list
+			 * and try another flow.
+			 * Otherwise, the flow will be used for dequeue.
+			 */
+			if (f->deficit < 0) {
+				 f->deficit += param->quantum;
+				 STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+				 STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+			 } else 
+				 break;
+
+			f = STAILQ_FIRST(fq_pie_flowlist);
+		}
+		
+		/* the new flows list is empty, try old flows list */
+		if (STAILQ_EMPTY(fq_pie_flowlist)) 
+			continue;
+
+		/* Dequeue a packet from the selected flow */
+		mbuf = pie_dequeue(f, si);
+
+		/* pie did not return a packet */
+		if (!mbuf) {
+			/* If the selected flow belongs to new flows list, then move 
+			 * it to the tail of old flows list. Otherwise, deactivate it and
+			 * remove it from the old list and
+			 */
+			if (fq_pie_flowlist == &si->newflows) {
+				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+				STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+			}	else {
+				f->active = 0;
+				fq_deactivate_pie(&f->pst);
+				STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+			}
+			/* start again */
+			continue;
+		}
+
+		/* we have a packet to return, 
+		 * update flow deficit and return the packet*/
+		f->deficit -= mbuf->m_pkthdr.len;
+		return mbuf;
+
+	} while (1);
+	
+	/* unreachable point */
+	return NULL;
+}
+
+/*
+ * Initialize fq_pie scheduler instance.
+ * also, allocate memory for flows array.
+ */
+static int
+fq_pie_new_sched(struct dn_sch_inst *_si)
+{
+	struct fq_pie_si *si;
+	struct dn_queue *q;
+	struct fq_pie_schk *schk;
+	struct fq_pie_flow *flows;
+	int i;
+
+	si = (struct fq_pie_si *)_si;
+	schk = (struct fq_pie_schk *)(_si->sched+1);
+
+	if(si->si_extra) {
+		D("si already configured!");
+		return 0;
+	}
+
+	/* init the main queue */
+	q = &si->main_q;
+	set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+	q->_si = _si;
+	q->fs = _si->sched->fs;
+
+	/* allocate memory for scheduler instance extra vars */
+	si->si_extra = malloc(sizeof(struct fq_pie_si_extra),
+		 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (si->si_extra == NULL) {
+		D("cannot allocate memory for fq_pie si extra vars");
+		return ENOMEM ; 
+	}
+	/* allocate memory for flows array */
+	si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
+		 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	flows = si->si_extra->flows;
+	if (flows == NULL) {
+		free(si->si_extra, M_DUMMYNET);
+		si->si_extra = NULL;
+		D("cannot allocate memory for fq_pie flows");
+		return ENOMEM ; 
+	}
+
+	/* init perturbation for this si */
+	si->perturbation = random();
+	si->si_extra->nr_active_q = 0;
+
+	/* init the old and new flows lists */
+	STAILQ_INIT(&si->newflows);
+	STAILQ_INIT(&si->oldflows);
+
+	/* init the flows (sub-queues) */
+	for (i = 0; i < schk->cfg.flows_cnt; i++) {
+		flows[i].pst.parms = &schk->cfg.pcfg;
+		flows[i].psi_extra = si->si_extra;
+		pie_init(&flows[i], schk);
+	}
+
+	fq_pie_desc.ref_count++;
+
+	return 0;
+}
+
+
+/*
+ * Free fq_pie scheduler instance.
+ */
+static int
+fq_pie_free_sched(struct dn_sch_inst *_si)
+{
+	struct fq_pie_si *si;
+	struct fq_pie_schk *schk;
+	struct fq_pie_flow *flows;
+	int i;
+
+	si = (struct fq_pie_si *)_si;
+	schk = (struct fq_pie_schk *)(_si->sched+1);
+	flows = si->si_extra->flows;
+	for (i = 0; i < schk->cfg.flows_cnt; i++) {
+		pie_cleanup(&flows[i]);
+	}
+	si->si_extra = NULL;
+	return 0;
+}
+
+/*
+ * Configure FQ-PIE scheduler.
+ * the configurations for the scheduler is passed fromipfw  userland.
+ */
+static int
+fq_pie_config(struct dn_schk *_schk)
+{
+	struct fq_pie_schk *schk;
+	struct dn_extra_parms *ep;
+	struct dn_sch_fq_pie_parms *fqp_cfg;
+	
+	schk = (struct fq_pie_schk *)(_schk+1);
+	ep = (struct dn_extra_parms *) _schk->cfg;
+
+	/* par array contains fq_pie configuration as follow
+	 * PIE: 0- qdelay_ref,1- tupdate, 2- max_burst
+	 * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
+	 * FQ_PIE: 7- quantum, 8- limit, 9- flows
+	 */
+	if (ep && ep->oid.len ==sizeof(*ep) &&
+		ep->oid.subtype == DN_SCH_PARAMS) {
+
+		fqp_cfg = &schk->cfg;
+		if (ep->par[0] < 0)
+			fqp_cfg->pcfg.qdelay_ref = fq_pie_sysctl.pcfg.qdelay_ref;
+		else
+			fqp_cfg->pcfg.qdelay_ref = ep->par[0];
+		if (ep->par[1] < 0)
+			fqp_cfg->pcfg.tupdate = fq_pie_sysctl.pcfg.tupdate;
+		else
+			fqp_cfg->pcfg.tupdate = ep->par[1];
+		if (ep->par[2] < 0)
+			fqp_cfg->pcfg.max_burst = fq_pie_sysctl.pcfg.max_burst;
+		else
+			fqp_cfg->pcfg.max_burst = ep->par[2];
+		if (ep->par[3] < 0)
+			fqp_cfg->pcfg.max_ecnth = fq_pie_sysctl.pcfg.max_ecnth;
+		else
+			fqp_cfg->pcfg.max_ecnth = ep->par[3];
+		if (ep->par[4] < 0)
+			fqp_cfg->pcfg.alpha = fq_pie_sysctl.pcfg.alpha;
+		else
+			fqp_cfg->pcfg.alpha = ep->par[4];
+		if (ep->par[5] < 0)
+			fqp_cfg->pcfg.beta = fq_pie_sysctl.pcfg.beta;
+		else
+			fqp_cfg->pcfg.beta = ep->par[5];
+		if (ep->par[6] < 0)
+			fqp_cfg->pcfg.flags = 0;
+		else
+			fqp_cfg->pcfg.flags = ep->par[6];
+
+		/* FQ configurations */
+		if (ep->par[7] < 0)
+			fqp_cfg->quantum = fq_pie_sysctl.quantum;
+		else
+			fqp_cfg->quantum = ep->par[7];
+		if (ep->par[8] < 0)
+			fqp_cfg->limit = fq_pie_sysctl.limit;
+		else
+			fqp_cfg->limit = ep->par[8];
+		if (ep->par[9] < 0)
+			fqp_cfg->flows_cnt = fq_pie_sysctl.flows_cnt;
+		else
+			fqp_cfg->flows_cnt = ep->par[9];
+
+		/* Bound the configurations */
+		fqp_cfg->pcfg.qdelay_ref = BOUND_VAR(fqp_cfg->pcfg.qdelay_ref,
+			1, 5 * AQM_TIME_1S);
+		fqp_cfg->pcfg.tupdate = BOUND_VAR(fqp_cfg->pcfg.tupdate,
+			1, 5 * AQM_TIME_1S);
+		fqp_cfg->pcfg.max_burst = BOUND_VAR(fqp_cfg->pcfg.max_burst,
+			0, 5 * AQM_TIME_1S);
+		fqp_cfg->pcfg.max_ecnth = BOUND_VAR(fqp_cfg->pcfg.max_ecnth,
+			0, PIE_SCALE);
+		fqp_cfg->pcfg.alpha = BOUND_VAR(fqp_cfg->pcfg.alpha, 0, 7 * PIE_SCALE);
+		fqp_cfg->pcfg.beta = BOUND_VAR(fqp_cfg->pcfg.beta, 0, 7 * PIE_SCALE);
+
+		fqp_cfg->quantum = BOUND_VAR(fqp_cfg->quantum,1,9000);
+		fqp_cfg->limit= BOUND_VAR(fqp_cfg->limit,1,20480);
+		fqp_cfg->flows_cnt= BOUND_VAR(fqp_cfg->flows_cnt,1,65536);
+	}
+	else {
+		D("Wrong parameters for fq_pie scheduler");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Return FQ-PIE scheduler configurations
+ * the configurations for the scheduler is passed to userland.
+ */
+static int 
+fq_pie_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
+	
+	struct fq_pie_schk *schk = (struct fq_pie_schk *)(_schk+1);
+	struct dn_sch_fq_pie_parms *fqp_cfg;
+
+	fqp_cfg = &schk->cfg;
+
+	strcpy(ep->name, fq_pie_desc.name);
+	ep->par[0] = fqp_cfg->pcfg.qdelay_ref;
+	ep->par[1] = fqp_cfg->pcfg.tupdate;
+	ep->par[2] = fqp_cfg->pcfg.max_burst;
+	ep->par[3] = fqp_cfg->pcfg.max_ecnth;
+	ep->par[4] = fqp_cfg->pcfg.alpha;
+	ep->par[5] = fqp_cfg->pcfg.beta;
+	ep->par[6] = fqp_cfg->pcfg.flags;
+	
+	ep->par[7] = fqp_cfg->quantum;
+	ep->par[8] = fqp_cfg->limit;
+	ep->par[9] = fqp_cfg->flows_cnt;
+
+	return 0;
+}
+
+/*
+ *  FQ-PIE scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fq_pie_desc = {
+	_SI( .type = )  DN_SCHED_FQ_PIE,
+	_SI( .name = ) "FQ_PIE",
+	_SI( .flags = ) 0,
+
+	_SI( .schk_datalen = ) sizeof(struct fq_pie_schk),
+	_SI( .si_datalen = ) sizeof(struct fq_pie_si) - sizeof(struct dn_sch_inst),
+	_SI( .q_datalen = ) 0,
+
+	_SI( .enqueue = ) fq_pie_enqueue,
+	_SI( .dequeue = ) fq_pie_dequeue,
+	_SI( .config = ) fq_pie_config, /* new sched i.e. sched X config ...*/
+	_SI( .destroy = ) NULL,  /*sched x delete */
+	_SI( .new_sched = ) fq_pie_new_sched, /* new schd instance */
+	_SI( .free_sched = ) fq_pie_free_sched,	/* delete schd instance */
+	_SI( .new_fsk = ) NULL,
+	_SI( .free_fsk = ) NULL,
+	_SI( .new_queue = ) NULL,
+	_SI( .free_queue = ) NULL,
+	_SI( .getconfig = )  fq_pie_getconfig,
+	_SI( .ref_count = ) 0
+};
+
+DECLARE_DNSCHED_MODULE(dn_fq_pie, &fq_pie_desc);


Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netpfil/ipfw/dn_sched_prio.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_prio.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_prio.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -25,7 +26,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_prio.c 325731 2017-11-12 01:28:20Z truckman $
  */
 #ifdef _KERNEL
 #include <sys/malloc.h>
@@ -32,15 +33,21 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 #else
 #include <dn_test.h>
@@ -223,6 +230,9 @@
 
 	_SI( .new_queue = ) prio_new_queue,
 	_SI( .free_queue = ) prio_free_queue,
+#ifdef NEW_AQM
+	_SI( .getconfig = )  NULL,
+#endif
 };
 
 

Modified: trunk/sys/netpfil/ipfw/dn_sched_qfq.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_qfq.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_qfq.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
  * All rights reserved
@@ -25,7 +26,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_qfq.c 325731 2017-11-12 01:28:20Z truckman $
  */
 
 #ifdef _KERNEL
@@ -33,15 +34,21 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 #else
 #include <dn_test.h>
@@ -608,7 +615,7 @@
 qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
 {
 	unsigned long mask;
-	uint32_t limit, roundedF;
+	uint64_t limit, roundedF;
 	int slot_shift = cl->grp->slot_shift;
 
 	roundedF = qfq_round_down(cl->F, slot_shift);
@@ -824,6 +831,9 @@
 	_SI( .free_fsk = )  NULL,
 	_SI( .new_queue = ) qfq_new_queue,
 	_SI( .free_queue = ) qfq_free_queue,
+#ifdef NEW_AQM
+	_SI( .getconfig = )  NULL,
+#endif
 };
 
 DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);

Modified: trunk/sys/netpfil/ipfw/dn_sched_rr.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_rr.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_rr.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -25,7 +26,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_rr.c 325731 2017-11-12 01:28:20Z truckman $
  */
 
 #ifdef _KERNEL
@@ -33,15 +34,21 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 #else
 #include <dn_test.h>
@@ -301,6 +308,9 @@
 	_SI( .free_fsk = ) NULL,
 	_SI( .new_queue = ) rr_new_queue,
 	_SI( .free_queue = ) rr_free_queue,
+#ifdef NEW_AQM
+	_SI( .getconfig = )  NULL,
+#endif
 };
 
 

Modified: trunk/sys/netpfil/ipfw/dn_sched_wf2q.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_wf2q.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_wf2q.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
@@ -26,7 +27,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_wf2q.c 325731 2017-11-12 01:28:20Z truckman $
  */
 
 #ifdef _KERNEL
@@ -34,15 +35,21 @@
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
+#include <sys/rwlock.h>
 #include <net/if.h>	/* IFNAMSIZ */
 #include <netinet/in.h>
 #include <netinet/ip_var.h>		/* ipfw_rule_ref */
 #include <netinet/ip_fw.h>	/* flow_id */
 #include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 #else
 #include <dn_test.h>
@@ -367,6 +374,10 @@
 
 	_SI( .new_queue = ) wf2qp_new_queue,
 	_SI( .free_queue = ) wf2qp_free_queue,
+#ifdef NEW_AQM
+	_SI( .getconfig = )  NULL,
+#endif
+
 };
 
 

Modified: trunk/sys/netpfil/ipfw/dummynet.txt
===================================================================
--- trunk/sys/netpfil/ipfw/dummynet.txt	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dummynet.txt	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,6 +1,6 @@
 #
-# $FreeBSD$
-#
+# $FreeBSD: stable/10/sys/netpfil/ipfw/dummynet.txt 239124 2012-08-07 07:52:25Z luigi $
+# $MidnightBSD$
 
 Notes on the internal structure of dummynet (2010 version)
 by Riccardo Panicucci and Luigi Rizzo
@@ -839,7 +839,7 @@
 
 The struct scheduler represent the scheduler descriptor that is passed to
 dummynet when a scheduler module is loaded.
-This struct contains the type of scheduler, the lenght of all structs and
+This struct contains the type of scheduler, the length of all structs and
 all function pointers.
 If a function is not implemented should be initialize to NULL. Some functions
 are mandatory, other are mandatory if some memory should be freed.

Modified: trunk/sys/netpfil/ipfw/ip_dn_glue.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_glue.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_glue.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -25,7 +26,7 @@
  */
 
 /*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_glue.c 301772 2016-06-10 00:00:25Z truckman $
  *
  * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
  */
@@ -55,6 +56,9 @@
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 
 /* FREEBSD7.2 ip_dummynet.h r191715*/
@@ -315,10 +319,10 @@
 /* Indicate 'ipfw' version
  * 1: from FreeBSD 7.2
  * 0: from FreeBSD 8
- * -1: unknow (for now is unused)
+ * -1: unknown (for now is unused)
  *
  * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
- * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
+ * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknown,
  *       it is suppose to be the FreeBSD 8 version.
  */
 static int is7 = 0;
@@ -513,7 +517,7 @@
 	lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
 		sizeof(struct dn_fs) + sizeof(struct dn_profile);
 
-	base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
+	base = buf = malloc(lmax, M_DUMMYNET, M_WAITOK|M_ZERO);
 	o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
 	base->id = DN_API_VERSION;
 

Modified: trunk/sys/netpfil/ipfw/ip_dn_io.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_io.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_io.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -28,7 +29,7 @@
  * Dummynet portions related to packet handling.
  */
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_io.c 325731 2017-11-12 01:28:20Z truckman $");
 
 #include "opt_inet6.h"
 
@@ -62,6 +63,9 @@
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 
 /*
@@ -83,8 +87,12 @@
 
 static unsigned long	io_pkt;
 static unsigned long	io_pkt_fast;
+
+#ifdef NEW_AQM
+unsigned long	io_pkt_drop;
+#else
 static unsigned long	io_pkt_drop;
-
+#endif
 /*
  * We use a heap to store entities for which we have pending timer events.
  * The heap is checked at every tick and all entities with expired events
@@ -97,17 +105,11 @@
 
 #ifdef SYSCTL_NODE
 
-SYSBEGIN(f4)
-
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-
-/* wrapper to pass dn_cfg fields to SYSCTL_* */
-//#define DC(x)	(&(VNET_NAME(_base_dn_cfg).x))
-#define DC(x)	(&(dn_cfg.x))
-/* parameters */
-
+/*
+ * Because of the way the SYSBEGIN/SYSEND macros work on other
+ * platforms, there should not be functions between them.
+ * So keep the handlers outside the block.
+ */
 static int
 sysctl_hash_size(SYSCTL_HANDLER_ARGS)
 {
@@ -123,10 +125,6 @@
 	return (0);
 }
 
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
-    CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
-    "I", "Default hash table size");
-
 static int
 sysctl_limits(SYSCTL_HANDLER_ARGS)
 {
@@ -153,6 +151,27 @@
 	return (0);
 }
 
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+#ifdef NEW_AQM
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+#else
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+#endif
+
+/* wrapper to pass dn_cfg fields to SYSCTL_* */
+//#define DC(x)	(&(VNET_NAME(_base_dn_cfg).x))
+#define DC(x)	(&(dn_cfg.x))
+/* parameters */
+
+
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+    CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
+    "I", "Default hash table size");
+
+
 SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
     CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
     "L", "Upper limit in slots for pipe queue.");
@@ -218,30 +237,21 @@
 static void	dummynet_send(struct mbuf *);
 
 /*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.
- * Outside dummynet, only the 'rule' field is relevant, and it must
- * be at the beginning of the structure.
- */
-struct dn_pkt_tag {
-	struct ipfw_rule_ref rule;	/* matching rule	*/
-
-	/* second part, dummynet specific */
-	int dn_dir;		/* action when packet comes out.*/
-				/* see ip_fw_private.h		*/
-	uint64_t output_time;	/* when the pkt is due for delivery*/
-	struct ifnet *ifp;	/* interface, for ip_output	*/
-	struct _ip6dn_args ip6opt;	/* XXX ipv6 options	*/
-};
-
-/*
  * Return the mbuf tag holding the dummynet state (it should
  * be the first one on the list).
  */
-static struct dn_pkt_tag *
+struct dn_pkt_tag *
 dn_tag_get(struct mbuf *m)
 {
 	struct m_tag *mtag = m_tag_first(m);
+#ifdef NEW_AQM
+	/* XXX: to skip ts m_tag. For Debugging only*/
+	if (mtag != NULL && mtag->m_tag_id == DN_AQM_MTAG_TS) {
+		m_tag_delete(m,mtag); 
+		mtag = m_tag_first(m);
+		D("skip TS tag");
+	}
+#endif
 	KASSERT(mtag != NULL &&
 	    mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
 	    mtag->m_tag_id == PACKET_TAG_DUMMYNET,
@@ -249,6 +259,7 @@
 	return (struct dn_pkt_tag *)(mtag+1);
 }
 
+#ifndef NEW_AQM
 static inline void
 mq_append(struct mq *q, struct mbuf *m)
 {
@@ -259,6 +270,7 @@
 	q->tail = m;
 	m->m_nextpkt = NULL;
 }
+#endif
 
 /*
  * Dispose a list of packet. Use a functions so if we need to do
@@ -330,6 +342,8 @@
 		return (0);	/* accept packet */
 	}
 	if (q->avg >= fs->max_th) {	/* average queue >=  max threshold */
+		if (fs->fs.flags & DN_IS_ECN)
+			return (1);
 		if (fs->fs.flags & DN_IS_GENTLE_RED) {
 			/*
 			 * According to Gentle-RED, if avg is greater than
@@ -345,6 +359,8 @@
 			return (1);
 		}
 	} else if (q->avg > fs->min_th) {
+		if (fs->fs.flags & DN_IS_ECN)
+			return (1);
 		/*
 		 * We compute p_b using the linear dropping function
 		 *	 p_b = c_1 * avg - c_2
@@ -377,6 +393,73 @@
 }
 
 /*
+ * ECN/ECT Processing (partially adopted from altq)
+ */
+#ifndef NEW_AQM
+static
+#endif
+int
+ecn_mark(struct mbuf* m)
+{
+	struct ip *ip;
+	ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+
+	switch (ip->ip_v) {
+	case IPVERSION:
+	{
+		u_int8_t otos;
+		int sum;
+
+		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+			return (0);	/* not-ECT */
+		if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+			return (1);	/* already marked */
+
+		/*
+		 * ecn-capable but not marked,
+		 * mark CE and update checksum
+		 */
+		otos = ip->ip_tos;
+		ip->ip_tos |= IPTOS_ECN_CE;
+		/*
+		 * update checksum (from RFC1624)
+		 *	   HC' = ~(~HC + ~m + m')
+		 */
+		sum = ~ntohs(ip->ip_sum) & 0xffff;
+		sum += (~otos & 0xffff) + ip->ip_tos;
+		sum = (sum >> 16) + (sum & 0xffff);
+		sum += (sum >> 16);  /* add carry */
+		ip->ip_sum = htons(~sum & 0xffff);
+		return (1);
+	}
+#ifdef INET6
+	case (IPV6_VERSION >> 4):
+	{
+		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+		u_int32_t flowlabel;
+
+		flowlabel = ntohl(ip6->ip6_flow);
+		if ((flowlabel >> 28) != 6)
+			return (0);	/* version mismatch! */
+		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+		    (IPTOS_ECN_NOTECT << 20))
+			return (0);	/* not-ECT */
+		if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+		    (IPTOS_ECN_CE << 20))
+			return (1);	/* already marked */
+		/*
+		 * ecn-capable but not marked, mark CE
+		 */
+		flowlabel |= (IPTOS_ECN_CE << 20);
+		ip6->ip6_flow = htonl(flowlabel);
+		return (1);
+	}
+#endif
+	}
+	return (0);
+}
+
+/*
  * Enqueue a packet in q, subject to space and queue management policy
  * (whose parameters are in q->fs).
  * Update stats for the queue and the scheduler.
@@ -407,8 +490,15 @@
 		goto drop;
 	if (f->plr && random() < f->plr)
 		goto drop;
-	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
-		goto drop;
+#ifdef NEW_AQM
+	/* Call AQM enqueue function */
+	if (q->fs->aqmfp)
+		return q->fs->aqmfp->enqueue(q ,m);
+#endif
+	if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
+		if (!(f->flags & DN_IS_ECN) || !ecn_mark(m))
+			goto drop;
+	}
 	if (f->flags & DN_QSIZE_BYTES) {
 		if (q->ni.len_bytes > f->qsize)
 			goto drop;
@@ -420,7 +510,7 @@
 	q->ni.len_bytes += len;
 	ni->length++;
 	ni->len_bytes += len;
-	return 0;
+	return (0);
 
 drop:
 	io_pkt_drop++;
@@ -427,7 +517,7 @@
 	q->ni.drops++;
 	ni->drops++;
 	FREE_PKT(m);
-	return 1;
+	return (1);
 }
 
 /*
@@ -612,8 +702,8 @@
 		dn_drain_queue();
 	}
 
+	dn_reschedule();
 	DN_BH_WUNLOCK();
-	dn_reschedule();
 	if (q.head != NULL)
 		dummynet_send(q.head);
 	CURVNET_RESTORE();
@@ -651,13 +741,10 @@
 
 		switch (dst) {
 		case DIR_OUT:
-			SET_HOST_IPLEN(mtod(m, struct ip *));
 			ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 			break ;
 
 		case DIR_IN :
-			/* put header in network format for ip_input() */
-			//SET_NET_IPLEN(mtod(m, struct ip *));
 			netisr_dispatch(NETISR_IP, m);
 			break;
 
@@ -729,6 +816,7 @@
 	dt->ifp = fwa->oif;
 	/* dt->output tame is updated as we move through */
 	dt->output_time = dn_cfg.curr_time;
+	dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0;
 	return 0;
 }
 
@@ -789,6 +877,10 @@
 	if (fs->sched->fp->enqueue(si, q, m)) {
 		/* packet was dropped by enqueue() */
 		m = *m0 = NULL;
+
+		/* dn_enqueue already increases io_pkt_drop */
+		io_pkt_drop--;
+
 		goto dropit;
 	}
 

Modified: trunk/sys/netpfil/ipfw/ip_dn_private.h
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_private.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_private.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
  * All rights reserved
@@ -27,7 +28,7 @@
 /*
  * internal dummynet APIs.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_private.h 325731 2017-11-12 01:28:20Z truckman $
  */
 
 #ifndef _IP_DN_PRIVATE_H
@@ -81,6 +82,10 @@
 SLIST_HEAD(dn_queue_head, dn_queue);
 SLIST_HEAD(dn_alg_head, dn_alg);
 
+#ifdef NEW_AQM
+SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */
+#endif
+
 struct mq {	/* a basic queue of packets*/
         struct mbuf *head, *tail;
 };
@@ -135,6 +140,9 @@
 	/* list of flowsets without a scheduler -- use sch_chain */
 	struct dn_fsk_head	fsu;	/* list of unlinked flowsets */
 	struct dn_alg_head	schedlist;	/* list of algorithms */
+#ifdef NEW_AQM
+	struct dn_aqm_head	aqmlist;	/* list of AQMs */
+#endif
 
 	/* Store the fs/sch to scan when draining. The value is the
 	 * bucket number of the hash table. Expire can be disabled
@@ -231,6 +239,10 @@
 	int lookup_weight ;	/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
 	int avg_pkt_size ;	/* medium packet size */
 	int max_pkt_size ;	/* max packet size */
+#ifdef NEW_AQM
+	struct dn_aqm *aqmfp;	/* Pointer to AQM functions */
+	void *aqmcfg;	/* configuration parameters for AQM */
+#endif
 };
 
 /*
@@ -253,6 +265,9 @@
 	int count;		/* arrivals since last RED drop */
 	int random;		/* random value (scaled) */
 	uint64_t q_time;	/* start of queue idle time */
+#ifdef NEW_AQM
+	void *aqm_status;	/* per-queue status variables*/
+#endif
 
 };
 
@@ -352,6 +367,24 @@
 	DN_QHT_IS_Q	= 0x0100, /* in flowset, qht is a single queue */
 };
 
+/*
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
+ */
+struct dn_pkt_tag {
+	struct ipfw_rule_ref rule;	/* matching rule	*/
+
+	/* second part, dummynet specific */
+	int dn_dir;		/* action when packet comes out.*/
+				/* see ip_fw_private.h		*/
+	uint64_t output_time;	/* when the pkt is due for delivery*/
+	struct ifnet *ifp;	/* interface, for ip_output	*/
+	struct _ip6dn_args ip6opt;	/* XXX ipv6 options	*/
+	uint16_t iphdr_off;	/* IP header offset for mtodo()	*/
+};
+
 extern struct dn_parms dn_cfg;
 //VNET_DECLARE(struct dn_parms, _base_dn_cfg);
 //#define dn_cfg	VNET(_base_dn_cfg)
@@ -359,6 +392,7 @@
 int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
 void dummynet_task(void *context, int pending);
 void dn_reschedule(void);
+struct dn_pkt_tag * dn_tag_get(struct mbuf *m);
 
 struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
         struct ipfw_flow_id *);
@@ -400,4 +434,20 @@
 void dn_drain_scheduler(void);
 void dn_drain_queue(void);
 
+#ifdef NEW_AQM
+int ecn_mark(struct mbuf* m);
+
+/* moved from ip_dn_io.c to here to be available for AQMs modules*/
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+	if (q->head == NULL)
+		q->head = m;
+	else
+		q->tail->m_nextpkt = m;
+	q->tail = m;
+	m->m_nextpkt = NULL;
+}
+#endif /* NEW_AQM */
+
 #endif /* _IP_DN_PRIVATE_H */

Modified: trunk/sys/netpfil/ipfw/ip_dummynet.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dummynet.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dummynet.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,4 +1,12 @@
+/* $MidnightBSD$ */
 /*-
+ * Codel/FQ_Codel and PIE/FQ-PIE Code:
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ *  Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from 
+ *  The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ * 
  * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
  * Portions Copyright (c) 2000 Akamba Corp.
  * All rights reserved
@@ -26,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_dummynet.c 318155 2017-05-10 20:46:59Z marius $");
 
 /*
  * Configuration and internal object management for dummynet.
@@ -57,6 +65,9 @@
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/dn_heap.h>
 #include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
 #include <netpfil/ipfw/dn_sched.h>
 
 /* which objects to copy */
@@ -74,23 +85,44 @@
 
 /*---- callout hooks. ----*/
 static struct callout dn_timeout;
+static int dn_gone;
 static struct task	dn_task;
 static struct taskqueue	*dn_tq = NULL;
 
 static void
-dummynet(void * __unused unused)
+dummynet(void *arg)
 {
 
-	taskqueue_enqueue(dn_tq, &dn_task);
+	(void)arg;	/* UNUSED */
+	taskqueue_enqueue_fast(dn_tq, &dn_task);
 }
 
 void
 dn_reschedule(void)
 {
-	callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+	if (dn_gone != 0)
+		return;
+	callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL,
+	    C_HARDCLOCK | C_DIRECT_EXEC);
 }
 /*----- end of callout hooks -----*/
 
+#ifdef NEW_AQM
+/* Return AQM descriptor for given type or name. */
+static struct dn_aqm *
+find_aqm_type(int type, char *name)
+{
+	struct dn_aqm *d;
+
+	SLIST_FOREACH(d, &dn_cfg.aqmlist, next) {
+		if (d->type == type || (name && !strcasecmp(d->name, name)))
+			return d;
+	}
+	return NULL; /* not found */
+}
+#endif
+
 /* Return a scheduler descriptor given the type or name. */
 static struct dn_alg *
 find_sched_type(int type, char *name)
@@ -313,7 +345,15 @@
 
 	if (fs->sched->fp->new_queue)
 		fs->sched->fp->new_queue(q);
+
+#ifdef NEW_AQM
+	/* call AQM init function after creating a queue*/
+	if (fs->aqmfp && fs->aqmfp->init)
+		if(fs->aqmfp->init(q))
+			D("unable to init AQM for fs %d", fs->fs.fs_nr);
+#endif
 	dn_cfg.queue_count++;
+
 	return q;
 }
 
@@ -327,6 +367,13 @@
 {
 	struct dn_fsk *fs = q->fs;
 
+#ifdef NEW_AQM
+	/* clean up AQM status for queue 'q'
+	 * cleanup here is called just with MULTIQUEUE
+	 */
+	if (fs && fs->aqmfp && fs->aqmfp->cleanup)
+		fs->aqmfp->cleanup(q);
+#endif
 	// D("fs %p si %p\n", fs, q->_si);
 	/* notify the parent scheduler that the queue is going away */
 	if (fs && fs->sched->fp->free_queue)
@@ -468,6 +515,16 @@
 	if (s->sch.flags & DN_HAVE_MASK)
 		si->ni.fid = *(struct ipfw_flow_id *)key;
 
+#ifdef NEW_AQM
+	/* init AQM status for !DN_MULTIQUEUE sched*/
+	if (!(s->fp->flags & DN_MULTIQUEUE))
+		if (s->fs->aqmfp && s->fs->aqmfp->init)
+			if(s->fs->aqmfp->init((struct dn_queue *)(si + 1))) {
+				D("unable to init AQM for fs %d", s->fs->fs.fs_nr);
+				goto error;
+			}
+#endif
+
 	dn_cfg.si_count++;
 	return si;
 
@@ -497,6 +554,20 @@
 	dn_free_pkts(dl->mq.head);	/* drain delay line */
 	if (si->kflags & DN_ACTIVE) /* remove si from event heap */
 		heap_extract(&dn_cfg.evheap, si);
+
+#ifdef NEW_AQM
+	/* clean up AQM status for !DN_MULTIQUEUE sched
+	 * Note that all queues belong to fs were cleaned up in fsk_detach.
+	 * When drain_scheduler is called s->fs and q->fs are pointing 
+	 * to a correct fs, so we can use fs in this case.
+	 */
+	if (!(s->fp->flags & DN_MULTIQUEUE)) {
+		struct dn_queue *q = (struct dn_queue *)(si + 1);
+		if (q->aqm_status && q->fs->aqmfp)
+			if (q->fs->aqmfp->cleanup)
+				q->fs->aqmfp->cleanup(q);
+	}
+#endif
 	if (s->fp->free_sched)
 		s->fp->free_sched(si);
 	bzero(si, sizeof(*si));	/* safety */
@@ -585,6 +656,67 @@
 	return fs;
 }
 
+#ifdef NEW_AQM
+/* callback function for cleaning up AQM queue status belongs to a flowset
+ * connected to scheduler instance '_si' (for !DN_MULTIQUEUE only).
+ */
+static int
+si_cleanup_q(void *_si, void *arg)
+{
+	struct dn_sch_inst *si = _si;
+
+	if (!(si->sched->fp->flags & DN_MULTIQUEUE)) {
+		if (si->sched->fs->aqmfp && si->sched->fs->aqmfp->cleanup)
+			si->sched->fs->aqmfp->cleanup((struct dn_queue *) (si+1));
+	}
+	return 0;
+}
+
+/* callback to clean up queue AQM status.*/
+static int
+q_cleanup_q(void *_q, void *arg)
+{
+	struct dn_queue *q = _q;
+	q->fs->aqmfp->cleanup(q);
+	return 0;
+}
+
+/* Clean up all AQM queues status belongs to flowset 'fs' and then
+ * deconfig AQM for flowset 'fs'
+ */
+static void 
+aqm_cleanup_deconfig_fs(struct dn_fsk *fs)
+{
+	struct dn_sch_inst *si;
+
+	/* clean up AQM status for all queues for !DN_MULTIQUEUE sched*/
+	if (fs->fs.fs_nr > DN_MAX_ID) {
+		if (fs->sched && !(fs->sched->fp->flags & DN_MULTIQUEUE)) {
+			if (fs->sched->sch.flags & DN_HAVE_MASK)
+				dn_ht_scan(fs->sched->siht, si_cleanup_q, NULL);
+			else {
+					/* single si i.e. no sched mask */
+					si = (struct dn_sch_inst *) fs->sched->siht;
+					if (si && fs->aqmfp && fs->aqmfp->cleanup)
+						fs->aqmfp->cleanup((struct dn_queue *) (si+1));
+			}
+		} 
+	}
+
+	/* clean up AQM status for all queues for DN_MULTIQUEUE sched*/
+	if (fs->sched && fs->sched->fp->flags & DN_MULTIQUEUE && fs->qht) {
+			if (fs->fs.flags & DN_QHT_HASH)
+				dn_ht_scan(fs->qht, q_cleanup_q, NULL);
+			else
+				fs->aqmfp->cleanup((struct dn_queue *)(fs->qht));
+	}
+
+	/* deconfig AQM */
+	if(fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig)
+		fs->aqmfp->deconfig(fs);
+}
+#endif
+
 /*
  * detach flowset from its current scheduler. Flags as follows:
  * DN_DETACH removes from the fsk_list
@@ -613,11 +745,15 @@
 		free(fs->w_q_lookup, M_DUMMYNET);
 	fs->w_q_lookup = NULL;
 	qht_delete(fs, flags);
+#ifdef NEW_AQM
+	aqm_cleanup_deconfig_fs(fs);
+#endif
+
 	if (fs->sched && fs->sched->fp->free_fsk)
 		fs->sched->fp->free_fsk(fs);
 	fs->sched = NULL;
 	if (flags & DN_DELETE_FS) {
-		bzero(fs, sizeof(fs));	/* safety */
+		bzero(fs, sizeof(*fs));	/* safety */
 		free(fs, M_DUMMYNET);
 		dn_cfg.fsk_count--;
 	} else {
@@ -795,29 +931,35 @@
 static int
 copy_obj(char **start, char *end, void *_o, const char *msg, int i)
 {
-	struct dn_id *o = _o;
+	struct dn_id o;
+	union {
+		struct dn_link l;
+		struct dn_schk s;
+	} dn;
 	int have = end - *start;
 
-	if (have < o->len || o->len == 0 || o->type == 0) {
+	memcpy(&o, _o, sizeof(o));
+	if (have < o.len || o.len == 0 || o.type == 0) {
 		D("(WARN) type %d %s %d have %d need %d",
-			o->type, msg, i, have, o->len);
+		    o.type, msg, i, have, o.len);
 		return 1;
 	}
-	ND("type %d %s %d len %d", o->type, msg, i, o->len);
-	bcopy(_o, *start, o->len);
-	if (o->type == DN_LINK) {
+	ND("type %d %s %d len %d", o.type, msg, i, o.len);
+	if (o.type == DN_LINK) {
+		memcpy(&dn.l, _o, sizeof(dn.l));
 		/* Adjust burst parameter for link */
-		struct dn_link *l = (struct dn_link *)*start;
-		l->burst =  div64(l->burst, 8 * hz);
-		l->delay = l->delay * 1000 / hz;
-	} else if (o->type == DN_SCH) {
-		/* Set id->id to the number of instances */
-		struct dn_schk *s = _o;
-		struct dn_id *id = (struct dn_id *)(*start);
-		id->id = (s->sch.flags & DN_HAVE_MASK) ?
-			dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
-	}
-	*start += o->len;
+		dn.l.burst = div64(dn.l.burst, 8 * hz);
+		dn.l.delay = dn.l.delay * 1000 / hz;
+		memcpy(*start, &dn.l, sizeof(dn.l));
+	} else if (o.type == DN_SCH) {
+		/* Set dn.s.sch.oid.id to the number of instances */
+		memcpy(&dn.s, _o, sizeof(dn.s));
+		dn.s.sch.oid.id = (dn.s.sch.flags & DN_HAVE_MASK) ?
+		    dn_ht_entries(dn.s.siht) : (dn.s.siht ? 1 : 0);
+		memcpy(*start, &dn.s, sizeof(dn.s));
+	} else
+		memcpy(*start, _o, o.len);
+	*start += o.len;
 	return 0;
 }
 
@@ -838,7 +980,7 @@
 		return 1;
 	}
 	ND("type %d %s %d len %d", o->type, msg, i, len);
-	bcopy(_o, *start, len);
+	memcpy(*start, _o, len);
 	((struct dn_id*)(*start))->len = len;
 	*start += len;
 	return 0;
@@ -886,7 +1028,7 @@
 		D("error have %d need %d", have, profile_len);
 		return 1;
 	}
-	bcopy(p, *a->start, profile_len);
+	memcpy(*a->start, p, profile_len);
 	((struct dn_id *)(*a->start))->len = profile_len;
 	*a->start += profile_len;
 	return 0;
@@ -1067,7 +1209,10 @@
 	fs->min_th = SCALE(fs->fs.min_th);
 	fs->max_th = SCALE(fs->fs.max_th);
 
-	fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+	if (fs->fs.max_th == fs->fs.min_th)
+		fs->c_1 = fs->max_p;
+	else
+		fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th);
 	fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
 
 	if (fs->fs.flags & DN_IS_GENTLE_RED) {
@@ -1181,6 +1326,183 @@
 	}
 }
 
+#ifdef NEW_AQM
+/* Retrieve AQM configurations to ipfw userland 
+ */
+static int
+get_aqm_parms(struct sockopt *sopt)
+{
+	struct dn_extra_parms  *ep;
+	struct dn_fsk *fs;
+	size_t sopt_valsize;
+	int l, err = 0;
+	
+	sopt_valsize = sopt->sopt_valsize;
+	l = sizeof(*ep);
+	if (sopt->sopt_valsize < l) {
+		D("bad len sopt->sopt_valsize %d len %d",
+			(int) sopt->sopt_valsize , l);
+		err = EINVAL;
+		return err;
+	}
+	ep = malloc(l, M_DUMMYNET, M_WAITOK);
+	if(!ep) {
+		err = ENOMEM ;
+		return err;
+	}
+	do {
+		err = sooptcopyin(sopt, ep, l, l);
+		if(err)
+			break;
+		sopt->sopt_valsize = sopt_valsize;
+		if (ep->oid.len < l) {
+			err = EINVAL;
+			break;
+		}
+
+		fs = dn_ht_find(dn_cfg.fshash, ep->nr, 0, NULL);
+		if (!fs) {
+			D("fs %d not found", ep->nr);
+			err = EINVAL;
+			break;
+		}
+
+		if (fs->aqmfp && fs->aqmfp->getconfig) {
+			if(fs->aqmfp->getconfig(fs, ep)) {
+				D("Error while trying to get AQM params");
+				err = EINVAL;
+				break;
+			}
+			ep->oid.len = l;
+			err = sooptcopyout(sopt, ep, l);
+		}
+	}while(0);
+
+	free(ep, M_DUMMYNET);
+	return err;
+}
+
+/* Retrieve AQM configurations to ipfw userland
+ */
+static int
+get_sched_parms(struct sockopt *sopt)
+{
+	struct dn_extra_parms  *ep;
+	struct dn_schk *schk;
+	size_t sopt_valsize;
+	int l, err = 0;
+	
+	sopt_valsize = sopt->sopt_valsize;
+	l = sizeof(*ep);
+	if (sopt->sopt_valsize < l) {
+		D("bad len sopt->sopt_valsize %d len %d",
+			(int) sopt->sopt_valsize , l);
+		err = EINVAL;
+		return err;
+	}
+	ep = malloc(l, M_DUMMYNET, M_WAITOK);
+	if(!ep) {
+		err = ENOMEM ;
+		return err;
+	}
+	do {
+		err = sooptcopyin(sopt, ep, l, l);
+		if(err)
+			break;
+		sopt->sopt_valsize = sopt_valsize;
+		if (ep->oid.len < l) {
+			err = EINVAL;
+			break;
+		}
+
+		schk = locate_scheduler(ep->nr);
+		if (!schk) {
+			D("sched %d not found", ep->nr);
+			err = EINVAL;
+			break;
+		}
+		
+		if (schk->fp && schk->fp->getconfig) {
+			if(schk->fp->getconfig(schk, ep)) {
+				D("Error while trying to get sched params");
+				err = EINVAL;
+				break;
+			}
+			ep->oid.len = l;
+			err = sooptcopyout(sopt, ep, l);
+		}
+	}while(0);
+	free(ep, M_DUMMYNET);
+
+	return err;
+}
+
+/* Configure AQM for flowset 'fs'.
+ * extra parameters are passed from userland.
+ */
+static int
+config_aqm(struct dn_fsk *fs, struct  dn_extra_parms *ep, int busy)
+{
+	int err = 0;
+
+	do {
+		/* no configurations */
+		if (!ep) {
+			err = 0;
+			break;
+		}
+
+		/* no AQM for this flowset*/
+		if (!strcmp(ep->name,"")) {
+			err = 0;
+			break;
+		}
+		if (ep->oid.len < sizeof(*ep)) {
+			D("short aqm len %d", ep->oid.len);
+				err = EINVAL;
+				break;
+		}
+
+		if (busy) {
+			D("Unable to configure flowset, flowset busy!");
+			err = EINVAL;
+			break;
+		}
+
+		/* deconfigure old aqm if exist */
+		if (fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) {
+			aqm_cleanup_deconfig_fs(fs);
+		}
+
+		if (!(fs->aqmfp = find_aqm_type(0, ep->name))) {
+			D("AQM functions not found for type %s!", ep->name);
+			fs->fs.flags &= ~DN_IS_AQM;
+			err = EINVAL;
+			break;
+		} else
+			fs->fs.flags |= DN_IS_AQM;
+
+		if (ep->oid.subtype != DN_AQM_PARAMS) {
+				D("Wrong subtype");
+				err = EINVAL;
+				break;
+		}
+
+		if (fs->aqmfp->config) {
+			err = fs->aqmfp->config(fs, ep, ep->oid.len);
+			if (err) {
+					D("Unable to configure AQM for FS %d", fs->fs.fs_nr );
+					fs->fs.flags &= ~DN_IS_AQM;
+					fs->aqmfp = NULL;
+					break;
+			}
+		}
+	} while(0);
+
+	return err;
+}
+#endif
+
 /*
  * Configuration -- to preserve backward compatibility we use
  * the following scheme (N is 65536)
@@ -1268,6 +1590,9 @@
 {
 	int i;
 	struct dn_fsk *fs;
+#ifdef NEW_AQM
+	struct dn_extra_parms *ep;
+#endif
 
 	if (nfs->oid.len != sizeof(*nfs)) {
 		D("invalid flowset len %d", nfs->oid.len);
@@ -1276,6 +1601,15 @@
 	i = nfs->fs_nr;
 	if (i <= 0 || i >= 3*DN_MAX_ID)
 		return NULL;
+#ifdef NEW_AQM
+	ep = NULL;
+	if (arg != NULL) {
+		ep = malloc(sizeof(*ep), M_TEMP, locked ? M_NOWAIT : M_WAITOK);
+		if (ep == NULL)
+			return (NULL);
+		memcpy(ep, arg, sizeof(*ep));
+	}
+#endif
 	ND("flowset %d", i);
 	/* XXX other sanity checks */
         if (nfs->flags & DN_QSIZE_BYTES) {
@@ -1287,7 +1621,7 @@
         }
 	if (nfs->flags & DN_HAVE_MASK) {
 		/* make sure we have some buckets */
-		ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
+		ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size,
 			1, dn_cfg.max_hash_size, "flowset buckets");
 	} else {
 		nfs->buckets = 1;	/* we only need 1 */
@@ -1313,6 +1647,17 @@
 	    }
 	    if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
 		ND("flowset %d unchanged", i);
+#ifdef NEW_AQM
+		if (ep != NULL) {
+			/*
+			 * Reconfigure AQM as the parameters can be changed.
+			 * We consider the flowset as busy if it has scheduler
+			 * instance(s).
+			 */ 
+			s = locate_scheduler(nfs->sched_nr);
+			config_aqm(fs, ep, s != NULL && s->siht != NULL);
+		}
+#endif
 		break; /* no change, nothing to do */
 	    }
 	    if (oldc != dn_cfg.fsk_count)	/* new item */
@@ -1331,11 +1676,21 @@
 		fsk_detach(fs, flags);
 	    }
 	    fs->fs = *nfs; /* copy configuration */
+#ifdef NEW_AQM
+			fs->aqmfp = NULL;
+			if (ep != NULL)
+				config_aqm(fs, ep, s != NULL &&
+				    s->siht != NULL);
+#endif
 	    if (s != NULL)
 		fsk_attach(fs, s);
 	} while (0);
 	if (!locked)
 		DN_BH_WUNLOCK();
+#ifdef NEW_AQM
+	if (ep != NULL)
+		free(ep, M_TEMP);
+#endif
 	return fs;
 }
 
@@ -1372,7 +1727,7 @@
 		return EINVAL;
 	/* make sure we have some buckets */
 	if (a.sch->flags & DN_HAVE_MASK)
-		ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
+		ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size,
 			1, dn_cfg.max_hash_size, "sched buckets");
 	/* XXX other sanity checks */
 	bzero(&p, sizeof(p));
@@ -1445,7 +1800,7 @@
 				D("cannot allocate profile");
 				goto error; //XXX
 			}
-			bcopy(pf, s->profile, sizeof(*pf));
+			memcpy(s->profile, pf, sizeof(*pf));
 		}
 	}
 	p.link_nr = 0;
@@ -1467,7 +1822,7 @@
 				pf = malloc(sizeof(*pf),
 				    M_DUMMYNET, M_NOWAIT | M_ZERO);
 			if (pf)	/* XXX should issue a warning otherwise */
-				bcopy(s->profile, pf, sizeof(*pf));
+				memcpy(pf, s->profile, sizeof(*pf));
 		}
 		/* remove from the hash */
 		dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
@@ -1589,7 +1944,7 @@
 		olen = s->profile->oid.len;
 		if (olen < pf->oid.len)
 			olen = pf->oid.len;
-		bcopy(pf, s->profile, pf->oid.len);
+		memcpy(s->profile, pf, pf->oid.len);
 		s->profile->oid.len = olen;
 	}
 	DN_BH_WUNLOCK();
@@ -1625,30 +1980,35 @@
 int
 do_config(void *p, int l)
 {
-	struct dn_id *next, *o;
-	int err = 0, err2 = 0;
-	struct dn_id *arg = NULL;
-	uintptr_t *a;
+	struct dn_id o;
+	union {
+		struct dn_profile profile;
+		struct dn_fs fs;
+		struct dn_link link;
+		struct dn_sch sched;
+	} *dn;
+	struct dn_id *arg;
+	uintptr_t a;
+	int err, err2, off;
 
-	o = p;
-	if (o->id != DN_API_VERSION) {
-		D("invalid api version got %d need %d",
-			o->id, DN_API_VERSION);
+	memcpy(&o, p, sizeof(o));
+	if (o.id != DN_API_VERSION) {
+		D("invalid api version got %d need %d", o.id, DN_API_VERSION);
 		return EINVAL;
 	}
-	for (; l >= sizeof(*o); o = next) {
-		struct dn_id *prev = arg;
-		if (o->len < sizeof(*o) || l < o->len) {
-			D("bad len o->len %d len %d", o->len, l);
+	arg = NULL;
+	dn = NULL;
+	for (off = 0; l >= sizeof(o); memcpy(&o, (char *)p + off, sizeof(o))) {
+		if (o.len < sizeof(o) || l < o.len) {
+			D("bad len o.len %d len %d", o.len, l);
 			err = EINVAL;
 			break;
 		}
-		l -= o->len;
-		next = (struct dn_id *)((char *)o + o->len);
+		l -= o.len;
 		err = 0;
-		switch (o->type) {
+		switch (o.type) {
 		default:
-			D("cmd %d not implemented", o->type);
+			D("cmd %d not implemented", o.type);
 			break;
 
 #ifdef EMULATE_SYSCTL
@@ -1666,17 +2026,17 @@
 
 		case DN_CMD_DELETE:
 			/* the argument is in the first uintptr_t after o */
-			a = (uintptr_t *)(o+1);
-			if (o->len < sizeof(*o) + sizeof(*a)) {
+			if (o.len < sizeof(o) + sizeof(a)) {
 				err = EINVAL;
 				break;
 			}
-			switch (o->subtype) {
+			memcpy(&a, (char *)p + off + sizeof(o), sizeof(a));
+			switch (o.subtype) {
 			case DN_LINK:
 				/* delete base and derived schedulers */
 				DN_BH_WLOCK();
-				err = delete_schk(*a);
-				err2 = delete_schk(*a + DN_MAX_ID);
+				err = delete_schk(a);
+				err2 = delete_schk(a + DN_MAX_ID);
 				DN_BH_WUNLOCK();
 				if (!err)
 					err = err2;
@@ -1683,14 +2043,13 @@
 				break;
 
 			default:
-				D("invalid delete type %d",
-					o->subtype);
+				D("invalid delete type %d", o.subtype);
 				err = EINVAL;
 				break;
 
 			case DN_FS:
-				err = (*a <1 || *a >= DN_MAX_ID) ?
-					EINVAL : delete_fs(*a, 0) ;
+				err = (a < 1 || a >= DN_MAX_ID) ?
+				    EINVAL : delete_fs(a, 0) ;
 				break;
 			}
 			break;
@@ -1700,28 +2059,47 @@
 			dummynet_flush();
 			DN_BH_WUNLOCK();
 			break;
-		case DN_TEXT:	/* store argument the next block */
-			prev = NULL;
-			arg = o;
+		case DN_TEXT:	/* store argument of next block */
+			if (arg != NULL)
+				free(arg, M_TEMP);
+			arg = malloc(o.len, M_TEMP, M_WAITOK);
+			memcpy(arg, (char *)p + off, o.len);
 			break;
 		case DN_LINK:
-			err = config_link((struct dn_link *)o, arg);
+			if (dn == NULL)
+				dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+			memcpy(&dn->link, (char *)p + off, sizeof(dn->link));
+			err = config_link(&dn->link, arg);
 			break;
 		case DN_PROFILE:
-			err = config_profile((struct dn_profile *)o, arg);
+			if (dn == NULL)
+				dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+			memcpy(&dn->profile, (char *)p + off,
+			    sizeof(dn->profile));
+			err = config_profile(&dn->profile, arg);
 			break;
 		case DN_SCH:
-			err = config_sched((struct dn_sch *)o, arg);
+			if (dn == NULL)
+				dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+			memcpy(&dn->sched, (char *)p + off,
+			    sizeof(dn->sched));
+			err = config_sched(&dn->sched, arg);
 			break;
 		case DN_FS:
-			err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
+			if (dn == NULL)
+				dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+			memcpy(&dn->fs, (char *)p + off, sizeof(dn->fs));
+			err = (NULL == config_fs(&dn->fs, arg, 0));
 			break;
 		}
-		if (prev)
-			arg = NULL;
 		if (err != 0)
 			break;
+		off += o.len;
 	}
+	if (arg != NULL)
+		free(arg, M_TEMP);
+	if (dn != NULL)
+		free(dn, M_TEMP);
 	return err;
 }
 
@@ -1856,6 +2234,19 @@
 		// cmd->id = sopt_valsize;
 		D("compatibility mode");
 	}
+
+#ifdef NEW_AQM
+	/* get AQM params */
+	if(cmd->subtype == DN_AQM_PARAMS) {
+		error = get_aqm_parms(sopt);
+		goto done;
+	/* get Scheduler params */
+	} else if (cmd->subtype == DN_SCH_PARAMS) {
+		error = get_sched_parms(sopt);
+		goto done;
+	}
+#endif
+
 	a.extra = (struct copy_range *)cmd;
 	if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
 		uint32_t *rp = (uint32_t *)(cmd + 1);
@@ -1920,7 +2311,7 @@
 	a.type = cmd->subtype;
 
 	if (compat == NULL) {
-		bcopy(cmd, start, sizeof(*cmd));
+		memcpy(start, cmd, sizeof(*cmd));
 		((struct dn_id*)(start))->len = sizeof(struct dn_id);
 		buf = start + sizeof(*cmd);
 	} else
@@ -2158,24 +2549,25 @@
 	DN_LOCK_INIT();
 
 	TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
-	dn_tq = taskqueue_create("dummynet", M_WAITOK,
+	dn_tq = taskqueue_create_fast("dummynet", M_WAITOK,
 	    taskqueue_thread_enqueue, &dn_tq);
 	taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
 
-	callout_init(&dn_timeout, CALLOUT_MPSAFE);
-	callout_reset(&dn_timeout, 1, dummynet, NULL);
+	callout_init(&dn_timeout, 1);
+	dn_reschedule();
 
 	/* Initialize curr_time adjustment mechanics. */
 	getmicrouptime(&dn_cfg.prev_t);
 }
 
-#ifdef KLD_MODULE
 static void
 ip_dn_destroy(int last)
 {
-	callout_drain(&dn_timeout);
+	DN_BH_WLOCK();
+	/* ensure no more callouts are started */
+	dn_gone = 1;
 
-	DN_BH_WLOCK();
+	/* check for last */
 	if (last) {
 		ND("removing last instance\n");
 		ip_dn_ctl_ptr = NULL;
@@ -2184,6 +2576,8 @@
 
 	dummynet_flush();
 	DN_BH_WUNLOCK();
+
+	callout_drain(&dn_timeout);
 	taskqueue_drain(dn_tq, &dn_task);
 	taskqueue_free(dn_tq);
 
@@ -2193,7 +2587,6 @@
 
 	DN_LOCK_DESTROY();
 }
-#endif /* KLD_MODULE */
 
 static int
 dummynet_modevent(module_t mod, int type, void *data)
@@ -2209,13 +2602,8 @@
 		ip_dn_io_ptr = dummynet_io;
 		return 0;
 	} else if (type == MOD_UNLOAD) {
-#if !defined(KLD_MODULE)
-		printf("dummynet statically compiled, cannot unload\n");
-		return EINVAL ;
-#else
 		ip_dn_destroy(1 /* last */);
 		return 0;
-#endif
 	} else
 		return EOPNOTSUPP;
 }
@@ -2311,4 +2699,98 @@
  */
 //VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
 
+#ifdef NEW_AQM
+
+/* modevent helpers for the AQM modules */
+static int
+load_dn_aqm(struct dn_aqm *d)
+{
+	struct dn_aqm *aqm=NULL;
+
+	if (d == NULL)
+		return 1; /* error */
+	ip_dn_init();	/* just in case, we need the lock */
+
+	/* Check that mandatory funcs exists */
+	if (d->enqueue == NULL || d->dequeue == NULL) {
+		D("missing enqueue or dequeue for %s", d->name);
+		return 1;
+	}
+
+	/* Search if AQM already exists */
+	DN_BH_WLOCK();
+	SLIST_FOREACH(aqm, &dn_cfg.aqmlist, next) {
+		if (strcmp(aqm->name, d->name) == 0) {
+			D("%s already loaded", d->name);
+			break; /* AQM already exists */
+		}
+	}
+	if (aqm == NULL)
+		SLIST_INSERT_HEAD(&dn_cfg.aqmlist, d, next);
+	DN_BH_WUNLOCK();
+	D("dn_aqm %s %sloaded", d->name, aqm ? "not ":"");
+	return aqm ? 1 : 0;
+}
+
+
+/* Callback to clean up AQM status for queues connected to a flowset
+ * and then deconfigure the flowset.
+ * This function is called before an AQM module is unloaded
+ */
+static int
+fs_cleanup(void *_fs, void *arg)
+{
+	struct dn_fsk *fs = _fs;
+	uint32_t type = *(uint32_t *)arg;
+
+	if (fs->aqmfp && fs->aqmfp->type == type)
+		aqm_cleanup_deconfig_fs(fs);
+
+	return 0;
+}
+
+static int
+unload_dn_aqm(struct dn_aqm *aqm)
+{
+	struct dn_aqm *tmp, *r;
+	int err = EINVAL;
+	err = 0;
+	ND("called for %s", aqm->name);
+
+	DN_BH_WLOCK();
+
+	/* clean up AQM status and deconfig flowset */
+	dn_ht_scan(dn_cfg.fshash, fs_cleanup, &aqm->type);
+
+	SLIST_FOREACH_SAFE(r, &dn_cfg.aqmlist, next, tmp) {
+		if (strcmp(aqm->name, r->name) != 0)
+			continue;
+		ND("ref_count = %d", r->ref_count);
+		err = (r->ref_count != 0 || r->cfg_ref_count != 0) ? EBUSY : 0;
+		if (err == 0)
+			SLIST_REMOVE(&dn_cfg.aqmlist, r, dn_aqm, next);
+		break;
+	}
+	DN_BH_WUNLOCK();
+	D("%s %sunloaded", aqm->name, err ? "not ":"");
+	if (err)
+		D("ref_count=%d, cfg_ref_count=%d", r->ref_count, r->cfg_ref_count);
+	return err;
+}
+
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg)
+{
+	struct dn_aqm *aqm = arg;
+
+	if (cmd == MOD_LOAD)
+		return load_dn_aqm(aqm);
+	else if (cmd == MOD_UNLOAD)
+		return unload_dn_aqm(aqm);
+	else
+		return EINVAL;
+}
+#endif
+
 /* end of file */
+

Modified: trunk/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw2.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw2.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw2.c 331202 2018-03-19 09:54:16Z ae $");
 
 /*
  * The FreeBSD IP packet firewall, main file
@@ -34,7 +35,7 @@
 #include "opt_ipdivert.h"
 #include "opt_inet.h"
 #ifndef INET
-#error IPFIREWALL requires INET.
+#error "IPFIREWALL requires INET"
 #endif /* INET */
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
@@ -60,10 +61,11 @@
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
 #include <net/route.h>
-#include <net/pf_mtag.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
+#include <netpfil/pf/pf_mtag.h>
+
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
@@ -86,6 +88,8 @@
 #include <netinet6/ip6_var.h>
 #endif
 
+#include <net/if_gre.h> /* for struct grehdr */
+
 #include <netpfil/ipfw/ip_fw_private.h>
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
@@ -142,6 +146,8 @@
 /* layer3_chain contains the list of rules for layer 3 */
 VNET_DEFINE(struct ip_fw_chain, layer3_chain);
 
+VNET_DEFINE(int, ipfw_nat_ready) = 0;
+
 ipfw_nat_t *ipfw_nat_ptr = NULL;
 struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
 ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
@@ -178,7 +184,7 @@
     &default_to_accept, 0,
     "Make the default rule accept all packets.");
 TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
-TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables);
+TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
 SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
     CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
     "Number of static rules");
@@ -628,8 +634,6 @@
 		m_adj(m, args->L3offset);
 #endif
 	if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
-		/* We need the IP header in host order for icmp_error(). */
-		SET_HOST_IPLEN(ip);
 		icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
 	} else if (args->f_id.proto == IPPROTO_TCP) {
 		struct tcphdr *const tcp =
@@ -942,7 +946,7 @@
 	uint8_t proto;
 	uint16_t src_port = 0, dst_port = 0;	/* NOTE: host format	*/
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
-	uint16_t iplen=0;
+	int iplen = 0;
 	int pktlen;
 	uint16_t	etype = 0;	/* Host order stored ether type */
 
@@ -1142,6 +1146,11 @@
 				PULLUP_TO(hlen, ulp, struct pim);
 				break;
 
+			case IPPROTO_GRE:	/* RFC 1701 */
+				/* XXX GRE header check? */
+				PULLUP_TO(hlen, ulp, struct grehdr);
+				break;
+
 			case IPPROTO_CARP:
 				PULLUP_TO(hlen, ulp, struct carp_header);
 				if (((struct carp_header *)ulp)->carp_version !=
@@ -1178,6 +1187,7 @@
 		args->f_id.src_ip = 0;
 		args->f_id.dst_ip = 0;
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
+		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
 	    (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
 	    	is_ipv4 = 1;
@@ -1192,7 +1202,6 @@
 		dst_ip = ip->ip_dst;
 		offset = ntohs(ip->ip_off) & IP_OFFMASK;
 		iplen = ntohs(ip->ip_len);
-		pktlen = iplen < pktlen ? iplen : pktlen;
 
 		if (offset == 0) {
 			switch (proto) {
@@ -1231,6 +1240,7 @@
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
 	}
 #undef PULLUP_TO
+	pktlen = iplen < pktlen ? iplen: pktlen;
 	if (proto) { /* we may have port numbers, store them */
 		args->f_id.proto = proto;
 		args->f_id.src_port = src_port = ntohs(src_port);
@@ -1237,9 +1247,9 @@
 		args->f_id.dst_port = dst_port = ntohs(dst_port);
 	}
 
-	IPFW_RLOCK(chain);
+	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
-		IPFW_RUNLOCK(chain);
+		IPFW_PF_RUNLOCK(chain);
 		return (IP_FW_PASS);	/* accept */
 	}
 	if (args->rule.slot) {
@@ -1677,7 +1687,7 @@
 					break;
 
 				/* DSCP bitmask is stored as low_u32 high_u32 */
-				if (x > 32)
+				if (x >= 32)
 					match = *(p + 1) & (1 << (x - 32));
 				else
 					match = *p & (1 << x);
@@ -1690,10 +1700,25 @@
 				    uint16_t x;
 				    uint16_t *p;
 				    int i;
+#ifdef INET6
+				    if (is_ipv6) {
+					    struct ip6_hdr *ip6;
 
+					    ip6 = (struct ip6_hdr *)ip;
+					    if (ip6->ip6_plen == 0) {
+						    /*
+						     * Jumbo payload is not
+						     * supported by this
+						     * opcode.
+						     */
+						    break;
+					    }
+					    x = iplen - hlen;
+				    } else
+#endif /* INET6 */
+					    x = iplen - (ip->ip_hl << 2);
 				    tcp = TCP(ulp);
-				    x = iplen -
-					((ip->ip_hl + tcp->th_off) << 2);
+				    x -= tcp->th_off << 2;
 				    if (cmdlen == 1) {
 					match = (cmd->arg1 == x);
 					break;
@@ -1758,14 +1783,22 @@
 
 			case O_ALTQ: {
 				struct pf_mtag *at;
+				struct m_tag *mtag;
 				ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
 
+				/*
+				 * ALTQ uses mbuf tags from another
+				 * packet filtering system - pf(4).
+				 * We allocate a tag in its format
+				 * and fill it in, pretending to be pf(4).
+				 */
 				match = 1;
 				at = pf_find_mtag(m);
 				if (at != NULL && at->qid != 0)
 					break;
-				at = pf_get_mtag(m);
-				if (at == NULL) {
+				mtag = m_tag_get(PACKET_TAG_PF,
+				    sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
+				if (mtag == NULL) {
 					/*
 					 * Let the packet fall back to the
 					 * default ALTQ.
@@ -1772,6 +1805,8 @@
 					 */
 					break;
 				}
+				m_tag_prepend(m, mtag);
+				at = (struct pf_mtag *)(mtag + 1);
 				at->qid = altq->qid;
 				at->hdr = ip;
 				break;
@@ -2393,55 +2428,49 @@
 			}
 
 			case O_NAT:
+				l = 0;          /* exit inner loop */
+				done = 1;       /* exit outer loop */
  				if (!IPFW_NAT_LOADED) {
 				    retval = IP_FW_DENY;
-				} else {
-				    struct cfg_nat *t;
-				    int nat_id;
+				    break;
+				}
 
-				    set_match(args, f_pos, chain);
-				    /* Check if this is 'global' nat rule */
-				    if (cmd->arg1 == 0) {
-					    retval = ipfw_nat_ptr(args, NULL, m);
-					    l = 0;
-					    done = 1;
-					    break;
-				    }
-				    t = ((ipfw_insn_nat *)cmd)->nat;
-				    if (t == NULL) {
+				struct cfg_nat *t;
+				int nat_id;
+
+				set_match(args, f_pos, chain);
+				/* Check if this is 'global' nat rule */
+				if (cmd->arg1 == 0) {
+					retval = ipfw_nat_ptr(args, NULL, m);
+					break;
+				}
+				t = ((ipfw_insn_nat *)cmd)->nat;
+				if (t == NULL) {
 					nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
 					t = (*lookup_nat_ptr)(&chain->nat, nat_id);
 
 					if (t == NULL) {
 					    retval = IP_FW_DENY;
-					    l = 0;	/* exit inner loop */
-					    done = 1;	/* exit outer loop */
 					    break;
 					}
 					if (cmd->arg1 != IP_FW_TABLEARG)
 					    ((ipfw_insn_nat *)cmd)->nat = t;
-				    }
-				    retval = ipfw_nat_ptr(args, t, m);
 				}
-				l = 0;          /* exit inner loop */
-				done = 1;       /* exit outer loop */
+				retval = ipfw_nat_ptr(args, t, m);
 				break;
 
 			case O_REASS: {
 				int ip_off;
 
+				l = 0;	/* in any case exit inner loop */
+				if (is_ipv6) /* IPv6 is not supported yet */
+					break;
 				IPFW_INC_RULE_COUNTER(f, pktlen);
-				l = 0;	/* in any case exit inner loop */
 				ip_off = ntohs(ip->ip_off);
 
 				/* if not fragmented, go to next rule */
 				if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
 				    break;
-				/* 
-				 * ip_reass() expects len & off in host
-				 * byte order.
-				 */
-				SET_HOST_IPLEN(ip);
 
 				args->m = m = ip_reass(m);
 
@@ -2455,7 +2484,6 @@
 
 				    ip = mtod(m, struct ip *);
 				    hlen = ip->ip_hl << 2;
-				    SET_NET_IPLEN(ip);
 				    ip->ip_sum = 0;
 				    if (hlen == sizeof(struct ip))
 					ip->ip_sum = in_cksum_hdr(ip);
@@ -2504,7 +2532,7 @@
 		retval = IP_FW_DENY;
 		printf("ipfw: ouch!, skip past end of rules, denying packet\n");
 	}
-	IPFW_RUNLOCK(chain);
+	IPFW_PF_RUNLOCK(chain);
 #ifdef __FreeBSD__
 	if (ucred_cache != NULL)
 		crfree(ucred_cache);
@@ -2655,7 +2683,7 @@
 	rule->set = RESVD_SET;
 	rule->cmd[0].len = 1;
 	rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
-	chain->rules = chain->default_rule = chain->map[0] = rule;
+	chain->default_rule = chain->map[0] = rule;
 	chain->id = rule->id = 1;
 
 	IPFW_LOCK_INIT(chain);
@@ -2665,10 +2693,9 @@
 	V_ipfw_vnet_ready = 1;		/* Open for business */
 
 	/*
-	 * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
-	 * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
-	 * we still keep the module alive because the sockopt and
-	 * layer2 paths are still useful.
+	 * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
+	 * Even if the latter two fail we still keep the module alive
+	 * because the sockopt and layer2 paths are still useful.
 	 * ipfw[6]_hook return 0 on success, ENOENT on failure,
 	 * so we can ignore the exact return value and just set a flag.
 	 *
@@ -2679,7 +2706,6 @@
 	 * is checked on each packet because there are no pfil hooks.
 	 */
 	V_ip_fw_ctl_ptr = ipfw_ctl;
-	V_ip_fw_chk_ptr = ipfw_chk;
 	error = ipfw_attach_hooks(1);
 	return (error);
 }
@@ -2701,16 +2727,13 @@
 	 * sure the update is propagated and nobody will be in.
 	 */
 	(void)ipfw_attach_hooks(0 /* detach */);
-	V_ip_fw_chk_ptr = NULL;
 	V_ip_fw_ctl_ptr = NULL;
 	IPFW_UH_WLOCK(chain);
 	IPFW_UH_WUNLOCK(chain);
-	IPFW_UH_WLOCK(chain);
 
-	IPFW_WLOCK(chain);
 	ipfw_dyn_uninit(0);	/* run the callout_drain */
-	IPFW_WUNLOCK(chain);
 
+	IPFW_UH_WLOCK(chain);
 	ipfw_destroy_tables(chain);
 	reap = NULL;
 	IPFW_WLOCK(chain);

Modified: trunk/sys/netpfil/ipfw/ip_fw_dynamic.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_dynamic.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_dynamic.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
  *
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_dynamic.c 314667 2017-03-04 13:03:31Z avg $");
 
 #define        DEB(x)
 #define        DDB(x) x
@@ -231,6 +232,7 @@
 #endif /* SYSCTL_NODE */
 
 
+#ifdef INET6
 static __inline int
 hash_packet6(struct ipfw_flow_id *id)
 {
@@ -242,6 +244,7 @@
 	    (id->dst_port) ^ (id->src_port);
 	return i;
 }
+#endif
 
 /*
  * IMPORTANT: the hash function for dynamic rules must be commutative
@@ -485,7 +488,7 @@
 	    V_curr_dyn_buckets, nbuckets);
 
 	/* Allocate and initialize new hash */
-	dyn_v = malloc(nbuckets * sizeof(ipfw_dyn_rule), M_IPFW,
+	dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	for (i = 0 ; i < nbuckets; i++)
@@ -713,6 +716,9 @@
 		id.fib = M_GETFIB(args->m);
 
 		if (IS_IP6_FLOW_ID (&(args->f_id))) {
+			bzero(&id.src_ip6, sizeof(id.src_ip6));
+			bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
 			if (limit_mask & DYN_SRC_ADDR)
 				id.src_ip6 = args->f_id.src_ip6;
 			if (limit_mask & DYN_DST_ADDR)
@@ -809,7 +815,7 @@
 #endif
 	struct tcphdr *th = NULL;
 
-	MGETHDR(m, M_DONTWAIT, MT_DATA);
+	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 
@@ -918,9 +924,8 @@
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
-		h->ip_off = 0;
-		/* ip_len must be in host format for ip_output */
-		h->ip_len = len;
+		h->ip_off = htons(0);
+		h->ip_len = htons(len);
 		h->ip_ttl = V_ip_defttl;
 		h->ip_sum = 0;
 		break;
@@ -1332,7 +1337,7 @@
 	/* Enforce limit on dynamic rules */
 	uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
 
-        callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE);
+        callout_init(&V_ipfw_timeout, 1);
 
 	/*
 	 * This can potentially be done on first dynamic rule

Modified: trunk/sys/netpfil/ipfw/ip_fw_log.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_log.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_log.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_log.c 255928 2013-09-28 15:49:36Z philip $");
 
 /*
  * Logging support for ipfw
@@ -44,10 +45,13 @@
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
 #include <net/ethernet.h> /* for ETHERTYPE_IP */
 #include <net/if.h>
+#include <net/if_clone.h>
 #include <net/vnet.h>
-#include <net/if_types.h>	/* for IFT_ETHER */
+#include <net/if_types.h>	/* for IFT_PFLOG */
 #include <net/bpf.h>		/* for BPF */
 
 #include <netinet/in.h>
@@ -91,7 +95,16 @@
 }
 #else /* !WITHOUT_BPF */
 static struct ifnet *log_if;	/* hook to attach to bpf */
+static struct rwlock log_if_lock;
+#define	LOGIF_LOCK_INIT(x)	rw_init(&log_if_lock, "ipfw log_if lock")
+#define	LOGIF_LOCK_DESTROY(x)	rw_destroy(&log_if_lock)
+#define	LOGIF_RLOCK(x)		rw_rlock(&log_if_lock)
+#define	LOGIF_RUNLOCK(x)	rw_runlock(&log_if_lock)
+#define	LOGIF_WLOCK(x)		rw_wlock(&log_if_lock)
+#define	LOGIF_WUNLOCK(x)	rw_wunlock(&log_if_lock)
 
+static const char ipfwname[] = "ipfw";
+
 /* we use this dummy function for all ifnet callbacks */
 static int
 log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
@@ -101,10 +114,10 @@
 
 static int
 ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
-	struct sockaddr *dst, struct route *ro)
+	const struct sockaddr *dst, struct route *ro)
 {
 	if (m != NULL)
-		m_freem(m);
+		FREE_PKT(m);
 	return EINVAL;
 }
 
@@ -117,37 +130,105 @@
 static const u_char ipfwbroadcastaddr[6] =
 	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
+static int
+ipfw_log_clone_match(struct if_clone *ifc, const char *name)
+{
+
+	return (strncmp(name, ipfwname, sizeof(ipfwname) - 1) == 0);
+}
+
+static int
+ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
+    caddr_t params)
+{
+	int error;
+	int unit;
+	struct ifnet *ifp;
+
+	error = ifc_name2unit(name, &unit);
+	if (error)
+		return (error);
+
+	error = ifc_alloc_unit(ifc, &unit);
+	if (error)
+		return (error);
+
+	ifp = if_alloc(IFT_PFLOG);
+	if (ifp == NULL) {
+		ifc_free_unit(ifc, unit);
+		return (ENOSPC);
+	}
+	ifp->if_dname = ipfwname;
+	ifp->if_dunit = unit;
+	snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ipfwname, unit);
+	strlcpy(name, ifp->if_xname, len);
+	ifp->if_mtu = 65536;
+	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_init = (void *)log_dummy;
+	ifp->if_ioctl = log_dummy;
+	ifp->if_start = ipfw_log_start;
+	ifp->if_output = ipfw_log_output;
+	ifp->if_addrlen = 6;
+	ifp->if_hdrlen = 14;
+	ifp->if_broadcastaddr = ipfwbroadcastaddr;
+	ifp->if_baudrate = IF_Mbps(10);
+
+	LOGIF_WLOCK();
+	if (log_if == NULL)
+		log_if = ifp;
+	else {
+		LOGIF_WUNLOCK();
+		if_free(ifp);
+		ifc_free_unit(ifc, unit);
+		return (EEXIST);
+	}
+	LOGIF_WUNLOCK();
+	if_attach(ifp);
+	bpfattach(ifp, DLT_EN10MB, 14);
+
+	return (0);
+}
+
+static int
+ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+	int unit;
+
+	if (ifp == NULL)
+		return (0);
+
+	LOGIF_WLOCK();
+	if (log_if != NULL && ifp == log_if)
+		log_if = NULL;
+	else {
+		LOGIF_WUNLOCK();
+		return (EINVAL);
+	}
+	LOGIF_WUNLOCK();
+
+	unit = ifp->if_dunit;
+	bpfdetach(ifp);
+	if_detach(ifp);
+	if_free(ifp);
+	ifc_free_unit(ifc, unit);
+
+	return (0);
+}
+
+static struct if_clone *ipfw_log_cloner;
+
 void
 ipfw_log_bpf(int onoff)
 {
-	struct ifnet *ifp;
 
 	if (onoff) {
-		if (log_if)
-			return;
-		ifp = if_alloc(IFT_ETHER);
-		if (ifp == NULL)
-			return;
-		if_initname(ifp, "ipfw", 0);
-		ifp->if_mtu = 65536;
-		ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
-		ifp->if_init = (void *)log_dummy;
-		ifp->if_ioctl = log_dummy;
-		ifp->if_start = ipfw_log_start;
-		ifp->if_output = ipfw_log_output;
-		ifp->if_addrlen = 6;
-		ifp->if_hdrlen = 14;
-		if_attach(ifp);
-		ifp->if_broadcastaddr = ipfwbroadcastaddr;
-		ifp->if_baudrate = IF_Mbps(10);
-		bpfattach(ifp, DLT_EN10MB, 14);
-		log_if = ifp;
+		LOGIF_LOCK_INIT();
+		ipfw_log_cloner = if_clone_advanced(ipfwname, 0,
+		    ipfw_log_clone_match, ipfw_log_clone_create,
+		    ipfw_log_clone_destroy);
 	} else {
-		if (log_if) {
-			ether_ifdetach(log_if);
-			if_free(log_if);
-		}
-		log_if = NULL;
+		if_clone_detach(ipfw_log_cloner);
+		LOGIF_LOCK_DESTROY();
 	}
 }
 #endif /* !WITHOUT_BPF */
@@ -167,17 +248,27 @@
 
 	if (V_fw_verbose == 0) {
 #ifndef WITHOUT_BPF
-
-		if (log_if == NULL || log_if->if_bpf == NULL)
+		LOGIF_RLOCK();
+		if (log_if == NULL || log_if->if_bpf == NULL) {
+			LOGIF_RUNLOCK();
 			return;
+		}
 
 		if (args->eh) /* layer2, use orig hdr */
 			BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
-		else
+		else {
 			/* Add fake header. Later we will store
 			 * more info in the header.
 			 */
-			BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+			if (ip->ip_v == 4)
+				BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+			else if  (ip->ip_v == 6)
+				BPF_MTAP2(log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+			else
+				/* Obviously bogus EtherType. */
+				BPF_MTAP2(log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+		}
+		LOGIF_RUNLOCK();
 #endif /* !WITHOUT_BPF */
 		return;
 	}

Modified: trunk/sys/netpfil/ipfw/ip_fw_nat.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_nat.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_nat.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2008 Paolo Pisati
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_nat.c 266678 2014-05-26 07:02:03Z ae $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -53,8 +54,7 @@
 
 #include <machine/in_cksum.h>	/* XXX for in_cksum */
 
-static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
-#define	V_ifaddr_event_tag	VNET(ifaddr_event_tag)
+static eventhandler_tag ifaddr_event_tag;
 
 static void
 ifaddr_change(void *arg __unused, struct ifnet *ifp)
@@ -63,6 +63,8 @@
 	struct ifaddr *ifa;
 	struct ip_fw_chain *chain;
 
+	KASSERT(curvnet == ifp->if_vnet,
+	    ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
 	chain = &V_layer3_chain;
 	IPFW_WLOCK(chain);
 	/* Check every nat entry... */
@@ -342,11 +344,11 @@
 	if (ldt) {
 		struct tcphdr 	*th;
 		struct udphdr 	*uh;
-		u_short cksum;
+		uint16_t ip_len, cksum;
 
-		ip->ip_len = ntohs(ip->ip_len);
+		ip_len = ntohs(ip->ip_len);
 		cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
-		    htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+		    htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
 
 		switch (ip->ip_p) {
 		case IPPROTO_TCP:
@@ -372,7 +374,6 @@
 			in_delayed_cksum(mcl);
 			mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 		}
-		ip->ip_len = htons(ip->ip_len);
 	}
 	args->m = mcl;
 	return (IP_FW_NAT);
@@ -441,7 +442,7 @@
 	ptr->ip = cfg->ip;
 	ptr->redir_cnt = cfg->redir_cnt;
 	ptr->mode = cfg->mode;
-	LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode);
+	LibAliasSetMode(ptr->lib, cfg->mode, ~0);
 	LibAliasSetAddress(ptr->lib, ptr->ip);
 	memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
 
@@ -590,11 +591,38 @@
 	return(0);
 }
 
+static int
+vnet_ipfw_nat_init(const void *arg __unused)
+{
+
+	V_ipfw_nat_ready = 1;
+	return (0);
+}
+
+static int
+vnet_ipfw_nat_uninit(const void *arg __unused)
+{
+	struct cfg_nat *ptr, *ptr_temp;
+	struct ip_fw_chain *chain;
+
+	chain = &V_layer3_chain;
+	IPFW_WLOCK(chain);
+	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+		LIST_REMOVE(ptr, _next);
+		del_redir_spool_cfg(ptr, &ptr->redir_chain);
+		LibAliasUninit(ptr->lib);
+		free(ptr, M_IPFW);
+	}
+	flush_nat_ptrs(chain, -1 /* flush all */);
+	V_ipfw_nat_ready = 0;
+	IPFW_WUNLOCK(chain);
+	return (0);
+}
+
 static void
 ipfw_nat_init(void)
 {
 
-	IPFW_WLOCK(&V_layer3_chain);
 	/* init ipfw hooks */
 	ipfw_nat_ptr = ipfw_nat;
 	lookup_nat_ptr = lookup_nat;
@@ -602,9 +630,8 @@
 	ipfw_nat_del_ptr = ipfw_nat_del;
 	ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
 	ipfw_nat_get_log_ptr = ipfw_nat_get_log;
-	IPFW_WUNLOCK(&V_layer3_chain);
-	V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
-	    ifaddr_event, ifaddr_change,
+
+	ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 }
 
@@ -611,19 +638,8 @@
 static void
 ipfw_nat_destroy(void)
 {
-	struct cfg_nat *ptr, *ptr_temp;
-	struct ip_fw_chain *chain;
 
-	chain = &V_layer3_chain;
-	IPFW_WLOCK(chain);
-	LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
-		LIST_REMOVE(ptr, _next);
-		del_redir_spool_cfg(ptr, &ptr->redir_chain);
-		LibAliasUninit(ptr->lib);
-		free(ptr, M_IPFW);
-	}
-	EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
-	flush_nat_ptrs(chain, -1 /* flush all */);
+	EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
 	/* deregister ipfw_nat */
 	ipfw_nat_ptr = NULL;
 	lookup_nat_ptr = NULL;
@@ -631,7 +647,6 @@
 	ipfw_nat_del_ptr = NULL;
 	ipfw_nat_get_cfg_ptr = NULL;
 	ipfw_nat_get_log_ptr = NULL;
-	IPFW_WUNLOCK(chain);
 }
 
 static int
@@ -641,11 +656,9 @@
 
 	switch (type) {
 	case MOD_LOAD:
-		ipfw_nat_init();
 		break;
 
 	case MOD_UNLOAD:
-		ipfw_nat_destroy();
 		break;
 
 	default:
@@ -661,8 +674,25 @@
 	0
 };
 
-DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+/* Define startup order. */
+#define	IPFW_NAT_SI_SUB_FIREWALL	SI_SUB_PROTO_IFATTACHDOMAIN
+#define	IPFW_NAT_MODEVENT_ORDER		(SI_ORDER_ANY - 128) /* after ipfw */
+#define	IPFW_NAT_MODULE_ORDER		(IPFW_NAT_MODEVENT_ORDER + 1)
+#define	IPFW_NAT_VNET_ORDER		(IPFW_NAT_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
 MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
 MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
 MODULE_VERSION(ipfw_nat, 1);
+
+SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+    ipfw_nat_init, NULL);
+VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
+    vnet_ipfw_nat_init, NULL);
+
+SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+    ipfw_nat_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
+    IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);
+
 /* end of file */

Modified: trunk/sys/netpfil/ipfw/ip_fw_pfil.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_pfil.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_pfil.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
@@ -25,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_pfil.c 264813 2014-04-23 09:56:17Z ae $");
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
@@ -47,6 +48,7 @@
 
 #include <net/if.h>
 #include <net/route.h>
+#include <net/ethernet.h>
 #include <net/pfil.h>
 #include <net/vnet.h>
 
@@ -74,10 +76,17 @@
 #define V_fw6_enable	VNET(fw6_enable)
 #endif
 
+static VNET_DEFINE(int, fwlink_enable) = 0;
+#define V_fwlink_enable	VNET(fwlink_enable)
+
 int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
 
 /* Forward declarations. */
 static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+static int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
+	struct inpcb *);
+static int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
+	struct inpcb *);
 
 #ifdef SYSCTL_NODE
 
@@ -94,6 +103,11 @@
     ipfw_chg_hook, "I", "Enable ipfw+6");
 #endif /* INET6 */
 
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_VNET_PROC(_net_link_ether, OID_AUTO, ipfw,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fwlink_enable), 0,
+    ipfw_chg_hook, "I", "Pass ether pkts through firewall");
+
 SYSEND
 
 #endif /* SYSCTL_NODE */
@@ -103,8 +117,8 @@
  * dummynet, divert, netgraph or other modules.
  * The packet may be consumed.
  */
-int
-ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+static int
+ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
     struct inpcb *inp)
 {
 	struct ip_fw_args args;
@@ -112,10 +126,6 @@
 	int ipfw;
 	int ret;
 
-	/* all the processing now uses ip_len in net format */
-	if (mtod(*m0, struct ip *)->ip_v == 4)
-		SET_NET_IPLEN(mtod(*m0, struct ip *));
-
 	/* convert dir to IPFW values */
 	dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
 	bzero(&args, sizeof(args));
@@ -129,11 +139,8 @@
 	if (tag != NULL) {
 		args.rule = *((struct ipfw_rule_ref *)(tag+1));
 		m_tag_delete(*m0, tag);
-		if (args.rule.info & IPFW_ONEPASS) {
-			if (mtod(*m0, struct ip *)->ip_v == 4)
-				SET_HOST_IPLEN(mtod(*m0, struct ip *));
+		if (args.rule.info & IPFW_ONEPASS)
 			return (0);
-		}
 	}
 
 	args.m = *m0;
@@ -277,11 +284,115 @@
 			FREE_PKT(*m0);
 		*m0 = NULL;
 	}
-	if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
-		SET_HOST_IPLEN(mtod(*m0, struct ip *));
+
 	return ret;
 }
 
+/*
+ * ipfw processing for ethernet packets (in and out).
+ * Inteface is NULL from ether_demux, and ifp from
+ * ether_output_frame.
+ */
+static int
+ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
+    struct inpcb *inp)
+{
+	struct ether_header *eh;
+	struct ether_header save_eh;
+	struct mbuf *m;
+	int i, ret;
+	struct ip_fw_args args;
+	struct m_tag *mtag;
+
+	/* fetch start point from rule, if any */
+	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+	if (mtag == NULL) {
+		args.rule.slot = 0;
+	} else {
+		/* dummynet packet, already partially processed */
+		struct ipfw_rule_ref *r;
+
+		/* XXX can we free it after use ? */
+		mtag->m_tag_id = PACKET_TAG_NONE;
+		r = (struct ipfw_rule_ref *)(mtag + 1);
+		if (r->info & IPFW_ONEPASS)
+			return (0);
+		args.rule = *r;
+	}
+
+	/* I need some amt of data to be contiguous */
+	m = *m0;
+	i = min(m->m_pkthdr.len, max_protohdr);
+	if (m->m_len < i) {
+		m = m_pullup(m, i);
+		if (m == NULL) {
+			*m0 = m;
+			return (0);
+		}
+	}
+	eh = mtod(m, struct ether_header *);
+	save_eh = *eh;			/* save copy for restore below */
+	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
+
+	args.m = m;		/* the packet we are looking at		*/
+	args.oif = dir == PFIL_OUT ? dst: NULL;	/* destination, if any	*/
+	args.next_hop = NULL;	/* we do not support forward yet	*/
+	args.next_hop6 = NULL;	/* we do not support forward yet	*/
+	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
+	args.inp = NULL;	/* used by ipfw uid/gid/jail rules	*/
+	i = ipfw_chk(&args);
+	m = args.m;
+	if (m != NULL) {
+		/*
+		 * Restore Ethernet header, as needed, in case the
+		 * mbuf chain was replaced by ipfw.
+		 */
+		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+		if (m == NULL) {
+			*m0 = NULL;
+			return (0);
+		}
+		if (eh != mtod(m, struct ether_header *))
+			bcopy(&save_eh, mtod(m, struct ether_header *),
+				ETHER_HDR_LEN);
+	}
+	*m0 = m;
+
+	ret = 0;
+	/* Check result of ipfw_chk() */
+	switch (i) {
+	case IP_FW_PASS:
+		break;
+
+	case IP_FW_DENY:
+		ret = EACCES;
+		break; /* i.e. drop */
+
+	case IP_FW_DUMMYNET:
+		ret = EACCES;
+		int dir;
+
+		if (ip_dn_io_ptr == NULL)
+			break; /* i.e. drop */
+
+		*m0 = NULL;
+		dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
+		ip_dn_io_ptr(&m, dir, &args);
+		return 0;
+
+	default:
+		KASSERT(0, ("%s: unknown retval", __func__));
+	}
+
+	if (ret != 0) {
+		if (*m0)
+			FREE_PKT(*m0);
+		*m0 = NULL;
+	}
+
+	return ret;
+}
+
 /* do the divert, return 1 on error 0 on success */
 static int
 ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
@@ -301,7 +412,7 @@
 		clone = *m0;	/* use the original mbuf */
 		*m0 = NULL;
 	} else {
-		clone = m_dup(*m0, M_DONTWAIT);
+		clone = m_dup(*m0, M_NOWAIT);
 		/* If we cannot duplicate the mbuf, we sacrifice the divert
 		 * chain and continue with the tee-ed packet.
 		 */
@@ -323,7 +434,6 @@
 		int hlen;
 		struct mbuf *reass;
 
-		SET_HOST_IPLEN(ip); /* ip_reass wants host order */
 		reass = ip_reass(clone); /* Reassemble packet. */
 		if (reass == NULL)
 			return 0; /* not an error */
@@ -334,7 +444,6 @@
 		 */
 		ip = mtod(reass, struct ip *);
 		hlen = ip->ip_hl << 2;
-		SET_NET_IPLEN(ip);
 		ip->ip_sum = 0;
 		if (hlen == sizeof(struct ip))
 			ip->ip_sum = in_cksum_hdr(ip);
@@ -383,13 +492,16 @@
 ipfw_hook(int onoff, int pf)
 {
 	struct pfil_head *pfh;
+	void *hook_func;
 
 	pfh = pfil_head_get(PFIL_TYPE_AF, pf);
 	if (pfh == NULL)
 		return ENOENT;
 
+	hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
+
 	(void) (onoff ? pfil_add_hook : pfil_remove_hook)
-	    (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+	    (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
 
 	return 0;
 }
@@ -413,6 +525,12 @@
                 printf("ipfw6_hook() error\n");
         }
 #endif
+	if (arg == 0) /* detach */
+		ipfw_hook(0, AF_LINK);
+	else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
+                error = ENOENT;
+                printf("ipfw_link_hook() error\n");
+        }
 	return error;
 }
 
@@ -419,45 +537,38 @@
 int
 ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
 {
-	int enable;
-	int oldenable;
+	int newval;
 	int error;
 	int af;
 
-	if (arg1 == &VNET_NAME(fw_enable)) {
-		enable = V_fw_enable;
+	if (arg1 == &V_fw_enable)
 		af = AF_INET;
-	}
 #ifdef INET6
-	else if (arg1 == &VNET_NAME(fw6_enable)) {
-		enable = V_fw6_enable;
+	else if (arg1 == &V_fw6_enable)
 		af = AF_INET6;
-	}
 #endif
+	else if (arg1 == &V_fwlink_enable)
+		af = AF_LINK;
 	else 
 		return (EINVAL);
 
-	oldenable = enable;
+	newval = *(int *)arg1;
+	/* Handle sysctl change */
+	error = sysctl_handle_int(oidp, &newval, 0, req);
 
-	error = sysctl_handle_int(oidp, &enable, 0, req);
-
 	if (error)
 		return (error);
 
-	enable = (enable) ? 1 : 0;
+	/* Formalize new value */
+	newval = (newval) ? 1 : 0;
 
-	if (enable == oldenable)
+	if (*(int *)arg1 == newval)
 		return (0);
 
-	error = ipfw_hook(enable, af);
+	error = ipfw_hook(newval, af);
 	if (error)
 		return (error);
-	if (af == AF_INET)
-		V_fw_enable = enable;
-#ifdef INET6
-	else if (af == AF_INET6)
-		V_fw6_enable = enable;
-#endif
+	*(int *)arg1 = newval;
 
 	return (0);
 }

Modified: trunk/sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_private.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_private.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
@@ -22,7 +23,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_private.h 265700 2014-05-08 19:11:41Z melifaro $
  */
 
 #ifndef _IPFW2_PRIVATE_H
@@ -213,12 +214,9 @@
 #define V_fw_tables_max		VNET(fw_tables_max)
 
 struct ip_fw_chain {
-	struct ip_fw	*rules;		/* list of rules */
-	struct ip_fw	*reap;		/* list of rules to reap */
-	struct ip_fw	*default_rule;
+	struct ip_fw	**map;		/* array of rule ptrs to ease lookup */
+	uint32_t	id;		/* ruleset id */
 	int		n_rules;	/* number of static rules */
-	int		static_len;	/* total len of static rules */
-	struct ip_fw	**map;		/* array of rule ptrs to ease lookup */
 	LIST_HEAD(nat_list, cfg_nat) nat;       /* list of nat entries */
 	struct radix_node_head **tables;	/* IPv4 tables */
 	struct radix_node_head **xtables;	/* extended tables */
@@ -225,13 +223,18 @@
 	uint8_t		*tabletype;	/* Array of table types */
 #if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t rwmtx;
+#else
+	struct rwlock	rwmtx;
+#endif
+	int		static_len;	/* total len of static rules */
+	uint32_t	gencnt;		/* NAT generation count */
+	struct ip_fw	*reap;		/* list of rules to reap */
+	struct ip_fw	*default_rule;
+#if defined( __linux__ ) || defined( _WIN32 )
 	spinlock_t uh_lock;
 #else
-	struct rwlock	rwmtx;
 	struct rwlock	uh_lock;	/* lock for upper half */
 #endif
-	uint32_t	id;		/* ruleset id */
-	uint32_t	gencnt;		/* generation count */
 };
 
 struct sockopt;	/* used by tcp_var.h */
@@ -259,7 +262,7 @@
 	(_cntr)->bcnt = 0;				\
 	} while (0)
 
-#define	IP_FW_ARG_TABLEARG(a)	((a) == IP_FW_TABLEARG) ? tablearg : (a)
+#define	IP_FW_ARG_TABLEARG(a)	(((a) == IP_FW_TABLEARG) ? tablearg : (a))
 /*
  * The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
  * so the variable and the macros must be here.
@@ -278,10 +281,12 @@
 #define	IPFW_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
 #define	IPFW_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
 
-#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
-#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
-#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
-#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define	IPFW_RLOCK(p)			rw_rlock(&(p)->rwmtx)
+#define	IPFW_RUNLOCK(p)			rw_runlock(&(p)->rwmtx)
+#define	IPFW_WLOCK(p)			rw_wlock(&(p)->rwmtx)
+#define	IPFW_WUNLOCK(p)			rw_wunlock(&(p)->rwmtx)
+#define	IPFW_PF_RLOCK(p)		IPFW_RLOCK(p)
+#define	IPFW_PF_RUNLOCK(p)		IPFW_RUNLOCK(p)
 
 #define	IPFW_UH_RLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
 #define	IPFW_UH_WLOCK_ASSERT(_chain)	rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
@@ -298,10 +303,6 @@
 int ipfw_chk(struct ip_fw_args *args);
 void ipfw_reap_rules(struct ip_fw *head);
 
-/* In ip_fw_pfil */
-int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
-     struct inpcb *inp);
-
 /* In ip_fw_table.c */
 struct radix_node;
 int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
@@ -329,9 +330,11 @@
 typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
 typedef int ipfw_nat_cfg_t(struct sockopt *);
 
+VNET_DECLARE(int, ipfw_nat_ready);
+#define	V_ipfw_nat_ready	VNET(ipfw_nat_ready)
+#define	IPFW_NAT_LOADED	(V_ipfw_nat_ready)
+
 extern ipfw_nat_t *ipfw_nat_ptr;
-#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
-
 extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
 extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;

Modified: trunk/sys/netpfil/ipfw/ip_fw_sockopt.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_sockopt.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_sockopt.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
  *
@@ -26,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_sockopt.c 265700 2014-05-08 19:11:41Z melifaro $");
 
 /*
  * Sockopt support for ipfw. The routines here implement
@@ -159,13 +160,11 @@
 	int i, l, insert_before;
 	struct ip_fw **map;	/* the new array of pointers */
 
-	if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
+	if (chain->map == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE - 1)
 		return (EINVAL);
 
 	l = RULESIZE(input_rule);
 	rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
-	if (rule == NULL)
-		return (ENOSPC);
 	/* get_map returns with IPFW_UH_WLOCK if successful */
 	map = get_map(chain, 1, 0 /* not locked */);
 	if (map == NULL) {
@@ -655,7 +654,7 @@
 
 		case O_IP_SRC_LOOKUP:
 		case O_IP_DST_LOOKUP:
-			if (cmd->arg1 >= IPFW_TABLES_MAX) {
+			if (cmd->arg1 >= V_fw_tables_max) {
 				printf("ipfw: invalid table number %d\n",
 				    cmd->arg1);
 				return (EINVAL);
@@ -1005,8 +1004,6 @@
 			if (size >= sopt->sopt_valsize)
 				break;
 			buf = malloc(size, M_TEMP, M_WAITOK);
-			if (buf == NULL)
-				break;
 			IPFW_UH_RLOCK(chain);
 			/* check again how much space we need */
 			want = chain->static_len + ipfw_dyn_len();
@@ -1043,8 +1040,10 @@
 		if (sopt->sopt_valsize == RULESIZE7(rule)) {
 		    is7 = 1;
 		    error = convert_rule_to_8(rule);
-		    if (error)
+		    if (error) {
+			free(rule, M_TEMP);
 			return error;
+		    }
 		    if (error == 0)
 			error = check_ipfw_struct(rule, RULESIZE(rule));
 		} else {
@@ -1060,12 +1059,14 @@
 				if (is7) {
 					error = convert_rule_to_7(rule);
 					size = RULESIZE7(rule);
-					if (error)
+					if (error) {
+						free(rule, M_TEMP);
 						return error;
+					}
 				}
 				error = sooptcopyout(sopt, rule, size);
+			}
 		}
-		}
 		free(rule, M_TEMP);
 		break;
 

Modified: trunk/sys/netpfil/ipfw/ip_fw_table.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_table.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_table.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
  *
@@ -24,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_table.c 287963 2015-09-18 17:29:24Z melifaro $");
 
 /*
  * Lookup table support for ipfw
@@ -69,7 +70,7 @@
 #include <security/mac/mac_framework.h>
 #endif
 
-MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
 
 struct table_entry {
 	struct radix_node	rn[2];
@@ -123,6 +124,7 @@
 #define OFF_LEN_IFACE	(8 * offsetof(struct xaddr_iface, ifname))
 
 
+#ifdef INET6
 static inline void
 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
 {
@@ -132,6 +134,7 @@
 		*cp++ = 0xFFFFFFFF;
 	*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
 }
+#endif
 
 int
 ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
@@ -542,7 +545,7 @@
 		return (0);
 	KEY_LEN(sa) = KEY_LEN_INET;
 	sa.sin_addr.s_addr = addr;
-	ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+	ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh));
 	if (ent != NULL) {
 		*val = ent->value;
 		return (1);
@@ -568,7 +571,7 @@
 	case IPFW_TABLE_CIDR:
 		KEY_LEN(sa6) = KEY_LEN_INET6;
 		memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
-		xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
+		xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
 		break;
 
 	case IPFW_TABLE_INTERFACE:
@@ -576,7 +579,7 @@
 		    strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
 		/* Assume direct match */
 		/* FIXME: Add interface pattern matching */
-		xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
+		xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh));
 		break;
 
 	default:
@@ -695,6 +698,7 @@
 		xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
 	/* Save IPv4 address as deprecated IPv6 compatible */
 	xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
+	xent->flags = IPFW_TCF_INET;
 	xent->value = n->value;
 	tbl->cnt++;
 	return (0);

Modified: trunk/sys/netpfil/ipfw/test/Makefile
===================================================================
--- trunk/sys/netpfil/ipfw/test/Makefile	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/Makefile	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+# $MidnightBSD$
 #
-# $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/Makefile 205417 2010-03-21 16:30:32Z luigi $
+# $FreeBSD: stable/10/sys/netpfil/ipfw/test/Makefile 205417 2010-03-21 16:30:32Z luigi $
 #
 # Makefile for building userland tests
 # this is written in a form compatible with gmake

Modified: trunk/sys/netpfil/ipfw/test/dn_test.h
===================================================================
--- trunk/sys/netpfil/ipfw/test/dn_test.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/dn_test.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/dn_test.h 204866 2010-03-08 11:27:39Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/dn_test.h 204866 2010-03-08 11:27:39Z luigi $
  *
  * userspace compatibility code for dummynet schedulers
  */

Modified: trunk/sys/netpfil/ipfw/test/main.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/main.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/main.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/main.c 204591 2010-03-02 17:40:48Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/main.c 204591 2010-03-02 17:40:48Z luigi $
  *
  * Testing program for schedulers
  *

Modified: trunk/sys/netpfil/ipfw/test/mylist.h
===================================================================
--- trunk/sys/netpfil/ipfw/test/mylist.h	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/mylist.h	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/mylist.h 204735 2010-03-04 21:01:59Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/mylist.h 204735 2010-03-04 21:01:59Z luigi $
  *
  * linux-like bidirectional lists
  */

Modified: trunk/sys/netpfil/ipfw/test/test_dn_heap.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/test_dn_heap.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/test_dn_heap.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
 /*-
  * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
  * All rights reserved
@@ -27,7 +28,7 @@
 /*
  * Userland code for testing binary heaps and hash tables
  *
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/test_dn_heap.c 204591 2010-03-02 17:40:48Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/test_dn_heap.c 204591 2010-03-02 17:40:48Z luigi $
  */
 
 #include <sys/cdefs.h>

Modified: trunk/sys/netpfil/ipfw/test/test_dn_sched.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/test_dn_sched.c	2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/test_dn_sched.c	2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
 /*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/test_dn_sched.c 204736 2010-03-04 21:52:40Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/test_dn_sched.c 204736 2010-03-04 21:52:40Z luigi $
  *
  * library functions for userland testing of dummynet schedulers
  */



More information about the Midnightbsd-cvs mailing list