[Midnightbsd-cvs] src [9923] trunk/sys/netpfil/ipfw: sync with freebsd 10
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Fri May 25 09:05:12 EDT 2018
Revision: 9923
http://svnweb.midnightbsd.org/src/?rev=9923
Author: laffer1
Date: 2018-05-25 09:05:12 -0400 (Fri, 25 May 2018)
Log Message:
-----------
sync with freebsd 10
Modified Paths:
--------------
trunk/sys/netpfil/ipfw/dn_heap.c
trunk/sys/netpfil/ipfw/dn_heap.h
trunk/sys/netpfil/ipfw/dn_sched.h
trunk/sys/netpfil/ipfw/dn_sched_fifo.c
trunk/sys/netpfil/ipfw/dn_sched_prio.c
trunk/sys/netpfil/ipfw/dn_sched_qfq.c
trunk/sys/netpfil/ipfw/dn_sched_rr.c
trunk/sys/netpfil/ipfw/dn_sched_wf2q.c
trunk/sys/netpfil/ipfw/dummynet.txt
trunk/sys/netpfil/ipfw/ip_dn_glue.c
trunk/sys/netpfil/ipfw/ip_dn_io.c
trunk/sys/netpfil/ipfw/ip_dn_private.h
trunk/sys/netpfil/ipfw/ip_dummynet.c
trunk/sys/netpfil/ipfw/ip_fw2.c
trunk/sys/netpfil/ipfw/ip_fw_dynamic.c
trunk/sys/netpfil/ipfw/ip_fw_log.c
trunk/sys/netpfil/ipfw/ip_fw_nat.c
trunk/sys/netpfil/ipfw/ip_fw_pfil.c
trunk/sys/netpfil/ipfw/ip_fw_private.h
trunk/sys/netpfil/ipfw/ip_fw_sockopt.c
trunk/sys/netpfil/ipfw/ip_fw_table.c
trunk/sys/netpfil/ipfw/test/Makefile
trunk/sys/netpfil/ipfw/test/dn_test.h
trunk/sys/netpfil/ipfw/test/main.c
trunk/sys/netpfil/ipfw/test/mylist.h
trunk/sys/netpfil/ipfw/test/test_dn_heap.c
trunk/sys/netpfil/ipfw/test/test_dn_sched.c
Added Paths:
-----------
trunk/sys/netpfil/ipfw/dn_aqm.h
trunk/sys/netpfil/ipfw/dn_aqm_codel.c
trunk/sys/netpfil/ipfw/dn_aqm_codel.h
trunk/sys/netpfil/ipfw/dn_aqm_pie.c
trunk/sys/netpfil/ipfw/dn_aqm_pie.h
trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c
Added: trunk/sys/netpfil/ipfw/dn_aqm.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm.h (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * API for writing an Active Queue Management algorithm for Dummynet
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm.h 301772 2016-06-10 00:00:25Z truckman $
+ */
+
+#ifndef _IP_DN_AQM_H
+#define _IP_DN_AQM_H
+
+
+/* NOW is the current time in millisecond*/
+#define NOW ((dn_cfg.curr_time * tick) / 1000)
+
+#define AQM_UNOW (dn_cfg.curr_time * tick)
+#define AQM_TIME_1US ((aqm_time_t)(1))
+#define AQM_TIME_1MS ((aqm_time_t)(1000))
+#define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000))
+
+/* aqm time allows to store up to 4294 seconds */
+typedef uint32_t aqm_time_t;
+typedef int32_t aqm_stime_t;
+
+#define DN_AQM_MTAG_TS 55345
+
+/* Macro for variable bounding */
+#define BOUND_VAR(x,l,h) ((x) > (h)? (h) : ((x) > (l)? (x) : (l)))
+
+/* sysctl variable to count number of dropped packets */
+extern unsigned long io_pkt_drop;
+
+/*
+ * Structure for holding data and function pointers that together represent a
+ * AQM algorithm.
+ */
+ struct dn_aqm {
+#define DN_AQM_NAME_MAX 50
+ char name[DN_AQM_NAME_MAX]; /* name of AQM algorithm */
+ uint32_t type; /* AQM type number */
+
+ /* Methods implemented by AQM algorithm:
+ *
+ * enqueue enqueue packet 'm' on queue 'q'.
+ * Return 0 on success, 1 on drop.
+ *
+ * dequeue dequeue a packet from queue 'q'.
+ * Return a packet, NULL if no packet available.
+ *
+ * config configure AQM algorithm
+ * If required, this function should allocate space to store
+ * the configurations and set 'fs->aqmcfg' to point to this space.
+ * 'dn_extra_parms' includes array of parameters send
+ * from ipfw userland command.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * deconfig deconfigure AQM algorithm.
+ * The allocated configuration memory space should be freed here.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * init initialise AQM status variables of queue 'q'
+ * This function is used to allocate space and init AQM status for a
+ * queue and q->aqm_status to point to this space.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * cleanup cleanup AQM status variables of queue 'q'
+ * The allocated memory space for AQM status should be freed here.
+ * Return 0 on success, non-zero otherwise.
+ *
+ * getconfig retrieve AQM configurations
+ * This function is used to return AQM parameters to userland
+ * command. The function should fill 'dn_extra_parms' struct with
+ * the AQM configurations using 'par' array.
+ *
+ */
+
+ int (*enqueue)(struct dn_queue *, struct mbuf *);
+ struct mbuf * (*dequeue)(struct dn_queue *);
+ int (*config)(struct dn_fsk *, struct dn_extra_parms *ep, int);
+ int (*deconfig)(struct dn_fsk *);
+ int (*init)(struct dn_queue *);
+ int (*cleanup)(struct dn_queue *);
+ int (*getconfig)(struct dn_fsk *, struct dn_extra_parms *);
+
+ int ref_count; /*Number of queues instances in the system */
+ int cfg_ref_count; /*Number of AQM instances in the system */
+ SLIST_ENTRY (dn_aqm) next; /* Next AQM in the list */
+};
+
+/* Helper function to update queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+update_stats(struct dn_queue *q, int len, int drop)
+{
+ int inc = 0;
+ struct dn_flow *sni;
+ struct dn_flow *qni;
+
+ sni = &q->_si->ni;
+ qni = &q->ni;
+
+ if (len < 0)
+ inc = -1;
+ else if(len > 0)
+ inc = 1;
+
+ if (drop) {
+ qni->drops++;
+ sni->drops++;
+ io_pkt_drop++;
+ } else {
+ /*update queue stats */
+ qni->length += inc;
+ qni->len_bytes += len;
+
+ /*update scheduler instance stats */
+ sni->length += inc;
+ sni->len_bytes += len;
+ }
+ /* tot_pkts is updated in dn_enqueue function */
+}
+
+
+/* kernel module related function */
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNAQM_MODULE(name, dnaqm) \
+ static moduledata_t name##_mod = { \
+ #name, dn_aqm_modevent, dnaqm \
+ }; \
+ DECLARE_MODULE(name, name##_mod, \
+ SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, dummynet, 3, 3, 3)
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_aqm.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_codel.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_codel.c (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_codel.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,445 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_codel.c 317489 2017-04-27 07:32:07Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/if_ether.h> /* various ether_* routines */
+#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+#include <netpfil/ipfw/dn_heap.h>
+
+#ifdef NEW_AQM
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_codel.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+#define DN_AQM_CODEL 1
+
+static struct dn_aqm codel_desc;
+
+/* default codel parameters */
+struct dn_aqm_codel_parms codel_sysctl = {5000 * AQM_TIME_1US,
+ 100000 * AQM_TIME_1US, 0};
+
+static int
+codel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = codel_sysctl.interval;
+ value /= AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 100 * AQM_TIME_1S)
+ return (EINVAL);
+ codel_sysctl.interval = value * AQM_TIME_1US ;
+ return (0);
+}
+
+static int
+codel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = codel_sysctl.target;
+ value /= AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ D("%ld", value);
+ if (value < 1 || value > 5 * AQM_TIME_1S)
+ return (EINVAL);
+ codel_sysctl.target = value * AQM_TIME_1US ;
+ return (0);
+}
+
+/* defining Codel sysctl variables */
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO,
+ codel, CTLFLAG_RW, 0, "CODEL");
+
+#ifdef SYSCTL_NODE
+SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, target,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,codel_sysctl_target_handler, "L",
+ "CoDel target in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_codel, OID_AUTO, interval,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, codel_sysctl_interval_handler, "L",
+ "CoDel interval in microsecond");
+#endif
+
+/* This function computes codel_interval/sqrt(count)
+ * Newton's method of approximation is used to compute 1/sqrt(count).
+ * http://betterexplained.com/articles/
+ * understanding-quakes-fast-inverse-square-root/
+ */
+aqm_time_t
+control_law(struct codel_status *cst, struct dn_aqm_codel_parms *cprms,
+ aqm_time_t t)
+{
+ uint32_t count;
+ uint64_t temp;
+ count = cst->count;
+
+ /* we don't calculate isqrt(1) to get more accurate result*/
+ if (count == 1) {
+ /* prepare isqrt (old guess) for the next iteration i.e. 1/sqrt(2)*/
+ cst->isqrt = (1UL<< FIX_POINT_BITS) * 7/10;
+ /* return time + isqrt(1)*interval */
+ return t + cprms->interval;
+ }
+
+ /* newguess = g(1.5 - 0.5*c*g^2)
+ * Multiplying both sides by 2 to make all the constants intergers
+ * newguess * 2 = g(3 - c*g^2) g=old guess, c=count
+ * So, newguess = newguess /2
+ * Fixed point operations are used here.
+ */
+
+ /* Calculate g^2 */
+ temp = (uint32_t) cst->isqrt * cst->isqrt;
+ /* Calculate (3 - c*g^2) i.e. (3 - c * temp) */
+ temp = (3ULL<< (FIX_POINT_BITS*2)) - (count * temp);
+
+ /*
+ * Divide by 2 because we multiplied the original equation by two
+ * Also, we shift the result by 8 bits to prevent overflow.
+ * */
+ temp >>= (1 + 8);
+
+ /* Now, temp = (1.5 - 0.5*c*g^2)
+ * Calculate g (1.5 - 0.5*c*g^2) i.e. g * temp
+ */
+ temp = (cst->isqrt * temp) >> (FIX_POINT_BITS + FIX_POINT_BITS - 8);
+ cst->isqrt = temp;
+
+ /* calculate codel_interval/sqrt(count) */
+ return t + ((cprms->interval * temp) >> FIX_POINT_BITS);
+}
+
+/*
+ * Extract a packet from the head of queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * Also extract packet's timestamp from mtag.
+ */
+struct mbuf *
+codel_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts)
+{
+ struct m_tag *mtag;
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return m;
+ q->mq.head = m->m_nextpkt;
+
+ /* Update stats */
+ update_stats(q, -m->m_pkthdr.len, 0);
+
+ if (q->ni.length == 0) /* queue is now idle */
+ q->q_time = dn_cfg.curr_time;
+
+ /* extract packet TS*/
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL) {
+ D("Codel timestamp mtag not found!");
+ *pkt_ts = 0;
+ } else {
+ *pkt_ts = *(aqm_time_t *)(mtag + 1);
+ m_tag_delete(m,mtag);
+ }
+
+ return m;
+}
+
+/*
+ * Enqueue a packet 'm' in queue 'q'
+ */
+static int
+aqm_codel_enqueue(struct dn_queue *q, struct mbuf *m)
+{
+ struct dn_fs *f;
+ uint64_t len;
+ struct codel_status *cst; /*codel status variables */
+ struct m_tag *mtag;
+
+ f = &(q->fs->fs);
+ len = m->m_pkthdr.len;
+ cst = q->aqm_status;
+ if(!cst) {
+ D("Codel queue is not initialized\n");
+ goto drop;
+ }
+
+ /* Finding maximum packet size */
+ // XXX we can get MTU from driver instead
+ if (len > cst->maxpkt_size)
+ cst->maxpkt_size = len;
+
+ /* check for queue size and drop the tail if exceed queue limit*/
+ if (f->flags & DN_QSIZE_BYTES) {
+ if ( q->ni.len_bytes > f->qsize)
+ goto drop;
+ }
+ else {
+ if ( q->ni.length >= f->qsize)
+ goto drop;
+ }
+
+ /* Add timestamp as mtag */
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+ sizeof(aqm_time_t), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ goto drop;
+ }
+
+ *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+ m_tag_prepend(m, mtag);
+
+ mq_append(&q->mq, m);
+ update_stats(q, len, 0);
+ return (0);
+
+drop:
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ return (1);
+}
+
+
+/* Dequeue a pcaket from queue q */
+static struct mbuf *
+aqm_codel_dequeue(struct dn_queue *q)
+{
+ return codel_dequeue(q);
+}
+
+/*
+ * initialize Codel for queue 'q'
+ * First allocate memory for codel status.
+ */
+static int
+aqm_codel_init(struct dn_queue *q)
+{
+ struct codel_status *cst;
+
+ if (!q->fs->aqmcfg) {
+ D("Codel is not configure!d");
+ return EINVAL;
+ }
+
+ q->aqm_status = malloc(sizeof(struct codel_status),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (q->aqm_status == NULL) {
+ D("Cannot allocate AQM_codel private data");
+ return ENOMEM ;
+ }
+
+ /* init codel status variables */
+ cst = q->aqm_status;
+ cst->dropping=0;
+ cst->first_above_time=0;
+ cst->drop_next_time=0;
+ cst->count=0;
+ cst->maxpkt_size = 500;
+
+ /* increase reference counters */
+ codel_desc.ref_count++;
+
+ return 0;
+}
+
+/*
+ * Clean up Codel status for queue 'q'
+ * Destroy memory allocated for codel status.
+ */
+static int
+aqm_codel_cleanup(struct dn_queue *q)
+{
+
+ if (q && q->aqm_status) {
+ free(q->aqm_status, M_DUMMYNET);
+ q->aqm_status = NULL;
+ /* decrease reference counters */
+ codel_desc.ref_count--;
+ }
+ else
+ D("Codel already cleaned up");
+ return 0;
+}
+
+/*
+ * Config codel parameters
+ * also allocate memory for codel configurations
+ */
+static int
+aqm_codel_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
+{
+ struct dn_aqm_codel_parms *ccfg;
+
+ int l = sizeof(struct dn_extra_parms);
+ if (len < l) {
+ D("invalid sched parms length got %d need %d", len, l);
+ return EINVAL;
+ }
+ /* we free the old cfg because maybe the original allocation
+ * not the same size as the new one (different AQM type).
+ */
+ if (fs->aqmcfg) {
+ free(fs->aqmcfg, M_DUMMYNET);
+ fs->aqmcfg = NULL;
+ }
+
+ fs->aqmcfg = malloc(sizeof(struct dn_aqm_codel_parms),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (fs->aqmcfg== NULL) {
+ D("cannot allocate AQM_codel configuration parameters");
+ return ENOMEM;
+ }
+
+ /* configure codel parameters */
+ ccfg = fs->aqmcfg;
+
+ if (ep->par[0] < 0)
+ ccfg->target = codel_sysctl.target;
+ else
+ ccfg->target = ep->par[0] * AQM_TIME_1US;
+
+ if (ep->par[1] < 0)
+ ccfg->interval = codel_sysctl.interval;
+ else
+ ccfg->interval = ep->par[1] * AQM_TIME_1US;
+
+ if (ep->par[2] < 0)
+ ccfg->flags = 0;
+ else
+ ccfg->flags = ep->par[2];
+
+ /* bound codel configurations */
+ ccfg->target = BOUND_VAR(ccfg->target,1, 5 * AQM_TIME_1S);
+ ccfg->interval = BOUND_VAR(ccfg->interval,1, 5 * AQM_TIME_1S);
+ /* increase config reference counter */
+ codel_desc.cfg_ref_count++;
+
+ return 0;
+}
+
+/*
+ * Deconfigure Codel and free memory allocation
+ */
+static int
+aqm_codel_deconfig(struct dn_fsk* fs)
+{
+
+ if (fs && fs->aqmcfg) {
+ free(fs->aqmcfg, M_DUMMYNET);
+ fs->aqmcfg = NULL;
+ fs->aqmfp = NULL;
+ /* decrease config reference counter */
+ codel_desc.cfg_ref_count--;
+ }
+
+ return 0;
+}
+
+/*
+ * Retrieve Codel configuration parameters.
+ */
+static int
+aqm_codel_getconfig(struct dn_fsk *fs, struct dn_extra_parms * ep)
+{
+ struct dn_aqm_codel_parms *ccfg;
+
+ if (fs->aqmcfg) {
+ strlcpy(ep->name, codel_desc.name, sizeof(ep->name));
+ ccfg = fs->aqmcfg;
+ ep->par[0] = ccfg->target / AQM_TIME_1US;
+ ep->par[1] = ccfg->interval / AQM_TIME_1US;
+ ep->par[2] = ccfg->flags;
+ return 0;
+ }
+ return 1;
+}
+
+static struct dn_aqm codel_desc = {
+ _SI( .type = ) DN_AQM_CODEL,
+ _SI( .name = ) "CODEL",
+ _SI( .enqueue = ) aqm_codel_enqueue,
+ _SI( .dequeue = ) aqm_codel_dequeue,
+ _SI( .config = ) aqm_codel_config,
+ _SI( .getconfig = ) aqm_codel_getconfig,
+ _SI( .deconfig = ) aqm_codel_deconfig,
+ _SI( .init = ) aqm_codel_init,
+ _SI( .cleanup = ) aqm_codel_cleanup,
+};
+
+DECLARE_DNAQM_MODULE(dn_aqm_codel, &codel_desc);
+
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_codel.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_codel.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_codel.h (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_codel.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,223 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_codel.h 301772 2016-06-10 00:00:25Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols at pollere.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * o The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_CODEL_H
+#define _IP_DN_AQM_CODEL_H
+
+
+// XXX How to choose MTAG?
+#define FIX_POINT_BITS 16
+
+enum {
+ CODEL_ECN_ENABLED = 1
+};
+
+/* Codel parameters */
+struct dn_aqm_codel_parms {
+ aqm_time_t target;
+ aqm_time_t interval;
+ uint32_t flags;
+};
+
+/* codel status variables */
+struct codel_status {
+ uint32_t count; /* number of dropped pkts since entering drop state */
+ uint16_t dropping; /* dropping state */
+ aqm_time_t drop_next_time; /* time for next drop */
+ aqm_time_t first_above_time; /* time for first ts over target we observed */
+ uint16_t isqrt; /* last isqrt for control low */
+ uint16_t maxpkt_size; /* max packet size seen so far */
+};
+
+struct mbuf *codel_extract_head(struct dn_queue *, aqm_time_t *);
+aqm_time_t control_law(struct codel_status *,
+ struct dn_aqm_codel_parms *, aqm_time_t );
+
+__inline static struct mbuf *
+codel_dodequeue(struct dn_queue *q, aqm_time_t now, uint16_t *ok_to_drop)
+{
+ struct mbuf * m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t pkt_ts, sojourn_time;
+
+ *ok_to_drop = 0;
+ m = codel_extract_head(q, &pkt_ts);
+
+ cst = q->aqm_status;
+
+ if (m == NULL) {
+ /* queue is empty - we can't be above target */
+ cst->first_above_time= 0;
+ return m;
+ }
+
+ cprms = q->fs->aqmcfg;
+
+ /* To span a large range of bandwidths, CoDel runs two
+ * different AQMs in parallel. One is sojourn-time-based
+ * and takes effect when the time to send an MTU-sized
+ * packet is less than target. The 1st term of the "if"
+ * below does this. The other is backlog-based and takes
+ * effect when the time to send an MTU-sized packet is >=
+ * target. The goal here is to keep the output link
+ * utilization high by never allowing the queue to get
+ * smaller than the amount that arrives in a typical
+ * interarrival time (MTU-sized packets arriving spaced
+ * by the amount of time it takes to send such a packet on
+ * the bottleneck). The 2nd term of the "if" does this.
+ */
+ sojourn_time = now - pkt_ts;
+ if (sojourn_time < cprms->target || q->ni.len_bytes <= cst->maxpkt_size) {
+ /* went below - stay below for at least interval */
+ cst->first_above_time = 0;
+ } else {
+ if (cst->first_above_time == 0) {
+ /* just went above from below. if still above at
+ * first_above_time, will say it's ok to drop. */
+ cst->first_above_time = now + cprms->interval;
+ } else if (now >= cst->first_above_time) {
+ *ok_to_drop = 1;
+ }
+ }
+ return m;
+}
+
+/*
+ * Dequeue a packet from queue 'q'
+ */
+__inline static struct mbuf *
+codel_dequeue(struct dn_queue *q)
+{
+ struct mbuf *m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t now;
+ uint16_t ok_to_drop;
+
+ cst = q->aqm_status;;
+ cprms = q->fs->aqmcfg;
+ now = AQM_UNOW;
+
+ m = codel_dodequeue(q, now, &ok_to_drop);
+ if (cst->dropping) {
+ if (!ok_to_drop) {
+ /* sojourn time below target - leave dropping state */
+ cst->dropping = false;
+ }
+ /*
+ * Time for the next drop. Drop current packet and dequeue
+ * next. If the dequeue doesn't take us out of dropping
+ * state, schedule the next drop. A large backlog might
+ * result in drop rates so high that the next drop should
+ * happen now, hence the 'while' loop.
+ */
+ while (now >= cst->drop_next_time && cst->dropping) {
+
+ /* mark the packet */
+ if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+ cst->count++;
+ /* schedule the next mark. */
+ cst->drop_next_time = control_law(cst, cprms,
+ cst->drop_next_time);
+ return m;
+ }
+
+ /* drop the packet */
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ m = codel_dodequeue(q, now, &ok_to_drop);
+
+ if (!ok_to_drop) {
+ /* leave dropping state */
+ cst->dropping = false;
+ } else {
+ cst->count++;
+ /* schedule the next drop. */
+ cst->drop_next_time = control_law(cst, cprms,
+ cst->drop_next_time);
+ }
+ }
+ /* If we get here we're not in dropping state. The 'ok_to_drop'
+ * return from dodequeue means that the sojourn time has been
+ * above 'target' for 'interval' so enter dropping state.
+ */
+ } else if (ok_to_drop) {
+
+ /* if ECN option is disabled or the packet cannot be marked,
+ * drop the packet and extract another.
+ */
+ if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ m = codel_dodequeue(q, now, &ok_to_drop);
+ }
+
+ cst->dropping = true;
+
+ /* If min went above target close to when it last went
+ * below, assume that the drop rate that controlled the
+ * queue on the last cycle is a good starting point to
+ * control it now. ('drop_next' will be at most 'interval'
+ * later than the time of the last drop so 'now - drop_next'
+ * is a good approximation of the time from the last drop
+ * until now.)
+ */
+ cst->count = (cst->count > 2 && ((aqm_stime_t)now -
+ (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)?
+ cst->count - 2 : 1;
+ /* we don't have to set initial guess for Newton's method isqrt as
+ * we initilaize isqrt in control_law function when count == 1 */
+ cst->drop_next_time = control_law(cst, cprms, now);
+ }
+
+ return m;
+}
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_codel.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_pie.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_pie.c (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_pie.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,811 @@
+/* $MidnightBSD$ */
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_pie.c 318905 2017-05-25 22:41:34Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/if_ether.h> /* various ether_* routines */
+#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+#include <netpfil/ipfw/dn_heap.h>
+
+#ifdef NEW_AQM
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_pie.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+/* for debugging */
+#include <sys/syslog.h>
+
+static struct dn_aqm pie_desc;
+
+/* PIE defaults
+ * target=15ms, tupdate=15ms, max_burst=150ms,
+ * max_ecnth=0.1, alpha=0.125, beta=1.25,
+ */
+struct dn_aqm_pie_parms pie_sysctl =
+ { 15 * AQM_TIME_1MS, 15 * AQM_TIME_1MS, 150 * AQM_TIME_1MS,
+ PIE_SCALE/10 , PIE_SCALE * 0.125, PIE_SCALE * 1.25 ,
+ PIE_CAPDROP_ENABLED | PIE_DEPRATEEST_ENABLED | PIE_DERAND_ENABLED };
+
+static int
+pie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ if (!strcmp(oidp->oid_name,"alpha"))
+ value = pie_sysctl.alpha;
+ else
+ value = pie_sysctl.beta;
+
+ value = value * 1000 / PIE_SCALE;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 7 * PIE_SCALE)
+ return (EINVAL);
+ value = (value * PIE_SCALE) / 1000;
+ if (!strcmp(oidp->oid_name,"alpha"))
+ pie_sysctl.alpha = value;
+ else
+ pie_sysctl.beta = value;
+ return (0);
+}
+
+static int
+pie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ if (!strcmp(oidp->oid_name,"target"))
+ value = pie_sysctl.qdelay_ref;
+ else if (!strcmp(oidp->oid_name,"tupdate"))
+ value = pie_sysctl.tupdate;
+ else
+ value = pie_sysctl.max_burst;
+
+ value = value / AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 10 * AQM_TIME_1S)
+ return (EINVAL);
+ value = value * AQM_TIME_1US;
+
+ if (!strcmp(oidp->oid_name,"target"))
+ pie_sysctl.qdelay_ref = value;
+ else if (!strcmp(oidp->oid_name,"tupdate"))
+ pie_sysctl.tupdate = value;
+ else
+ pie_sysctl.max_burst = value;
+ return (0);
+}
+
+static int
+pie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = pie_sysctl.max_ecnth;
+ value = value * 1000 / PIE_SCALE;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > PIE_SCALE)
+ return (EINVAL);
+ value = (value * PIE_SCALE) / 1000;
+ pie_sysctl.max_ecnth = value;
+ return (0);
+}
+
+/* define PIE sysctl variables */
+SYSBEGIN(f4)
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO,
+ pie, CTLFLAG_RW, 0, "PIE");
+
+#ifdef SYSCTL_NODE
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, target,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_target_tupdate_maxb_handler, "L",
+ "queue target in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, tupdate,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_target_tupdate_maxb_handler, "L",
+ "the frequency of drop probability calculation in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_burst,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_target_tupdate_maxb_handler, "L",
+ "Burst allowance interval in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_ecnth,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_max_ecnth_handler, "L",
+ "ECN safeguard threshold scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, alpha,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_alpha_beta_handler, "L",
+ "PIE alpha scaled by 1000");
+SYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, beta,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ pie_sysctl_alpha_beta_handler, "L",
+ "beta scaled by 1000");
+#endif
+
+
+/*
+ * Callout function for drop probability calculation
+ * This function is called over tupdate ms and takes pointer of PIE
+ * status variables as an argument
+ */
+static void
+calculate_drop_prob(void *x)
+{
+ int64_t p, prob, oldprob;
+ struct dn_aqm_pie_parms *pprms;
+ struct pie_status *pst = (struct pie_status *) x;
+ int p_isneg;
+
+ pprms = pst->parms;
+ prob = pst->drop_prob;
+
+ /* calculate current qdelay using DRE method.
+ * If TS is used and no data in the queue, reset current_qdelay
+ * as it stays at last value during dequeue process.
+ */
+ if (pprms->flags & PIE_DEPRATEEST_ENABLED)
+ pst->current_qdelay = ((uint64_t)pst->pq->ni.len_bytes *
+ pst->avg_dq_time) >> PIE_DQ_THRESHOLD_BITS;
+ else
+ if (!pst->pq->ni.len_bytes)
+ pst->current_qdelay = 0;
+
+ /* calculate drop probability */
+ p = (int64_t)pprms->alpha *
+ ((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref);
+ p +=(int64_t) pprms->beta *
+ ((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old);
+
+ /* take absolute value so right shift result is well defined */
+ p_isneg = p < 0;
+ if (p_isneg) {
+ p = -p;
+ }
+
+ /* We PIE_MAX_PROB shift by 12-bits to increase the division precision */
+ p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
+
+ /* auto-tune drop probability */
+ if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
+ p >>= 11 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
+ p >>= 9 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
+ p >>= 7 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
+ p >>= 5 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
+ p >>= 3 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
+ p >>= 1 + PIE_FIX_POINT_BITS + 12;
+ else
+ p >>= PIE_FIX_POINT_BITS + 12;
+
+ oldprob = prob;
+
+ if (p_isneg) {
+ prob = prob - p;
+
+ /* check for multiplication underflow */
+ if (prob > oldprob) {
+ prob= 0;
+ D("underflow");
+ }
+ } else {
+ /* Cap Drop adjustment */
+ if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
+ prob >= PIE_MAX_PROB / 10 &&
+ p > PIE_MAX_PROB / 50 ) {
+ p = PIE_MAX_PROB / 50;
+ }
+
+ prob = prob + p;
+
+ /* check for multiplication overflow */
+ if (prob<oldprob) {
+ D("overflow");
+ prob= PIE_MAX_PROB;
+ }
+ }
+
+ /*
+ * decay the drop probability exponentially
+ * and restrict it to range 0 to PIE_MAX_PROB
+ */
+ if (prob < 0) {
+ prob = 0;
+ } else {
+ if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
+ /* 0.98 ~= 1- 1/64 */
+ prob = prob - (prob >> 6);
+ }
+
+ if (prob > PIE_MAX_PROB) {
+ prob = PIE_MAX_PROB;
+ }
+ }
+
+ pst->drop_prob = prob;
+
+ /* store current queue delay value in old queue delay*/
+ pst->qdelay_old = pst->current_qdelay;
+
+ /* update burst allowance */
+ if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance>0) {
+
+ if (pst->burst_allowance > pprms->tupdate )
+ pst->burst_allowance -= pprms->tupdate;
+ else
+ pst->burst_allowance = 0;
+ }
+
+ /* reschedule calculate_drop_prob function */
+ if (pst->sflags & PIE_ACTIVE)
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ (uint64_t)pprms->tupdate * SBT_1US, 0, calculate_drop_prob, pst, 0);
+
+ mtx_unlock(&pst->lock_mtx);
+}
+
+/*
+ * Extract a packet from the head of queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * If getts is set, also extract packet's timestamp from mtag.
+ */
+static struct mbuf *
+pie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts)
+{
+ struct m_tag *mtag;
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return m;
+ q->mq.head = m->m_nextpkt;
+
+ /* Update stats */
+ update_stats(q, -m->m_pkthdr.len, 0);
+
+ if (q->ni.length == 0) /* queue is now idle */
+ q->q_time = dn_cfg.curr_time;
+
+ if (getts) {
+ /* extract packet TS*/
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL) {
+ D("PIE timestamp mtag not found!");
+ *pkt_ts = 0;
+ } else {
+ *pkt_ts = *(aqm_time_t *)(mtag + 1);
+ m_tag_delete(m,mtag);
+ }
+ }
+ return m;
+}
+
+/*
+ * Initiate PIE variable and optionally activate it
+ */
+__inline static void
+init_activate_pie(struct pie_status *pst, int resettimer)
+{
+ struct dn_aqm_pie_parms *pprms;
+
+ mtx_lock(&pst->lock_mtx);
+ pprms = pst->parms;
+ pst->drop_prob = 0;
+ pst->qdelay_old = 0;
+ pst->burst_allowance = pprms->max_burst;
+ pst->accu_prob = 0;
+ pst->dq_count = 0;
+ pst->avg_dq_time = 0;
+ pst->sflags = PIE_INMEASUREMENT;
+ pst->measurement_start = AQM_UNOW;
+
+ if (resettimer) {
+ pst->sflags |= PIE_ACTIVE;
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ (uint64_t)pprms->tupdate * SBT_1US,
+ 0, calculate_drop_prob, pst, 0);
+ }
+ //DX(2, "PIE Activated");
+ mtx_unlock(&pst->lock_mtx);
+}
+
+/*
+ * Deactivate PIE and stop probe update callout
+ */
+__inline static void
+deactivate_pie(struct pie_status *pst)
+{
+ mtx_lock(&pst->lock_mtx);
+ pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
+ callout_stop(&pst->aqm_pie_callout);
+ //D("PIE Deactivated");
+ mtx_unlock(&pst->lock_mtx);
+}
+
+/*
+ * Dequeue and return a pcaket from queue 'q' or NULL if 'q' is empty.
+ * Also, caculate depature time or queue delay using timestamp
+ */
+static struct mbuf *
+aqm_pie_dequeue(struct dn_queue *q)
+{
+ struct mbuf *m;
+ struct dn_flow *ni; /* stats for scheduler instance */
+ struct dn_aqm_pie_parms *pprms;
+ struct pie_status *pst;
+ aqm_time_t now;
+ aqm_time_t pkt_ts, dq_time;
+ int32_t w;
+
+ pst = q->aqm_status;
+ pprms = pst->parms;
+ ni = &q->_si->ni;
+
+ /*we extarct packet ts only when Departure Rate Estimation dis not used*/
+ m = pie_extract_head(q, &pkt_ts, !(pprms->flags & PIE_DEPRATEEST_ENABLED));
+
+ if (!m || !(pst->sflags & PIE_ACTIVE))
+ return m;
+
+ now = AQM_UNOW;
+ if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
+ /* calculate average depature time */
+ if(pst->sflags & PIE_INMEASUREMENT) {
+ pst->dq_count += m->m_pkthdr.len;
+
+ if (pst->dq_count >= PIE_DQ_THRESHOLD) {
+ dq_time = now - pst->measurement_start;
+
+ /*
+ * if we don't have old avg dq_time i.e PIE is (re)initialized,
+ * don't use weight to calculate new avg_dq_time
+ */
+ if(pst->avg_dq_time == 0)
+ pst->avg_dq_time = dq_time;
+ else {
+ /*
+ * weight = PIE_DQ_THRESHOLD/2^6, but we scaled
+ * weight by 2^8. Thus, scaled
+ * weight = PIE_DQ_THRESHOLD /2^8
+ * */
+ w = PIE_DQ_THRESHOLD >> 8;
+ pst->avg_dq_time = (dq_time* w
+ + (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
+ pst->sflags &= ~PIE_INMEASUREMENT;
+ }
+ }
+ }
+
+ /*
+ * Start new measurment cycle when the queue has
+ * PIE_DQ_THRESHOLD worth of bytes.
+ */
+ if(!(pst->sflags & PIE_INMEASUREMENT) &&
+ q->ni.len_bytes >= PIE_DQ_THRESHOLD) {
+ pst->sflags |= PIE_INMEASUREMENT;
+ pst->measurement_start = now;
+ pst->dq_count = 0;
+ }
+ }
+ /* Optionally, use packet timestamp to estimate queue delay */
+ else
+ pst->current_qdelay = now - pkt_ts;
+
+ return m;
+}
+
+/*
+ * Enqueue a packet in q, subject to space and PIE queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+static int
+aqm_pie_enqueue(struct dn_queue *q, struct mbuf* m)
+{
+ struct dn_fs *f;
+ uint64_t len;
+ uint32_t qlen;
+ struct pie_status *pst;
+ struct dn_aqm_pie_parms *pprms;
+ int t;
+
+ len = m->m_pkthdr.len;
+ pst = q->aqm_status;
+ if(!pst) {
+ DX(2, "PIE queue is not initialized\n");
+ update_stats(q, 0, 1);
+ FREE_PKT(m);
+ return 1;
+ }
+
+ f = &(q->fs->fs);
+ pprms = pst->parms;
+ t = ENQUE;
+
+ /* get current queue length in bytes or packets*/
+ qlen = (f->flags & DN_QSIZE_BYTES) ?
+ q->ni.len_bytes : q->ni.length;
+
+ /* check for queue size and drop the tail if exceed queue limit*/
+ if (qlen >= f->qsize)
+ t = DROP;
+ /* drop/mark the packet when PIE is active and burst time elapsed */
+ else if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance==0
+ && drop_early(pst, q->ni.len_bytes) == DROP) {
+ /*
+ * if drop_prob over ECN threshold, drop the packet
+ * otherwise mark and enqueue it.
+ */
+ if ((pprms->flags & PIE_ECN_ENABLED) && pst->drop_prob <
+ (pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
+ && ecn_mark(m))
+ t = ENQUE;
+ else
+ t = DROP;
+ }
+
+ /* Turn PIE on when 1/3 of the queue is full */
+ if (!(pst->sflags & PIE_ACTIVE) && qlen >= pst->one_third_q_size) {
+ init_activate_pie(pst, 1);
+ }
+
+ /* Reset burst tolerance and optinally turn PIE off*/
+ if ((pst->sflags & PIE_ACTIVE) && pst->drop_prob == 0 &&
+ pst->current_qdelay < (pprms->qdelay_ref >> 1) &&
+ pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
+
+ pst->burst_allowance = pprms->max_burst;
+ if ((pprms->flags & PIE_ON_OFF_MODE_ENABLED) && qlen<=0)
+ deactivate_pie(pst);
+ }
+
+ /* Timestamp the packet if Departure Rate Estimation is disabled */
+ if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
+ /* Add TS to mbuf as a TAG */
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+ sizeof(aqm_time_t), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ t = DROP;
+ }
+ *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+ m_tag_prepend(m, mtag);
+ }
+
+ if (t != DROP) {
+ mq_append(&q->mq, m);
+ update_stats(q, len, 0);
+ return (0);
+ } else {
+ update_stats(q, 0, 1);
+
+ /* reset accu_prob after packet drop */
+ pst->accu_prob = 0;
+ FREE_PKT(m);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * initialize PIE for queue 'q'
+ * First allocate memory for PIE status.
+ */
+static int
+aqm_pie_init(struct dn_queue *q)
+{
+ struct pie_status *pst;
+ struct dn_aqm_pie_parms *pprms;
+ int err = 0;
+
+ pprms = q->fs->aqmcfg;
+
+ do { /* exit with break when error occurs*/
+ if (!pprms){
+ DX(2, "AQM_PIE is not configured");
+ err = EINVAL;
+ break;
+ }
+
+ q->aqm_status = malloc(sizeof(struct pie_status),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (q->aqm_status == NULL) {
+ D("cannot allocate PIE private data");
+ err = ENOMEM ;
+ break;
+ }
+
+ pst = q->aqm_status;
+ /* increase reference count for PIE module */
+ pie_desc.ref_count++;
+
+ pst->pq = q;
+ pst->parms = pprms;
+
+ /* For speed optimization, we caculate 1/3 queue size once here */
+ // we can use x/3 = (x >>2) + (x >>4) + (x >>7)
+ pst->one_third_q_size = q->fs->fs.qsize/3;
+
+ mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
+ callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
+ CALLOUT_RETURNUNLOCKED);
+
+ pst->current_qdelay = 0;
+ init_activate_pie(pst, !(pprms->flags & PIE_ON_OFF_MODE_ENABLED));
+
+ //DX(2, "aqm_PIE_init");
+
+ } while(0);
+
+ return err;
+}
+
+/*
+ * Callout function to destroy pie mtx and free PIE status memory
+ */
+static void
+pie_callout_cleanup(void *x)
+{
+ struct pie_status *pst = (struct pie_status *) x;
+
+ mtx_unlock(&pst->lock_mtx);
+ mtx_destroy(&pst->lock_mtx);
+ free(x, M_DUMMYNET);
+ DN_BH_WLOCK();
+ pie_desc.ref_count--;
+ DN_BH_WUNLOCK();
+}
+
+/*
+ * Clean up PIE status for queue 'q'
+ * Destroy memory allocated for PIE status.
+ */
+static int
+aqm_pie_cleanup(struct dn_queue *q)
+{
+
+ if(!q) {
+ D("q is null");
+ return 0;
+ }
+ struct pie_status *pst = q->aqm_status;
+ if(!pst) {
+ //D("queue is already cleaned up");
+ return 0;
+ }
+ if(!q->fs || !q->fs->aqmcfg) {
+ D("fs is null or no cfg");
+ return 1;
+ }
+ if (q->fs->aqmfp && q->fs->aqmfp->type !=DN_AQM_PIE) {
+ D("Not PIE fs (%d)", q->fs->fs.fs_nr);
+ return 1;
+ }
+
+ /*
+ * Free PIE status allocated memory using pie_callout_cleanup() callout
+ * function to avoid any potential race.
+ * We reset aqm_pie_callout to call pie_callout_cleanup() in next 1um. This
+ * stops the scheduled calculate_drop_prob() callout and call pie_callout_cleanup()
+ * which does memory freeing.
+ */
+ mtx_lock(&pst->lock_mtx);
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ SBT_1US, 0, pie_callout_cleanup, pst, 0);
+ q->aqm_status = NULL;
+ mtx_unlock(&pst->lock_mtx);
+
+ return 0;
+}
+
+/*
+ * Config PIE parameters
+ * also allocate memory for PIE configurations
+ */
+static int
+aqm_pie_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len)
+{
+ struct dn_aqm_pie_parms *pcfg;
+
+ int l = sizeof(struct dn_extra_parms);
+ if (len < l) {
+ D("invalid sched parms length got %d need %d", len, l);
+ return EINVAL;
+ }
+ /* we free the old cfg because maybe the orignal allocation
+ * was used for diffirent AQM type.
+ */
+ if (fs->aqmcfg) {
+ free(fs->aqmcfg, M_DUMMYNET);
+ fs->aqmcfg = NULL;
+ }
+
+ fs->aqmcfg = malloc(sizeof(struct dn_aqm_pie_parms),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (fs->aqmcfg== NULL) {
+ D("cannot allocate PIE configuration parameters");
+ return ENOMEM;
+ }
+
+ /* par array contains pie configuration as follow
+ * 0- qdelay_ref,1- tupdate, 2- max_burst
+ * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
+ */
+
+ /* configure PIE parameters */
+ pcfg = fs->aqmcfg;
+
+ if (ep->par[0] < 0)
+ pcfg->qdelay_ref = pie_sysctl.qdelay_ref * AQM_TIME_1US;
+ else
+ pcfg->qdelay_ref = ep->par[0];
+ if (ep->par[1] < 0)
+ pcfg->tupdate = pie_sysctl.tupdate * AQM_TIME_1US;
+ else
+ pcfg->tupdate = ep->par[1];
+ if (ep->par[2] < 0)
+ pcfg->max_burst = pie_sysctl.max_burst * AQM_TIME_1US;
+ else
+ pcfg->max_burst = ep->par[2];
+ if (ep->par[3] < 0)
+ pcfg->max_ecnth = pie_sysctl.max_ecnth;
+ else
+ pcfg->max_ecnth = ep->par[3];
+ if (ep->par[4] < 0)
+ pcfg->alpha = pie_sysctl.alpha;
+ else
+ pcfg->alpha = ep->par[4];
+ if (ep->par[5] < 0)
+ pcfg->beta = pie_sysctl.beta;
+ else
+ pcfg->beta = ep->par[5];
+ if (ep->par[6] < 0)
+ pcfg->flags = pie_sysctl.flags;
+ else
+ pcfg->flags = ep->par[6];
+
+ /* bound PIE configurations */
+ pcfg->qdelay_ref = BOUND_VAR(pcfg->qdelay_ref, 1, 10 * AQM_TIME_1S);
+ pcfg->tupdate = BOUND_VAR(pcfg->tupdate, 1, 10 * AQM_TIME_1S);
+ pcfg->max_burst = BOUND_VAR(pcfg->max_burst, 0, 10 * AQM_TIME_1S);
+ pcfg->max_ecnth = BOUND_VAR(pcfg->max_ecnth, 0, PIE_SCALE);
+ pcfg->alpha = BOUND_VAR(pcfg->alpha, 0, 7 * PIE_SCALE);
+ pcfg->beta = BOUND_VAR(pcfg->beta, 0 , 7 * PIE_SCALE);
+
+ pie_desc.cfg_ref_count++;
+ //D("pie cfg_ref_count=%d", pie_desc.cfg_ref_count);
+ return 0;
+}
+
+/*
+ * Deconfigure PIE and free memory allocation
+ */
+static int
+aqm_pie_deconfig(struct dn_fsk* fs)
+{
+ if (fs && fs->aqmcfg) {
+ free(fs->aqmcfg, M_DUMMYNET);
+ fs->aqmcfg = NULL;
+ pie_desc.cfg_ref_count--;
+ }
+ return 0;
+}
+
+/*
+ * Retrieve PIE configuration parameters.
+ */
+static int
+aqm_pie_getconfig (struct dn_fsk *fs, struct dn_extra_parms * ep)
+{
+ struct dn_aqm_pie_parms *pcfg;
+ if (fs->aqmcfg) {
+ strlcpy(ep->name, pie_desc.name, sizeof(ep->name));
+ pcfg = fs->aqmcfg;
+ ep->par[0] = pcfg->qdelay_ref / AQM_TIME_1US;
+ ep->par[1] = pcfg->tupdate / AQM_TIME_1US;
+ ep->par[2] = pcfg->max_burst / AQM_TIME_1US;
+ ep->par[3] = pcfg->max_ecnth;
+ ep->par[4] = pcfg->alpha;
+ ep->par[5] = pcfg->beta;
+ ep->par[6] = pcfg->flags;
+
+ return 0;
+ }
+ return 1;
+}
+
+static struct dn_aqm pie_desc = {
+ _SI( .type = ) DN_AQM_PIE,
+ _SI( .name = ) "PIE",
+ _SI( .ref_count = ) 0,
+ _SI( .cfg_ref_count = ) 0,
+ _SI( .enqueue = ) aqm_pie_enqueue,
+ _SI( .dequeue = ) aqm_pie_dequeue,
+ _SI( .config = ) aqm_pie_config,
+ _SI( .deconfig = ) aqm_pie_deconfig,
+ _SI( .getconfig = ) aqm_pie_getconfig,
+ _SI( .init = ) aqm_pie_init,
+ _SI( .cleanup = ) aqm_pie_cleanup,
+};
+
+DECLARE_DNAQM_MODULE(dn_aqm_pie, &pie_desc);
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_pie.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_aqm_pie.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_aqm_pie.h (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_aqm_pie.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,154 @@
+/* $MidnightBSD$ */
+/*
+ * PIE - Proportional Integral controller Enhanced AQM algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_aqm_pie.h 316325 2017-03-31 06:33:20Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_AQM_PIE_H
+#define _IP_DN_AQM_PIE_H
+
+#define DN_AQM_PIE 2
+#define PIE_DQ_THRESHOLD_BITS 14
+/* 2^14 =16KB */
+#define PIE_DQ_THRESHOLD (1L << PIE_DQ_THRESHOLD_BITS)
+#define MEAN_PKTSIZE 800
+
+/* 31-bits because random() generates range from 0->(2**31)-1 */
+#define PIE_PROB_BITS 31
+#define PIE_MAX_PROB ((1LL<<PIE_PROB_BITS) -1)
+
+/* for 16-bits, we have 3-bits for integer part and 13-bits for fraction */
+#define PIE_FIX_POINT_BITS 13
+#define PIE_SCALE (1L<<PIE_FIX_POINT_BITS)
+
+
+/* PIE options */
+enum {
+ PIE_ECN_ENABLED =1,
+ PIE_CAPDROP_ENABLED = 2,
+ PIE_ON_OFF_MODE_ENABLED = 4,
+ PIE_DEPRATEEST_ENABLED = 8,
+ PIE_DERAND_ENABLED = 16
+};
+
+/* PIE parameters */
+struct dn_aqm_pie_parms {
+ aqm_time_t qdelay_ref; /* AQM Latency Target (default: 15ms) */
+ aqm_time_t tupdate; /* a period to calculate drop probability (default:15ms) */
+ aqm_time_t max_burst; /* AQM Max Burst Allowance (default: 150ms) */
+ uint16_t max_ecnth; /*AQM Max ECN Marking Threshold (default: 10%) */
+ uint16_t alpha; /* (default: 1/8) */
+ uint16_t beta; /* (default: 1+1/4) */
+ uint32_t flags; /* PIE options */
+};
+
+/* PIE status variables */
+struct pie_status{
+ struct callout aqm_pie_callout;
+ aqm_time_t burst_allowance;
+ uint32_t drop_prob;
+ aqm_time_t current_qdelay;
+ aqm_time_t qdelay_old;
+ uint64_t accu_prob;
+ aqm_time_t measurement_start;
+ aqm_time_t avg_dq_time;
+ uint32_t dq_count;
+ uint32_t sflags;
+ struct dn_aqm_pie_parms *parms; /* pointer to PIE configurations */
+ /* pointer to parent queue of FQ-PIE sub-queues, or queue of owner fs. */
+ struct dn_queue *pq;
+ struct mtx lock_mtx;
+ uint32_t one_third_q_size; /* 1/3 of queue size, for speed optization */
+};
+
+enum {
+ ENQUE = 1,
+ DROP,
+ MARKECN
+};
+
+/* PIE current state */
+enum {
+ PIE_ACTIVE = 1,
+ PIE_INMEASUREMENT = 2
+};
+
+/*
+ * Check if eneque should drop packet to control delay or not based on
+ * PIe algorithm.
+ * return DROP if it is time to drop or ENQUE otherwise.
+ * This function is used by PIE and FQ-PIE.
+ */
+__inline static int
+drop_early(struct pie_status *pst, uint32_t qlen)
+{
+ struct dn_aqm_pie_parms *pprms;
+
+ pprms = pst->parms;
+
+ /* queue is not congested */
+
+ if ((pst->qdelay_old < (pprms->qdelay_ref >> 1)
+ && pst->drop_prob < PIE_MAX_PROB / 5 )
+ || qlen <= 2 * MEAN_PKTSIZE)
+ return ENQUE;
+
+
+ if (pst->drop_prob == 0)
+ pst->accu_prob = 0;
+
+ /* increment accu_prob */
+ if (pprms->flags & PIE_DERAND_ENABLED)
+ pst->accu_prob += pst->drop_prob;
+
+ /* De-randomize option
+ * if accu_prob < 0.85 -> enqueue
+ * if accu_prob>8.5 ->drop
+ * between 0.85 and 8.5 || !De-randomize --> drop on prob
+ *
+ * (0.85 = 17/20 ,8.5 = 17/2)
+ */
+ if (pprms->flags & PIE_DERAND_ENABLED) {
+ if(pst->accu_prob < (uint64_t) (PIE_MAX_PROB * 17 / 20))
+ return ENQUE;
+ if( pst->accu_prob >= (uint64_t) (PIE_MAX_PROB * 17 / 2))
+ return DROP;
+ }
+
+ if (random() < pst->drop_prob) {
+ pst->accu_prob = 0;
+ return DROP;
+ }
+
+ return ENQUE;
+}
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_aqm_pie.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netpfil/ipfw/dn_heap.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_heap.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_heap.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
* All rights reserved
@@ -27,13 +28,13 @@
/*
* Binary heap and hash tables, used in dummynet
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.c 240494 2012-09-14 11:51:49Z glebius $
*/
#include <sys/cdefs.h>
#include <sys/param.h>
#ifdef _KERNEL
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.c 240494 2012-09-14 11:51:49Z glebius $");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
@@ -59,7 +60,7 @@
#define free(p, t) my_free(p)
#endif /* !_KERNEL */
-MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
+static MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
/*
* Heap management functions.
Modified: trunk/sys/netpfil/ipfw/dn_heap.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_heap.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_heap.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
* All rights reserved
@@ -27,7 +28,7 @@
/*
* Binary heap and hash tables, header file
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_heap.h 313726 2017-02-14 04:52:24Z ngie $
*/
#ifndef _IP_DN_HEAP_H
@@ -85,7 +86,7 @@
* HEAP_TOP() returns a pointer to the top element of the heap,
* but makes no checks on its existance (XXX should we change ?)
*
- * heap_extract() removes the entry at the top, returing the pointer.
+ * heap_extract() removes the entry at the top, returning the pointer.
* (the key should have been read before).
*
* heap_scan() invokes a callback on each entry of the heap.
Modified: trunk/sys/netpfil/ipfw/dn_sched.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
* All rights reserved
@@ -27,7 +28,7 @@
/*
* The API to write a packet scheduling algorithm for dummynet.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched.h 301772 2016-06-10 00:00:25Z truckman $
*/
#ifndef _DN_SCHED_H
@@ -132,6 +133,10 @@
int (*free_fsk)(struct dn_fsk *f);
int (*new_queue)(struct dn_queue *q);
int (*free_queue)(struct dn_queue *q);
+#ifdef NEW_AQM
+ /* Getting scheduler extra parameters */
+ int (*getconfig)(struct dn_schk *, struct dn_extra_parms *);
+#endif
/* run-time fields */
int ref_count; /* XXX number of instances in the system */
@@ -165,6 +170,11 @@
struct mbuf *m = q->mq.head;
if (m == NULL)
return NULL;
+#ifdef NEW_AQM
+ /* Call AQM dequeue function */
+ if (q->fs->aqmfp && q->fs->aqmfp->dequeue )
+ return q->fs->aqmfp->dequeue(q);
+#endif
q->mq.head = m->m_nextpkt;
/* Update stats for the queue */
Modified: trunk/sys/netpfil/ipfw/dn_sched_fifo.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fifo.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_fifo.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -25,7 +26,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fifo.c 325731 2017-11-12 01:28:20Z truckman $
*/
#ifdef _KERNEL
@@ -33,15 +34,21 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rwlock.h>
#include <net/if.h> /* IFNAMSIZ */
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
@@ -115,6 +122,9 @@
_SI( .free_fsk = ) NULL,
_SI( .new_queue = ) NULL,
_SI( .free_queue = ) NULL,
+#ifdef NEW_AQM
+ _SI( .getconfig = ) NULL,
+#endif
};
DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,618 @@
+/* $MidnightBSD$ */
+/*
+ * FQ_Codel - The FlowQueue-Codel scheduler/AQM
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel.c 325731 2017-11-12 01:28:20Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+//#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <sys/sysctl.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/queue.h>
+#include <sys/hash.h>
+
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_codel.h>
+#include <netpfil/ipfw/dn_sched.h>
+#include <netpfil/ipfw/dn_sched_fq_codel.h>
+#include <netpfil/ipfw/dn_sched_fq_codel_helper.h>
+
+#else
+#include <dn_test.h>
+#endif
+
+/* NOTE: In fq_codel module, we reimplements CoDel AQM functions
+ * because fq_codel use different flows (sub-queues) structure and
+ * dn_queue includes many variables not needed by a flow (sub-queue
+ * )i.e. avoid extra overhead (88 bytes vs 208 bytes).
+ * Also, CoDel functions manages stats of sub-queues as well as the main queue.
+ */
+
+#define DN_SCHED_FQ_CODEL 6
+
+static struct dn_alg fq_codel_desc;
+
+/* fq_codel default parameters including codel */
+struct dn_sch_fq_codel_parms
+fq_codel_sysctl = {{5000 * AQM_TIME_1US, 100000 * AQM_TIME_1US,
+ CODEL_ECN_ENABLED}, 1024, 10240, 1514};
+
+static int
+fqcodel_sysctl_interval_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = fq_codel_sysctl.ccfg.interval;
+ value /= AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 100 * AQM_TIME_1S)
+ return (EINVAL);
+ fq_codel_sysctl.ccfg.interval = value * AQM_TIME_1US ;
+
+ return (0);
+}
+
+static int
+fqcodel_sysctl_target_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = fq_codel_sysctl.ccfg.target;
+ value /= AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 5 * AQM_TIME_1S)
+ return (EINVAL);
+ fq_codel_sysctl.ccfg.target = value * AQM_TIME_1US ;
+
+ return (0);
+}
+
+
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqcodel,
+ CTLFLAG_RW, 0, "FQ_CODEL");
+
+#ifdef SYSCTL_NODE
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, target,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_target_handler, "L",
+ "FQ_CoDel target in microsecond");
+SYSCTL_PROC(_net_inet_ip_dummynet_fqcodel, OID_AUTO, interval,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, fqcodel_sysctl_interval_handler, "L",
+ "FQ_CoDel interval in microsecond");
+
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, quantum,
+ CTLFLAG_RW, &fq_codel_sysctl.quantum, 1514, "FQ_CoDel quantum");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, flows,
+ CTLFLAG_RW, &fq_codel_sysctl.flows_cnt, 1024,
+ "Number of queues for FQ_CoDel");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqcodel, OID_AUTO, limit,
+ CTLFLAG_RW, &fq_codel_sysctl.limit, 10240, "FQ_CoDel queues size limit");
+#endif
+
+/* Drop a packet form the head of codel queue */
+static void
+codel_drop_head(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return;
+ q->mq.head = m->m_nextpkt;
+
+ fq_update_stats(q, si, -m->m_pkthdr.len, 1);
+
+ if (si->main_q.ni.length == 0) /* queue is now idle */
+ si->main_q.q_time = dn_cfg.curr_time;
+
+ FREE_PKT(m);
+}
+
+/* Enqueue a packet 'm' to a queue 'q' and add timestamp to that packet.
+ * Return 1 when unable to add timestamp, otherwise return 0
+ */
+static int
+codel_enqueue(struct fq_codel_flow *q, struct mbuf *m, struct fq_codel_si *si)
+{
+ uint64_t len;
+
+ len = m->m_pkthdr.len;
+ /* finding maximum packet size */
+ if (len > q->cst.maxpkt_size)
+ q->cst.maxpkt_size = len;
+
+ /* Add timestamp to mbuf as MTAG */
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, sizeof(aqm_time_t),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ goto drop;
+ }
+ *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+ m_tag_prepend(m, mtag);
+
+ mq_append(&q->mq, m);
+ fq_update_stats(q, si, len, 0);
+ return 0;
+
+drop:
+ fq_update_stats(q, si, len, 1);
+ m_freem(m);
+ return 1;
+}
+
+/*
+ * Classify a packet to queue number using Jenkins hash function.
+ * Return: queue number
+ * the input of the hash are protocol no, perturbation, src IP, dst IP,
+ * src port, dst port,
+ */
+static inline int
+fq_codel_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_codel_si *si)
+{
+ struct ip *ip;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ uint8_t tuple[41];
+ uint16_t hash=0;
+
+ ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+//#ifdef INET6
+ struct ip6_hdr *ip6;
+ int isip6;
+ isip6 = (ip->ip_v == 6);
+
+ if(isip6) {
+ ip6 = (struct ip6_hdr *)ip;
+ *((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
+ *((uint32_t *) &tuple[1]) = si->perturbation;
+ memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
+ memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
+
+ switch (ip6->ip6_nxt) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip6 + 1);
+ *((uint16_t *) &tuple[37]) = th->th_dport;
+ *((uint16_t *) &tuple[39]) = th->th_sport;
+ break;
+
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip6 + 1);
+ *((uint16_t *) &tuple[37]) = uh->uh_dport;
+ *((uint16_t *) &tuple[39]) = uh->uh_sport;
+ break;
+ default:
+ memset(&tuple[37], 0, 4);
+
+ }
+
+ hash = jenkins_hash(tuple, 41, HASHINIT) % fcount;
+ return hash;
+ }
+//#endif
+
+ /* IPv4 */
+ *((uint8_t *) &tuple[0]) = ip->ip_p;
+ *((uint32_t *) &tuple[1]) = si->perturbation;
+ *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
+ *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip + 1);
+ *((uint16_t *) &tuple[13]) = th->th_dport;
+ *((uint16_t *) &tuple[15]) = th->th_sport;
+ break;
+
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip + 1);
+ *((uint16_t *) &tuple[13]) = uh->uh_dport;
+ *((uint16_t *) &tuple[15]) = uh->uh_sport;
+ break;
+ default:
+ memset(&tuple[13], 0, 4);
+
+ }
+ hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
+
+ return hash;
+}
+
+/*
+ * Enqueue a packet into an appropriate queue according to
+ * FQ_CODEL algorithm.
+ */
+static int
+fq_codel_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
+ struct mbuf *m)
+{
+ struct fq_codel_si *si;
+ struct fq_codel_schk *schk;
+ struct dn_sch_fq_codel_parms *param;
+ struct dn_queue *mainq;
+ int idx, drop, i, maxidx;
+
+ mainq = (struct dn_queue *)(_si + 1);
+ si = (struct fq_codel_si *)_si;
+ schk = (struct fq_codel_schk *)(si->_si.sched+1);
+ param = &schk->cfg;
+
+ /* classify a packet to queue number*/
+ idx = fq_codel_classify_flow(m, param->flows_cnt, si);
+ /* enqueue packet into appropriate queue using CoDel AQM.
+ * Note: 'codel_enqueue' function returns 1 only when it unable to
+ * add timestamp to packet (no limit check)*/
+ drop = codel_enqueue(&si->flows[idx], m, si);
+
+ /* codel unable to timestamp a packet */
+ if (drop)
+ return 1;
+
+ /* If the flow (sub-queue) is not active ,then add it to the tail of
+ * new flows list, initialize and activate it.
+ */
+ if (!si->flows[idx].active ) {
+ STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
+ si->flows[idx].deficit = param->quantum;
+ si->flows[idx].cst.dropping = false;
+ si->flows[idx].cst.first_above_time = 0;
+ si->flows[idx].active = 1;
+ //D("activate %d",idx);
+ }
+
+ /* check the limit for all queues and remove a packet from the
+ * largest one
+ */
+ if (mainq->ni.length > schk->cfg.limit) { D("over limit");
+ /* find first active flow */
+ for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
+ if (si->flows[maxidx].active)
+ break;
+ if (maxidx < schk->cfg.flows_cnt) {
+ /* find the largest sub- queue */
+ for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
+ if (si->flows[i].active && si->flows[i].stats.length >
+ si->flows[maxidx].stats.length)
+ maxidx = i;
+ codel_drop_head(&si->flows[maxidx], si);
+ D("maxidx = %d",maxidx);
+ drop = 1;
+ }
+ }
+
+ return drop;
+}
+
+/*
+ * Dequeue a packet from an appropriate queue according to
+ * FQ_CODEL algorithm.
+ */
+static struct mbuf *
+fq_codel_dequeue(struct dn_sch_inst *_si)
+{
+ struct fq_codel_si *si;
+ struct fq_codel_schk *schk;
+ struct dn_sch_fq_codel_parms *param;
+ struct fq_codel_flow *f;
+ struct mbuf *mbuf;
+ struct fq_codel_list *fq_codel_flowlist;
+
+ si = (struct fq_codel_si *)_si;
+ schk = (struct fq_codel_schk *)(si->_si.sched+1);
+ param = &schk->cfg;
+
+ do {
+ /* select a list to start with */
+ if (STAILQ_EMPTY(&si->newflows))
+ fq_codel_flowlist = &si->oldflows;
+ else
+ fq_codel_flowlist = &si->newflows;
+
+ /* Both new and old queue lists are empty, return NULL */
+ if (STAILQ_EMPTY(fq_codel_flowlist))
+ return NULL;
+
+ f = STAILQ_FIRST(fq_codel_flowlist);
+ while (f != NULL) {
+ /* if there is no flow(sub-queue) deficit, increase deficit
+ * by quantum, move the flow to the tail of old flows list
+ * and try another flow.
+ * Otherwise, the flow will be used for dequeue.
+ */
+ if (f->deficit < 0) {
+ f->deficit += param->quantum;
+ STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+ STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+ } else
+ break;
+
+ f = STAILQ_FIRST(fq_codel_flowlist);
+ }
+
+ /* the new flows list is empty, try old flows list */
+ if (STAILQ_EMPTY(fq_codel_flowlist))
+ continue;
+
+ /* Dequeue a packet from the selected flow */
+ mbuf = fqc_codel_dequeue(f, si);
+
+ /* Codel did not return a packet */
+ if (!mbuf) {
+ /* If the selected flow belongs to new flows list, then move
+ * it to the tail of old flows list. Otherwise, deactivate it and
+ * remove it from the old list and
+ */
+ if (fq_codel_flowlist == &si->newflows) {
+ STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+ STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+ } else {
+ f->active = 0;
+ STAILQ_REMOVE_HEAD(fq_codel_flowlist, flowchain);
+ }
+ /* start again */
+ continue;
+ }
+
+ /* we have a packet to return,
+ * update flow deficit and return the packet*/
+ f->deficit -= mbuf->m_pkthdr.len;
+ return mbuf;
+
+ } while (1);
+
+ /* unreachable point */
+ return NULL;
+}
+
+/*
+ * Initialize fq_codel scheduler instance.
+ * also, allocate memory for flows array.
+ */
+static int
+fq_codel_new_sched(struct dn_sch_inst *_si)
+{
+ struct fq_codel_si *si;
+ struct dn_queue *q;
+ struct fq_codel_schk *schk;
+ int i;
+
+ si = (struct fq_codel_si *)_si;
+ schk = (struct fq_codel_schk *)(_si->sched+1);
+
+ if(si->flows) {
+ D("si already configured!");
+ return 0;
+ }
+
+ /* init the main queue */
+ q = &si->main_q;
+ set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+ q->_si = _si;
+ q->fs = _si->sched->fs;
+
+ /* allocate memory for flows array */
+ si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_codel_flow),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si->flows == NULL) {
+ D("cannot allocate memory for fq_codel configuration parameters");
+ return ENOMEM ;
+ }
+
+ /* init perturbation for this si */
+ si->perturbation = random();
+
+ /* init the old and new flows lists */
+ STAILQ_INIT(&si->newflows);
+ STAILQ_INIT(&si->oldflows);
+
+ /* init the flows (sub-queues) */
+ for (i = 0; i < schk->cfg.flows_cnt; i++) {
+ /* init codel */
+ si->flows[i].cst.maxpkt_size = 500;
+ }
+
+ fq_codel_desc.ref_count++;
+ return 0;
+}
+
+/*
+ * Free fq_codel scheduler instance.
+ */
+static int
+fq_codel_free_sched(struct dn_sch_inst *_si)
+{
+ struct fq_codel_si *si = (struct fq_codel_si *)_si ;
+
+ /* free the flows array */
+ free(si->flows , M_DUMMYNET);
+ si->flows = NULL;
+ fq_codel_desc.ref_count--;
+
+ return 0;
+}
+
+/*
+ * Configure fq_codel scheduler.
+ * the configurations for the scheduler is passed from userland.
+ */
+static int
+fq_codel_config(struct dn_schk *_schk)
+{
+ struct fq_codel_schk *schk;
+ struct dn_extra_parms *ep;
+ struct dn_sch_fq_codel_parms *fqc_cfg;
+
+ schk = (struct fq_codel_schk *)(_schk+1);
+ ep = (struct dn_extra_parms *) _schk->cfg;
+
+ /* par array contains fq_codel configuration as follow
+ * Codel: 0- target,1- interval, 2- flags
+ * FQ_CODEL: 3- quantum, 4- limit, 5- flows
+ */
+ if (ep && ep->oid.len ==sizeof(*ep) &&
+ ep->oid.subtype == DN_SCH_PARAMS) {
+
+ fqc_cfg = &schk->cfg;
+ if (ep->par[0] < 0)
+ fqc_cfg->ccfg.target = fq_codel_sysctl.ccfg.target;
+ else
+ fqc_cfg->ccfg.target = ep->par[0] * AQM_TIME_1US;
+
+ if (ep->par[1] < 0)
+ fqc_cfg->ccfg.interval = fq_codel_sysctl.ccfg.interval;
+ else
+ fqc_cfg->ccfg.interval = ep->par[1] * AQM_TIME_1US;
+
+ if (ep->par[2] < 0)
+ fqc_cfg->ccfg.flags = 0;
+ else
+ fqc_cfg->ccfg.flags = ep->par[2];
+
+ /* FQ configurations */
+ if (ep->par[3] < 0)
+ fqc_cfg->quantum = fq_codel_sysctl.quantum;
+ else
+ fqc_cfg->quantum = ep->par[3];
+
+ if (ep->par[4] < 0)
+ fqc_cfg->limit = fq_codel_sysctl.limit;
+ else
+ fqc_cfg->limit = ep->par[4];
+
+ if (ep->par[5] < 0)
+ fqc_cfg->flows_cnt = fq_codel_sysctl.flows_cnt;
+ else
+ fqc_cfg->flows_cnt = ep->par[5];
+
+ /* Bound the configurations */
+ fqc_cfg->ccfg.target = BOUND_VAR(fqc_cfg->ccfg.target, 1 ,
+ 5 * AQM_TIME_1S); ;
+ fqc_cfg->ccfg.interval = BOUND_VAR(fqc_cfg->ccfg.interval, 1,
+ 100 * AQM_TIME_1S);
+
+ fqc_cfg->quantum = BOUND_VAR(fqc_cfg->quantum,1, 9000);
+ fqc_cfg->limit= BOUND_VAR(fqc_cfg->limit,1,20480);
+ fqc_cfg->flows_cnt= BOUND_VAR(fqc_cfg->flows_cnt,1,65536);
+ }
+ else
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Return fq_codel scheduler configurations
+ * the configurations for the scheduler is passed to userland.
+ */
+static int
+fq_codel_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
+
+ struct fq_codel_schk *schk = (struct fq_codel_schk *)(_schk+1);
+ struct dn_sch_fq_codel_parms *fqc_cfg;
+
+ fqc_cfg = &schk->cfg;
+
+ strcpy(ep->name, fq_codel_desc.name);
+ ep->par[0] = fqc_cfg->ccfg.target / AQM_TIME_1US;
+ ep->par[1] = fqc_cfg->ccfg.interval / AQM_TIME_1US;
+ ep->par[2] = fqc_cfg->ccfg.flags;
+
+ ep->par[3] = fqc_cfg->quantum;
+ ep->par[4] = fqc_cfg->limit;
+ ep->par[5] = fqc_cfg->flows_cnt;
+
+ return 0;
+}
+
+/*
+ * fq_codel scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fq_codel_desc = {
+ _SI( .type = ) DN_SCHED_FQ_CODEL,
+ _SI( .name = ) "FQ_CODEL",
+ _SI( .flags = ) 0,
+
+ _SI( .schk_datalen = ) sizeof(struct fq_codel_schk),
+ _SI( .si_datalen = ) sizeof(struct fq_codel_si) - sizeof(struct dn_sch_inst),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) fq_codel_enqueue,
+ _SI( .dequeue = ) fq_codel_dequeue,
+ _SI( .config = ) fq_codel_config, /* new sched i.e. sched X config ...*/
+ _SI( .destroy = ) NULL, /*sched x delete */
+ _SI( .new_sched = ) fq_codel_new_sched, /* new schd instance */
+ _SI( .free_sched = ) fq_codel_free_sched, /* delete schd instance */
+ _SI( .new_fsk = ) NULL,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) NULL,
+ _SI( .free_queue = ) NULL,
+ _SI( .getconfig = ) fq_codel_getconfig,
+ _SI( .ref_count = ) 0
+};
+
+DECLARE_DNSCHED_MODULE(dn_fq_codel, &fq_codel_desc);
Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,168 @@
+/* $MidnightBSD$ */
+/*-
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * FQ_Codel Structures and helper functions
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel.h 301772 2016-06-10 00:00:25Z truckman $
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_H
+#define _IP_DN_SCHED_FQ_CODEL_H
+
+/* list of queues */
+STAILQ_HEAD(fq_codel_list, fq_codel_flow) ;
+
+/* fq_codel parameters including codel */
+struct dn_sch_fq_codel_parms {
+ struct dn_aqm_codel_parms ccfg; /* CoDel Parameters */
+ /* FQ_CODEL Parameters */
+ uint32_t flows_cnt; /* number of flows */
+ uint32_t limit; /* hard limit of fq_codel queue size*/
+ uint32_t quantum;
+}; /* defaults */
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+ uint64_t tot_pkts; /* statistics counters */
+ uint64_t tot_bytes;
+ uint32_t length; /* Queue length, in packets */
+ uint32_t len_bytes; /* Queue length, in bytes */
+ uint32_t drops;
+};
+
+/* A flow of packets (sub-queue).*/
+struct fq_codel_flow {
+ struct mq mq; /* list of packets */
+ struct flow_stats stats; /* statistics */
+ int deficit;
+ int active; /* 1: flow is active (in a list) */
+ struct codel_status cst;
+ STAILQ_ENTRY(fq_codel_flow) flowchain;
+};
+
+/* extra fq_codel scheduler configurations */
+struct fq_codel_schk {
+ struct dn_sch_fq_codel_parms cfg;
+};
+
+/* fq_codel scheduler instance */
+struct fq_codel_si {
+ struct dn_sch_inst _si; /* standard scheduler instance */
+ struct dn_queue main_q; /* main queue is after si directly */
+
+ struct fq_codel_flow *flows; /* array of flows (queues) */
+ uint32_t perturbation; /* random value */
+ struct fq_codel_list newflows; /* list of new queues */
+ struct fq_codel_list oldflows; /* list of old queues */
+};
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len + drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len,
+ int drop)
+{
+ int inc = 0;
+
+ if (len < 0)
+ inc = -1;
+ else if (len > 0)
+ inc = 1;
+
+ if (drop) {
+ si->main_q.ni.drops ++;
+ q->stats.drops ++;
+ si->_si.ni.drops ++;
+ io_pkt_drop ++;
+ }
+
+ if (!drop || (drop && len < 0)) {
+ /* Update stats for the main queue */
+ si->main_q.ni.length += inc;
+ si->main_q.ni.len_bytes += len;
+
+ /*update sub-queue stats */
+ q->stats.length += inc;
+ q->stats.len_bytes += len;
+
+ /*update scheduler instance stats */
+ si->_si.ni.length += inc;
+ si->_si.ni.len_bytes += len;
+ }
+
+ if (inc > 0) {
+ si->main_q.ni.tot_bytes += len;
+ si->main_q.ni.tot_pkts ++;
+
+ q->stats.tot_bytes +=len;
+ q->stats.tot_pkts++;
+
+ si->_si.ni.tot_bytes +=len;
+ si->_si.ni.tot_pkts ++;
+ }
+
+}
+
+/* extract the head of fq_codel sub-queue */
+__inline static struct mbuf *
+fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_codel_si *si)
+{
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return m;
+ q->mq.head = m->m_nextpkt;
+
+ fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+ if (si->main_q.ni.length == 0) /* queue is now idle */
+ si->main_q.q_time = dn_cfg.curr_time;
+
+ /* extract packet timestamp*/
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL){
+ D("timestamp tag is not found!");
+ *pkt_ts = 0;
+ } else {
+ *pkt_ts = *(aqm_time_t *)(mtag + 1);
+ m_tag_delete(m,mtag);
+ }
+
+ return m;
+}
+
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,188 @@
+/* $MidnightBSD$ */
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm.
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h 301772 2016-06-10 00:00:25Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Copyright (C) 2011-2014 Kathleen Nichols <nichols at pollere.com>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * o Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ *
+ * o Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * o The names of the authors may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, in which case the provisions of the GPL
+ * apply INSTEAD OF those given above.
+
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_DN_SCHED_FQ_CODEL_HELPER_H
+#define _IP_DN_SCHED_FQ_CODEL_HELPER_H
+
+__inline static struct mbuf *
+fqc_dodequeue(struct fq_codel_flow *q, aqm_time_t now, uint16_t *ok_to_drop,
+ struct fq_codel_si *si)
+{
+ struct mbuf * m;
+ struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+ aqm_time_t pkt_ts, sojourn_time;
+
+ *ok_to_drop = 0;
+ m = fq_codel_extract_head(q, &pkt_ts, si);
+
+ if (m == NULL) {
+ /*queue is empty - we can't be above target*/
+ q->cst.first_above_time= 0;
+ return m;
+ }
+
+ /* To span a large range of bandwidths, CoDel runs two
+ * different AQMs in parallel. One is sojourn-time-based
+ * and takes effect when the time to send an MTU-sized
+ * packet is less than target. The 1st term of the "if"
+ * below does this. The other is backlog-based and takes
+ * effect when the time to send an MTU-sized packet is >=
+ * target. The goal here is to keep the output link
+ * utilization high by never allowing the queue to get
+ * smaller than the amount that arrives in a typical
+ * interarrival time (MTU-sized packets arriving spaced
+ * by the amount of time it takes to send such a packet on
+ * the bottleneck). The 2nd term of the "if" does this.
+ */
+ sojourn_time = now - pkt_ts;
+ if (sojourn_time < schk->cfg.ccfg.target || q->stats.len_bytes <= q->cst.maxpkt_size) {
+ /* went below - stay below for at least interval */
+ q->cst.first_above_time = 0;
+ } else {
+ if (q->cst.first_above_time == 0) {
+ /* just went above from below. if still above at
+ * first_above_time, will say it's ok to drop. */
+ q->cst.first_above_time = now + schk->cfg.ccfg.interval;
+ } else if (now >= q->cst.first_above_time) {
+ *ok_to_drop = 1;
+ }
+ }
+ return m;
+}
+
+/* Codel dequeue function */
+__inline static struct mbuf *
+fqc_codel_dequeue(struct fq_codel_flow *q, struct fq_codel_si *si)
+{
+ struct mbuf *m;
+ struct dn_aqm_codel_parms *cprms;
+ struct codel_status *cst;
+ aqm_time_t now;
+ uint16_t ok_to_drop;
+ struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1);
+
+ cst = &q->cst;
+ cprms = &schk->cfg.ccfg;
+
+ now = AQM_UNOW;
+ m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+ if (cst->dropping) {
+ if (!ok_to_drop) {
+ /* sojourn time below target - leave dropping state */
+ cst->dropping = false;
+ }
+
+ /* Time for the next drop. Drop current packet and dequeue
+ * next. If the dequeue doesn't take us out of dropping
+ * state, schedule the next drop. A large backlog might
+ * result in drop rates so high that the next drop should
+ * happen now, hence the 'while' loop.
+ */
+ while (now >= cst->drop_next_time && cst->dropping) {
+
+ /* mark the packet */
+ if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) {
+ cst->count++;
+ /* schedule the next mark. */
+ cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+ return m;
+ }
+
+ /* drop the packet */
+ fq_update_stats(q, si, 0, 1);
+ m_freem(m);
+ m = fqc_dodequeue(q, now, &ok_to_drop, si);
+
+ if (!ok_to_drop) {
+ /* leave dropping state */
+ cst->dropping = false;
+ } else {
+ cst->count++;
+ /* schedule the next drop. */
+ cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time);
+ }
+ }
+ /* If we get here we're not in dropping state. The 'ok_to_drop'
+ * return from dodequeue means that the sojourn time has been
+ * above 'target' for 'interval' so enter dropping state.
+ */
+ } else if (ok_to_drop) {
+
+ /* if ECN option is disabled or the packet cannot be marked,
+ * drop the packet and extract another.
+ */
+ if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) {
+ fq_update_stats(q, si, 0, 1);
+ m_freem(m);
+ m = fqc_dodequeue(q, now, &ok_to_drop,si);
+ }
+
+ cst->dropping = true;
+
+ /* If min went above target close to when it last went
+ * below, assume that the drop rate that controlled the
+ * queue on the last cycle is a good starting point to
+ * control it now. ('drop_next' will be at most 'interval'
+ * later than the time of the last drop so 'now - drop_next'
+ * is a good approximation of the time from the last drop
+ * until now.)
+ */
+ cst->count = (cst->count > 2 && ((aqm_stime_t)now -
+ (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? cst->count - 2 : 1;
+
+ /* we don't have to set initial guess for Newton's method isqrt as
+ * we initilaize isqrt in control_law function when count == 1 */
+ cst->drop_next_time = control_law(cst, cprms, now);
+ }
+
+ return m;
+}
+
+#endif
Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c (rev 0)
+++ trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -0,0 +1,1236 @@
+/* $MidnightBSD$ */
+/*
+ * FQ_PIE - The FlowQueue-PIE scheduler/AQM
+ *
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_fq_pie.c 325731 2017-11-12 01:28:20Z truckman $
+ *
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Important note:
+ * As there is no an office document for FQ-PIE specification, we used
+ * FQ-CoDel algorithm with some modifications to implement FQ-PIE.
+ * This FQ-PIE implementation is a beta version and have not been tested
+ * extensively. Our FQ-PIE uses stand-alone PIE AQM per sub-queue. By
+ * default, timestamp is used to calculate queue delay instead of departure
+ * rate estimation method. Although departure rate estimation is available
+ * as testing option, the results could be incorrect. Moreover, turning PIE on
+ * and off option is available but it does not work properly in this version.
+ */
+
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <sys/sysctl.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/queue.h>
+#include <sys/hash.h>
+
+#include <netpfil/ipfw/dn_heap.h>
+#include <netpfil/ipfw/ip_dn_private.h>
+
+#include <netpfil/ipfw/dn_aqm.h>
+#include <netpfil/ipfw/dn_aqm_pie.h>
+#include <netpfil/ipfw/dn_sched.h>
+
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_FQ_PIE 7
+
+/* list of queues */
+STAILQ_HEAD(fq_pie_list, fq_pie_flow) ;
+
+/* FQ_PIE parameters including PIE */
+struct dn_sch_fq_pie_parms {
+ struct dn_aqm_pie_parms pcfg; /* PIE configuration Parameters */
+ /* FQ_PIE Parameters */
+ uint32_t flows_cnt; /* number of flows */
+ uint32_t limit; /* hard limit of FQ_PIE queue size*/
+ uint32_t quantum;
+};
+
+/* flow (sub-queue) stats */
+struct flow_stats {
+ uint64_t tot_pkts; /* statistics counters */
+ uint64_t tot_bytes;
+ uint32_t length; /* Queue length, in packets */
+ uint32_t len_bytes; /* Queue length, in bytes */
+ uint32_t drops;
+};
+
+/* A flow of packets (sub-queue)*/
+struct fq_pie_flow {
+ struct mq mq; /* list of packets */
+ struct flow_stats stats; /* statistics */
+ int deficit;
+ int active; /* 1: flow is active (in a list) */
+ struct pie_status pst; /* pie status variables */
+ struct fq_pie_si_extra *psi_extra;
+ STAILQ_ENTRY(fq_pie_flow) flowchain;
+};
+
+/* extra fq_pie scheduler configurations */
+struct fq_pie_schk {
+ struct dn_sch_fq_pie_parms cfg;
+};
+
+
+/* fq_pie scheduler instance extra state vars.
+ * The purpose of separation this structure is to preserve number of active
+ * sub-queues and the flows array pointer even after the scheduler instance
+ * is destroyed.
+ * Preserving these varaiables allows freeing the allocated memory by
+ * fqpie_callout_cleanup() independently from fq_pie_free_sched().
+ */
+struct fq_pie_si_extra {
+ uint32_t nr_active_q; /* number of active queues */
+ struct fq_pie_flow *flows; /* array of flows (queues) */
+ };
+
+/* fq_pie scheduler instance */
+struct fq_pie_si {
+ struct dn_sch_inst _si; /* standard scheduler instance. SHOULD BE FIRST */
+ struct dn_queue main_q; /* main queue is after si directly */
+ uint32_t perturbation; /* random value */
+ struct fq_pie_list newflows; /* list of new queues */
+ struct fq_pie_list oldflows; /* list of old queues */
+ struct fq_pie_si_extra *si_extra; /* extra state vars*/
+};
+
+
+static struct dn_alg fq_pie_desc;
+
+/* Default FQ-PIE parameters including PIE */
+/* PIE defaults
+ * target=15ms, max_burst=150ms, max_ecnth=0.1,
+ * alpha=0.125, beta=1.25, tupdate=15ms
+ * FQ-
+ * flows=1024, limit=10240, quantum =1514
+ */
+struct dn_sch_fq_pie_parms
+ fq_pie_sysctl = {{15000 * AQM_TIME_1US, 15000 * AQM_TIME_1US,
+ 150000 * AQM_TIME_1US, PIE_SCALE * 0.1, PIE_SCALE * 0.125,
+ PIE_SCALE * 1.25, PIE_CAPDROP_ENABLED | PIE_DERAND_ENABLED},
+ 1024, 10240, 1514};
+
+static int
+fqpie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ if (!strcmp(oidp->oid_name,"alpha"))
+ value = fq_pie_sysctl.pcfg.alpha;
+ else
+ value = fq_pie_sysctl.pcfg.beta;
+
+ value = value * 1000 / PIE_SCALE;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 7 * PIE_SCALE)
+ return (EINVAL);
+ value = (value * PIE_SCALE) / 1000;
+ if (!strcmp(oidp->oid_name,"alpha"))
+ fq_pie_sysctl.pcfg.alpha = value;
+ else
+ fq_pie_sysctl.pcfg.beta = value;
+ return (0);
+}
+
+static int
+fqpie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ if (!strcmp(oidp->oid_name,"target"))
+ value = fq_pie_sysctl.pcfg.qdelay_ref;
+ else if (!strcmp(oidp->oid_name,"tupdate"))
+ value = fq_pie_sysctl.pcfg.tupdate;
+ else
+ value = fq_pie_sysctl.pcfg.max_burst;
+
+ value = value / AQM_TIME_1US;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > 10 * AQM_TIME_1S)
+ return (EINVAL);
+ value = value * AQM_TIME_1US;
+
+ if (!strcmp(oidp->oid_name,"target"))
+ fq_pie_sysctl.pcfg.qdelay_ref = value;
+ else if (!strcmp(oidp->oid_name,"tupdate"))
+ fq_pie_sysctl.pcfg.tupdate = value;
+ else
+ fq_pie_sysctl.pcfg.max_burst = value;
+ return (0);
+}
+
+static int
+fqpie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ long value;
+
+ value = fq_pie_sysctl.pcfg.max_ecnth;
+ value = value * 1000 / PIE_SCALE;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (value < 1 || value > PIE_SCALE)
+ return (EINVAL);
+ value = (value * PIE_SCALE) / 1000;
+ fq_pie_sysctl.pcfg.max_ecnth = value;
+ return (0);
+}
+
+/* define FQ- PIE sysctl variables */
+SYSBEGIN(f4)
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_DECL(_net_inet_ip_dummynet);
+static SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, fqpie,
+ CTLFLAG_RW, 0, "FQ_PIE");
+
+#ifdef SYSCTL_NODE
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, target,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_target_tupdate_maxb_handler, "L",
+ "queue target in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, tupdate,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_target_tupdate_maxb_handler, "L",
+ "the frequency of drop probability calculation in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_burst,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_target_tupdate_maxb_handler, "L",
+ "Burst allowance interval in microsecond");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, max_ecnth,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_max_ecnth_handler, "L",
+ "ECN safeguard threshold scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, alpha,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_alpha_beta_handler, "L", "PIE alpha scaled by 1000");
+
+SYSCTL_PROC(_net_inet_ip_dummynet_fqpie, OID_AUTO, beta,
+ CTLTYPE_LONG | CTLFLAG_RW, NULL, 0,
+ fqpie_sysctl_alpha_beta_handler, "L", "beta scaled by 1000");
+
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, quantum,
+ CTLFLAG_RW, &fq_pie_sysctl.quantum, 1514, "quantum for FQ_PIE");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, flows,
+ CTLFLAG_RW, &fq_pie_sysctl.flows_cnt, 1024, "Number of queues for FQ_PIE");
+SYSCTL_UINT(_net_inet_ip_dummynet_fqpie, OID_AUTO, limit,
+ CTLFLAG_RW, &fq_pie_sysctl.limit, 10240, "limit for FQ_PIE");
+#endif
+
+/* Helper function to update queue&main-queue and scheduler statistics.
+ * negative len & drop -> drop
+ * negative len -> dequeue
+ * positive len -> enqueue
+ * positive len + drop -> drop during enqueue
+ */
+__inline static void
+fq_update_stats(struct fq_pie_flow *q, struct fq_pie_si *si, int len,
+ int drop)
+{
+ int inc = 0;
+
+ if (len < 0)
+ inc = -1;
+ else if (len > 0)
+ inc = 1;
+
+ if (drop) {
+ si->main_q.ni.drops ++;
+ q->stats.drops ++;
+ si->_si.ni.drops ++;
+ io_pkt_drop ++;
+ }
+
+ if (!drop || (drop && len < 0)) {
+ /* Update stats for the main queue */
+ si->main_q.ni.length += inc;
+ si->main_q.ni.len_bytes += len;
+
+ /*update sub-queue stats */
+ q->stats.length += inc;
+ q->stats.len_bytes += len;
+
+ /*update scheduler instance stats */
+ si->_si.ni.length += inc;
+ si->_si.ni.len_bytes += len;
+ }
+
+ if (inc > 0) {
+ si->main_q.ni.tot_bytes += len;
+ si->main_q.ni.tot_pkts ++;
+
+ q->stats.tot_bytes +=len;
+ q->stats.tot_pkts++;
+
+ si->_si.ni.tot_bytes +=len;
+ si->_si.ni.tot_pkts ++;
+ }
+
+}
+
+/*
+ * Extract a packet from the head of sub-queue 'q'
+ * Return a packet or NULL if the queue is empty.
+ * If getts is set, also extract packet's timestamp from mtag.
+ */
+__inline static struct mbuf *
+fq_pie_extract_head(struct fq_pie_flow *q, aqm_time_t *pkt_ts,
+ struct fq_pie_si *si, int getts)
+{
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return m;
+ q->mq.head = m->m_nextpkt;
+
+ fq_update_stats(q, si, -m->m_pkthdr.len, 0);
+
+ if (si->main_q.ni.length == 0) /* queue is now idle */
+ si->main_q.q_time = dn_cfg.curr_time;
+
+ if (getts) {
+ /* extract packet timestamp*/
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL){
+ D("PIE timestamp mtag not found!");
+ *pkt_ts = 0;
+ } else {
+ *pkt_ts = *(aqm_time_t *)(mtag + 1);
+ m_tag_delete(m,mtag);
+ }
+ }
+ return m;
+}
+
+/*
+ * Callout function for drop probability calculation
+ * This function is called over tupdate ms and takes pointer of FQ-PIE
+ * flow as an argument
+ */
+static void
+fq_calculate_drop_prob(void *x)
+{
+ struct fq_pie_flow *q = (struct fq_pie_flow *) x;
+ struct pie_status *pst = &q->pst;
+ struct dn_aqm_pie_parms *pprms;
+ int64_t p, prob, oldprob;
+ aqm_time_t now;
+ int p_isneg;
+
+ now = AQM_UNOW;
+ pprms = pst->parms;
+ prob = pst->drop_prob;
+
+ /* calculate current qdelay using DRE method.
+ * If TS is used and no data in the queue, reset current_qdelay
+ * as it stays at last value during dequeue process.
+ */
+ if (pprms->flags & PIE_DEPRATEEST_ENABLED)
+ pst->current_qdelay = ((uint64_t)q->stats.len_bytes * pst->avg_dq_time)
+ >> PIE_DQ_THRESHOLD_BITS;
+ else
+ if (!q->stats.len_bytes)
+ pst->current_qdelay = 0;
+
+ /* calculate drop probability */
+ p = (int64_t)pprms->alpha *
+ ((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref);
+ p +=(int64_t) pprms->beta *
+ ((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old);
+
+ /* take absolute value so right shift result is well defined */
+ p_isneg = p < 0;
+ if (p_isneg) {
+ p = -p;
+ }
+
+ /* We PIE_MAX_PROB shift by 12-bits to increase the division precision */
+ p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S;
+
+ /* auto-tune drop probability */
+ if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */
+ p >>= 11 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */
+ p >>= 9 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */
+ p >>= 7 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */
+ p >>= 5 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */
+ p >>= 3 + PIE_FIX_POINT_BITS + 12;
+ else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */
+ p >>= 1 + PIE_FIX_POINT_BITS + 12;
+ else
+ p >>= PIE_FIX_POINT_BITS + 12;
+
+ oldprob = prob;
+
+ if (p_isneg) {
+ prob = prob - p;
+
+ /* check for multiplication underflow */
+ if (prob > oldprob) {
+ prob= 0;
+ D("underflow");
+ }
+ } else {
+ /* Cap Drop adjustment */
+ if ((pprms->flags & PIE_CAPDROP_ENABLED) &&
+ prob >= PIE_MAX_PROB / 10 &&
+ p > PIE_MAX_PROB / 50 ) {
+ p = PIE_MAX_PROB / 50;
+ }
+
+ prob = prob + p;
+
+ /* check for multiplication overflow */
+ if (prob<oldprob) {
+ D("overflow");
+ prob= PIE_MAX_PROB;
+ }
+ }
+
+ /*
+ * decay the drop probability exponentially
+ * and restrict it to range 0 to PIE_MAX_PROB
+ */
+ if (prob < 0) {
+ prob = 0;
+ } else {
+ if (pst->current_qdelay == 0 && pst->qdelay_old == 0) {
+ /* 0.98 ~= 1- 1/64 */
+ prob = prob - (prob >> 6);
+ }
+
+ if (prob > PIE_MAX_PROB) {
+ prob = PIE_MAX_PROB;
+ }
+ }
+
+ pst->drop_prob = prob;
+
+ /* store current delay value */
+ pst->qdelay_old = pst->current_qdelay;
+
+ /* update burst allowance */
+ if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance) {
+ if (pst->burst_allowance > pprms->tupdate)
+ pst->burst_allowance -= pprms->tupdate;
+ else
+ pst->burst_allowance = 0;
+ }
+
+ if (pst->sflags & PIE_ACTIVE)
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ (uint64_t)pprms->tupdate * SBT_1US,
+ 0, fq_calculate_drop_prob, q, 0);
+
+ mtx_unlock(&pst->lock_mtx);
+}
+
+/*
+ * Reset PIE variables & activate the queue
+ */
+__inline static void
+fq_activate_pie(struct fq_pie_flow *q)
+{
+ struct pie_status *pst = &q->pst;
+ struct dn_aqm_pie_parms *pprms;
+
+ mtx_lock(&pst->lock_mtx);
+ pprms = pst->parms;
+
+ pprms = pst->parms;
+ pst->drop_prob = 0;
+ pst->qdelay_old = 0;
+ pst->burst_allowance = pprms->max_burst;
+ pst->accu_prob = 0;
+ pst->dq_count = 0;
+ pst->avg_dq_time = 0;
+ pst->sflags = PIE_INMEASUREMENT | PIE_ACTIVE;
+ pst->measurement_start = AQM_UNOW;
+
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ (uint64_t)pprms->tupdate * SBT_1US,
+ 0, fq_calculate_drop_prob, q, 0);
+
+ mtx_unlock(&pst->lock_mtx);
+}
+
+
+ /*
+ * Deactivate PIE and stop probe update callout
+ */
+__inline static void
+fq_deactivate_pie(struct pie_status *pst)
+{
+ mtx_lock(&pst->lock_mtx);
+ pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT);
+ callout_stop(&pst->aqm_pie_callout);
+ //D("PIE Deactivated");
+ mtx_unlock(&pst->lock_mtx);
+}
+
+ /*
+ * Initialize PIE for sub-queue 'q'
+ */
+static int
+pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk)
+{
+ struct pie_status *pst=&q->pst;
+ struct dn_aqm_pie_parms *pprms = pst->parms;
+
+ int err = 0;
+ if (!pprms){
+ D("AQM_PIE is not configured");
+ err = EINVAL;
+ } else {
+ q->psi_extra->nr_active_q++;
+
+ /* For speed optimization, we caculate 1/3 queue size once here */
+ // XXX limit divided by number of queues divided by 3 ???
+ pst->one_third_q_size = (fqpie_schk->cfg.limit /
+ fqpie_schk->cfg.flows_cnt) / 3;
+
+ mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF);
+ callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx,
+ CALLOUT_RETURNUNLOCKED);
+ }
+
+ return err;
+}
+
+/*
+ * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si
+ * extra memory when number of active sub-queues reaches zero.
+ * 'x' is a fq_pie_flow to be destroyed
+ */
+static void
+fqpie_callout_cleanup(void *x)
+{
+ struct fq_pie_flow *q = x;
+ struct pie_status *pst = &q->pst;
+ struct fq_pie_si_extra *psi_extra;
+
+ mtx_unlock(&pst->lock_mtx);
+ mtx_destroy(&pst->lock_mtx);
+ psi_extra = q->psi_extra;
+
+ DN_BH_WLOCK();
+ psi_extra->nr_active_q--;
+
+ /* when all sub-queues are destroyed, free flows fq_pie extra vars memory */
+ if (!psi_extra->nr_active_q) {
+ free(psi_extra->flows, M_DUMMYNET);
+ free(psi_extra, M_DUMMYNET);
+ fq_pie_desc.ref_count--;
+ }
+ DN_BH_WUNLOCK();
+}
+
+/*
+ * Clean up PIE status for sub-queue 'q'
+ * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout.
+ */
+static int
+pie_cleanup(struct fq_pie_flow *q)
+{
+ struct pie_status *pst = &q->pst;
+
+ mtx_lock(&pst->lock_mtx);
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ SBT_1US, 0, fqpie_callout_cleanup, q, 0);
+ mtx_unlock(&pst->lock_mtx);
+ return 0;
+}
+
+/*
+ * Dequeue and return a pcaket from sub-queue 'q' or NULL if 'q' is empty.
+ * Also, caculate depature time or queue delay using timestamp
+ */
+ static struct mbuf *
+pie_dequeue(struct fq_pie_flow *q, struct fq_pie_si *si)
+{
+ struct mbuf *m;
+ struct dn_aqm_pie_parms *pprms;
+ struct pie_status *pst;
+ aqm_time_t now;
+ aqm_time_t pkt_ts, dq_time;
+ int32_t w;
+
+ pst = &q->pst;
+ pprms = q->pst.parms;
+
+ /*we extarct packet ts only when Departure Rate Estimation dis not used*/
+ m = fq_pie_extract_head(q, &pkt_ts, si,
+ !(pprms->flags & PIE_DEPRATEEST_ENABLED));
+
+ if (!m || !(pst->sflags & PIE_ACTIVE))
+ return m;
+
+ now = AQM_UNOW;
+ if (pprms->flags & PIE_DEPRATEEST_ENABLED) {
+ /* calculate average depature time */
+ if(pst->sflags & PIE_INMEASUREMENT) {
+ pst->dq_count += m->m_pkthdr.len;
+
+ if (pst->dq_count >= PIE_DQ_THRESHOLD) {
+ dq_time = now - pst->measurement_start;
+
+ /*
+ * if we don't have old avg dq_time i.e PIE is (re)initialized,
+ * don't use weight to calculate new avg_dq_time
+ */
+ if(pst->avg_dq_time == 0)
+ pst->avg_dq_time = dq_time;
+ else {
+ /*
+ * weight = PIE_DQ_THRESHOLD/2^6, but we scaled
+ * weight by 2^8. Thus, scaled
+ * weight = PIE_DQ_THRESHOLD /2^8
+ * */
+ w = PIE_DQ_THRESHOLD >> 8;
+ pst->avg_dq_time = (dq_time* w
+ + (pst->avg_dq_time * ((1L << 8) - w))) >> 8;
+ pst->sflags &= ~PIE_INMEASUREMENT;
+ }
+ }
+ }
+
+ /*
+ * Start new measurment cycle when the queue has
+ * PIE_DQ_THRESHOLD worth of bytes.
+ */
+ if(!(pst->sflags & PIE_INMEASUREMENT) &&
+ q->stats.len_bytes >= PIE_DQ_THRESHOLD) {
+ pst->sflags |= PIE_INMEASUREMENT;
+ pst->measurement_start = now;
+ pst->dq_count = 0;
+ }
+ }
+ /* Optionally, use packet timestamp to estimate queue delay */
+ else
+ pst->current_qdelay = now - pkt_ts;
+
+ return m;
+}
+
+
+ /*
+ * Enqueue a packet in q, subject to space and FQ-PIE queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+static int
+pie_enqueue(struct fq_pie_flow *q, struct mbuf* m, struct fq_pie_si *si)
+{
+ uint64_t len;
+ struct pie_status *pst;
+ struct dn_aqm_pie_parms *pprms;
+ int t;
+
+ len = m->m_pkthdr.len;
+ pst = &q->pst;
+ pprms = pst->parms;
+ t = ENQUE;
+
+ /* drop/mark the packet when PIE is active and burst time elapsed */
+ if (pst->sflags & PIE_ACTIVE && pst->burst_allowance == 0
+ && drop_early(pst, q->stats.len_bytes) == DROP) {
+ /*
+ * if drop_prob over ECN threshold, drop the packet
+ * otherwise mark and enqueue it.
+ */
+ if (pprms->flags & PIE_ECN_ENABLED && pst->drop_prob <
+ (pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS))
+ && ecn_mark(m))
+ t = ENQUE;
+ else
+ t = DROP;
+ }
+
+ /* Turn PIE on when 1/3 of the queue is full */
+ if (!(pst->sflags & PIE_ACTIVE) && q->stats.len_bytes >=
+ pst->one_third_q_size) {
+ fq_activate_pie(q);
+ }
+
+ /* reset burst tolerance and optinally turn PIE off*/
+ if (pst->drop_prob == 0 && pst->current_qdelay < (pprms->qdelay_ref >> 1)
+ && pst->qdelay_old < (pprms->qdelay_ref >> 1)) {
+
+ pst->burst_allowance = pprms->max_burst;
+ if (pprms->flags & PIE_ON_OFF_MODE_ENABLED && q->stats.len_bytes<=0)
+ fq_deactivate_pie(pst);
+ }
+
+ /* Use timestamp if Departure Rate Estimation mode is disabled */
+ if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) {
+ /* Add TS to mbuf as a TAG */
+ struct m_tag *mtag;
+ mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL);
+ if (mtag == NULL)
+ mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS,
+ sizeof(aqm_time_t), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ t = DROP;
+ }
+ *(aqm_time_t *)(mtag + 1) = AQM_UNOW;
+ m_tag_prepend(m, mtag);
+ }
+
+ if (t != DROP) {
+ mq_append(&q->mq, m);
+ fq_update_stats(q, si, len, 0);
+ return 0;
+ } else {
+ fq_update_stats(q, si, len, 1);
+ pst->accu_prob = 0;
+ FREE_PKT(m);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Drop a packet form the head of FQ-PIE sub-queue */
+static void
+pie_drop_head(struct fq_pie_flow *q, struct fq_pie_si *si)
+{
+ struct mbuf *m = q->mq.head;
+
+ if (m == NULL)
+ return;
+ q->mq.head = m->m_nextpkt;
+
+ fq_update_stats(q, si, -m->m_pkthdr.len, 1);
+
+ if (si->main_q.ni.length == 0) /* queue is now idle */
+ si->main_q.q_time = dn_cfg.curr_time;
+ /* reset accu_prob after packet drop */
+ q->pst.accu_prob = 0;
+
+ FREE_PKT(m);
+}
+
+/*
+ * Classify a packet to queue number using Jenkins hash function.
+ * Return: queue number
+ * the input of the hash are protocol no, perturbation, src IP, dst IP,
+ * src port, dst port,
+ */
+static inline int
+fq_pie_classify_flow(struct mbuf *m, uint16_t fcount, struct fq_pie_si *si)
+{
+ struct ip *ip;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ uint8_t tuple[41];
+ uint16_t hash=0;
+
+ ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+//#ifdef INET6
+ struct ip6_hdr *ip6;
+ int isip6;
+ isip6 = (ip->ip_v == 6);
+
+ if(isip6) {
+ ip6 = (struct ip6_hdr *)ip;
+ *((uint8_t *) &tuple[0]) = ip6->ip6_nxt;
+ *((uint32_t *) &tuple[1]) = si->perturbation;
+ memcpy(&tuple[5], ip6->ip6_src.s6_addr, 16);
+ memcpy(&tuple[21], ip6->ip6_dst.s6_addr, 16);
+
+ switch (ip6->ip6_nxt) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip6 + 1);
+ *((uint16_t *) &tuple[37]) = th->th_dport;
+ *((uint16_t *) &tuple[39]) = th->th_sport;
+ break;
+
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip6 + 1);
+ *((uint16_t *) &tuple[37]) = uh->uh_dport;
+ *((uint16_t *) &tuple[39]) = uh->uh_sport;
+ break;
+ default:
+ memset(&tuple[37], 0, 4);
+ }
+
+ hash = jenkins_hash(tuple, 41, HASHINIT) % fcount;
+ return hash;
+ }
+//#endif
+
+ /* IPv4 */
+ *((uint8_t *) &tuple[0]) = ip->ip_p;
+ *((uint32_t *) &tuple[1]) = si->perturbation;
+ *((uint32_t *) &tuple[5]) = ip->ip_src.s_addr;
+ *((uint32_t *) &tuple[9]) = ip->ip_dst.s_addr;
+
+ switch (ip->ip_p) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)(ip + 1);
+ *((uint16_t *) &tuple[13]) = th->th_dport;
+ *((uint16_t *) &tuple[15]) = th->th_sport;
+ break;
+
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)(ip + 1);
+ *((uint16_t *) &tuple[13]) = uh->uh_dport;
+ *((uint16_t *) &tuple[15]) = uh->uh_sport;
+ break;
+ default:
+ memset(&tuple[13], 0, 4);
+ }
+ hash = jenkins_hash(tuple, 17, HASHINIT) % fcount;
+
+ return hash;
+}
+
+/*
+ * Enqueue a packet into an appropriate queue according to
+ * FQ-CoDe; algorithm.
+ */
+static int
+fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
+ struct mbuf *m)
+{
+ struct fq_pie_si *si;
+ struct fq_pie_schk *schk;
+ struct dn_sch_fq_pie_parms *param;
+ struct dn_queue *mainq;
+ struct fq_pie_flow *flows;
+ int idx, drop, i, maxidx;
+
+ mainq = (struct dn_queue *)(_si + 1);
+ si = (struct fq_pie_si *)_si;
+ flows = si->si_extra->flows;
+ schk = (struct fq_pie_schk *)(si->_si.sched+1);
+ param = &schk->cfg;
+
+ /* classify a packet to queue number*/
+ idx = fq_pie_classify_flow(m, param->flows_cnt, si);
+
+ /* enqueue packet into appropriate queue using PIE AQM.
+ * Note: 'pie_enqueue' function returns 1 only when it unable to
+ * add timestamp to packet (no limit check)*/
+ drop = pie_enqueue(&flows[idx], m, si);
+
+ /* pie unable to timestamp a packet */
+ if (drop)
+ return 1;
+
+ /* If the flow (sub-queue) is not active ,then add it to tail of
+ * new flows list, initialize and activate it.
+ */
+ if (!flows[idx].active) {
+ STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain);
+ flows[idx].deficit = param->quantum;
+ fq_activate_pie(&flows[idx]);
+ flows[idx].active = 1;
+ }
+
+ /* check the limit for all queues and remove a packet from the
+ * largest one
+ */
+ if (mainq->ni.length > schk->cfg.limit) {
+ /* find first active flow */
+ for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
+ if (flows[maxidx].active)
+ break;
+ if (maxidx < schk->cfg.flows_cnt) {
+ /* find the largest sub- queue */
+ for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
+ if (flows[i].active && flows[i].stats.length >
+ flows[maxidx].stats.length)
+ maxidx = i;
+ pie_drop_head(&flows[maxidx], si);
+ drop = 1;
+ }
+ }
+
+ return drop;
+}
+
+/*
+ * Dequeue a packet from an appropriate queue according to
+ * FQ-CoDel algorithm.
+ */
+static struct mbuf *
+fq_pie_dequeue(struct dn_sch_inst *_si)
+{
+ struct fq_pie_si *si;
+ struct fq_pie_schk *schk;
+ struct dn_sch_fq_pie_parms *param;
+ struct fq_pie_flow *f;
+ struct mbuf *mbuf;
+ struct fq_pie_list *fq_pie_flowlist;
+
+ si = (struct fq_pie_si *)_si;
+ schk = (struct fq_pie_schk *)(si->_si.sched+1);
+ param = &schk->cfg;
+
+ do {
+ /* select a list to start with */
+ if (STAILQ_EMPTY(&si->newflows))
+ fq_pie_flowlist = &si->oldflows;
+ else
+ fq_pie_flowlist = &si->newflows;
+
+ /* Both new and old queue lists are empty, return NULL */
+ if (STAILQ_EMPTY(fq_pie_flowlist))
+ return NULL;
+
+ f = STAILQ_FIRST(fq_pie_flowlist);
+ while (f != NULL) {
+ /* if there is no flow(sub-queue) deficit, increase deficit
+ * by quantum, move the flow to the tail of old flows list
+ * and try another flow.
+ * Otherwise, the flow will be used for dequeue.
+ */
+ if (f->deficit < 0) {
+ f->deficit += param->quantum;
+ STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+ STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+ } else
+ break;
+
+ f = STAILQ_FIRST(fq_pie_flowlist);
+ }
+
+ /* the new flows list is empty, try old flows list */
+ if (STAILQ_EMPTY(fq_pie_flowlist))
+ continue;
+
+ /* Dequeue a packet from the selected flow */
+ mbuf = pie_dequeue(f, si);
+
+ /* pie did not return a packet */
+ if (!mbuf) {
+ /* If the selected flow belongs to new flows list, then move
+ * it to the tail of old flows list. Otherwise, deactivate it and
+ * remove it from the old list and
+ */
+ if (fq_pie_flowlist == &si->newflows) {
+ STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+ STAILQ_INSERT_TAIL(&si->oldflows, f, flowchain);
+ } else {
+ f->active = 0;
+ fq_deactivate_pie(&f->pst);
+ STAILQ_REMOVE_HEAD(fq_pie_flowlist, flowchain);
+ }
+ /* start again */
+ continue;
+ }
+
+ /* we have a packet to return,
+ * update flow deficit and return the packet*/
+ f->deficit -= mbuf->m_pkthdr.len;
+ return mbuf;
+
+ } while (1);
+
+ /* unreachable point */
+ return NULL;
+}
+
+/*
+ * Initialize fq_pie scheduler instance.
+ * also, allocate memory for flows array.
+ */
+static int
+fq_pie_new_sched(struct dn_sch_inst *_si)
+{
+ struct fq_pie_si *si;
+ struct dn_queue *q;
+ struct fq_pie_schk *schk;
+ struct fq_pie_flow *flows;
+ int i;
+
+ si = (struct fq_pie_si *)_si;
+ schk = (struct fq_pie_schk *)(_si->sched+1);
+
+ if(si->si_extra) {
+ D("si already configured!");
+ return 0;
+ }
+
+ /* init the main queue */
+ q = &si->main_q;
+ set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+ q->_si = _si;
+ q->fs = _si->sched->fs;
+
+ /* allocate memory for scheduler instance extra vars */
+ si->si_extra = malloc(sizeof(struct fq_pie_si_extra),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si->si_extra == NULL) {
+ D("cannot allocate memory for fq_pie si extra vars");
+ return ENOMEM ;
+ }
+ /* allocate memory for flows array */
+ si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ flows = si->si_extra->flows;
+ if (flows == NULL) {
+ free(si->si_extra, M_DUMMYNET);
+ si->si_extra = NULL;
+ D("cannot allocate memory for fq_pie flows");
+ return ENOMEM ;
+ }
+
+ /* init perturbation for this si */
+ si->perturbation = random();
+ si->si_extra->nr_active_q = 0;
+
+ /* init the old and new flows lists */
+ STAILQ_INIT(&si->newflows);
+ STAILQ_INIT(&si->oldflows);
+
+ /* init the flows (sub-queues) */
+ for (i = 0; i < schk->cfg.flows_cnt; i++) {
+ flows[i].pst.parms = &schk->cfg.pcfg;
+ flows[i].psi_extra = si->si_extra;
+ pie_init(&flows[i], schk);
+ }
+
+ fq_pie_desc.ref_count++;
+
+ return 0;
+}
+
+
+/*
+ * Free fq_pie scheduler instance.
+ */
+static int
+fq_pie_free_sched(struct dn_sch_inst *_si)
+{
+ struct fq_pie_si *si;
+ struct fq_pie_schk *schk;
+ struct fq_pie_flow *flows;
+ int i;
+
+ si = (struct fq_pie_si *)_si;
+ schk = (struct fq_pie_schk *)(_si->sched+1);
+ flows = si->si_extra->flows;
+ for (i = 0; i < schk->cfg.flows_cnt; i++) {
+ pie_cleanup(&flows[i]);
+ }
+ si->si_extra = NULL;
+ return 0;
+}
+
+/*
+ * Configure FQ-PIE scheduler.
+ * the configurations for the scheduler is passed fromipfw userland.
+ */
+static int
+fq_pie_config(struct dn_schk *_schk)
+{
+ struct fq_pie_schk *schk;
+ struct dn_extra_parms *ep;
+ struct dn_sch_fq_pie_parms *fqp_cfg;
+
+ schk = (struct fq_pie_schk *)(_schk+1);
+ ep = (struct dn_extra_parms *) _schk->cfg;
+
+ /* par array contains fq_pie configuration as follow
+ * PIE: 0- qdelay_ref,1- tupdate, 2- max_burst
+ * 3- max_ecnth, 4- alpha, 5- beta, 6- flags
+ * FQ_PIE: 7- quantum, 8- limit, 9- flows
+ */
+ if (ep && ep->oid.len ==sizeof(*ep) &&
+ ep->oid.subtype == DN_SCH_PARAMS) {
+
+ fqp_cfg = &schk->cfg;
+ if (ep->par[0] < 0)
+ fqp_cfg->pcfg.qdelay_ref = fq_pie_sysctl.pcfg.qdelay_ref;
+ else
+ fqp_cfg->pcfg.qdelay_ref = ep->par[0];
+ if (ep->par[1] < 0)
+ fqp_cfg->pcfg.tupdate = fq_pie_sysctl.pcfg.tupdate;
+ else
+ fqp_cfg->pcfg.tupdate = ep->par[1];
+ if (ep->par[2] < 0)
+ fqp_cfg->pcfg.max_burst = fq_pie_sysctl.pcfg.max_burst;
+ else
+ fqp_cfg->pcfg.max_burst = ep->par[2];
+ if (ep->par[3] < 0)
+ fqp_cfg->pcfg.max_ecnth = fq_pie_sysctl.pcfg.max_ecnth;
+ else
+ fqp_cfg->pcfg.max_ecnth = ep->par[3];
+ if (ep->par[4] < 0)
+ fqp_cfg->pcfg.alpha = fq_pie_sysctl.pcfg.alpha;
+ else
+ fqp_cfg->pcfg.alpha = ep->par[4];
+ if (ep->par[5] < 0)
+ fqp_cfg->pcfg.beta = fq_pie_sysctl.pcfg.beta;
+ else
+ fqp_cfg->pcfg.beta = ep->par[5];
+ if (ep->par[6] < 0)
+ fqp_cfg->pcfg.flags = 0;
+ else
+ fqp_cfg->pcfg.flags = ep->par[6];
+
+ /* FQ configurations */
+ if (ep->par[7] < 0)
+ fqp_cfg->quantum = fq_pie_sysctl.quantum;
+ else
+ fqp_cfg->quantum = ep->par[7];
+ if (ep->par[8] < 0)
+ fqp_cfg->limit = fq_pie_sysctl.limit;
+ else
+ fqp_cfg->limit = ep->par[8];
+ if (ep->par[9] < 0)
+ fqp_cfg->flows_cnt = fq_pie_sysctl.flows_cnt;
+ else
+ fqp_cfg->flows_cnt = ep->par[9];
+
+ /* Bound the configurations */
+ fqp_cfg->pcfg.qdelay_ref = BOUND_VAR(fqp_cfg->pcfg.qdelay_ref,
+ 1, 5 * AQM_TIME_1S);
+ fqp_cfg->pcfg.tupdate = BOUND_VAR(fqp_cfg->pcfg.tupdate,
+ 1, 5 * AQM_TIME_1S);
+ fqp_cfg->pcfg.max_burst = BOUND_VAR(fqp_cfg->pcfg.max_burst,
+ 0, 5 * AQM_TIME_1S);
+ fqp_cfg->pcfg.max_ecnth = BOUND_VAR(fqp_cfg->pcfg.max_ecnth,
+ 0, PIE_SCALE);
+ fqp_cfg->pcfg.alpha = BOUND_VAR(fqp_cfg->pcfg.alpha, 0, 7 * PIE_SCALE);
+ fqp_cfg->pcfg.beta = BOUND_VAR(fqp_cfg->pcfg.beta, 0, 7 * PIE_SCALE);
+
+ fqp_cfg->quantum = BOUND_VAR(fqp_cfg->quantum,1,9000);
+ fqp_cfg->limit= BOUND_VAR(fqp_cfg->limit,1,20480);
+ fqp_cfg->flows_cnt= BOUND_VAR(fqp_cfg->flows_cnt,1,65536);
+ }
+ else {
+ D("Wrong parameters for fq_pie scheduler");
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Return FQ-PIE scheduler configurations
+ * the configurations for the scheduler is passed to userland.
+ */
+static int
+fq_pie_getconfig (struct dn_schk *_schk, struct dn_extra_parms *ep) {
+
+ struct fq_pie_schk *schk = (struct fq_pie_schk *)(_schk+1);
+ struct dn_sch_fq_pie_parms *fqp_cfg;
+
+ fqp_cfg = &schk->cfg;
+
+ strcpy(ep->name, fq_pie_desc.name);
+ ep->par[0] = fqp_cfg->pcfg.qdelay_ref;
+ ep->par[1] = fqp_cfg->pcfg.tupdate;
+ ep->par[2] = fqp_cfg->pcfg.max_burst;
+ ep->par[3] = fqp_cfg->pcfg.max_ecnth;
+ ep->par[4] = fqp_cfg->pcfg.alpha;
+ ep->par[5] = fqp_cfg->pcfg.beta;
+ ep->par[6] = fqp_cfg->pcfg.flags;
+
+ ep->par[7] = fqp_cfg->quantum;
+ ep->par[8] = fqp_cfg->limit;
+ ep->par[9] = fqp_cfg->flows_cnt;
+
+ return 0;
+}
+
+/*
+ * FQ-PIE scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fq_pie_desc = {
+ _SI( .type = ) DN_SCHED_FQ_PIE,
+ _SI( .name = ) "FQ_PIE",
+ _SI( .flags = ) 0,
+
+ _SI( .schk_datalen = ) sizeof(struct fq_pie_schk),
+ _SI( .si_datalen = ) sizeof(struct fq_pie_si) - sizeof(struct dn_sch_inst),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) fq_pie_enqueue,
+ _SI( .dequeue = ) fq_pie_dequeue,
+ _SI( .config = ) fq_pie_config, /* new sched i.e. sched X config ...*/
+ _SI( .destroy = ) NULL, /*sched x delete */
+ _SI( .new_sched = ) fq_pie_new_sched, /* new schd instance */
+ _SI( .free_sched = ) fq_pie_free_sched, /* delete schd instance */
+ _SI( .new_fsk = ) NULL,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) NULL,
+ _SI( .free_queue = ) NULL,
+ _SI( .getconfig = ) fq_pie_getconfig,
+ _SI( .ref_count = ) 0
+};
+
+DECLARE_DNSCHED_MODULE(dn_fq_pie, &fq_pie_desc);
Property changes on: trunk/sys/netpfil/ipfw/dn_sched_fq_pie.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+MidnightBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: trunk/sys/netpfil/ipfw/dn_sched_prio.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_prio.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_prio.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -25,7 +26,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_prio.c 325731 2017-11-12 01:28:20Z truckman $
*/
#ifdef _KERNEL
#include <sys/malloc.h>
@@ -32,15 +33,21 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rwlock.h>
#include <net/if.h> /* IFNAMSIZ */
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
@@ -223,6 +230,9 @@
_SI( .new_queue = ) prio_new_queue,
_SI( .free_queue = ) prio_free_queue,
+#ifdef NEW_AQM
+ _SI( .getconfig = ) NULL,
+#endif
};
Modified: trunk/sys/netpfil/ipfw/dn_sched_qfq.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_qfq.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_qfq.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
* All rights reserved
@@ -25,7 +26,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_qfq.c 325731 2017-11-12 01:28:20Z truckman $
*/
#ifdef _KERNEL
@@ -33,15 +34,21 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rwlock.h>
#include <net/if.h> /* IFNAMSIZ */
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
@@ -608,7 +615,7 @@
qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
{
unsigned long mask;
- uint32_t limit, roundedF;
+ uint64_t limit, roundedF;
int slot_shift = cl->grp->slot_shift;
roundedF = qfq_round_down(cl->F, slot_shift);
@@ -824,6 +831,9 @@
_SI( .free_fsk = ) NULL,
_SI( .new_queue = ) qfq_new_queue,
_SI( .free_queue = ) qfq_free_queue,
+#ifdef NEW_AQM
+ _SI( .getconfig = ) NULL,
+#endif
};
DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
Modified: trunk/sys/netpfil/ipfw/dn_sched_rr.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_rr.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_rr.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -25,7 +26,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_rr.c 325731 2017-11-12 01:28:20Z truckman $
*/
#ifdef _KERNEL
@@ -33,15 +34,21 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rwlock.h>
#include <net/if.h> /* IFNAMSIZ */
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
@@ -301,6 +308,9 @@
_SI( .free_fsk = ) NULL,
_SI( .new_queue = ) rr_new_queue,
_SI( .free_queue = ) rr_free_queue,
+#ifdef NEW_AQM
+ _SI( .getconfig = ) NULL,
+#endif
};
Modified: trunk/sys/netpfil/ipfw/dn_sched_wf2q.c
===================================================================
--- trunk/sys/netpfil/ipfw/dn_sched_wf2q.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dn_sched_wf2q.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
* Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
@@ -26,7 +27,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_wf2q.c 325731 2017-11-12 01:28:20Z truckman $
*/
#ifdef _KERNEL
@@ -34,15 +35,21 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/rwlock.h>
#include <net/if.h> /* IFNAMSIZ */
#include <netinet/in.h>
#include <netinet/ip_var.h> /* ipfw_rule_ref */
#include <netinet/ip_fw.h> /* flow_id */
#include <netinet/ip_dummynet.h>
+#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
#else
#include <dn_test.h>
@@ -367,6 +374,10 @@
_SI( .new_queue = ) wf2qp_new_queue,
_SI( .free_queue = ) wf2qp_free_queue,
+#ifdef NEW_AQM
+ _SI( .getconfig = ) NULL,
+#endif
+
};
Modified: trunk/sys/netpfil/ipfw/dummynet.txt
===================================================================
--- trunk/sys/netpfil/ipfw/dummynet.txt 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/dummynet.txt 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,6 +1,6 @@
#
-# $FreeBSD$
-#
+# $FreeBSD: stable/10/sys/netpfil/ipfw/dummynet.txt 239124 2012-08-07 07:52:25Z luigi $
+# $MidnightBSD$
Notes on the internal structure of dummynet (2010 version)
by Riccardo Panicucci and Luigi Rizzo
@@ -839,7 +839,7 @@
The struct scheduler represent the scheduler descriptor that is passed to
dummynet when a scheduler module is loaded.
-This struct contains the type of scheduler, the lenght of all structs and
+This struct contains the type of scheduler, the length of all structs and
all function pointers.
If a function is not implemented should be initialize to NULL. Some functions
are mandatory, other are mandatory if some memory should be freed.
Modified: trunk/sys/netpfil/ipfw/ip_dn_glue.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_glue.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_glue.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -25,7 +26,7 @@
*/
/*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_glue.c 301772 2016-06-10 00:00:25Z truckman $
*
* Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
*/
@@ -55,6 +56,9 @@
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
/* FREEBSD7.2 ip_dummynet.h r191715*/
@@ -315,10 +319,10 @@
/* Indicate 'ipfw' version
* 1: from FreeBSD 7.2
* 0: from FreeBSD 8
- * -1: unknow (for now is unused)
+ * -1: unknown (for now is unused)
*
* It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
- * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
+ * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknown,
* it is suppose to be the FreeBSD 8 version.
*/
static int is7 = 0;
@@ -513,7 +517,7 @@
lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
sizeof(struct dn_fs) + sizeof(struct dn_profile);
- base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
+ base = buf = malloc(lmax, M_DUMMYNET, M_WAITOK|M_ZERO);
o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
base->id = DN_API_VERSION;
Modified: trunk/sys/netpfil/ipfw/ip_dn_io.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_io.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_io.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -28,7 +29,7 @@
* Dummynet portions related to packet handling.
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_io.c 325731 2017-11-12 01:28:20Z truckman $");
#include "opt_inet6.h"
@@ -62,6 +63,9 @@
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
/*
@@ -83,8 +87,12 @@
static unsigned long io_pkt;
static unsigned long io_pkt_fast;
+
+#ifdef NEW_AQM
+unsigned long io_pkt_drop;
+#else
static unsigned long io_pkt_drop;
-
+#endif
/*
* We use a heap to store entities for which we have pending timer events.
* The heap is checked at every tick and all entities with expired events
@@ -97,17 +105,11 @@
#ifdef SYSCTL_NODE
-SYSBEGIN(f4)
-
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-
-/* wrapper to pass dn_cfg fields to SYSCTL_* */
-//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
-#define DC(x) (&(dn_cfg.x))
-/* parameters */
-
+/*
+ * Because of the way the SYSBEGIN/SYSEND macros work on other
+ * platforms, there should not be functions between them.
+ * So keep the handlers outside the block.
+ */
static int
sysctl_hash_size(SYSCTL_HANDLER_ARGS)
{
@@ -123,10 +125,6 @@
return (0);
}
-SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
- "I", "Default hash table size");
-
static int
sysctl_limits(SYSCTL_HANDLER_ARGS)
{
@@ -153,6 +151,27 @@
return (0);
}
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+#ifdef NEW_AQM
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+#else
+static SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+#endif
+
+/* wrapper to pass dn_cfg fields to SYSCTL_* */
+//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
+#define DC(x) (&(dn_cfg.x))
+/* parameters */
+
+
+SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+ CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_hash_size,
+ "I", "Default hash table size");
+
+
SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
CTLTYPE_LONG | CTLFLAG_RW, 0, 1, sysctl_limits,
"L", "Upper limit in slots for pipe queue.");
@@ -218,30 +237,21 @@
static void dummynet_send(struct mbuf *);
/*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state.
- * Outside dummynet, only the 'rule' field is relevant, and it must
- * be at the beginning of the structure.
- */
-struct dn_pkt_tag {
- struct ipfw_rule_ref rule; /* matching rule */
-
- /* second part, dummynet specific */
- int dn_dir; /* action when packet comes out.*/
- /* see ip_fw_private.h */
- uint64_t output_time; /* when the pkt is due for delivery*/
- struct ifnet *ifp; /* interface, for ip_output */
- struct _ip6dn_args ip6opt; /* XXX ipv6 options */
-};
-
-/*
* Return the mbuf tag holding the dummynet state (it should
* be the first one on the list).
*/
-static struct dn_pkt_tag *
+struct dn_pkt_tag *
dn_tag_get(struct mbuf *m)
{
struct m_tag *mtag = m_tag_first(m);
+#ifdef NEW_AQM
+ /* XXX: to skip ts m_tag. For Debugging only*/
+ if (mtag != NULL && mtag->m_tag_id == DN_AQM_MTAG_TS) {
+ m_tag_delete(m,mtag);
+ mtag = m_tag_first(m);
+ D("skip TS tag");
+ }
+#endif
KASSERT(mtag != NULL &&
mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
mtag->m_tag_id == PACKET_TAG_DUMMYNET,
@@ -249,6 +259,7 @@
return (struct dn_pkt_tag *)(mtag+1);
}
+#ifndef NEW_AQM
static inline void
mq_append(struct mq *q, struct mbuf *m)
{
@@ -259,6 +270,7 @@
q->tail = m;
m->m_nextpkt = NULL;
}
+#endif
/*
* Dispose a list of packet. Use a functions so if we need to do
@@ -330,6 +342,8 @@
return (0); /* accept packet */
}
if (q->avg >= fs->max_th) { /* average queue >= max threshold */
+ if (fs->fs.flags & DN_IS_ECN)
+ return (1);
if (fs->fs.flags & DN_IS_GENTLE_RED) {
/*
* According to Gentle-RED, if avg is greater than
@@ -345,6 +359,8 @@
return (1);
}
} else if (q->avg > fs->min_th) {
+ if (fs->fs.flags & DN_IS_ECN)
+ return (1);
/*
* We compute p_b using the linear dropping function
* p_b = c_1 * avg - c_2
@@ -377,6 +393,73 @@
}
/*
+ * ECN/ECT Processing (partially adopted from altq)
+ */
+#ifndef NEW_AQM
+static
+#endif
+int
+ecn_mark(struct mbuf* m)
+{
+ struct ip *ip;
+ ip = (struct ip *)mtodo(m, dn_tag_get(m)->iphdr_off);
+
+ switch (ip->ip_v) {
+ case IPVERSION:
+ {
+ u_int8_t otos;
+ int sum;
+
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0); /* not-ECT */
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (1); /* already marked */
+
+ /*
+ * ecn-capable but not marked,
+ * mark CE and update checksum
+ */
+ otos = ip->ip_tos;
+ ip->ip_tos |= IPTOS_ECN_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ return (1);
+ }
+#ifdef INET6
+ case (IPV6_VERSION >> 4):
+ {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_NOTECT << 20))
+ return (0); /* not-ECT */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_CE << 20))
+ return (1); /* already marked */
+ /*
+ * ecn-capable but not marked, mark CE
+ */
+ flowlabel |= (IPTOS_ECN_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+#endif
+ }
+ return (0);
+}
+
+/*
* Enqueue a packet in q, subject to space and queue management policy
* (whose parameters are in q->fs).
* Update stats for the queue and the scheduler.
@@ -407,8 +490,15 @@
goto drop;
if (f->plr && random() < f->plr)
goto drop;
- if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
- goto drop;
+#ifdef NEW_AQM
+ /* Call AQM enqueue function */
+ if (q->fs->aqmfp)
+ return q->fs->aqmfp->enqueue(q ,m);
+#endif
+ if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
+ if (!(f->flags & DN_IS_ECN) || !ecn_mark(m))
+ goto drop;
+ }
if (f->flags & DN_QSIZE_BYTES) {
if (q->ni.len_bytes > f->qsize)
goto drop;
@@ -420,7 +510,7 @@
q->ni.len_bytes += len;
ni->length++;
ni->len_bytes += len;
- return 0;
+ return (0);
drop:
io_pkt_drop++;
@@ -427,7 +517,7 @@
q->ni.drops++;
ni->drops++;
FREE_PKT(m);
- return 1;
+ return (1);
}
/*
@@ -612,8 +702,8 @@
dn_drain_queue();
}
+ dn_reschedule();
DN_BH_WUNLOCK();
- dn_reschedule();
if (q.head != NULL)
dummynet_send(q.head);
CURVNET_RESTORE();
@@ -651,13 +741,10 @@
switch (dst) {
case DIR_OUT:
- SET_HOST_IPLEN(mtod(m, struct ip *));
ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
break ;
case DIR_IN :
- /* put header in network format for ip_input() */
- //SET_NET_IPLEN(mtod(m, struct ip *));
netisr_dispatch(NETISR_IP, m);
break;
@@ -729,6 +816,7 @@
dt->ifp = fwa->oif;
/* dt->output tame is updated as we move through */
dt->output_time = dn_cfg.curr_time;
+ dt->iphdr_off = (dir & PROTO_LAYER2) ? ETHER_HDR_LEN : 0;
return 0;
}
@@ -789,6 +877,10 @@
if (fs->sched->fp->enqueue(si, q, m)) {
/* packet was dropped by enqueue() */
m = *m0 = NULL;
+
+ /* dn_enqueue already increases io_pkt_drop */
+ io_pkt_drop--;
+
goto dropit;
}
Modified: trunk/sys/netpfil/ipfw/ip_dn_private.h
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dn_private.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dn_private.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
* All rights reserved
@@ -27,7 +28,7 @@
/*
* internal dummynet APIs.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_dn_private.h 325731 2017-11-12 01:28:20Z truckman $
*/
#ifndef _IP_DN_PRIVATE_H
@@ -81,6 +82,10 @@
SLIST_HEAD(dn_queue_head, dn_queue);
SLIST_HEAD(dn_alg_head, dn_alg);
+#ifdef NEW_AQM
+SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */
+#endif
+
struct mq { /* a basic queue of packets*/
struct mbuf *head, *tail;
};
@@ -135,6 +140,9 @@
/* list of flowsets without a scheduler -- use sch_chain */
struct dn_fsk_head fsu; /* list of unlinked flowsets */
struct dn_alg_head schedlist; /* list of algorithms */
+#ifdef NEW_AQM
+ struct dn_aqm_head aqmlist; /* list of AQMs */
+#endif
/* Store the fs/sch to scan when draining. The value is the
* bucket number of the hash table. Expire can be disabled
@@ -231,6 +239,10 @@
int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
int avg_pkt_size ; /* medium packet size */
int max_pkt_size ; /* max packet size */
+#ifdef NEW_AQM
+ struct dn_aqm *aqmfp; /* Pointer to AQM functions */
+ void *aqmcfg; /* configuration parameters for AQM */
+#endif
};
/*
@@ -253,6 +265,9 @@
int count; /* arrivals since last RED drop */
int random; /* random value (scaled) */
uint64_t q_time; /* start of queue idle time */
+#ifdef NEW_AQM
+ void *aqm_status; /* per-queue status variables*/
+#endif
};
@@ -352,6 +367,24 @@
DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */
};
+/*
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
+ */
+struct dn_pkt_tag {
+ struct ipfw_rule_ref rule; /* matching rule */
+
+ /* second part, dummynet specific */
+ int dn_dir; /* action when packet comes out.*/
+ /* see ip_fw_private.h */
+ uint64_t output_time; /* when the pkt is due for delivery*/
+ struct ifnet *ifp; /* interface, for ip_output */
+ struct _ip6dn_args ip6opt; /* XXX ipv6 options */
+ uint16_t iphdr_off; /* IP header offset for mtodo() */
+};
+
extern struct dn_parms dn_cfg;
//VNET_DECLARE(struct dn_parms, _base_dn_cfg);
//#define dn_cfg VNET(_base_dn_cfg)
@@ -359,6 +392,7 @@
int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
void dummynet_task(void *context, int pending);
void dn_reschedule(void);
+struct dn_pkt_tag * dn_tag_get(struct mbuf *m);
struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
struct ipfw_flow_id *);
@@ -400,4 +434,20 @@
void dn_drain_scheduler(void);
void dn_drain_queue(void);
+#ifdef NEW_AQM
+int ecn_mark(struct mbuf* m);
+
+/* moved from ip_dn_io.c to here to be available for AQMs modules*/
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+ if (q->head == NULL)
+ q->head = m;
+ else
+ q->tail->m_nextpkt = m;
+ q->tail = m;
+ m->m_nextpkt = NULL;
+}
+#endif /* NEW_AQM */
+
#endif /* _IP_DN_PRIVATE_H */
Modified: trunk/sys/netpfil/ipfw/ip_dummynet.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_dummynet.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_dummynet.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,4 +1,12 @@
+/* $MidnightBSD$ */
/*-
+ * Codel/FQ_Codel and PIE/FQ-PIE Code:
+ * Copyright (C) 2016 Centre for Advanced Internet Architectures,
+ * Swinburne University of Technology, Melbourne, Australia.
+ * Portions of this code were made possible in part by a gift from
+ * The Comcast Innovation Fund.
+ * Implemented by Rasool Al-Saadi <ralsaadi at swin.edu.au>
+ *
* Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
* Portions Copyright (c) 2000 Akamba Corp.
* All rights reserved
@@ -26,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_dummynet.c 318155 2017-05-10 20:46:59Z marius $");
/*
* Configuration and internal object management for dummynet.
@@ -57,6 +65,9 @@
#include <netpfil/ipfw/ip_fw_private.h>
#include <netpfil/ipfw/dn_heap.h>
#include <netpfil/ipfw/ip_dn_private.h>
+#ifdef NEW_AQM
+#include <netpfil/ipfw/dn_aqm.h>
+#endif
#include <netpfil/ipfw/dn_sched.h>
/* which objects to copy */
@@ -74,23 +85,44 @@
/*---- callout hooks. ----*/
static struct callout dn_timeout;
+static int dn_gone;
static struct task dn_task;
static struct taskqueue *dn_tq = NULL;
static void
-dummynet(void * __unused unused)
+dummynet(void *arg)
{
- taskqueue_enqueue(dn_tq, &dn_task);
+ (void)arg; /* UNUSED */
+ taskqueue_enqueue_fast(dn_tq, &dn_task);
}
void
dn_reschedule(void)
{
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+
+ if (dn_gone != 0)
+ return;
+ callout_reset_sbt(&dn_timeout, tick_sbt, 0, dummynet, NULL,
+ C_HARDCLOCK | C_DIRECT_EXEC);
}
/*----- end of callout hooks -----*/
+#ifdef NEW_AQM
+/* Return AQM descriptor for given type or name. */
+static struct dn_aqm *
+find_aqm_type(int type, char *name)
+{
+ struct dn_aqm *d;
+
+ SLIST_FOREACH(d, &dn_cfg.aqmlist, next) {
+ if (d->type == type || (name && !strcasecmp(d->name, name)))
+ return d;
+ }
+ return NULL; /* not found */
+}
+#endif
+
/* Return a scheduler descriptor given the type or name. */
static struct dn_alg *
find_sched_type(int type, char *name)
@@ -313,7 +345,15 @@
if (fs->sched->fp->new_queue)
fs->sched->fp->new_queue(q);
+
+#ifdef NEW_AQM
+ /* call AQM init function after creating a queue*/
+ if (fs->aqmfp && fs->aqmfp->init)
+ if(fs->aqmfp->init(q))
+ D("unable to init AQM for fs %d", fs->fs.fs_nr);
+#endif
dn_cfg.queue_count++;
+
return q;
}
@@ -327,6 +367,13 @@
{
struct dn_fsk *fs = q->fs;
+#ifdef NEW_AQM
+ /* clean up AQM status for queue 'q'
+ * cleanup here is called just with MULTIQUEUE
+ */
+ if (fs && fs->aqmfp && fs->aqmfp->cleanup)
+ fs->aqmfp->cleanup(q);
+#endif
// D("fs %p si %p\n", fs, q->_si);
/* notify the parent scheduler that the queue is going away */
if (fs && fs->sched->fp->free_queue)
@@ -468,6 +515,16 @@
if (s->sch.flags & DN_HAVE_MASK)
si->ni.fid = *(struct ipfw_flow_id *)key;
+#ifdef NEW_AQM
+ /* init AQM status for !DN_MULTIQUEUE sched*/
+ if (!(s->fp->flags & DN_MULTIQUEUE))
+ if (s->fs->aqmfp && s->fs->aqmfp->init)
+ if(s->fs->aqmfp->init((struct dn_queue *)(si + 1))) {
+ D("unable to init AQM for fs %d", s->fs->fs.fs_nr);
+ goto error;
+ }
+#endif
+
dn_cfg.si_count++;
return si;
@@ -497,6 +554,20 @@
dn_free_pkts(dl->mq.head); /* drain delay line */
if (si->kflags & DN_ACTIVE) /* remove si from event heap */
heap_extract(&dn_cfg.evheap, si);
+
+#ifdef NEW_AQM
+ /* clean up AQM status for !DN_MULTIQUEUE sched
+ * Note that all queues belong to fs were cleaned up in fsk_detach.
+ * When drain_scheduler is called s->fs and q->fs are pointing
+ * to a correct fs, so we can use fs in this case.
+ */
+ if (!(s->fp->flags & DN_MULTIQUEUE)) {
+ struct dn_queue *q = (struct dn_queue *)(si + 1);
+ if (q->aqm_status && q->fs->aqmfp)
+ if (q->fs->aqmfp->cleanup)
+ q->fs->aqmfp->cleanup(q);
+ }
+#endif
if (s->fp->free_sched)
s->fp->free_sched(si);
bzero(si, sizeof(*si)); /* safety */
@@ -585,6 +656,67 @@
return fs;
}
+#ifdef NEW_AQM
+/* callback function for cleaning up AQM queue status belongs to a flowset
+ * connected to scheduler instance '_si' (for !DN_MULTIQUEUE only).
+ */
+static int
+si_cleanup_q(void *_si, void *arg)
+{
+ struct dn_sch_inst *si = _si;
+
+ if (!(si->sched->fp->flags & DN_MULTIQUEUE)) {
+ if (si->sched->fs->aqmfp && si->sched->fs->aqmfp->cleanup)
+ si->sched->fs->aqmfp->cleanup((struct dn_queue *) (si+1));
+ }
+ return 0;
+}
+
+/* callback to clean up queue AQM status.*/
+static int
+q_cleanup_q(void *_q, void *arg)
+{
+ struct dn_queue *q = _q;
+ q->fs->aqmfp->cleanup(q);
+ return 0;
+}
+
+/* Clean up all AQM queues status belongs to flowset 'fs' and then
+ * deconfig AQM for flowset 'fs'
+ */
+static void
+aqm_cleanup_deconfig_fs(struct dn_fsk *fs)
+{
+ struct dn_sch_inst *si;
+
+ /* clean up AQM status for all queues for !DN_MULTIQUEUE sched*/
+ if (fs->fs.fs_nr > DN_MAX_ID) {
+ if (fs->sched && !(fs->sched->fp->flags & DN_MULTIQUEUE)) {
+ if (fs->sched->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(fs->sched->siht, si_cleanup_q, NULL);
+ else {
+ /* single si i.e. no sched mask */
+ si = (struct dn_sch_inst *) fs->sched->siht;
+ if (si && fs->aqmfp && fs->aqmfp->cleanup)
+ fs->aqmfp->cleanup((struct dn_queue *) (si+1));
+ }
+ }
+ }
+
+ /* clean up AQM status for all queues for DN_MULTIQUEUE sched*/
+ if (fs->sched && fs->sched->fp->flags & DN_MULTIQUEUE && fs->qht) {
+ if (fs->fs.flags & DN_QHT_HASH)
+ dn_ht_scan(fs->qht, q_cleanup_q, NULL);
+ else
+ fs->aqmfp->cleanup((struct dn_queue *)(fs->qht));
+ }
+
+ /* deconfig AQM */
+ if(fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig)
+ fs->aqmfp->deconfig(fs);
+}
+#endif
+
/*
* detach flowset from its current scheduler. Flags as follows:
* DN_DETACH removes from the fsk_list
@@ -613,11 +745,15 @@
free(fs->w_q_lookup, M_DUMMYNET);
fs->w_q_lookup = NULL;
qht_delete(fs, flags);
+#ifdef NEW_AQM
+ aqm_cleanup_deconfig_fs(fs);
+#endif
+
if (fs->sched && fs->sched->fp->free_fsk)
fs->sched->fp->free_fsk(fs);
fs->sched = NULL;
if (flags & DN_DELETE_FS) {
- bzero(fs, sizeof(fs)); /* safety */
+ bzero(fs, sizeof(*fs)); /* safety */
free(fs, M_DUMMYNET);
dn_cfg.fsk_count--;
} else {
@@ -795,29 +931,35 @@
static int
copy_obj(char **start, char *end, void *_o, const char *msg, int i)
{
- struct dn_id *o = _o;
+ struct dn_id o;
+ union {
+ struct dn_link l;
+ struct dn_schk s;
+ } dn;
int have = end - *start;
- if (have < o->len || o->len == 0 || o->type == 0) {
+ memcpy(&o, _o, sizeof(o));
+ if (have < o.len || o.len == 0 || o.type == 0) {
D("(WARN) type %d %s %d have %d need %d",
- o->type, msg, i, have, o->len);
+ o.type, msg, i, have, o.len);
return 1;
}
- ND("type %d %s %d len %d", o->type, msg, i, o->len);
- bcopy(_o, *start, o->len);
- if (o->type == DN_LINK) {
+ ND("type %d %s %d len %d", o.type, msg, i, o.len);
+ if (o.type == DN_LINK) {
+ memcpy(&dn.l, _o, sizeof(dn.l));
/* Adjust burst parameter for link */
- struct dn_link *l = (struct dn_link *)*start;
- l->burst = div64(l->burst, 8 * hz);
- l->delay = l->delay * 1000 / hz;
- } else if (o->type == DN_SCH) {
- /* Set id->id to the number of instances */
- struct dn_schk *s = _o;
- struct dn_id *id = (struct dn_id *)(*start);
- id->id = (s->sch.flags & DN_HAVE_MASK) ?
- dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
- }
- *start += o->len;
+ dn.l.burst = div64(dn.l.burst, 8 * hz);
+ dn.l.delay = dn.l.delay * 1000 / hz;
+ memcpy(*start, &dn.l, sizeof(dn.l));
+ } else if (o.type == DN_SCH) {
+ /* Set dn.s.sch.oid.id to the number of instances */
+ memcpy(&dn.s, _o, sizeof(dn.s));
+ dn.s.sch.oid.id = (dn.s.sch.flags & DN_HAVE_MASK) ?
+ dn_ht_entries(dn.s.siht) : (dn.s.siht ? 1 : 0);
+ memcpy(*start, &dn.s, sizeof(dn.s));
+ } else
+ memcpy(*start, _o, o.len);
+ *start += o.len;
return 0;
}
@@ -838,7 +980,7 @@
return 1;
}
ND("type %d %s %d len %d", o->type, msg, i, len);
- bcopy(_o, *start, len);
+ memcpy(*start, _o, len);
((struct dn_id*)(*start))->len = len;
*start += len;
return 0;
@@ -886,7 +1028,7 @@
D("error have %d need %d", have, profile_len);
return 1;
}
- bcopy(p, *a->start, profile_len);
+ memcpy(*a->start, p, profile_len);
((struct dn_id *)(*a->start))->len = profile_len;
*a->start += profile_len;
return 0;
@@ -1067,7 +1209,10 @@
fs->min_th = SCALE(fs->fs.min_th);
fs->max_th = SCALE(fs->fs.max_th);
- fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+ if (fs->fs.max_th == fs->fs.min_th)
+ fs->c_1 = fs->max_p;
+ else
+ fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th);
fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
if (fs->fs.flags & DN_IS_GENTLE_RED) {
@@ -1181,6 +1326,183 @@
}
}
+#ifdef NEW_AQM
+/* Retrieve AQM configurations to ipfw userland
+ */
+static int
+get_aqm_parms(struct sockopt *sopt)
+{
+ struct dn_extra_parms *ep;
+ struct dn_fsk *fs;
+ size_t sopt_valsize;
+ int l, err = 0;
+
+ sopt_valsize = sopt->sopt_valsize;
+ l = sizeof(*ep);
+ if (sopt->sopt_valsize < l) {
+ D("bad len sopt->sopt_valsize %d len %d",
+ (int) sopt->sopt_valsize , l);
+ err = EINVAL;
+ return err;
+ }
+ ep = malloc(l, M_DUMMYNET, M_WAITOK);
+ if(!ep) {
+ err = ENOMEM ;
+ return err;
+ }
+ do {
+ err = sooptcopyin(sopt, ep, l, l);
+ if(err)
+ break;
+ sopt->sopt_valsize = sopt_valsize;
+ if (ep->oid.len < l) {
+ err = EINVAL;
+ break;
+ }
+
+ fs = dn_ht_find(dn_cfg.fshash, ep->nr, 0, NULL);
+ if (!fs) {
+ D("fs %d not found", ep->nr);
+ err = EINVAL;
+ break;
+ }
+
+ if (fs->aqmfp && fs->aqmfp->getconfig) {
+ if(fs->aqmfp->getconfig(fs, ep)) {
+ D("Error while trying to get AQM params");
+ err = EINVAL;
+ break;
+ }
+ ep->oid.len = l;
+ err = sooptcopyout(sopt, ep, l);
+ }
+ }while(0);
+
+ free(ep, M_DUMMYNET);
+ return err;
+}
+
+/* Retrieve AQM configurations to ipfw userland
+ */
+static int
+get_sched_parms(struct sockopt *sopt)
+{
+ struct dn_extra_parms *ep;
+ struct dn_schk *schk;
+ size_t sopt_valsize;
+ int l, err = 0;
+
+ sopt_valsize = sopt->sopt_valsize;
+ l = sizeof(*ep);
+ if (sopt->sopt_valsize < l) {
+ D("bad len sopt->sopt_valsize %d len %d",
+ (int) sopt->sopt_valsize , l);
+ err = EINVAL;
+ return err;
+ }
+ ep = malloc(l, M_DUMMYNET, M_WAITOK);
+ if(!ep) {
+ err = ENOMEM ;
+ return err;
+ }
+ do {
+ err = sooptcopyin(sopt, ep, l, l);
+ if(err)
+ break;
+ sopt->sopt_valsize = sopt_valsize;
+ if (ep->oid.len < l) {
+ err = EINVAL;
+ break;
+ }
+
+ schk = locate_scheduler(ep->nr);
+ if (!schk) {
+ D("sched %d not found", ep->nr);
+ err = EINVAL;
+ break;
+ }
+
+ if (schk->fp && schk->fp->getconfig) {
+ if(schk->fp->getconfig(schk, ep)) {
+ D("Error while trying to get sched params");
+ err = EINVAL;
+ break;
+ }
+ ep->oid.len = l;
+ err = sooptcopyout(sopt, ep, l);
+ }
+ }while(0);
+ free(ep, M_DUMMYNET);
+
+ return err;
+}
+
+/* Configure AQM for flowset 'fs'.
+ * extra parameters are passed from userland.
+ */
+static int
+config_aqm(struct dn_fsk *fs, struct dn_extra_parms *ep, int busy)
+{
+ int err = 0;
+
+ do {
+ /* no configurations */
+ if (!ep) {
+ err = 0;
+ break;
+ }
+
+ /* no AQM for this flowset*/
+ if (!strcmp(ep->name,"")) {
+ err = 0;
+ break;
+ }
+ if (ep->oid.len < sizeof(*ep)) {
+ D("short aqm len %d", ep->oid.len);
+ err = EINVAL;
+ break;
+ }
+
+ if (busy) {
+ D("Unable to configure flowset, flowset busy!");
+ err = EINVAL;
+ break;
+ }
+
+ /* deconfigure old aqm if exist */
+ if (fs->aqmcfg && fs->aqmfp && fs->aqmfp->deconfig) {
+ aqm_cleanup_deconfig_fs(fs);
+ }
+
+ if (!(fs->aqmfp = find_aqm_type(0, ep->name))) {
+ D("AQM functions not found for type %s!", ep->name);
+ fs->fs.flags &= ~DN_IS_AQM;
+ err = EINVAL;
+ break;
+ } else
+ fs->fs.flags |= DN_IS_AQM;
+
+ if (ep->oid.subtype != DN_AQM_PARAMS) {
+ D("Wrong subtype");
+ err = EINVAL;
+ break;
+ }
+
+ if (fs->aqmfp->config) {
+ err = fs->aqmfp->config(fs, ep, ep->oid.len);
+ if (err) {
+ D("Unable to configure AQM for FS %d", fs->fs.fs_nr );
+ fs->fs.flags &= ~DN_IS_AQM;
+ fs->aqmfp = NULL;
+ break;
+ }
+ }
+ } while(0);
+
+ return err;
+}
+#endif
+
/*
* Configuration -- to preserve backward compatibility we use
* the following scheme (N is 65536)
@@ -1268,6 +1590,9 @@
{
int i;
struct dn_fsk *fs;
+#ifdef NEW_AQM
+ struct dn_extra_parms *ep;
+#endif
if (nfs->oid.len != sizeof(*nfs)) {
D("invalid flowset len %d", nfs->oid.len);
@@ -1276,6 +1601,15 @@
i = nfs->fs_nr;
if (i <= 0 || i >= 3*DN_MAX_ID)
return NULL;
+#ifdef NEW_AQM
+ ep = NULL;
+ if (arg != NULL) {
+ ep = malloc(sizeof(*ep), M_TEMP, locked ? M_NOWAIT : M_WAITOK);
+ if (ep == NULL)
+ return (NULL);
+ memcpy(ep, arg, sizeof(*ep));
+ }
+#endif
ND("flowset %d", i);
/* XXX other sanity checks */
if (nfs->flags & DN_QSIZE_BYTES) {
@@ -1287,7 +1621,7 @@
}
if (nfs->flags & DN_HAVE_MASK) {
/* make sure we have some buckets */
- ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
+ ipdn_bound_var((int *)&nfs->buckets, dn_cfg.hash_size,
1, dn_cfg.max_hash_size, "flowset buckets");
} else {
nfs->buckets = 1; /* we only need 1 */
@@ -1313,6 +1647,17 @@
}
if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
ND("flowset %d unchanged", i);
+#ifdef NEW_AQM
+ if (ep != NULL) {
+ /*
+ * Reconfigure AQM as the parameters can be changed.
+ * We consider the flowset as busy if it has scheduler
+ * instance(s).
+ */
+ s = locate_scheduler(nfs->sched_nr);
+ config_aqm(fs, ep, s != NULL && s->siht != NULL);
+ }
+#endif
break; /* no change, nothing to do */
}
if (oldc != dn_cfg.fsk_count) /* new item */
@@ -1331,11 +1676,21 @@
fsk_detach(fs, flags);
}
fs->fs = *nfs; /* copy configuration */
+#ifdef NEW_AQM
+ fs->aqmfp = NULL;
+ if (ep != NULL)
+ config_aqm(fs, ep, s != NULL &&
+ s->siht != NULL);
+#endif
if (s != NULL)
fsk_attach(fs, s);
} while (0);
if (!locked)
DN_BH_WUNLOCK();
+#ifdef NEW_AQM
+ if (ep != NULL)
+ free(ep, M_TEMP);
+#endif
return fs;
}
@@ -1372,7 +1727,7 @@
return EINVAL;
/* make sure we have some buckets */
if (a.sch->flags & DN_HAVE_MASK)
- ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
+ ipdn_bound_var((int *)&a.sch->buckets, dn_cfg.hash_size,
1, dn_cfg.max_hash_size, "sched buckets");
/* XXX other sanity checks */
bzero(&p, sizeof(p));
@@ -1445,7 +1800,7 @@
D("cannot allocate profile");
goto error; //XXX
}
- bcopy(pf, s->profile, sizeof(*pf));
+ memcpy(s->profile, pf, sizeof(*pf));
}
}
p.link_nr = 0;
@@ -1467,7 +1822,7 @@
pf = malloc(sizeof(*pf),
M_DUMMYNET, M_NOWAIT | M_ZERO);
if (pf) /* XXX should issue a warning otherwise */
- bcopy(s->profile, pf, sizeof(*pf));
+ memcpy(pf, s->profile, sizeof(*pf));
}
/* remove from the hash */
dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
@@ -1589,7 +1944,7 @@
olen = s->profile->oid.len;
if (olen < pf->oid.len)
olen = pf->oid.len;
- bcopy(pf, s->profile, pf->oid.len);
+ memcpy(s->profile, pf, pf->oid.len);
s->profile->oid.len = olen;
}
DN_BH_WUNLOCK();
@@ -1625,30 +1980,35 @@
int
do_config(void *p, int l)
{
- struct dn_id *next, *o;
- int err = 0, err2 = 0;
- struct dn_id *arg = NULL;
- uintptr_t *a;
+ struct dn_id o;
+ union {
+ struct dn_profile profile;
+ struct dn_fs fs;
+ struct dn_link link;
+ struct dn_sch sched;
+ } *dn;
+ struct dn_id *arg;
+ uintptr_t a;
+ int err, err2, off;
- o = p;
- if (o->id != DN_API_VERSION) {
- D("invalid api version got %d need %d",
- o->id, DN_API_VERSION);
+ memcpy(&o, p, sizeof(o));
+ if (o.id != DN_API_VERSION) {
+ D("invalid api version got %d need %d", o.id, DN_API_VERSION);
return EINVAL;
}
- for (; l >= sizeof(*o); o = next) {
- struct dn_id *prev = arg;
- if (o->len < sizeof(*o) || l < o->len) {
- D("bad len o->len %d len %d", o->len, l);
+ arg = NULL;
+ dn = NULL;
+ for (off = 0; l >= sizeof(o); memcpy(&o, (char *)p + off, sizeof(o))) {
+ if (o.len < sizeof(o) || l < o.len) {
+ D("bad len o.len %d len %d", o.len, l);
err = EINVAL;
break;
}
- l -= o->len;
- next = (struct dn_id *)((char *)o + o->len);
+ l -= o.len;
err = 0;
- switch (o->type) {
+ switch (o.type) {
default:
- D("cmd %d not implemented", o->type);
+ D("cmd %d not implemented", o.type);
break;
#ifdef EMULATE_SYSCTL
@@ -1666,17 +2026,17 @@
case DN_CMD_DELETE:
/* the argument is in the first uintptr_t after o */
- a = (uintptr_t *)(o+1);
- if (o->len < sizeof(*o) + sizeof(*a)) {
+ if (o.len < sizeof(o) + sizeof(a)) {
err = EINVAL;
break;
}
- switch (o->subtype) {
+ memcpy(&a, (char *)p + off + sizeof(o), sizeof(a));
+ switch (o.subtype) {
case DN_LINK:
/* delete base and derived schedulers */
DN_BH_WLOCK();
- err = delete_schk(*a);
- err2 = delete_schk(*a + DN_MAX_ID);
+ err = delete_schk(a);
+ err2 = delete_schk(a + DN_MAX_ID);
DN_BH_WUNLOCK();
if (!err)
err = err2;
@@ -1683,14 +2043,13 @@
break;
default:
- D("invalid delete type %d",
- o->subtype);
+ D("invalid delete type %d", o.subtype);
err = EINVAL;
break;
case DN_FS:
- err = (*a <1 || *a >= DN_MAX_ID) ?
- EINVAL : delete_fs(*a, 0) ;
+ err = (a < 1 || a >= DN_MAX_ID) ?
+ EINVAL : delete_fs(a, 0) ;
break;
}
break;
@@ -1700,28 +2059,47 @@
dummynet_flush();
DN_BH_WUNLOCK();
break;
- case DN_TEXT: /* store argument the next block */
- prev = NULL;
- arg = o;
+ case DN_TEXT: /* store argument of next block */
+ if (arg != NULL)
+ free(arg, M_TEMP);
+ arg = malloc(o.len, M_TEMP, M_WAITOK);
+ memcpy(arg, (char *)p + off, o.len);
break;
case DN_LINK:
- err = config_link((struct dn_link *)o, arg);
+ if (dn == NULL)
+ dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+ memcpy(&dn->link, (char *)p + off, sizeof(dn->link));
+ err = config_link(&dn->link, arg);
break;
case DN_PROFILE:
- err = config_profile((struct dn_profile *)o, arg);
+ if (dn == NULL)
+ dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+ memcpy(&dn->profile, (char *)p + off,
+ sizeof(dn->profile));
+ err = config_profile(&dn->profile, arg);
break;
case DN_SCH:
- err = config_sched((struct dn_sch *)o, arg);
+ if (dn == NULL)
+ dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+ memcpy(&dn->sched, (char *)p + off,
+ sizeof(dn->sched));
+ err = config_sched(&dn->sched, arg);
break;
case DN_FS:
- err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
+ if (dn == NULL)
+ dn = malloc(sizeof(*dn), M_TEMP, M_WAITOK);
+ memcpy(&dn->fs, (char *)p + off, sizeof(dn->fs));
+ err = (NULL == config_fs(&dn->fs, arg, 0));
break;
}
- if (prev)
- arg = NULL;
if (err != 0)
break;
+ off += o.len;
}
+ if (arg != NULL)
+ free(arg, M_TEMP);
+ if (dn != NULL)
+ free(dn, M_TEMP);
return err;
}
@@ -1856,6 +2234,19 @@
// cmd->id = sopt_valsize;
D("compatibility mode");
}
+
+#ifdef NEW_AQM
+ /* get AQM params */
+ if(cmd->subtype == DN_AQM_PARAMS) {
+ error = get_aqm_parms(sopt);
+ goto done;
+ /* get Scheduler params */
+ } else if (cmd->subtype == DN_SCH_PARAMS) {
+ error = get_sched_parms(sopt);
+ goto done;
+ }
+#endif
+
a.extra = (struct copy_range *)cmd;
if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
uint32_t *rp = (uint32_t *)(cmd + 1);
@@ -1920,7 +2311,7 @@
a.type = cmd->subtype;
if (compat == NULL) {
- bcopy(cmd, start, sizeof(*cmd));
+ memcpy(start, cmd, sizeof(*cmd));
((struct dn_id*)(start))->len = sizeof(struct dn_id);
buf = start + sizeof(*cmd);
} else
@@ -2158,24 +2549,25 @@
DN_LOCK_INIT();
TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
- dn_tq = taskqueue_create("dummynet", M_WAITOK,
+ dn_tq = taskqueue_create_fast("dummynet", M_WAITOK,
taskqueue_thread_enqueue, &dn_tq);
taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
- callout_init(&dn_timeout, CALLOUT_MPSAFE);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+ callout_init(&dn_timeout, 1);
+ dn_reschedule();
/* Initialize curr_time adjustment mechanics. */
getmicrouptime(&dn_cfg.prev_t);
}
-#ifdef KLD_MODULE
static void
ip_dn_destroy(int last)
{
- callout_drain(&dn_timeout);
+ DN_BH_WLOCK();
+ /* ensure no more callouts are started */
+ dn_gone = 1;
- DN_BH_WLOCK();
+ /* check for last */
if (last) {
ND("removing last instance\n");
ip_dn_ctl_ptr = NULL;
@@ -2184,6 +2576,8 @@
dummynet_flush();
DN_BH_WUNLOCK();
+
+ callout_drain(&dn_timeout);
taskqueue_drain(dn_tq, &dn_task);
taskqueue_free(dn_tq);
@@ -2193,7 +2587,6 @@
DN_LOCK_DESTROY();
}
-#endif /* KLD_MODULE */
static int
dummynet_modevent(module_t mod, int type, void *data)
@@ -2209,13 +2602,8 @@
ip_dn_io_ptr = dummynet_io;
return 0;
} else if (type == MOD_UNLOAD) {
-#if !defined(KLD_MODULE)
- printf("dummynet statically compiled, cannot unload\n");
- return EINVAL ;
-#else
ip_dn_destroy(1 /* last */);
return 0;
-#endif
} else
return EOPNOTSUPP;
}
@@ -2311,4 +2699,98 @@
*/
//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
+#ifdef NEW_AQM
+
+/* modevent helpers for the AQM modules */
+static int
+load_dn_aqm(struct dn_aqm *d)
+{
+ struct dn_aqm *aqm=NULL;
+
+ if (d == NULL)
+ return 1; /* error */
+ ip_dn_init(); /* just in case, we need the lock */
+
+ /* Check that mandatory funcs exists */
+ if (d->enqueue == NULL || d->dequeue == NULL) {
+ D("missing enqueue or dequeue for %s", d->name);
+ return 1;
+ }
+
+ /* Search if AQM already exists */
+ DN_BH_WLOCK();
+ SLIST_FOREACH(aqm, &dn_cfg.aqmlist, next) {
+ if (strcmp(aqm->name, d->name) == 0) {
+ D("%s already loaded", d->name);
+ break; /* AQM already exists */
+ }
+ }
+ if (aqm == NULL)
+ SLIST_INSERT_HEAD(&dn_cfg.aqmlist, d, next);
+ DN_BH_WUNLOCK();
+ D("dn_aqm %s %sloaded", d->name, aqm ? "not ":"");
+ return aqm ? 1 : 0;
+}
+
+
+/* Callback to clean up AQM status for queues connected to a flowset
+ * and then deconfigure the flowset.
+ * This function is called before an AQM module is unloaded
+ */
+static int
+fs_cleanup(void *_fs, void *arg)
+{
+ struct dn_fsk *fs = _fs;
+ uint32_t type = *(uint32_t *)arg;
+
+ if (fs->aqmfp && fs->aqmfp->type == type)
+ aqm_cleanup_deconfig_fs(fs);
+
+ return 0;
+}
+
+static int
+unload_dn_aqm(struct dn_aqm *aqm)
+{
+ struct dn_aqm *tmp, *r;
+ int err = EINVAL;
+ err = 0;
+ ND("called for %s", aqm->name);
+
+ DN_BH_WLOCK();
+
+ /* clean up AQM status and deconfig flowset */
+ dn_ht_scan(dn_cfg.fshash, fs_cleanup, &aqm->type);
+
+ SLIST_FOREACH_SAFE(r, &dn_cfg.aqmlist, next, tmp) {
+ if (strcmp(aqm->name, r->name) != 0)
+ continue;
+ ND("ref_count = %d", r->ref_count);
+ err = (r->ref_count != 0 || r->cfg_ref_count != 0) ? EBUSY : 0;
+ if (err == 0)
+ SLIST_REMOVE(&dn_cfg.aqmlist, r, dn_aqm, next);
+ break;
+ }
+ DN_BH_WUNLOCK();
+ D("%s %sunloaded", aqm->name, err ? "not ":"");
+ if (err)
+ D("ref_count=%d, cfg_ref_count=%d", r->ref_count, r->cfg_ref_count);
+ return err;
+}
+
+int
+dn_aqm_modevent(module_t mod, int cmd, void *arg)
+{
+ struct dn_aqm *aqm = arg;
+
+ if (cmd == MOD_LOAD)
+ return load_dn_aqm(aqm);
+ else if (cmd == MOD_UNLOAD)
+ return unload_dn_aqm(aqm);
+ else
+ return EINVAL;
+}
+#endif
+
/* end of file */
+
Modified: trunk/sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw2.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw2.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
*
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw2.c 331202 2018-03-19 09:54:16Z ae $");
/*
* The FreeBSD IP packet firewall, main file
@@ -34,7 +35,7 @@
#include "opt_ipdivert.h"
#include "opt_inet.h"
#ifndef INET
-#error IPFIREWALL requires INET.
+#error "IPFIREWALL requires INET"
#endif /* INET */
#include "opt_inet6.h"
#include "opt_ipsec.h"
@@ -60,10 +61,11 @@
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
#include <net/route.h>
-#include <net/pf_mtag.h>
#include <net/pfil.h>
#include <net/vnet.h>
+#include <netpfil/pf/pf_mtag.h>
+
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
@@ -86,6 +88,8 @@
#include <netinet6/ip6_var.h>
#endif
+#include <net/if_gre.h> /* for struct grehdr */
+
#include <netpfil/ipfw/ip_fw_private.h>
#include <machine/in_cksum.h> /* XXX for in_cksum */
@@ -142,6 +146,8 @@
/* layer3_chain contains the list of rules for layer 3 */
VNET_DEFINE(struct ip_fw_chain, layer3_chain);
+VNET_DEFINE(int, ipfw_nat_ready) = 0;
+
ipfw_nat_t *ipfw_nat_ptr = NULL;
struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
@@ -178,7 +184,7 @@
&default_to_accept, 0,
"Make the default rule accept all packets.");
TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept);
-TUNABLE_INT("net.inet.ip.fw.tables_max", &default_fw_tables);
+TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables);
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count,
CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0,
"Number of static rules");
@@ -628,8 +634,6 @@
m_adj(m, args->L3offset);
#endif
if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */
- /* We need the IP header in host order for icmp_error(). */
- SET_HOST_IPLEN(ip);
icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
} else if (args->f_id.proto == IPPROTO_TCP) {
struct tcphdr *const tcp =
@@ -942,7 +946,7 @@
uint8_t proto;
uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */
struct in_addr src_ip, dst_ip; /* NOTE: network format */
- uint16_t iplen=0;
+ int iplen = 0;
int pktlen;
uint16_t etype = 0; /* Host order stored ether type */
@@ -1142,6 +1146,11 @@
PULLUP_TO(hlen, ulp, struct pim);
break;
+ case IPPROTO_GRE: /* RFC 1701 */
+ /* XXX GRE header check? */
+ PULLUP_TO(hlen, ulp, struct grehdr);
+ break;
+
case IPPROTO_CARP:
PULLUP_TO(hlen, ulp, struct carp_header);
if (((struct carp_header *)ulp)->carp_version !=
@@ -1178,6 +1187,7 @@
args->f_id.src_ip = 0;
args->f_id.dst_ip = 0;
args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
+ iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
} else if (pktlen >= sizeof(struct ip) &&
(args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
is_ipv4 = 1;
@@ -1192,7 +1202,6 @@
dst_ip = ip->ip_dst;
offset = ntohs(ip->ip_off) & IP_OFFMASK;
iplen = ntohs(ip->ip_len);
- pktlen = iplen < pktlen ? iplen : pktlen;
if (offset == 0) {
switch (proto) {
@@ -1231,6 +1240,7 @@
args->f_id.dst_ip = ntohl(dst_ip.s_addr);
}
#undef PULLUP_TO
+ pktlen = iplen < pktlen ? iplen: pktlen;
if (proto) { /* we may have port numbers, store them */
args->f_id.proto = proto;
args->f_id.src_port = src_port = ntohs(src_port);
@@ -1237,9 +1247,9 @@
args->f_id.dst_port = dst_port = ntohs(dst_port);
}
- IPFW_RLOCK(chain);
+ IPFW_PF_RLOCK(chain);
if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
- IPFW_RUNLOCK(chain);
+ IPFW_PF_RUNLOCK(chain);
return (IP_FW_PASS); /* accept */
}
if (args->rule.slot) {
@@ -1677,7 +1687,7 @@
break;
/* DSCP bitmask is stored as low_u32 high_u32 */
- if (x > 32)
+ if (x >= 32)
match = *(p + 1) & (1 << (x - 32));
else
match = *p & (1 << x);
@@ -1690,10 +1700,25 @@
uint16_t x;
uint16_t *p;
int i;
+#ifdef INET6
+ if (is_ipv6) {
+ struct ip6_hdr *ip6;
+ ip6 = (struct ip6_hdr *)ip;
+ if (ip6->ip6_plen == 0) {
+ /*
+ * Jumbo payload is not
+ * supported by this
+ * opcode.
+ */
+ break;
+ }
+ x = iplen - hlen;
+ } else
+#endif /* INET6 */
+ x = iplen - (ip->ip_hl << 2);
tcp = TCP(ulp);
- x = iplen -
- ((ip->ip_hl + tcp->th_off) << 2);
+ x -= tcp->th_off << 2;
if (cmdlen == 1) {
match = (cmd->arg1 == x);
break;
@@ -1758,14 +1783,22 @@
case O_ALTQ: {
struct pf_mtag *at;
+ struct m_tag *mtag;
ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd;
+ /*
+ * ALTQ uses mbuf tags from another
+ * packet filtering system - pf(4).
+ * We allocate a tag in its format
+ * and fill it in, pretending to be pf(4).
+ */
match = 1;
at = pf_find_mtag(m);
if (at != NULL && at->qid != 0)
break;
- at = pf_get_mtag(m);
- if (at == NULL) {
+ mtag = m_tag_get(PACKET_TAG_PF,
+ sizeof(struct pf_mtag), M_NOWAIT | M_ZERO);
+ if (mtag == NULL) {
/*
* Let the packet fall back to the
* default ALTQ.
@@ -1772,6 +1805,8 @@
*/
break;
}
+ m_tag_prepend(m, mtag);
+ at = (struct pf_mtag *)(mtag + 1);
at->qid = altq->qid;
at->hdr = ip;
break;
@@ -2393,55 +2428,49 @@
}
case O_NAT:
+ l = 0; /* exit inner loop */
+ done = 1; /* exit outer loop */
if (!IPFW_NAT_LOADED) {
retval = IP_FW_DENY;
- } else {
- struct cfg_nat *t;
- int nat_id;
+ break;
+ }
- set_match(args, f_pos, chain);
- /* Check if this is 'global' nat rule */
- if (cmd->arg1 == 0) {
- retval = ipfw_nat_ptr(args, NULL, m);
- l = 0;
- done = 1;
- break;
- }
- t = ((ipfw_insn_nat *)cmd)->nat;
- if (t == NULL) {
+ struct cfg_nat *t;
+ int nat_id;
+
+ set_match(args, f_pos, chain);
+ /* Check if this is 'global' nat rule */
+ if (cmd->arg1 == 0) {
+ retval = ipfw_nat_ptr(args, NULL, m);
+ break;
+ }
+ t = ((ipfw_insn_nat *)cmd)->nat;
+ if (t == NULL) {
nat_id = IP_FW_ARG_TABLEARG(cmd->arg1);
t = (*lookup_nat_ptr)(&chain->nat, nat_id);
if (t == NULL) {
retval = IP_FW_DENY;
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
break;
}
if (cmd->arg1 != IP_FW_TABLEARG)
((ipfw_insn_nat *)cmd)->nat = t;
- }
- retval = ipfw_nat_ptr(args, t, m);
}
- l = 0; /* exit inner loop */
- done = 1; /* exit outer loop */
+ retval = ipfw_nat_ptr(args, t, m);
break;
case O_REASS: {
int ip_off;
+ l = 0; /* in any case exit inner loop */
+ if (is_ipv6) /* IPv6 is not supported yet */
+ break;
IPFW_INC_RULE_COUNTER(f, pktlen);
- l = 0; /* in any case exit inner loop */
ip_off = ntohs(ip->ip_off);
/* if not fragmented, go to next rule */
if ((ip_off & (IP_MF | IP_OFFMASK)) == 0)
break;
- /*
- * ip_reass() expects len & off in host
- * byte order.
- */
- SET_HOST_IPLEN(ip);
args->m = m = ip_reass(m);
@@ -2455,7 +2484,6 @@
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -2504,7 +2532,7 @@
retval = IP_FW_DENY;
printf("ipfw: ouch!, skip past end of rules, denying packet\n");
}
- IPFW_RUNLOCK(chain);
+ IPFW_PF_RUNLOCK(chain);
#ifdef __FreeBSD__
if (ucred_cache != NULL)
crfree(ucred_cache);
@@ -2655,7 +2683,7 @@
rule->set = RESVD_SET;
rule->cmd[0].len = 1;
rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY;
- chain->rules = chain->default_rule = chain->map[0] = rule;
+ chain->default_rule = chain->map[0] = rule;
chain->id = rule->id = 1;
IPFW_LOCK_INIT(chain);
@@ -2665,10 +2693,9 @@
V_ipfw_vnet_ready = 1; /* Open for business */
/*
- * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr)
- * and pfil hooks for ipv4 and ipv6. Even if the latter two fail
- * we still keep the module alive because the sockopt and
- * layer2 paths are still useful.
+ * Hook the sockopt handler and pfil hooks for ipv4 and ipv6.
+ * Even if the latter two fail we still keep the module alive
+ * because the sockopt and layer2 paths are still useful.
* ipfw[6]_hook return 0 on success, ENOENT on failure,
* so we can ignore the exact return value and just set a flag.
*
@@ -2679,7 +2706,6 @@
* is checked on each packet because there are no pfil hooks.
*/
V_ip_fw_ctl_ptr = ipfw_ctl;
- V_ip_fw_chk_ptr = ipfw_chk;
error = ipfw_attach_hooks(1);
return (error);
}
@@ -2701,16 +2727,13 @@
* sure the update is propagated and nobody will be in.
*/
(void)ipfw_attach_hooks(0 /* detach */);
- V_ip_fw_chk_ptr = NULL;
V_ip_fw_ctl_ptr = NULL;
IPFW_UH_WLOCK(chain);
IPFW_UH_WUNLOCK(chain);
- IPFW_UH_WLOCK(chain);
- IPFW_WLOCK(chain);
ipfw_dyn_uninit(0); /* run the callout_drain */
- IPFW_WUNLOCK(chain);
+ IPFW_UH_WLOCK(chain);
ipfw_destroy_tables(chain);
reap = NULL;
IPFW_WLOCK(chain);
Modified: trunk/sys/netpfil/ipfw/ip_fw_dynamic.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_dynamic.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_dynamic.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa
*
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_dynamic.c 314667 2017-03-04 13:03:31Z avg $");
#define DEB(x)
#define DDB(x) x
@@ -231,6 +232,7 @@
#endif /* SYSCTL_NODE */
+#ifdef INET6
static __inline int
hash_packet6(struct ipfw_flow_id *id)
{
@@ -242,6 +244,7 @@
(id->dst_port) ^ (id->src_port);
return i;
}
+#endif
/*
* IMPORTANT: the hash function for dynamic rules must be commutative
@@ -485,7 +488,7 @@
V_curr_dyn_buckets, nbuckets);
/* Allocate and initialize new hash */
- dyn_v = malloc(nbuckets * sizeof(ipfw_dyn_rule), M_IPFW,
+ dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW,
M_WAITOK | M_ZERO);
for (i = 0 ; i < nbuckets; i++)
@@ -713,6 +716,9 @@
id.fib = M_GETFIB(args->m);
if (IS_IP6_FLOW_ID (&(args->f_id))) {
+ bzero(&id.src_ip6, sizeof(id.src_ip6));
+ bzero(&id.dst_ip6, sizeof(id.dst_ip6));
+
if (limit_mask & DYN_SRC_ADDR)
id.src_ip6 = args->f_id.src_ip6;
if (limit_mask & DYN_DST_ADDR)
@@ -809,7 +815,7 @@
#endif
struct tcphdr *th = NULL;
- MGETHDR(m, M_DONTWAIT, MT_DATA);
+ MGETHDR(m, M_NOWAIT, MT_DATA);
if (m == NULL)
return (NULL);
@@ -918,9 +924,8 @@
h->ip_v = 4;
h->ip_hl = sizeof(*h) >> 2;
h->ip_tos = IPTOS_LOWDELAY;
- h->ip_off = 0;
- /* ip_len must be in host format for ip_output */
- h->ip_len = len;
+ h->ip_off = htons(0);
+ h->ip_len = htons(len);
h->ip_ttl = V_ip_defttl;
h->ip_sum = 0;
break;
@@ -1332,7 +1337,7 @@
/* Enforce limit on dynamic rules */
uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max);
- callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE);
+ callout_init(&V_ipfw_timeout, 1);
/*
* This can potentially be done on first dynamic rule
Modified: trunk/sys/netpfil/ipfw/ip_fw_log.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_log.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_log.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
*
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_log.c 255928 2013-09-28 15:49:36Z philip $");
/*
* Logging support for ipfw
@@ -44,10 +45,13 @@
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
#include <net/ethernet.h> /* for ETHERTYPE_IP */
#include <net/if.h>
+#include <net/if_clone.h>
#include <net/vnet.h>
-#include <net/if_types.h> /* for IFT_ETHER */
+#include <net/if_types.h> /* for IFT_PFLOG */
#include <net/bpf.h> /* for BPF */
#include <netinet/in.h>
@@ -91,7 +95,16 @@
}
#else /* !WITHOUT_BPF */
static struct ifnet *log_if; /* hook to attach to bpf */
+static struct rwlock log_if_lock;
+#define LOGIF_LOCK_INIT(x) rw_init(&log_if_lock, "ipfw log_if lock")
+#define LOGIF_LOCK_DESTROY(x) rw_destroy(&log_if_lock)
+#define LOGIF_RLOCK(x) rw_rlock(&log_if_lock)
+#define LOGIF_RUNLOCK(x) rw_runlock(&log_if_lock)
+#define LOGIF_WLOCK(x) rw_wlock(&log_if_lock)
+#define LOGIF_WUNLOCK(x) rw_wunlock(&log_if_lock)
+static const char ipfwname[] = "ipfw";
+
/* we use this dummy function for all ifnet callbacks */
static int
log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
@@ -101,10 +114,10 @@
static int
ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
- struct sockaddr *dst, struct route *ro)
+ const struct sockaddr *dst, struct route *ro)
{
if (m != NULL)
- m_freem(m);
+ FREE_PKT(m);
return EINVAL;
}
@@ -117,37 +130,105 @@
static const u_char ipfwbroadcastaddr[6] =
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+static int
+ipfw_log_clone_match(struct if_clone *ifc, const char *name)
+{
+
+ return (strncmp(name, ipfwname, sizeof(ipfwname) - 1) == 0);
+}
+
+static int
+ipfw_log_clone_create(struct if_clone *ifc, char *name, size_t len,
+ caddr_t params)
+{
+ int error;
+ int unit;
+ struct ifnet *ifp;
+
+ error = ifc_name2unit(name, &unit);
+ if (error)
+ return (error);
+
+ error = ifc_alloc_unit(ifc, &unit);
+ if (error)
+ return (error);
+
+ ifp = if_alloc(IFT_PFLOG);
+ if (ifp == NULL) {
+ ifc_free_unit(ifc, unit);
+ return (ENOSPC);
+ }
+ ifp->if_dname = ipfwname;
+ ifp->if_dunit = unit;
+ snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ipfwname, unit);
+ strlcpy(name, ifp->if_xname, len);
+ ifp->if_mtu = 65536;
+ ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
+ ifp->if_init = (void *)log_dummy;
+ ifp->if_ioctl = log_dummy;
+ ifp->if_start = ipfw_log_start;
+ ifp->if_output = ipfw_log_output;
+ ifp->if_addrlen = 6;
+ ifp->if_hdrlen = 14;
+ ifp->if_broadcastaddr = ipfwbroadcastaddr;
+ ifp->if_baudrate = IF_Mbps(10);
+
+ LOGIF_WLOCK();
+ if (log_if == NULL)
+ log_if = ifp;
+ else {
+ LOGIF_WUNLOCK();
+ if_free(ifp);
+ ifc_free_unit(ifc, unit);
+ return (EEXIST);
+ }
+ LOGIF_WUNLOCK();
+ if_attach(ifp);
+ bpfattach(ifp, DLT_EN10MB, 14);
+
+ return (0);
+}
+
+static int
+ipfw_log_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
+{
+ int unit;
+
+ if (ifp == NULL)
+ return (0);
+
+ LOGIF_WLOCK();
+ if (log_if != NULL && ifp == log_if)
+ log_if = NULL;
+ else {
+ LOGIF_WUNLOCK();
+ return (EINVAL);
+ }
+ LOGIF_WUNLOCK();
+
+ unit = ifp->if_dunit;
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ ifc_free_unit(ifc, unit);
+
+ return (0);
+}
+
+static struct if_clone *ipfw_log_cloner;
+
void
ipfw_log_bpf(int onoff)
{
- struct ifnet *ifp;
if (onoff) {
- if (log_if)
- return;
- ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- return;
- if_initname(ifp, "ipfw", 0);
- ifp->if_mtu = 65536;
- ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
- ifp->if_init = (void *)log_dummy;
- ifp->if_ioctl = log_dummy;
- ifp->if_start = ipfw_log_start;
- ifp->if_output = ipfw_log_output;
- ifp->if_addrlen = 6;
- ifp->if_hdrlen = 14;
- if_attach(ifp);
- ifp->if_broadcastaddr = ipfwbroadcastaddr;
- ifp->if_baudrate = IF_Mbps(10);
- bpfattach(ifp, DLT_EN10MB, 14);
- log_if = ifp;
+ LOGIF_LOCK_INIT();
+ ipfw_log_cloner = if_clone_advanced(ipfwname, 0,
+ ipfw_log_clone_match, ipfw_log_clone_create,
+ ipfw_log_clone_destroy);
} else {
- if (log_if) {
- ether_ifdetach(log_if);
- if_free(log_if);
- }
- log_if = NULL;
+ if_clone_detach(ipfw_log_cloner);
+ LOGIF_LOCK_DESTROY();
}
}
#endif /* !WITHOUT_BPF */
@@ -167,17 +248,27 @@
if (V_fw_verbose == 0) {
#ifndef WITHOUT_BPF
-
- if (log_if == NULL || log_if->if_bpf == NULL)
+ LOGIF_RLOCK();
+ if (log_if == NULL || log_if->if_bpf == NULL) {
+ LOGIF_RUNLOCK();
return;
+ }
if (args->eh) /* layer2, use orig hdr */
BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
- else
+ else {
/* Add fake header. Later we will store
* more info in the header.
*/
- BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+ if (ip->ip_v == 4)
+ BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
+ else if (ip->ip_v == 6)
+ BPF_MTAP2(log_if, "DDDDDDSSSSSS\x86\xdd", ETHER_HDR_LEN, m);
+ else
+ /* Obviously bogus EtherType. */
+ BPF_MTAP2(log_if, "DDDDDDSSSSSS\xff\xff", ETHER_HDR_LEN, m);
+ }
+ LOGIF_RUNLOCK();
#endif /* !WITHOUT_BPF */
return;
}
Modified: trunk/sys/netpfil/ipfw/ip_fw_nat.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_nat.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_nat.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2008 Paolo Pisati
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_nat.c 266678 2014-05-26 07:02:03Z ae $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -53,8 +54,7 @@
#include <machine/in_cksum.h> /* XXX for in_cksum */
-static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
-#define V_ifaddr_event_tag VNET(ifaddr_event_tag)
+static eventhandler_tag ifaddr_event_tag;
static void
ifaddr_change(void *arg __unused, struct ifnet *ifp)
@@ -63,6 +63,8 @@
struct ifaddr *ifa;
struct ip_fw_chain *chain;
+ KASSERT(curvnet == ifp->if_vnet,
+ ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet));
chain = &V_layer3_chain;
IPFW_WLOCK(chain);
/* Check every nat entry... */
@@ -342,11 +344,11 @@
if (ldt) {
struct tcphdr *th;
struct udphdr *uh;
- u_short cksum;
+ uint16_t ip_len, cksum;
- ip->ip_len = ntohs(ip->ip_len);
+ ip_len = ntohs(ip->ip_len);
cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+ htons(ip->ip_p + ip_len - (ip->ip_hl << 2)));
switch (ip->ip_p) {
case IPPROTO_TCP:
@@ -372,7 +374,6 @@
in_delayed_cksum(mcl);
mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
- ip->ip_len = htons(ip->ip_len);
}
args->m = mcl;
return (IP_FW_NAT);
@@ -441,7 +442,7 @@
ptr->ip = cfg->ip;
ptr->redir_cnt = cfg->redir_cnt;
ptr->mode = cfg->mode;
- LibAliasSetMode(ptr->lib, cfg->mode, cfg->mode);
+ LibAliasSetMode(ptr->lib, cfg->mode, ~0);
LibAliasSetAddress(ptr->lib, ptr->ip);
memcpy(ptr->if_name, cfg->if_name, IF_NAMESIZE);
@@ -590,11 +591,38 @@
return(0);
}
+static int
+vnet_ipfw_nat_init(const void *arg __unused)
+{
+
+ V_ipfw_nat_ready = 1;
+ return (0);
+}
+
+static int
+vnet_ipfw_nat_uninit(const void *arg __unused)
+{
+ struct cfg_nat *ptr, *ptr_temp;
+ struct ip_fw_chain *chain;
+
+ chain = &V_layer3_chain;
+ IPFW_WLOCK(chain);
+ LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
+ LIST_REMOVE(ptr, _next);
+ del_redir_spool_cfg(ptr, &ptr->redir_chain);
+ LibAliasUninit(ptr->lib);
+ free(ptr, M_IPFW);
+ }
+ flush_nat_ptrs(chain, -1 /* flush all */);
+ V_ipfw_nat_ready = 0;
+ IPFW_WUNLOCK(chain);
+ return (0);
+}
+
static void
ipfw_nat_init(void)
{
- IPFW_WLOCK(&V_layer3_chain);
/* init ipfw hooks */
ipfw_nat_ptr = ipfw_nat;
lookup_nat_ptr = lookup_nat;
@@ -602,9 +630,8 @@
ipfw_nat_del_ptr = ipfw_nat_del;
ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg;
ipfw_nat_get_log_ptr = ipfw_nat_get_log;
- IPFW_WUNLOCK(&V_layer3_chain);
- V_ifaddr_event_tag = EVENTHANDLER_REGISTER(
- ifaddr_event, ifaddr_change,
+
+ ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change,
NULL, EVENTHANDLER_PRI_ANY);
}
@@ -611,19 +638,8 @@
static void
ipfw_nat_destroy(void)
{
- struct cfg_nat *ptr, *ptr_temp;
- struct ip_fw_chain *chain;
- chain = &V_layer3_chain;
- IPFW_WLOCK(chain);
- LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
- LIST_REMOVE(ptr, _next);
- del_redir_spool_cfg(ptr, &ptr->redir_chain);
- LibAliasUninit(ptr->lib);
- free(ptr, M_IPFW);
- }
- EVENTHANDLER_DEREGISTER(ifaddr_event, V_ifaddr_event_tag);
- flush_nat_ptrs(chain, -1 /* flush all */);
+ EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
/* deregister ipfw_nat */
ipfw_nat_ptr = NULL;
lookup_nat_ptr = NULL;
@@ -631,7 +647,6 @@
ipfw_nat_del_ptr = NULL;
ipfw_nat_get_cfg_ptr = NULL;
ipfw_nat_get_log_ptr = NULL;
- IPFW_WUNLOCK(chain);
}
static int
@@ -641,11 +656,9 @@
switch (type) {
case MOD_LOAD:
- ipfw_nat_init();
break;
case MOD_UNLOAD:
- ipfw_nat_destroy();
break;
default:
@@ -661,8 +674,25 @@
0
};
-DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+/* Define startup order. */
+#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1)
+#define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY);
MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1);
MODULE_DEPEND(ipfw_nat, ipfw, 2, 2, 2);
MODULE_VERSION(ipfw_nat, 1);
+
+SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+ ipfw_nat_init, NULL);
+VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER,
+ vnet_ipfw_nat_init, NULL);
+
+SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER,
+ ipfw_nat_destroy, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL,
+ IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL);
+
/* end of file */
Modified: trunk/sys/netpfil/ipfw/ip_fw_pfil.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_pfil.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_pfil.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG
* All rights reserved.
@@ -25,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_pfil.c 264813 2014-04-23 09:56:17Z ae $");
#include "opt_ipfw.h"
#include "opt_inet.h"
@@ -47,6 +48,7 @@
#include <net/if.h>
#include <net/route.h>
+#include <net/ethernet.h>
#include <net/pfil.h>
#include <net/vnet.h>
@@ -74,10 +76,17 @@
#define V_fw6_enable VNET(fw6_enable)
#endif
+static VNET_DEFINE(int, fwlink_enable) = 0;
+#define V_fwlink_enable VNET(fwlink_enable)
+
int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
/* Forward declarations. */
static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
+static int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
+static int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
#ifdef SYSCTL_NODE
@@ -94,6 +103,11 @@
ipfw_chg_hook, "I", "Enable ipfw+6");
#endif /* INET6 */
+SYSCTL_DECL(_net_link_ether);
+SYSCTL_VNET_PROC(_net_link_ether, OID_AUTO, ipfw,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fwlink_enable), 0,
+ ipfw_chg_hook, "I", "Pass ether pkts through firewall");
+
SYSEND
#endif /* SYSCTL_NODE */
@@ -103,8 +117,8 @@
* dummynet, divert, netgraph or other modules.
* The packet may be consumed.
*/
-int
-ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+static int
+ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
{
struct ip_fw_args args;
@@ -112,10 +126,6 @@
int ipfw;
int ret;
- /* all the processing now uses ip_len in net format */
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_NET_IPLEN(mtod(*m0, struct ip *));
-
/* convert dir to IPFW values */
dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
bzero(&args, sizeof(args));
@@ -129,11 +139,8 @@
if (tag != NULL) {
args.rule = *((struct ipfw_rule_ref *)(tag+1));
m_tag_delete(*m0, tag);
- if (args.rule.info & IPFW_ONEPASS) {
- if (mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
+ if (args.rule.info & IPFW_ONEPASS)
return (0);
- }
}
args.m = *m0;
@@ -277,11 +284,115 @@
FREE_PKT(*m0);
*m0 = NULL;
}
- if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
- SET_HOST_IPLEN(mtod(*m0, struct ip *));
+
return ret;
}
+/*
+ * ipfw processing for ethernet packets (in and out).
+ * Inteface is NULL from ether_demux, and ifp from
+ * ether_output_frame.
+ */
+static int
+ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *dst, int dir,
+ struct inpcb *inp)
+{
+ struct ether_header *eh;
+ struct ether_header save_eh;
+ struct mbuf *m;
+ int i, ret;
+ struct ip_fw_args args;
+ struct m_tag *mtag;
+
+ /* fetch start point from rule, if any */
+ mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
+ if (mtag == NULL) {
+ args.rule.slot = 0;
+ } else {
+ /* dummynet packet, already partially processed */
+ struct ipfw_rule_ref *r;
+
+ /* XXX can we free it after use ? */
+ mtag->m_tag_id = PACKET_TAG_NONE;
+ r = (struct ipfw_rule_ref *)(mtag + 1);
+ if (r->info & IPFW_ONEPASS)
+ return (0);
+ args.rule = *r;
+ }
+
+ /* I need some amt of data to be contiguous */
+ m = *m0;
+ i = min(m->m_pkthdr.len, max_protohdr);
+ if (m->m_len < i) {
+ m = m_pullup(m, i);
+ if (m == NULL) {
+ *m0 = m;
+ return (0);
+ }
+ }
+ eh = mtod(m, struct ether_header *);
+ save_eh = *eh; /* save copy for restore below */
+ m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */
+
+ args.m = m; /* the packet we are looking at */
+ args.oif = dir == PFIL_OUT ? dst: NULL; /* destination, if any */
+ args.next_hop = NULL; /* we do not support forward yet */
+ args.next_hop6 = NULL; /* we do not support forward yet */
+ args.eh = &save_eh; /* MAC header for bridged/MAC packets */
+ args.inp = NULL; /* used by ipfw uid/gid/jail rules */
+ i = ipfw_chk(&args);
+ m = args.m;
+ if (m != NULL) {
+ /*
+ * Restore Ethernet header, as needed, in case the
+ * mbuf chain was replaced by ipfw.
+ */
+ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
+ if (m == NULL) {
+ *m0 = NULL;
+ return (0);
+ }
+ if (eh != mtod(m, struct ether_header *))
+ bcopy(&save_eh, mtod(m, struct ether_header *),
+ ETHER_HDR_LEN);
+ }
+ *m0 = m;
+
+ ret = 0;
+ /* Check result of ipfw_chk() */
+ switch (i) {
+ case IP_FW_PASS:
+ break;
+
+ case IP_FW_DENY:
+ ret = EACCES;
+ break; /* i.e. drop */
+
+ case IP_FW_DUMMYNET:
+ ret = EACCES;
+ int dir;
+
+ if (ip_dn_io_ptr == NULL)
+ break; /* i.e. drop */
+
+ *m0 = NULL;
+ dir = PROTO_LAYER2 | (dst ? DIR_OUT : DIR_IN);
+ ip_dn_io_ptr(&m, dir, &args);
+ return 0;
+
+ default:
+ KASSERT(0, ("%s: unknown retval", __func__));
+ }
+
+ if (ret != 0) {
+ if (*m0)
+ FREE_PKT(*m0);
+ *m0 = NULL;
+ }
+
+ return ret;
+}
+
/* do the divert, return 1 on error 0 on success */
static int
ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule,
@@ -301,7 +412,7 @@
clone = *m0; /* use the original mbuf */
*m0 = NULL;
} else {
- clone = m_dup(*m0, M_DONTWAIT);
+ clone = m_dup(*m0, M_NOWAIT);
/* If we cannot duplicate the mbuf, we sacrifice the divert
* chain and continue with the tee-ed packet.
*/
@@ -323,7 +434,6 @@
int hlen;
struct mbuf *reass;
- SET_HOST_IPLEN(ip); /* ip_reass wants host order */
reass = ip_reass(clone); /* Reassemble packet. */
if (reass == NULL)
return 0; /* not an error */
@@ -334,7 +444,6 @@
*/
ip = mtod(reass, struct ip *);
hlen = ip->ip_hl << 2;
- SET_NET_IPLEN(ip);
ip->ip_sum = 0;
if (hlen == sizeof(struct ip))
ip->ip_sum = in_cksum_hdr(ip);
@@ -383,13 +492,16 @@
ipfw_hook(int onoff, int pf)
{
struct pfil_head *pfh;
+ void *hook_func;
pfh = pfil_head_get(PFIL_TYPE_AF, pf);
if (pfh == NULL)
return ENOENT;
+ hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet;
+
(void) (onoff ? pfil_add_hook : pfil_remove_hook)
- (ipfw_check_hook, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
+ (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh);
return 0;
}
@@ -413,6 +525,12 @@
printf("ipfw6_hook() error\n");
}
#endif
+ if (arg == 0) /* detach */
+ ipfw_hook(0, AF_LINK);
+ else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) {
+ error = ENOENT;
+ printf("ipfw_link_hook() error\n");
+ }
return error;
}
@@ -419,45 +537,38 @@
int
ipfw_chg_hook(SYSCTL_HANDLER_ARGS)
{
- int enable;
- int oldenable;
+ int newval;
int error;
int af;
- if (arg1 == &VNET_NAME(fw_enable)) {
- enable = V_fw_enable;
+ if (arg1 == &V_fw_enable)
af = AF_INET;
- }
#ifdef INET6
- else if (arg1 == &VNET_NAME(fw6_enable)) {
- enable = V_fw6_enable;
+ else if (arg1 == &V_fw6_enable)
af = AF_INET6;
- }
#endif
+ else if (arg1 == &V_fwlink_enable)
+ af = AF_LINK;
else
return (EINVAL);
- oldenable = enable;
+ newval = *(int *)arg1;
+ /* Handle sysctl change */
+ error = sysctl_handle_int(oidp, &newval, 0, req);
- error = sysctl_handle_int(oidp, &enable, 0, req);
-
if (error)
return (error);
- enable = (enable) ? 1 : 0;
+ /* Formalize new value */
+ newval = (newval) ? 1 : 0;
- if (enable == oldenable)
+ if (*(int *)arg1 == newval)
return (0);
- error = ipfw_hook(enable, af);
+ error = ipfw_hook(newval, af);
if (error)
return (error);
- if (af == AF_INET)
- V_fw_enable = enable;
-#ifdef INET6
- else if (af == AF_INET6)
- V_fw6_enable = enable;
-#endif
+ *(int *)arg1 = newval;
return (0);
}
Modified: trunk/sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_private.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_private.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
*
@@ -22,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD$
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_private.h 265700 2014-05-08 19:11:41Z melifaro $
*/
#ifndef _IPFW2_PRIVATE_H
@@ -213,12 +214,9 @@
#define V_fw_tables_max VNET(fw_tables_max)
struct ip_fw_chain {
- struct ip_fw *rules; /* list of rules */
- struct ip_fw *reap; /* list of rules to reap */
- struct ip_fw *default_rule;
+ struct ip_fw **map; /* array of rule ptrs to ease lookup */
+ uint32_t id; /* ruleset id */
int n_rules; /* number of static rules */
- int static_len; /* total len of static rules */
- struct ip_fw **map; /* array of rule ptrs to ease lookup */
LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
struct radix_node_head **tables; /* IPv4 tables */
struct radix_node_head **xtables; /* extended tables */
@@ -225,13 +223,18 @@
uint8_t *tabletype; /* Array of table types */
#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t rwmtx;
+#else
+ struct rwlock rwmtx;
+#endif
+ int static_len; /* total len of static rules */
+ uint32_t gencnt; /* NAT generation count */
+ struct ip_fw *reap; /* list of rules to reap */
+ struct ip_fw *default_rule;
+#if defined( __linux__ ) || defined( _WIN32 )
spinlock_t uh_lock;
#else
- struct rwlock rwmtx;
struct rwlock uh_lock; /* lock for upper half */
#endif
- uint32_t id; /* ruleset id */
- uint32_t gencnt; /* generation count */
};
struct sockopt; /* used by tcp_var.h */
@@ -259,7 +262,7 @@
(_cntr)->bcnt = 0; \
} while (0)
-#define IP_FW_ARG_TABLEARG(a) ((a) == IP_FW_TABLEARG) ? tablearg : (a)
+#define IP_FW_ARG_TABLEARG(a) (((a) == IP_FW_TABLEARG) ? tablearg : (a))
/*
* The lock is heavily used by ip_fw2.c (the main file) and ip_fw_nat.c
* so the variable and the macros must be here.
@@ -278,10 +281,12 @@
#define IPFW_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_RLOCKED)
#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED)
-#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
-#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
-#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
-#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx)
+#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx)
+#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx)
+#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx)
+#define IPFW_PF_RLOCK(p) IPFW_RLOCK(p)
+#define IPFW_PF_RUNLOCK(p) IPFW_RUNLOCK(p)
#define IPFW_UH_RLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_RLOCKED)
#define IPFW_UH_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->uh_lock, RA_WLOCKED)
@@ -298,10 +303,6 @@
int ipfw_chk(struct ip_fw_args *args);
void ipfw_reap_rules(struct ip_fw *head);
-/* In ip_fw_pfil */
-int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
- struct inpcb *inp);
-
/* In ip_fw_table.c */
struct radix_node;
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
@@ -329,9 +330,11 @@
typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *);
typedef int ipfw_nat_cfg_t(struct sockopt *);
+VNET_DECLARE(int, ipfw_nat_ready);
+#define V_ipfw_nat_ready VNET(ipfw_nat_ready)
+#define IPFW_NAT_LOADED (V_ipfw_nat_ready)
+
extern ipfw_nat_t *ipfw_nat_ptr;
-#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL)
-
extern ipfw_nat_cfg_t *ipfw_nat_cfg_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_del_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
Modified: trunk/sys/netpfil/ipfw/ip_fw_sockopt.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_sockopt.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_sockopt.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
*
@@ -26,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_sockopt.c 265700 2014-05-08 19:11:41Z melifaro $");
/*
* Sockopt support for ipfw. The routines here implement
@@ -159,13 +160,11 @@
int i, l, insert_before;
struct ip_fw **map; /* the new array of pointers */
- if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
+ if (chain->map == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE - 1)
return (EINVAL);
l = RULESIZE(input_rule);
rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
- if (rule == NULL)
- return (ENOSPC);
/* get_map returns with IPFW_UH_WLOCK if successful */
map = get_map(chain, 1, 0 /* not locked */);
if (map == NULL) {
@@ -655,7 +654,7 @@
case O_IP_SRC_LOOKUP:
case O_IP_DST_LOOKUP:
- if (cmd->arg1 >= IPFW_TABLES_MAX) {
+ if (cmd->arg1 >= V_fw_tables_max) {
printf("ipfw: invalid table number %d\n",
cmd->arg1);
return (EINVAL);
@@ -1005,8 +1004,6 @@
if (size >= sopt->sopt_valsize)
break;
buf = malloc(size, M_TEMP, M_WAITOK);
- if (buf == NULL)
- break;
IPFW_UH_RLOCK(chain);
/* check again how much space we need */
want = chain->static_len + ipfw_dyn_len();
@@ -1043,8 +1040,10 @@
if (sopt->sopt_valsize == RULESIZE7(rule)) {
is7 = 1;
error = convert_rule_to_8(rule);
- if (error)
+ if (error) {
+ free(rule, M_TEMP);
return error;
+ }
if (error == 0)
error = check_ipfw_struct(rule, RULESIZE(rule));
} else {
@@ -1060,12 +1059,14 @@
if (is7) {
error = convert_rule_to_7(rule);
size = RULESIZE7(rule);
- if (error)
+ if (error) {
+ free(rule, M_TEMP);
return error;
+ }
}
error = sooptcopyout(sopt, rule, size);
+ }
}
- }
free(rule, M_TEMP);
break;
Modified: trunk/sys/netpfil/ipfw/ip_fw_table.c
===================================================================
--- trunk/sys/netpfil/ipfw/ip_fw_table.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/ip_fw_table.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
*
@@ -24,7 +25,7 @@
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: stable/10/sys/netpfil/ipfw/ip_fw_table.c 287963 2015-09-18 17:29:24Z melifaro $");
/*
* Lookup table support for ipfw
@@ -69,7 +70,7 @@
#include <security/mac/mac_framework.h>
#endif
-MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
+static MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
struct table_entry {
struct radix_node rn[2];
@@ -123,6 +124,7 @@
#define OFF_LEN_IFACE (8 * offsetof(struct xaddr_iface, ifname))
+#ifdef INET6
static inline void
ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
{
@@ -132,6 +134,7 @@
*cp++ = 0xFFFFFFFF;
*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
}
+#endif
int
ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, void *paddr,
@@ -542,7 +545,7 @@
return (0);
KEY_LEN(sa) = KEY_LEN_INET;
sa.sin_addr.s_addr = addr;
- ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh));
+ ent = (struct table_entry *)(rnh->rnh_matchaddr(&sa, rnh));
if (ent != NULL) {
*val = ent->value;
return (1);
@@ -568,7 +571,7 @@
case IPFW_TABLE_CIDR:
KEY_LEN(sa6) = KEY_LEN_INET6;
memcpy(&sa6.sin6_addr, paddr, sizeof(struct in6_addr));
- xent = (struct table_xentry *)(rnh->rnh_lookup(&sa6, NULL, rnh));
+ xent = (struct table_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
break;
case IPFW_TABLE_INTERFACE:
@@ -576,7 +579,7 @@
strlcpy(iface.ifname, (char *)paddr, IF_NAMESIZE) + 1;
/* Assume direct match */
/* FIXME: Add interface pattern matching */
- xent = (struct table_xentry *)(rnh->rnh_lookup(&iface, NULL, rnh));
+ xent = (struct table_xentry *)(rnh->rnh_matchaddr(&iface, rnh));
break;
default:
@@ -695,6 +698,7 @@
xent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr));
/* Save IPv4 address as deprecated IPv6 compatible */
xent->k.addr6.s6_addr32[3] = n->addr.sin_addr.s_addr;
+ xent->flags = IPFW_TCF_INET;
xent->value = n->value;
tbl->cnt++;
return (0);
Modified: trunk/sys/netpfil/ipfw/test/Makefile
===================================================================
--- trunk/sys/netpfil/ipfw/test/Makefile 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/Makefile 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+# $MidnightBSD$
#
-# $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/Makefile 205417 2010-03-21 16:30:32Z luigi $
+# $FreeBSD: stable/10/sys/netpfil/ipfw/test/Makefile 205417 2010-03-21 16:30:32Z luigi $
#
# Makefile for building userland tests
# this is written in a form compatible with gmake
Modified: trunk/sys/netpfil/ipfw/test/dn_test.h
===================================================================
--- trunk/sys/netpfil/ipfw/test/dn_test.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/dn_test.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
/*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/dn_test.h 204866 2010-03-08 11:27:39Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/dn_test.h 204866 2010-03-08 11:27:39Z luigi $
*
* userspace compatibility code for dummynet schedulers
*/
Modified: trunk/sys/netpfil/ipfw/test/main.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/main.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/main.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
/*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/main.c 204591 2010-03-02 17:40:48Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/main.c 204591 2010-03-02 17:40:48Z luigi $
*
* Testing program for schedulers
*
Modified: trunk/sys/netpfil/ipfw/test/mylist.h
===================================================================
--- trunk/sys/netpfil/ipfw/test/mylist.h 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/mylist.h 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
/*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/mylist.h 204735 2010-03-04 21:01:59Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/mylist.h 204735 2010-03-04 21:01:59Z luigi $
*
* linux-like bidirectional lists
*/
Modified: trunk/sys/netpfil/ipfw/test/test_dn_heap.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/test_dn_heap.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/test_dn_heap.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,3 +1,4 @@
+/* $MidnightBSD$ */
/*-
* Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
* All rights reserved
@@ -27,7 +28,7 @@
/*
* Userland code for testing binary heaps and hash tables
*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/test_dn_heap.c 204591 2010-03-02 17:40:48Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/test_dn_heap.c 204591 2010-03-02 17:40:48Z luigi $
*/
#include <sys/cdefs.h>
Modified: trunk/sys/netpfil/ipfw/test/test_dn_sched.c
===================================================================
--- trunk/sys/netpfil/ipfw/test/test_dn_sched.c 2018-05-25 13:03:43 UTC (rev 9922)
+++ trunk/sys/netpfil/ipfw/test/test_dn_sched.c 2018-05-25 13:05:12 UTC (rev 9923)
@@ -1,5 +1,6 @@
+/* $MidnightBSD$ */
/*
- * $FreeBSD: release/9.2.0/sys/netpfil/ipfw/test/test_dn_sched.c 204736 2010-03-04 21:52:40Z luigi $
+ * $FreeBSD: stable/10/sys/netpfil/ipfw/test/test_dn_sched.c 204736 2010-03-04 21:52:40Z luigi $
*
* library functions for userland testing of dummynet schedulers
*/
More information about the Midnightbsd-cvs
mailing list