1 |
/*- |
2 |
* Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa |
3 |
* Portions Copyright (c) 2000 Akamba Corp. |
4 |
* All rights reserved |
5 |
* |
6 |
* Redistribution and use in source and binary forms, with or without |
7 |
* modification, are permitted provided that the following conditions |
8 |
* are met: |
9 |
* 1. Redistributions of source code must retain the above copyright |
10 |
* notice, this list of conditions and the following disclaimer. |
11 |
* 2. Redistributions in binary form must reproduce the above copyright |
12 |
* notice, this list of conditions and the following disclaimer in the |
13 |
* documentation and/or other materials provided with the distribution. |
14 |
* |
15 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 |
* SUCH DAMAGE. |
26 |
* |
27 |
* $MidnightBSD$ |
28 |
*/ |
29 |
|
30 |
#ifndef _IP_DUMMYNET_H |
31 |
#define _IP_DUMMYNET_H |
32 |
|
33 |
/* |
34 |
* Definition of the kernel-userland API for dummynet. |
35 |
* |
36 |
* Setsockopt() and getsockopt() pass a batch of objects, each |
37 |
* of them starting with a "struct dn_id" which should fully identify |
38 |
* the object and its relation with others in the sequence. |
39 |
* The first object in each request should have |
40 |
* type= DN_CMD_*, id = DN_API_VERSION. |
41 |
* For other objects, type and subtype specify the object, len indicates |
42 |
* the total length including the header, and 'id' identifies the specific |
43 |
* object. |
44 |
* |
45 |
* Most objects are numbered with an identifier in the range 1..65535. |
46 |
* DN_MAX_ID indicates the first value outside the range. |
47 |
*/ |
48 |
|
49 |
#define DN_API_VERSION 12500000 |
50 |
#define DN_MAX_ID 0x10000 |
51 |
|
52 |
struct dn_id { |
53 |
uint16_t len; /* total obj len including this header */ |
54 |
uint8_t type; |
55 |
uint8_t subtype; |
56 |
uint32_t id; /* generic id */ |
57 |
}; |
58 |
|
59 |
/* |
60 |
* These values are in the type field of struct dn_id. |
61 |
* To preserve the ABI, never rearrange the list or delete |
62 |
* entries with the exception of DN_LAST |
63 |
*/ |
64 |
enum { |
65 |
DN_NONE = 0, |
66 |
DN_LINK = 1, |
67 |
DN_FS, |
68 |
DN_SCH, |
69 |
DN_SCH_I, |
70 |
DN_QUEUE, |
71 |
DN_DELAY_LINE, |
72 |
DN_PROFILE, |
73 |
DN_FLOW, /* struct dn_flow */ |
74 |
DN_TEXT, /* opaque text is the object */ |
75 |
|
76 |
DN_CMD_CONFIG = 0x80, /* objects follow */ |
77 |
DN_CMD_DELETE, /* subtype + list of entries */ |
78 |
DN_CMD_GET, /* subtype + list of entries */ |
79 |
DN_CMD_FLUSH, |
80 |
/* for compatibility with FreeBSD 7.2/8 */ |
81 |
DN_COMPAT_PIPE, |
82 |
DN_COMPAT_QUEUE, |
83 |
DN_GET_COMPAT, |
84 |
|
85 |
/* special commands for emulation of sysctl variables */ |
86 |
DN_SYSCTL_GET, |
87 |
DN_SYSCTL_SET, |
88 |
|
89 |
DN_LAST, |
90 |
}; |
91 |
|
92 |
enum { /* subtype for schedulers, flowset and the like */ |
93 |
DN_SCHED_UNKNOWN = 0, |
94 |
DN_SCHED_FIFO = 1, |
95 |
DN_SCHED_WF2QP = 2, |
96 |
/* others are in individual modules */ |
97 |
}; |
98 |
|
99 |
enum { /* user flags */ |
100 |
DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */ |
101 |
DN_NOERROR = 0x0002, /* do not report errors */ |
102 |
DN_QHT_HASH = 0x0004, /* qht is a hash table */ |
103 |
DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */ |
104 |
DN_HAS_PROFILE = 0x0010, /* a link has a profile */ |
105 |
DN_IS_RED = 0x0020, |
106 |
DN_IS_GENTLE_RED= 0x0040, |
107 |
DN_PIPE_CMD = 0x1000, /* pipe config... */ |
108 |
}; |
109 |
|
110 |
/* |
111 |
* link template. |
112 |
*/ |
113 |
struct dn_link { |
114 |
struct dn_id oid; |
115 |
|
116 |
/* |
117 |
* Userland sets bw and delay in bits/s and milliseconds. |
118 |
* The kernel converts this back and forth to bits/tick and ticks. |
119 |
* XXX what about burst ? |
120 |
*/ |
121 |
int32_t link_nr; |
122 |
int bandwidth; /* bit/s or bits/tick. */ |
123 |
int delay; /* ms and ticks */ |
124 |
uint64_t burst; /* scaled. bits*Hz XXX */ |
125 |
}; |
126 |
|
127 |
/* |
128 |
* A flowset, which is a template for flows. Contains parameters |
129 |
* from the command line: id, target scheduler, queue sizes, plr, |
130 |
* flow masks, buckets for the flow hash, and possibly scheduler- |
131 |
* specific parameters (weight, quantum and so on). |
132 |
*/ |
133 |
struct dn_fs { |
134 |
struct dn_id oid; |
135 |
uint32_t fs_nr; /* the flowset number */ |
136 |
uint32_t flags; /* userland flags */ |
137 |
int qsize; /* queue size in slots or bytes */ |
138 |
int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ |
139 |
uint32_t buckets; /* buckets used for the queue hash table */ |
140 |
|
141 |
struct ipfw_flow_id flow_mask; |
142 |
uint32_t sched_nr; /* the scheduler we attach to */ |
143 |
/* generic scheduler parameters. Leave them at -1 if unset. |
144 |
* Now we use 0: weight, 1: lmax, 2: priority |
145 |
*/ |
146 |
int par[4]; |
147 |
|
148 |
/* RED/GRED parameters. |
149 |
* weight and probabilities are in the range 0..1 represented |
150 |
* in fixed point arithmetic with SCALE_RED decimal bits. |
151 |
*/ |
152 |
#define SCALE_RED 16 |
153 |
#define SCALE(x) ( (x) << SCALE_RED ) |
154 |
#define SCALE_VAL(x) ( (x) >> SCALE_RED ) |
155 |
#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) |
156 |
int w_q ; /* queue weight (scaled) */ |
157 |
int max_th ; /* maximum threshold for queue (scaled) */ |
158 |
int min_th ; /* minimum threshold for queue (scaled) */ |
159 |
int max_p ; /* maximum value for p_b (scaled) */ |
160 |
|
161 |
}; |
162 |
|
163 |
/* |
164 |
* dn_flow collects flow_id and stats for queues and scheduler |
165 |
* instances, and is used to pass these info to userland. |
166 |
* oid.type/oid.subtype describe the object, oid.id is number |
167 |
* of the parent object. |
168 |
*/ |
169 |
struct dn_flow { |
170 |
struct dn_id oid; |
171 |
struct ipfw_flow_id fid; |
172 |
uint64_t tot_pkts; /* statistics counters */ |
173 |
uint64_t tot_bytes; |
174 |
uint32_t length; /* Queue lenght, in packets */ |
175 |
uint32_t len_bytes; /* Queue lenght, in bytes */ |
176 |
uint32_t drops; |
177 |
}; |
178 |
|
179 |
|
180 |
/* |
181 |
* Scheduler template, mostly indicating the name, number, |
182 |
* sched_mask and buckets. |
183 |
*/ |
184 |
struct dn_sch { |
185 |
struct dn_id oid; |
186 |
uint32_t sched_nr; /* N, scheduler number */ |
187 |
uint32_t buckets; /* number of buckets for the instances */ |
188 |
uint32_t flags; /* have_mask, ... */ |
189 |
|
190 |
char name[16]; /* null terminated */ |
191 |
/* mask to select the appropriate scheduler instance */ |
192 |
struct ipfw_flow_id sched_mask; /* M */ |
193 |
}; |
194 |
|
195 |
|
196 |
/* A delay profile is attached to a link. |
197 |
* Note that a profile, as any other object, cannot be longer than 2^16 |
198 |
*/ |
199 |
#define ED_MAX_SAMPLES_NO 1024 |
200 |
struct dn_profile { |
201 |
struct dn_id oid; |
202 |
/* fields to simulate a delay profile */ |
203 |
#define ED_MAX_NAME_LEN 32 |
204 |
char name[ED_MAX_NAME_LEN]; |
205 |
int link_nr; |
206 |
int loss_level; |
207 |
int bandwidth; // XXX use link bandwidth? |
208 |
int samples_no; /* actual len of samples[] */ |
209 |
int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ |
210 |
}; |
211 |
|
212 |
|
213 |
|
214 |
/* |
215 |
* Overall structure of dummynet |
216 |
|
217 |
In dummynet, packets are selected with the firewall rules, and passed |
218 |
to two different objects: PIPE or QUEUE (bad name). |
219 |
|
220 |
A QUEUE defines a classifier, which groups packets into flows |
221 |
according to a 'mask', puts them into independent queues (one |
222 |
per flow) with configurable size and queue management policy, |
223 |
and passes flows to a scheduler: |
224 |
|
225 |
(flow_mask|sched_mask) sched_mask |
226 |
+---------+ weight Wx +-------------+ |
227 |
| |->-[flow]-->--| |-+ |
228 |
-->--| QUEUE x | ... | | | |
229 |
| |->-[flow]-->--| SCHEDuler N | | |
230 |
+---------+ | | | |
231 |
... | +--[LINK N]-->-- |
232 |
+---------+ weight Wy | | +--[LINK N]-->-- |
233 |
| |->-[flow]-->--| | | |
234 |
-->--| QUEUE y | ... | | | |
235 |
| |->-[flow]-->--| | | |
236 |
+---------+ +-------------+ | |
237 |
+-------------+ |
238 |
|
239 |
Many QUEUE objects can connect to the same scheduler, each |
240 |
QUEUE object can have its own set of parameters. |
241 |
|
242 |
In turn, the SCHEDuler 'forks' multiple instances according |
243 |
to a 'sched_mask', each instance manages its own set of queues |
244 |
and transmits on a private instance of a configurable LINK. |
245 |
|
246 |
A PIPE is a simplified version of the above, where there |
247 |
is no flow_mask, and each scheduler instance handles a single queue. |
248 |
|
249 |
The following data structures (visible from userland) describe |
250 |
the objects used by dummynet: |
251 |
|
252 |
+ dn_link, contains the main configuration parameters related |
253 |
to delay and bandwidth; |
254 |
+ dn_profile describes a delay profile; |
255 |
+ dn_flow describes the flow status (flow id, statistics) |
256 |
|
257 |
+ dn_sch describes a scheduler |
258 |
+ dn_fs describes a flowset (msk, weight, queue parameters) |
259 |
|
260 |
* |
261 |
*/ |
262 |
|
263 |
#endif /* _IP_DUMMYNET_H */ |