pkt_sched.h revision 1b1177ed5f3a8e34b1c7c7c79922c31448f212c9
1#ifndef __LINUX_PKT_SCHED_H
2#define __LINUX_PKT_SCHED_H
3
4#include <linux/types.h>
5
6/* Logical priority bands not depending on specific packet scheduler.
7   Every scheduler will map them to real traffic classes, if it has
8   no more precise mechanism to classify packets.
9
10   These numbers have no special meaning, though their coincidence
11   with obsolete IPv6 values is not occasional :-). New IPv6 drafts
12   preferred full anarchy inspired by diffserv group.
13
14   Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
15   class, actually, as rule it will be handled with more care than
16   filler or even bulk.
17 */
18
19#define TC_PRIO_BESTEFFORT		0
20#define TC_PRIO_FILLER			1
21#define TC_PRIO_BULK			2
22#define TC_PRIO_INTERACTIVE_BULK	4
23#define TC_PRIO_INTERACTIVE		6
24#define TC_PRIO_CONTROL			7
25
26#define TC_PRIO_MAX			15
27
28/* Generic queue statistics, available for all the elements.
29   Particular schedulers may have also their private records.
30 */
31
32struct tc_stats {
33	__u64	bytes;			/* Number of enqueued bytes */
34	__u32	packets;		/* Number of enqueued packets	*/
35	__u32	drops;			/* Packets dropped because of lack of resources */
36	__u32	overlimits;		/* Number of throttle events when this
37					 * flow goes out of allocated bandwidth */
38	__u32	bps;			/* Current flow byte rate */
39	__u32	pps;			/* Current flow packet rate */
40	__u32	qlen;
41	__u32	backlog;
42};
43
44struct tc_estimator {
45	signed char	interval;
46	unsigned char	ewma_log;
47};
48
49/* "Handles"
50   ---------
51
52    All the traffic control objects have 32bit identifiers, or "handles".
53
54    They can be considered as opaque numbers from user API viewpoint,
55    but actually they always consist of two fields: major and
56    minor numbers, which are interpreted by kernel specially,
57    that may be used by applications, though not recommended.
58
59    F.e. qdisc handles always have minor number equal to zero,
60    classes (or flows) have major equal to parent qdisc major, and
61    minor uniquely identifying class inside qdisc.
62
63    Macros to manipulate handles:
64 */
65
66#define TC_H_MAJ_MASK (0xFFFF0000U)
67#define TC_H_MIN_MASK (0x0000FFFFU)
68#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
69#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
70#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
71
72#define TC_H_UNSPEC	(0U)
73#define TC_H_ROOT	(0xFFFFFFFFU)
74#define TC_H_INGRESS    (0xFFFFFFF1U)
75
76struct tc_ratespec {
77	unsigned char	cell_log;
78	unsigned char	__reserved;
79	unsigned short	overhead;
80	short		cell_align;
81	unsigned short	mpu;
82	__u32		rate;
83};
84
85#define TC_RTAB_SIZE	1024
86
87struct tc_sizespec {
88	unsigned char	cell_log;
89	unsigned char	size_log;
90	short		cell_align;
91	int		overhead;
92	unsigned int	linklayer;
93	unsigned int	mpu;
94	unsigned int	mtu;
95	unsigned int	tsize;
96};
97
98enum {
99	TCA_STAB_UNSPEC,
100	TCA_STAB_BASE,
101	TCA_STAB_DATA,
102	__TCA_STAB_MAX
103};
104
105#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
106
107/* FIFO section */
108
109struct tc_fifo_qopt {
110	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
111};
112
113/* PRIO section */
114
115#define TCQ_PRIO_BANDS	16
116#define TCQ_MIN_PRIO_BANDS 2
117
118struct tc_prio_qopt {
119	int	bands;			/* Number of bands */
120	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
121};
122
123/* MULTIQ section */
124
125struct tc_multiq_qopt {
126	__u16	bands;			/* Number of bands */
127	__u16	max_bands;		/* Maximum number of queues */
128};
129
130/* TBF section */
131
132struct tc_tbf_qopt {
133	struct tc_ratespec rate;
134	struct tc_ratespec peakrate;
135	__u32		limit;
136	__u32		buffer;
137	__u32		mtu;
138};
139
140enum {
141	TCA_TBF_UNSPEC,
142	TCA_TBF_PARMS,
143	TCA_TBF_RTAB,
144	TCA_TBF_PTAB,
145	__TCA_TBF_MAX,
146};
147
148#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
149
150
151/* TEQL section */
152
153/* TEQL does not require any parameters */
154
155/* SFQ section */
156
157struct tc_sfq_qopt {
158	unsigned	quantum;	/* Bytes per round allocated to flow */
159	int		perturb_period;	/* Period of hash perturbation */
160	__u32		limit;		/* Maximal packets in queue */
161	unsigned	divisor;	/* Hash divisor  */
162	unsigned	flows;		/* Maximal number of flows  */
163};
164
165struct tc_sfq_xstats {
166	__s32		allot;
167};
168
169/*
170 *  NOTE: limit, divisor and flows are hardwired to code at the moment.
171 *
172 *	limit=flows=128, divisor=1024;
173 *
174 *	The only reason for this is efficiency, it is possible
175 *	to change these parameters in compile time.
176 */
177
178/* RED section */
179
180enum {
181	TCA_RED_UNSPEC,
182	TCA_RED_PARMS,
183	TCA_RED_STAB,
184	__TCA_RED_MAX,
185};
186
187#define TCA_RED_MAX (__TCA_RED_MAX - 1)
188
189struct tc_red_qopt {
190	__u32		limit;		/* HARD maximal queue length (bytes)	*/
191	__u32		qth_min;	/* Min average length threshold (bytes) */
192	__u32		qth_max;	/* Max average length threshold (bytes) */
193	unsigned char   Wlog;		/* log(W)		*/
194	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
195	unsigned char   Scell_log;	/* cell size for idle damping */
196	unsigned char	flags;
197#define TC_RED_ECN	1
198#define TC_RED_HARDDROP	2
199};
200
201struct tc_red_xstats {
202	__u32           early;          /* Early drops */
203	__u32           pdrop;          /* Drops due to queue limits */
204	__u32           other;          /* Drops due to drop() calls */
205	__u32           marked;         /* Marked packets */
206};
207
208/* GRED section */
209
210#define MAX_DPs 16
211
212enum {
213       TCA_GRED_UNSPEC,
214       TCA_GRED_PARMS,
215       TCA_GRED_STAB,
216       TCA_GRED_DPS,
217	   __TCA_GRED_MAX,
218};
219
220#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
221
222struct tc_gred_qopt {
223	__u32		limit;        /* HARD maximal queue length (bytes)    */
224	__u32		qth_min;      /* Min average length threshold (bytes) */
225	__u32		qth_max;      /* Max average length threshold (bytes) */
226	__u32		DP;           /* up to 2^32 DPs */
227	__u32		backlog;
228	__u32		qave;
229	__u32		forced;
230	__u32		early;
231	__u32		other;
232	__u32		pdrop;
233	__u8		Wlog;         /* log(W)               */
234	__u8		Plog;         /* log(P_max/(qth_max-qth_min)) */
235	__u8		Scell_log;    /* cell size for idle damping */
236	__u8		prio;         /* prio of this VQ */
237	__u32		packets;
238	__u32		bytesin;
239};
240
241/* gred setup */
242struct tc_gred_sopt {
243	__u32		DPs;
244	__u32		def_DP;
245	__u8		grio;
246	__u8		flags;
247	__u16		pad1;
248};
249
250/* CHOKe section */
251
252enum {
253	TCA_CHOKE_UNSPEC,
254	TCA_CHOKE_PARMS,
255	TCA_CHOKE_STAB,
256	__TCA_CHOKE_MAX,
257};
258
259#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
260
261struct tc_choke_qopt {
262	__u32		limit;		/* Hard queue length (packets)	*/
263	__u32		qth_min;	/* Min average threshold (packets) */
264	__u32		qth_max;	/* Max average threshold (packets) */
265	unsigned char   Wlog;		/* log(W)		*/
266	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
267	unsigned char   Scell_log;	/* cell size for idle damping */
268	unsigned char	flags;		/* see RED flags */
269};
270
271struct tc_choke_xstats {
272	__u32		early;          /* Early drops */
273	__u32		pdrop;          /* Drops due to queue limits */
274	__u32		other;          /* Drops due to drop() calls */
275	__u32		marked;         /* Marked packets */
276	__u32		matched;	/* Drops due to flow match */
277};
278
279/* HTB section */
280#define TC_HTB_NUMPRIO		8
281#define TC_HTB_MAXDEPTH		8
282#define TC_HTB_PROTOVER		3 /* the same as HTB and TC's major */
283
284struct tc_htb_opt {
285	struct tc_ratespec 	rate;
286	struct tc_ratespec 	ceil;
287	__u32	buffer;
288	__u32	cbuffer;
289	__u32	quantum;
290	__u32	level;		/* out only */
291	__u32	prio;
292};
293struct tc_htb_glob {
294	__u32 version;		/* to match HTB/TC */
295    	__u32 rate2quantum;	/* bps->quantum divisor */
296    	__u32 defcls;		/* default class number */
297	__u32 debug;		/* debug flags */
298
299	/* stats */
300	__u32 direct_pkts; /* count of non shaped packets */
301};
302enum {
303	TCA_HTB_UNSPEC,
304	TCA_HTB_PARMS,
305	TCA_HTB_INIT,
306	TCA_HTB_CTAB,
307	TCA_HTB_RTAB,
308	__TCA_HTB_MAX,
309};
310
311#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
312
313struct tc_htb_xstats {
314	__u32 lends;
315	__u32 borrows;
316	__u32 giants;	/* too big packets (rate will not be accurate) */
317	__u32 tokens;
318	__u32 ctokens;
319};
320
321/* HFSC section */
322
323struct tc_hfsc_qopt {
324	__u16	defcls;		/* default class */
325};
326
327struct tc_service_curve {
328	__u32	m1;		/* slope of the first segment in bps */
329	__u32	d;		/* x-projection of the first segment in us */
330	__u32	m2;		/* slope of the second segment in bps */
331};
332
333struct tc_hfsc_stats {
334	__u64	work;		/* total work done */
335	__u64	rtwork;		/* work done by real-time criteria */
336	__u32	period;		/* current period */
337	__u32	level;		/* class level in hierarchy */
338};
339
340enum {
341	TCA_HFSC_UNSPEC,
342	TCA_HFSC_RSC,
343	TCA_HFSC_FSC,
344	TCA_HFSC_USC,
345	__TCA_HFSC_MAX,
346};
347
348#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
349
350
351/* CBQ section */
352
353#define TC_CBQ_MAXPRIO		8
354#define TC_CBQ_MAXLEVEL		8
355#define TC_CBQ_DEF_EWMA		5
356
357struct tc_cbq_lssopt {
358	unsigned char	change;
359	unsigned char	flags;
360#define TCF_CBQ_LSS_BOUNDED	1
361#define TCF_CBQ_LSS_ISOLATED	2
362	unsigned char  	ewma_log;
363	unsigned char  	level;
364#define TCF_CBQ_LSS_FLAGS	1
365#define TCF_CBQ_LSS_EWMA	2
366#define TCF_CBQ_LSS_MAXIDLE	4
367#define TCF_CBQ_LSS_MINIDLE	8
368#define TCF_CBQ_LSS_OFFTIME	0x10
369#define TCF_CBQ_LSS_AVPKT	0x20
370	__u32		maxidle;
371	__u32		minidle;
372	__u32		offtime;
373	__u32		avpkt;
374};
375
376struct tc_cbq_wrropt {
377	unsigned char	flags;
378	unsigned char	priority;
379	unsigned char	cpriority;
380	unsigned char	__reserved;
381	__u32		allot;
382	__u32		weight;
383};
384
385struct tc_cbq_ovl {
386	unsigned char	strategy;
387#define	TC_CBQ_OVL_CLASSIC	0
388#define	TC_CBQ_OVL_DELAY	1
389#define	TC_CBQ_OVL_LOWPRIO	2
390#define	TC_CBQ_OVL_DROP		3
391#define	TC_CBQ_OVL_RCLASSIC	4
392	unsigned char	priority2;
393	__u16		pad;
394	__u32		penalty;
395};
396
397struct tc_cbq_police {
398	unsigned char	police;
399	unsigned char	__res1;
400	unsigned short	__res2;
401};
402
403struct tc_cbq_fopt {
404	__u32		split;
405	__u32		defmap;
406	__u32		defchange;
407};
408
409struct tc_cbq_xstats {
410	__u32		borrows;
411	__u32		overactions;
412	__s32		avgidle;
413	__s32		undertime;
414};
415
416enum {
417	TCA_CBQ_UNSPEC,
418	TCA_CBQ_LSSOPT,
419	TCA_CBQ_WRROPT,
420	TCA_CBQ_FOPT,
421	TCA_CBQ_OVL_STRATEGY,
422	TCA_CBQ_RATE,
423	TCA_CBQ_RTAB,
424	TCA_CBQ_POLICE,
425	__TCA_CBQ_MAX,
426};
427
428#define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
429
430/* dsmark section */
431
432enum {
433	TCA_DSMARK_UNSPEC,
434	TCA_DSMARK_INDICES,
435	TCA_DSMARK_DEFAULT_INDEX,
436	TCA_DSMARK_SET_TC_INDEX,
437	TCA_DSMARK_MASK,
438	TCA_DSMARK_VALUE,
439	__TCA_DSMARK_MAX,
440};
441
442#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
443
444/* ATM  section */
445
446enum {
447	TCA_ATM_UNSPEC,
448	TCA_ATM_FD,		/* file/socket descriptor */
449	TCA_ATM_PTR,		/* pointer to descriptor - later */
450	TCA_ATM_HDR,		/* LL header */
451	TCA_ATM_EXCESS,		/* excess traffic class (0 for CLP)  */
452	TCA_ATM_ADDR,		/* PVC address (for output only) */
453	TCA_ATM_STATE,		/* VC state (ATM_VS_*; for output only) */
454	__TCA_ATM_MAX,
455};
456
457#define TCA_ATM_MAX	(__TCA_ATM_MAX - 1)
458
459/* Network emulator */
460
461enum {
462	TCA_NETEM_UNSPEC,
463	TCA_NETEM_CORR,
464	TCA_NETEM_DELAY_DIST,
465	TCA_NETEM_REORDER,
466	TCA_NETEM_CORRUPT,
467	TCA_NETEM_LOSS,
468	__TCA_NETEM_MAX,
469};
470
471#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
472
473struct tc_netem_qopt {
474	__u32	latency;	/* added delay (us) */
475	__u32   limit;		/* fifo limit (packets) */
476	__u32	loss;		/* random packet loss (0=none ~0=100%) */
477	__u32	gap;		/* re-ordering gap (0 for none) */
478	__u32   duplicate;	/* random packet dup  (0=none ~0=100%) */
479	__u32	jitter;		/* random jitter in latency (us) */
480};
481
482struct tc_netem_corr {
483	__u32	delay_corr;	/* delay correlation */
484	__u32	loss_corr;	/* packet loss correlation */
485	__u32	dup_corr;	/* duplicate correlation  */
486};
487
488struct tc_netem_reorder {
489	__u32	probability;
490	__u32	correlation;
491};
492
493struct tc_netem_corrupt {
494	__u32	probability;
495	__u32	correlation;
496};
497
498enum {
499	NETEM_LOSS_UNSPEC,
500	NETEM_LOSS_GI,		/* General Intuitive - 4 state model */
501	NETEM_LOSS_GE,		/* Gilbert Elliot models */
502	__NETEM_LOSS_MAX
503};
504#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
505
506/* State transition probabilities for 4 state model */
507struct tc_netem_gimodel {
508	__u32	p13;
509	__u32	p31;
510	__u32	p32;
511	__u32	p14;
512	__u32	p23;
513};
514
515/* Gilbert-Elliot models */
516struct tc_netem_gemodel {
517	__u32 p;
518	__u32 r;
519	__u32 h;
520	__u32 k1;
521};
522
523#define NETEM_DIST_SCALE	8192
524#define NETEM_DIST_MAX		16384
525
526/* DRR */
527
528enum {
529	TCA_DRR_UNSPEC,
530	TCA_DRR_QUANTUM,
531	__TCA_DRR_MAX
532};
533
534#define TCA_DRR_MAX	(__TCA_DRR_MAX - 1)
535
536struct tc_drr_stats {
537	__u32	deficit;
538};
539
540/* MQPRIO */
541#define TC_QOPT_BITMASK 15
542#define TC_QOPT_MAX_QUEUE 16
543
544struct tc_mqprio_qopt {
545	__u8	num_tc;
546	__u8	prio_tc_map[TC_QOPT_BITMASK + 1];
547	__u8	hw;
548	__u16	count[TC_QOPT_MAX_QUEUE];
549	__u16	offset[TC_QOPT_MAX_QUEUE];
550};
551
552/* SFB */
553
554enum {
555	TCA_SFB_UNSPEC,
556	TCA_SFB_PARMS,
557	__TCA_SFB_MAX,
558};
559
560#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
561
562/*
563 * Note: increment, decrement are Q0.16 fixed-point values.
564 */
565struct tc_sfb_qopt {
566	__u32 rehash_interval;	/* delay between hash move, in ms */
567	__u32 warmup_time;	/* double buffering warmup time in ms (warmup_time < rehash_interval) */
568	__u32 max;		/* max len of qlen_min */
569	__u32 bin_size;		/* maximum queue length per bin */
570	__u32 increment;	/* probability increment, (d1 in Blue) */
571	__u32 decrement;	/* probability decrement, (d2 in Blue) */
572	__u32 limit;		/* max SFB queue length */
573	__u32 penalty_rate;	/* inelastic flows are rate limited to 'rate' pps */
574	__u32 penalty_burst;
575};
576
577struct tc_sfb_xstats {
578	__u32 earlydrop;
579	__u32 penaltydrop;
580	__u32 bucketdrop;
581	__u32 queuedrop;
582	__u32 childdrop; /* drops in child qdisc */
583	__u32 marked;
584	__u32 maxqlen;
585	__u32 maxprob;
586	__u32 avgprob;
587};
588
589#define SFB_MAX_PROB 0xFFFF
590
591/* QFQ */
592enum {
593	TCA_QFQ_UNSPEC,
594	TCA_QFQ_WEIGHT,
595	TCA_QFQ_LMAX,
596	__TCA_QFQ_MAX
597};
598
599#define TCA_QFQ_MAX	(__TCA_QFQ_MAX - 1)
600
601struct tc_qfq_stats {
602	__u32 weight;
603	__u32 lmax;
604};
605
606#endif
607