pkt_sched.h revision 7878c0ba404c3688442d2362203d79fa108450e0
1#ifndef __LINUX_PKT_SCHED_H
2#define __LINUX_PKT_SCHED_H
3
4#include <linux/types.h>
5
6/* Logical priority bands not depending on specific packet scheduler.
7   Every scheduler will map them to real traffic classes, if it has
8   no more precise mechanism to classify packets.
9
10   These numbers have no special meaning, though their coincidence
11   with obsolete IPv6 values is not occasional :-). New IPv6 drafts
12   preferred full anarchy inspired by diffserv group.
13
14   Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
15   class, actually, as rule it will be handled with more care than
16   filler or even bulk.
17 */
18
19#define TC_PRIO_BESTEFFORT		0
20#define TC_PRIO_FILLER			1
21#define TC_PRIO_BULK			2
22#define TC_PRIO_INTERACTIVE_BULK	4
23#define TC_PRIO_INTERACTIVE		6
24#define TC_PRIO_CONTROL			7
25
26#define TC_PRIO_MAX			15
27
28/* Generic queue statistics, available for all the elements.
29   Particular schedulers may have also their private records.
30 */
31
32struct tc_stats {
33	__u64	bytes;			/* Number of enqueued bytes */
34	__u32	packets;		/* Number of enqueued packets	*/
35	__u32	drops;			/* Packets dropped because of lack of resources */
36	__u32	overlimits;		/* Number of throttle events when this
37					 * flow goes out of allocated bandwidth */
38	__u32	bps;			/* Current flow byte rate */
39	__u32	pps;			/* Current flow packet rate */
40	__u32	qlen;
41	__u32	backlog;
42};
43
44struct tc_estimator {
45	signed char	interval;
46	unsigned char	ewma_log;
47};
48
49/* "Handles"
50   ---------
51
52    All the traffic control objects have 32bit identifiers, or "handles".
53
54    They can be considered as opaque numbers from user API viewpoint,
55    but actually they always consist of two fields: major and
56    minor numbers, which are interpreted by kernel specially,
57    that may be used by applications, though not recommended.
58
59    F.e. qdisc handles always have minor number equal to zero,
60    classes (or flows) have major equal to parent qdisc major, and
61    minor uniquely identifying class inside qdisc.
62
63    Macros to manipulate handles:
64 */
65
66#define TC_H_MAJ_MASK (0xFFFF0000U)
67#define TC_H_MIN_MASK (0x0000FFFFU)
68#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
69#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
70#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
71
72#define TC_H_UNSPEC	(0U)
73#define TC_H_ROOT	(0xFFFFFFFFU)
74#define TC_H_INGRESS    (0xFFFFFFF1U)
75
76struct tc_ratespec {
77	unsigned char	cell_log;
78	unsigned char	__reserved;
79	unsigned short	overhead;
80	short		cell_align;
81	unsigned short	mpu;
82	__u32		rate;
83};
84
85#define TC_RTAB_SIZE	1024
86
87struct tc_sizespec {
88	unsigned char	cell_log;
89	unsigned char	size_log;
90	short		cell_align;
91	int		overhead;
92	unsigned int	linklayer;
93	unsigned int	mpu;
94	unsigned int	mtu;
95	unsigned int	tsize;
96};
97
98enum {
99	TCA_STAB_UNSPEC,
100	TCA_STAB_BASE,
101	TCA_STAB_DATA,
102	__TCA_STAB_MAX
103};
104
105#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
106
107/* FIFO section */
108
109struct tc_fifo_qopt {
110	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
111};
112
113/* PRIO section */
114
115#define TCQ_PRIO_BANDS	16
116#define TCQ_MIN_PRIO_BANDS 2
117
118struct tc_prio_qopt {
119	int	bands;			/* Number of bands */
120	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
121};
122
123/* MULTIQ section */
124
125struct tc_multiq_qopt {
126	__u16	bands;			/* Number of bands */
127	__u16	max_bands;		/* Maximum number of queues */
128};
129
130/* TBF section */
131
132struct tc_tbf_qopt {
133	struct tc_ratespec rate;
134	struct tc_ratespec peakrate;
135	__u32		limit;
136	__u32		buffer;
137	__u32		mtu;
138};
139
140enum {
141	TCA_TBF_UNSPEC,
142	TCA_TBF_PARMS,
143	TCA_TBF_RTAB,
144	TCA_TBF_PTAB,
145	__TCA_TBF_MAX,
146};
147
148#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
149
150
151/* TEQL section */
152
153/* TEQL does not require any parameters */
154
155/* SFQ section */
156
157struct tc_sfq_qopt {
158	unsigned	quantum;	/* Bytes per round allocated to flow */
159	int		perturb_period;	/* Period of hash perturbation */
160	__u32		limit;		/* Maximal packets in queue */
161	unsigned	divisor;	/* Hash divisor  */
162	unsigned	flows;		/* Maximal number of flows  */
163};
164
165struct tc_sfqred_stats {
166	__u32           prob_drop;      /* Early drops, below max threshold */
167	__u32           forced_drop;	/* Early drops, after max threshold */
168	__u32           prob_mark;      /* Marked packets, below max threshold */
169	__u32           forced_mark;    /* Marked packets, after max threshold */
170	__u32           prob_mark_head; /* Marked packets, below max threshold */
171	__u32           forced_mark_head;/* Marked packets, after max threshold */
172};
173
174struct tc_sfq_qopt_v1 {
175	struct tc_sfq_qopt v0;
176	unsigned int	depth;		/* max number of packets per flow */
177	unsigned int	headdrop;
178/* SFQRED parameters */
179	__u32		limit;		/* HARD maximal flow queue length (bytes) */
180	__u32		qth_min;	/* Min average length threshold (bytes) */
181	__u32		qth_max;	/* Max average length threshold (bytes) */
182	unsigned char   Wlog;		/* log(W)		*/
183	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
184	unsigned char   Scell_log;	/* cell size for idle damping */
185	unsigned char	flags;
186	__u32		max_P;		/* probability, high resolution */
187/* SFQRED stats */
188	struct tc_sfqred_stats stats;
189};
190
191
192struct tc_sfq_xstats {
193	__s32		allot;
194};
195
196/* RED section */
197
198enum {
199	TCA_RED_UNSPEC,
200	TCA_RED_PARMS,
201	TCA_RED_STAB,
202	TCA_RED_MAX_P,
203	__TCA_RED_MAX,
204};
205
206#define TCA_RED_MAX (__TCA_RED_MAX - 1)
207
208struct tc_red_qopt {
209	__u32		limit;		/* HARD maximal queue length (bytes)	*/
210	__u32		qth_min;	/* Min average length threshold (bytes) */
211	__u32		qth_max;	/* Max average length threshold (bytes) */
212	unsigned char   Wlog;		/* log(W)		*/
213	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
214	unsigned char   Scell_log;	/* cell size for idle damping */
215	unsigned char	flags;
216#define TC_RED_ECN		1
217#define TC_RED_HARDDROP		2
218#define TC_RED_ADAPTATIVE	4
219};
220
221struct tc_red_xstats {
222	__u32           early;          /* Early drops */
223	__u32           pdrop;          /* Drops due to queue limits */
224	__u32           other;          /* Drops due to drop() calls */
225	__u32           marked;         /* Marked packets */
226};
227
228/* GRED section */
229
230#define MAX_DPs 16
231
232enum {
233       TCA_GRED_UNSPEC,
234       TCA_GRED_PARMS,
235       TCA_GRED_STAB,
236       TCA_GRED_DPS,
237       TCA_GRED_MAX_P,
238	   __TCA_GRED_MAX,
239};
240
241#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
242
243struct tc_gred_qopt {
244	__u32		limit;        /* HARD maximal queue length (bytes)    */
245	__u32		qth_min;      /* Min average length threshold (bytes) */
246	__u32		qth_max;      /* Max average length threshold (bytes) */
247	__u32		DP;           /* up to 2^32 DPs */
248	__u32		backlog;
249	__u32		qave;
250	__u32		forced;
251	__u32		early;
252	__u32		other;
253	__u32		pdrop;
254	__u8		Wlog;         /* log(W)               */
255	__u8		Plog;         /* log(P_max/(qth_max-qth_min)) */
256	__u8		Scell_log;    /* cell size for idle damping */
257	__u8		prio;         /* prio of this VQ */
258	__u32		packets;
259	__u32		bytesin;
260};
261
262/* gred setup */
263struct tc_gred_sopt {
264	__u32		DPs;
265	__u32		def_DP;
266	__u8		grio;
267	__u8		flags;
268	__u16		pad1;
269};
270
271/* CHOKe section */
272
273enum {
274	TCA_CHOKE_UNSPEC,
275	TCA_CHOKE_PARMS,
276	TCA_CHOKE_STAB,
277	TCA_CHOKE_MAX_P,
278	__TCA_CHOKE_MAX,
279};
280
281#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
282
283struct tc_choke_qopt {
284	__u32		limit;		/* Hard queue length (packets)	*/
285	__u32		qth_min;	/* Min average threshold (packets) */
286	__u32		qth_max;	/* Max average threshold (packets) */
287	unsigned char   Wlog;		/* log(W)		*/
288	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
289	unsigned char   Scell_log;	/* cell size for idle damping */
290	unsigned char	flags;		/* see RED flags */
291};
292
293struct tc_choke_xstats {
294	__u32		early;          /* Early drops */
295	__u32		pdrop;          /* Drops due to queue limits */
296	__u32		other;          /* Drops due to drop() calls */
297	__u32		marked;         /* Marked packets */
298	__u32		matched;	/* Drops due to flow match */
299};
300
301/* HTB section */
302#define TC_HTB_NUMPRIO		8
303#define TC_HTB_MAXDEPTH		8
304#define TC_HTB_PROTOVER		3 /* the same as HTB and TC's major */
305
306struct tc_htb_opt {
307	struct tc_ratespec 	rate;
308	struct tc_ratespec 	ceil;
309	__u32	buffer;
310	__u32	cbuffer;
311	__u32	quantum;
312	__u32	level;		/* out only */
313	__u32	prio;
314};
315struct tc_htb_glob {
316	__u32 version;		/* to match HTB/TC */
317    	__u32 rate2quantum;	/* bps->quantum divisor */
318    	__u32 defcls;		/* default class number */
319	__u32 debug;		/* debug flags */
320
321	/* stats */
322	__u32 direct_pkts; /* count of non shaped packets */
323};
324enum {
325	TCA_HTB_UNSPEC,
326	TCA_HTB_PARMS,
327	TCA_HTB_INIT,
328	TCA_HTB_CTAB,
329	TCA_HTB_RTAB,
330	__TCA_HTB_MAX,
331};
332
333#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
334
335struct tc_htb_xstats {
336	__u32 lends;
337	__u32 borrows;
338	__u32 giants;	/* too big packets (rate will not be accurate) */
339	__u32 tokens;
340	__u32 ctokens;
341};
342
343/* HFSC section */
344
345struct tc_hfsc_qopt {
346	__u16	defcls;		/* default class */
347};
348
349struct tc_service_curve {
350	__u32	m1;		/* slope of the first segment in bps */
351	__u32	d;		/* x-projection of the first segment in us */
352	__u32	m2;		/* slope of the second segment in bps */
353};
354
355struct tc_hfsc_stats {
356	__u64	work;		/* total work done */
357	__u64	rtwork;		/* work done by real-time criteria */
358	__u32	period;		/* current period */
359	__u32	level;		/* class level in hierarchy */
360};
361
362enum {
363	TCA_HFSC_UNSPEC,
364	TCA_HFSC_RSC,
365	TCA_HFSC_FSC,
366	TCA_HFSC_USC,
367	__TCA_HFSC_MAX,
368};
369
370#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
371
372
373/* CBQ section */
374
375#define TC_CBQ_MAXPRIO		8
376#define TC_CBQ_MAXLEVEL		8
377#define TC_CBQ_DEF_EWMA		5
378
379struct tc_cbq_lssopt {
380	unsigned char	change;
381	unsigned char	flags;
382#define TCF_CBQ_LSS_BOUNDED	1
383#define TCF_CBQ_LSS_ISOLATED	2
384	unsigned char  	ewma_log;
385	unsigned char  	level;
386#define TCF_CBQ_LSS_FLAGS	1
387#define TCF_CBQ_LSS_EWMA	2
388#define TCF_CBQ_LSS_MAXIDLE	4
389#define TCF_CBQ_LSS_MINIDLE	8
390#define TCF_CBQ_LSS_OFFTIME	0x10
391#define TCF_CBQ_LSS_AVPKT	0x20
392	__u32		maxidle;
393	__u32		minidle;
394	__u32		offtime;
395	__u32		avpkt;
396};
397
398struct tc_cbq_wrropt {
399	unsigned char	flags;
400	unsigned char	priority;
401	unsigned char	cpriority;
402	unsigned char	__reserved;
403	__u32		allot;
404	__u32		weight;
405};
406
407struct tc_cbq_ovl {
408	unsigned char	strategy;
409#define	TC_CBQ_OVL_CLASSIC	0
410#define	TC_CBQ_OVL_DELAY	1
411#define	TC_CBQ_OVL_LOWPRIO	2
412#define	TC_CBQ_OVL_DROP		3
413#define	TC_CBQ_OVL_RCLASSIC	4
414	unsigned char	priority2;
415	__u16		pad;
416	__u32		penalty;
417};
418
419struct tc_cbq_police {
420	unsigned char	police;
421	unsigned char	__res1;
422	unsigned short	__res2;
423};
424
425struct tc_cbq_fopt {
426	__u32		split;
427	__u32		defmap;
428	__u32		defchange;
429};
430
431struct tc_cbq_xstats {
432	__u32		borrows;
433	__u32		overactions;
434	__s32		avgidle;
435	__s32		undertime;
436};
437
438enum {
439	TCA_CBQ_UNSPEC,
440	TCA_CBQ_LSSOPT,
441	TCA_CBQ_WRROPT,
442	TCA_CBQ_FOPT,
443	TCA_CBQ_OVL_STRATEGY,
444	TCA_CBQ_RATE,
445	TCA_CBQ_RTAB,
446	TCA_CBQ_POLICE,
447	__TCA_CBQ_MAX,
448};
449
450#define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
451
452/* dsmark section */
453
454enum {
455	TCA_DSMARK_UNSPEC,
456	TCA_DSMARK_INDICES,
457	TCA_DSMARK_DEFAULT_INDEX,
458	TCA_DSMARK_SET_TC_INDEX,
459	TCA_DSMARK_MASK,
460	TCA_DSMARK_VALUE,
461	__TCA_DSMARK_MAX,
462};
463
464#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
465
466/* ATM  section */
467
468enum {
469	TCA_ATM_UNSPEC,
470	TCA_ATM_FD,		/* file/socket descriptor */
471	TCA_ATM_PTR,		/* pointer to descriptor - later */
472	TCA_ATM_HDR,		/* LL header */
473	TCA_ATM_EXCESS,		/* excess traffic class (0 for CLP)  */
474	TCA_ATM_ADDR,		/* PVC address (for output only) */
475	TCA_ATM_STATE,		/* VC state (ATM_VS_*; for output only) */
476	__TCA_ATM_MAX,
477};
478
479#define TCA_ATM_MAX	(__TCA_ATM_MAX - 1)
480
481/* Network emulator */
482
483enum {
484	TCA_NETEM_UNSPEC,
485	TCA_NETEM_CORR,
486	TCA_NETEM_DELAY_DIST,
487	TCA_NETEM_REORDER,
488	TCA_NETEM_CORRUPT,
489	TCA_NETEM_LOSS,
490	TCA_NETEM_RATE,
491	__TCA_NETEM_MAX,
492};
493
494#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
495
496struct tc_netem_qopt {
497	__u32	latency;	/* added delay (us) */
498	__u32   limit;		/* fifo limit (packets) */
499	__u32	loss;		/* random packet loss (0=none ~0=100%) */
500	__u32	gap;		/* re-ordering gap (0 for none) */
501	__u32   duplicate;	/* random packet dup  (0=none ~0=100%) */
502	__u32	jitter;		/* random jitter in latency (us) */
503};
504
505struct tc_netem_corr {
506	__u32	delay_corr;	/* delay correlation */
507	__u32	loss_corr;	/* packet loss correlation */
508	__u32	dup_corr;	/* duplicate correlation  */
509};
510
511struct tc_netem_reorder {
512	__u32	probability;
513	__u32	correlation;
514};
515
516struct tc_netem_corrupt {
517	__u32	probability;
518	__u32	correlation;
519};
520
521struct tc_netem_rate {
522	__u32	rate;	/* byte/s */
523	__s32	packet_overhead;
524	__u32	cell_size;
525	__s32	cell_overhead;
526};
527
528enum {
529	NETEM_LOSS_UNSPEC,
530	NETEM_LOSS_GI,		/* General Intuitive - 4 state model */
531	NETEM_LOSS_GE,		/* Gilbert Elliot models */
532	__NETEM_LOSS_MAX
533};
534#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
535
536/* State transition probabilities for 4 state model */
537struct tc_netem_gimodel {
538	__u32	p13;
539	__u32	p31;
540	__u32	p32;
541	__u32	p14;
542	__u32	p23;
543};
544
545/* Gilbert-Elliot models */
546struct tc_netem_gemodel {
547	__u32 p;
548	__u32 r;
549	__u32 h;
550	__u32 k1;
551};
552
553#define NETEM_DIST_SCALE	8192
554#define NETEM_DIST_MAX		16384
555
556/* DRR */
557
558enum {
559	TCA_DRR_UNSPEC,
560	TCA_DRR_QUANTUM,
561	__TCA_DRR_MAX
562};
563
564#define TCA_DRR_MAX	(__TCA_DRR_MAX - 1)
565
566struct tc_drr_stats {
567	__u32	deficit;
568};
569
570/* MQPRIO */
571#define TC_QOPT_BITMASK 15
572#define TC_QOPT_MAX_QUEUE 16
573
574struct tc_mqprio_qopt {
575	__u8	num_tc;
576	__u8	prio_tc_map[TC_QOPT_BITMASK + 1];
577	__u8	hw;
578	__u16	count[TC_QOPT_MAX_QUEUE];
579	__u16	offset[TC_QOPT_MAX_QUEUE];
580};
581
582/* SFB */
583
584enum {
585	TCA_SFB_UNSPEC,
586	TCA_SFB_PARMS,
587	__TCA_SFB_MAX,
588};
589
590#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
591
592/*
593 * Note: increment, decrement are Q0.16 fixed-point values.
594 */
595struct tc_sfb_qopt {
596	__u32 rehash_interval;	/* delay between hash move, in ms */
597	__u32 warmup_time;	/* double buffering warmup time in ms (warmup_time < rehash_interval) */
598	__u32 max;		/* max len of qlen_min */
599	__u32 bin_size;		/* maximum queue length per bin */
600	__u32 increment;	/* probability increment, (d1 in Blue) */
601	__u32 decrement;	/* probability decrement, (d2 in Blue) */
602	__u32 limit;		/* max SFB queue length */
603	__u32 penalty_rate;	/* inelastic flows are rate limited to 'rate' pps */
604	__u32 penalty_burst;
605};
606
607struct tc_sfb_xstats {
608	__u32 earlydrop;
609	__u32 penaltydrop;
610	__u32 bucketdrop;
611	__u32 queuedrop;
612	__u32 childdrop; /* drops in child qdisc */
613	__u32 marked;
614	__u32 maxqlen;
615	__u32 maxprob;
616	__u32 avgprob;
617};
618
619#define SFB_MAX_PROB 0xFFFF
620
621/* QFQ */
622enum {
623	TCA_QFQ_UNSPEC,
624	TCA_QFQ_WEIGHT,
625	TCA_QFQ_LMAX,
626	__TCA_QFQ_MAX
627};
628
629#define TCA_QFQ_MAX	(__TCA_QFQ_MAX - 1)
630
631struct tc_qfq_stats {
632	__u32 weight;
633	__u32 lmax;
634};
635
636#endif
637