pkt_sched.h revision 29cea29df0dfe45562fd42673e26608d25180eee
1#ifndef __LINUX_PKT_SCHED_H
2#define __LINUX_PKT_SCHED_H
3
4#include <linux/types.h>
5
6/* Logical priority bands not depending on specific packet scheduler.
7   Every scheduler will map them to real traffic classes, if it has
8   no more precise mechanism to classify packets.
9
10   These numbers have no special meaning, though their coincidence
11   with obsolete IPv6 values is not occasional :-). New IPv6 drafts
12   preferred full anarchy inspired by diffserv group.
13
14   Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy
15   class, actually, as rule it will be handled with more care than
16   filler or even bulk.
17 */
18
19#define TC_PRIO_BESTEFFORT		0
20#define TC_PRIO_FILLER			1
21#define TC_PRIO_BULK			2
22#define TC_PRIO_INTERACTIVE_BULK	4
23#define TC_PRIO_INTERACTIVE		6
24#define TC_PRIO_CONTROL			7
25
26#define TC_PRIO_MAX			15
27
28/* Generic queue statistics, available for all the elements.
29   Particular schedulers may have also their private records.
30 */
31
32struct tc_stats {
33	__u64	bytes;			/* Number of enqueued bytes */
34	__u32	packets;		/* Number of enqueued packets	*/
35	__u32	drops;			/* Packets dropped because of lack of resources */
36	__u32	overlimits;		/* Number of throttle events when this
37					 * flow goes out of allocated bandwidth */
38	__u32	bps;			/* Current flow byte rate */
39	__u32	pps;			/* Current flow packet rate */
40	__u32	qlen;
41	__u32	backlog;
42};
43
44struct tc_estimator {
45	signed char	interval;
46	unsigned char	ewma_log;
47};
48
49/* "Handles"
50   ---------
51
52    All the traffic control objects have 32bit identifiers, or "handles".
53
54    They can be considered as opaque numbers from user API viewpoint,
55    but actually they always consist of two fields: major and
56    minor numbers, which are interpreted by kernel specially,
57    that may be used by applications, though not recommended.
58
59    F.e. qdisc handles always have minor number equal to zero,
60    classes (or flows) have major equal to parent qdisc major, and
61    minor uniquely identifying class inside qdisc.
62
63    Macros to manipulate handles:
64 */
65
66#define TC_H_MAJ_MASK (0xFFFF0000U)
67#define TC_H_MIN_MASK (0x0000FFFFU)
68#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK)
69#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK)
70#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK))
71
72#define TC_H_UNSPEC	(0U)
73#define TC_H_ROOT	(0xFFFFFFFFU)
74#define TC_H_INGRESS    (0xFFFFFFF1U)
75
76struct tc_ratespec {
77	unsigned char	cell_log;
78	unsigned char	__reserved;
79	unsigned short	overhead;
80	short		cell_align;
81	unsigned short	mpu;
82	__u32		rate;
83};
84
85#define TC_RTAB_SIZE	1024
86
87struct tc_sizespec {
88	unsigned char	cell_log;
89	unsigned char	size_log;
90	short		cell_align;
91	int		overhead;
92	unsigned int	linklayer;
93	unsigned int	mpu;
94	unsigned int	mtu;
95	unsigned int	tsize;
96};
97
98enum {
99	TCA_STAB_UNSPEC,
100	TCA_STAB_BASE,
101	TCA_STAB_DATA,
102	__TCA_STAB_MAX
103};
104
105#define TCA_STAB_MAX (__TCA_STAB_MAX - 1)
106
107/* FIFO section */
108
109struct tc_fifo_qopt {
110	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
111};
112
113/* PRIO section */
114
115#define TCQ_PRIO_BANDS	16
116#define TCQ_MIN_PRIO_BANDS 2
117
118struct tc_prio_qopt {
119	int	bands;			/* Number of bands */
120	__u8	priomap[TC_PRIO_MAX+1];	/* Map: logical priority -> PRIO band */
121};
122
123/* MULTIQ section */
124
125struct tc_multiq_qopt {
126	__u16	bands;			/* Number of bands */
127	__u16	max_bands;		/* Maximum number of queues */
128};
129
130/* PLUG section */
131
132#define TCQ_PLUG_BUFFER                0
133#define TCQ_PLUG_RELEASE_ONE           1
134#define TCQ_PLUG_RELEASE_INDEFINITE    2
135#define TCQ_PLUG_LIMIT                 3
136
137struct tc_plug_qopt {
138	/* TCQ_PLUG_BUFFER: Inset a plug into the queue and
139	 *  buffer any incoming packets
140	 * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
141	 *   to beginning of the next plug.
142	 * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
143	 *   Stop buffering packets until the next TCQ_PLUG_BUFFER
144	 *   command is received (just act as a pass-thru queue).
145	 * TCQ_PLUG_LIMIT: Increase/decrease queue size
146	 */
147	int             action;
148	__u32           limit;
149};
150
151/* TBF section */
152
153struct tc_tbf_qopt {
154	struct tc_ratespec rate;
155	struct tc_ratespec peakrate;
156	__u32		limit;
157	__u32		buffer;
158	__u32		mtu;
159};
160
161enum {
162	TCA_TBF_UNSPEC,
163	TCA_TBF_PARMS,
164	TCA_TBF_RTAB,
165	TCA_TBF_PTAB,
166	__TCA_TBF_MAX,
167};
168
169#define TCA_TBF_MAX (__TCA_TBF_MAX - 1)
170
171
172/* TEQL section */
173
174/* TEQL does not require any parameters */
175
176/* SFQ section */
177
178struct tc_sfq_qopt {
179	unsigned	quantum;	/* Bytes per round allocated to flow */
180	int		perturb_period;	/* Period of hash perturbation */
181	__u32		limit;		/* Maximal packets in queue */
182	unsigned	divisor;	/* Hash divisor  */
183	unsigned	flows;		/* Maximal number of flows  */
184};
185
186struct tc_sfqred_stats {
187	__u32           prob_drop;      /* Early drops, below max threshold */
188	__u32           forced_drop;	/* Early drops, after max threshold */
189	__u32           prob_mark;      /* Marked packets, below max threshold */
190	__u32           forced_mark;    /* Marked packets, after max threshold */
191	__u32           prob_mark_head; /* Marked packets, below max threshold */
192	__u32           forced_mark_head;/* Marked packets, after max threshold */
193};
194
195struct tc_sfq_qopt_v1 {
196	struct tc_sfq_qopt v0;
197	unsigned int	depth;		/* max number of packets per flow */
198	unsigned int	headdrop;
199/* SFQRED parameters */
200	__u32		limit;		/* HARD maximal flow queue length (bytes) */
201	__u32		qth_min;	/* Min average length threshold (bytes) */
202	__u32		qth_max;	/* Max average length threshold (bytes) */
203	unsigned char   Wlog;		/* log(W)		*/
204	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
205	unsigned char   Scell_log;	/* cell size for idle damping */
206	unsigned char	flags;
207	__u32		max_P;		/* probability, high resolution */
208/* SFQRED stats */
209	struct tc_sfqred_stats stats;
210};
211
212
213struct tc_sfq_xstats {
214	__s32		allot;
215};
216
217/* RED section */
218
219enum {
220	TCA_RED_UNSPEC,
221	TCA_RED_PARMS,
222	TCA_RED_STAB,
223	TCA_RED_MAX_P,
224	__TCA_RED_MAX,
225};
226
227#define TCA_RED_MAX (__TCA_RED_MAX - 1)
228
229struct tc_red_qopt {
230	__u32		limit;		/* HARD maximal queue length (bytes)	*/
231	__u32		qth_min;	/* Min average length threshold (bytes) */
232	__u32		qth_max;	/* Max average length threshold (bytes) */
233	unsigned char   Wlog;		/* log(W)		*/
234	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
235	unsigned char   Scell_log;	/* cell size for idle damping */
236	unsigned char	flags;
237#define TC_RED_ECN		1
238#define TC_RED_HARDDROP		2
239#define TC_RED_ADAPTATIVE	4
240};
241
242struct tc_red_xstats {
243	__u32           early;          /* Early drops */
244	__u32           pdrop;          /* Drops due to queue limits */
245	__u32           other;          /* Drops due to drop() calls */
246	__u32           marked;         /* Marked packets */
247};
248
249/* GRED section */
250
251#define MAX_DPs 16
252
253enum {
254       TCA_GRED_UNSPEC,
255       TCA_GRED_PARMS,
256       TCA_GRED_STAB,
257       TCA_GRED_DPS,
258       TCA_GRED_MAX_P,
259	   __TCA_GRED_MAX,
260};
261
262#define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
263
264struct tc_gred_qopt {
265	__u32		limit;        /* HARD maximal queue length (bytes)    */
266	__u32		qth_min;      /* Min average length threshold (bytes) */
267	__u32		qth_max;      /* Max average length threshold (bytes) */
268	__u32		DP;           /* up to 2^32 DPs */
269	__u32		backlog;
270	__u32		qave;
271	__u32		forced;
272	__u32		early;
273	__u32		other;
274	__u32		pdrop;
275	__u8		Wlog;         /* log(W)               */
276	__u8		Plog;         /* log(P_max/(qth_max-qth_min)) */
277	__u8		Scell_log;    /* cell size for idle damping */
278	__u8		prio;         /* prio of this VQ */
279	__u32		packets;
280	__u32		bytesin;
281};
282
283/* gred setup */
284struct tc_gred_sopt {
285	__u32		DPs;
286	__u32		def_DP;
287	__u8		grio;
288	__u8		flags;
289	__u16		pad1;
290};
291
292/* CHOKe section */
293
294enum {
295	TCA_CHOKE_UNSPEC,
296	TCA_CHOKE_PARMS,
297	TCA_CHOKE_STAB,
298	TCA_CHOKE_MAX_P,
299	__TCA_CHOKE_MAX,
300};
301
302#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
303
304struct tc_choke_qopt {
305	__u32		limit;		/* Hard queue length (packets)	*/
306	__u32		qth_min;	/* Min average threshold (packets) */
307	__u32		qth_max;	/* Max average threshold (packets) */
308	unsigned char   Wlog;		/* log(W)		*/
309	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
310	unsigned char   Scell_log;	/* cell size for idle damping */
311	unsigned char	flags;		/* see RED flags */
312};
313
314struct tc_choke_xstats {
315	__u32		early;          /* Early drops */
316	__u32		pdrop;          /* Drops due to queue limits */
317	__u32		other;          /* Drops due to drop() calls */
318	__u32		marked;         /* Marked packets */
319	__u32		matched;	/* Drops due to flow match */
320};
321
322/* HTB section */
323#define TC_HTB_NUMPRIO		8
324#define TC_HTB_MAXDEPTH		8
325#define TC_HTB_PROTOVER		3 /* the same as HTB and TC's major */
326
327struct tc_htb_opt {
328	struct tc_ratespec 	rate;
329	struct tc_ratespec 	ceil;
330	__u32	buffer;
331	__u32	cbuffer;
332	__u32	quantum;
333	__u32	level;		/* out only */
334	__u32	prio;
335};
336struct tc_htb_glob {
337	__u32 version;		/* to match HTB/TC */
338    	__u32 rate2quantum;	/* bps->quantum divisor */
339    	__u32 defcls;		/* default class number */
340	__u32 debug;		/* debug flags */
341
342	/* stats */
343	__u32 direct_pkts; /* count of non shaped packets */
344};
345enum {
346	TCA_HTB_UNSPEC,
347	TCA_HTB_PARMS,
348	TCA_HTB_INIT,
349	TCA_HTB_CTAB,
350	TCA_HTB_RTAB,
351	__TCA_HTB_MAX,
352};
353
354#define TCA_HTB_MAX (__TCA_HTB_MAX - 1)
355
356struct tc_htb_xstats {
357	__u32 lends;
358	__u32 borrows;
359	__u32 giants;	/* too big packets (rate will not be accurate) */
360	__u32 tokens;
361	__u32 ctokens;
362};
363
364/* HFSC section */
365
366struct tc_hfsc_qopt {
367	__u16	defcls;		/* default class */
368};
369
370struct tc_service_curve {
371	__u32	m1;		/* slope of the first segment in bps */
372	__u32	d;		/* x-projection of the first segment in us */
373	__u32	m2;		/* slope of the second segment in bps */
374};
375
376struct tc_hfsc_stats {
377	__u64	work;		/* total work done */
378	__u64	rtwork;		/* work done by real-time criteria */
379	__u32	period;		/* current period */
380	__u32	level;		/* class level in hierarchy */
381};
382
383enum {
384	TCA_HFSC_UNSPEC,
385	TCA_HFSC_RSC,
386	TCA_HFSC_FSC,
387	TCA_HFSC_USC,
388	__TCA_HFSC_MAX,
389};
390
391#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
392
393
394/* CBQ section */
395
396#define TC_CBQ_MAXPRIO		8
397#define TC_CBQ_MAXLEVEL		8
398#define TC_CBQ_DEF_EWMA		5
399
400struct tc_cbq_lssopt {
401	unsigned char	change;
402	unsigned char	flags;
403#define TCF_CBQ_LSS_BOUNDED	1
404#define TCF_CBQ_LSS_ISOLATED	2
405	unsigned char  	ewma_log;
406	unsigned char  	level;
407#define TCF_CBQ_LSS_FLAGS	1
408#define TCF_CBQ_LSS_EWMA	2
409#define TCF_CBQ_LSS_MAXIDLE	4
410#define TCF_CBQ_LSS_MINIDLE	8
411#define TCF_CBQ_LSS_OFFTIME	0x10
412#define TCF_CBQ_LSS_AVPKT	0x20
413	__u32		maxidle;
414	__u32		minidle;
415	__u32		offtime;
416	__u32		avpkt;
417};
418
419struct tc_cbq_wrropt {
420	unsigned char	flags;
421	unsigned char	priority;
422	unsigned char	cpriority;
423	unsigned char	__reserved;
424	__u32		allot;
425	__u32		weight;
426};
427
428struct tc_cbq_ovl {
429	unsigned char	strategy;
430#define	TC_CBQ_OVL_CLASSIC	0
431#define	TC_CBQ_OVL_DELAY	1
432#define	TC_CBQ_OVL_LOWPRIO	2
433#define	TC_CBQ_OVL_DROP		3
434#define	TC_CBQ_OVL_RCLASSIC	4
435	unsigned char	priority2;
436	__u16		pad;
437	__u32		penalty;
438};
439
440struct tc_cbq_police {
441	unsigned char	police;
442	unsigned char	__res1;
443	unsigned short	__res2;
444};
445
446struct tc_cbq_fopt {
447	__u32		split;
448	__u32		defmap;
449	__u32		defchange;
450};
451
452struct tc_cbq_xstats {
453	__u32		borrows;
454	__u32		overactions;
455	__s32		avgidle;
456	__s32		undertime;
457};
458
459enum {
460	TCA_CBQ_UNSPEC,
461	TCA_CBQ_LSSOPT,
462	TCA_CBQ_WRROPT,
463	TCA_CBQ_FOPT,
464	TCA_CBQ_OVL_STRATEGY,
465	TCA_CBQ_RATE,
466	TCA_CBQ_RTAB,
467	TCA_CBQ_POLICE,
468	__TCA_CBQ_MAX,
469};
470
471#define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
472
473/* dsmark section */
474
475enum {
476	TCA_DSMARK_UNSPEC,
477	TCA_DSMARK_INDICES,
478	TCA_DSMARK_DEFAULT_INDEX,
479	TCA_DSMARK_SET_TC_INDEX,
480	TCA_DSMARK_MASK,
481	TCA_DSMARK_VALUE,
482	__TCA_DSMARK_MAX,
483};
484
485#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
486
487/* ATM  section */
488
489enum {
490	TCA_ATM_UNSPEC,
491	TCA_ATM_FD,		/* file/socket descriptor */
492	TCA_ATM_PTR,		/* pointer to descriptor - later */
493	TCA_ATM_HDR,		/* LL header */
494	TCA_ATM_EXCESS,		/* excess traffic class (0 for CLP)  */
495	TCA_ATM_ADDR,		/* PVC address (for output only) */
496	TCA_ATM_STATE,		/* VC state (ATM_VS_*; for output only) */
497	__TCA_ATM_MAX,
498};
499
500#define TCA_ATM_MAX	(__TCA_ATM_MAX - 1)
501
502/* Network emulator */
503
504enum {
505	TCA_NETEM_UNSPEC,
506	TCA_NETEM_CORR,
507	TCA_NETEM_DELAY_DIST,
508	TCA_NETEM_REORDER,
509	TCA_NETEM_CORRUPT,
510	TCA_NETEM_LOSS,
511	TCA_NETEM_RATE,
512	__TCA_NETEM_MAX,
513};
514
515#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1)
516
517struct tc_netem_qopt {
518	__u32	latency;	/* added delay (us) */
519	__u32   limit;		/* fifo limit (packets) */
520	__u32	loss;		/* random packet loss (0=none ~0=100%) */
521	__u32	gap;		/* re-ordering gap (0 for none) */
522	__u32   duplicate;	/* random packet dup  (0=none ~0=100%) */
523	__u32	jitter;		/* random jitter in latency (us) */
524};
525
526struct tc_netem_corr {
527	__u32	delay_corr;	/* delay correlation */
528	__u32	loss_corr;	/* packet loss correlation */
529	__u32	dup_corr;	/* duplicate correlation  */
530};
531
532struct tc_netem_reorder {
533	__u32	probability;
534	__u32	correlation;
535};
536
537struct tc_netem_corrupt {
538	__u32	probability;
539	__u32	correlation;
540};
541
542struct tc_netem_rate {
543	__u32	rate;	/* byte/s */
544	__s32	packet_overhead;
545	__u32	cell_size;
546	__s32	cell_overhead;
547};
548
549enum {
550	NETEM_LOSS_UNSPEC,
551	NETEM_LOSS_GI,		/* General Intuitive - 4 state model */
552	NETEM_LOSS_GE,		/* Gilbert Elliot models */
553	__NETEM_LOSS_MAX
554};
555#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1)
556
557/* State transition probabilities for 4 state model */
558struct tc_netem_gimodel {
559	__u32	p13;
560	__u32	p31;
561	__u32	p32;
562	__u32	p14;
563	__u32	p23;
564};
565
566/* Gilbert-Elliot models */
567struct tc_netem_gemodel {
568	__u32 p;
569	__u32 r;
570	__u32 h;
571	__u32 k1;
572};
573
574#define NETEM_DIST_SCALE	8192
575#define NETEM_DIST_MAX		16384
576
577/* DRR */
578
579enum {
580	TCA_DRR_UNSPEC,
581	TCA_DRR_QUANTUM,
582	__TCA_DRR_MAX
583};
584
585#define TCA_DRR_MAX	(__TCA_DRR_MAX - 1)
586
587struct tc_drr_stats {
588	__u32	deficit;
589};
590
591/* MQPRIO */
592#define TC_QOPT_BITMASK 15
593#define TC_QOPT_MAX_QUEUE 16
594
595struct tc_mqprio_qopt {
596	__u8	num_tc;
597	__u8	prio_tc_map[TC_QOPT_BITMASK + 1];
598	__u8	hw;
599	__u16	count[TC_QOPT_MAX_QUEUE];
600	__u16	offset[TC_QOPT_MAX_QUEUE];
601};
602
603/* SFB */
604
605enum {
606	TCA_SFB_UNSPEC,
607	TCA_SFB_PARMS,
608	__TCA_SFB_MAX,
609};
610
611#define TCA_SFB_MAX (__TCA_SFB_MAX - 1)
612
613/*
614 * Note: increment, decrement are Q0.16 fixed-point values.
615 */
616struct tc_sfb_qopt {
617	__u32 rehash_interval;	/* delay between hash move, in ms */
618	__u32 warmup_time;	/* double buffering warmup time in ms (warmup_time < rehash_interval) */
619	__u32 max;		/* max len of qlen_min */
620	__u32 bin_size;		/* maximum queue length per bin */
621	__u32 increment;	/* probability increment, (d1 in Blue) */
622	__u32 decrement;	/* probability decrement, (d2 in Blue) */
623	__u32 limit;		/* max SFB queue length */
624	__u32 penalty_rate;	/* inelastic flows are rate limited to 'rate' pps */
625	__u32 penalty_burst;
626};
627
628struct tc_sfb_xstats {
629	__u32 earlydrop;
630	__u32 penaltydrop;
631	__u32 bucketdrop;
632	__u32 queuedrop;
633	__u32 childdrop; /* drops in child qdisc */
634	__u32 marked;
635	__u32 maxqlen;
636	__u32 maxprob;
637	__u32 avgprob;
638};
639
640#define SFB_MAX_PROB 0xFFFF
641
642/* QFQ */
643enum {
644	TCA_QFQ_UNSPEC,
645	TCA_QFQ_WEIGHT,
646	TCA_QFQ_LMAX,
647	__TCA_QFQ_MAX
648};
649
650#define TCA_QFQ_MAX	(__TCA_QFQ_MAX - 1)
651
652struct tc_qfq_stats {
653	__u32 weight;
654	__u32 lmax;
655};
656
657#endif
658