sch_gred.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/*
2 * net/sched/sch_gred.c	Generic Random Early Detection queue.
3 *
4 *
5 *              This program is free software; you can redistribute it and/or
6 *              modify it under the terms of the GNU General Public License
7 *              as published by the Free Software Foundation; either version
8 *              2 of the License, or (at your option) any later version.
9 *
10 * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
11 *
12 *             991129: -  Bug fix with grio mode
13 *		       - a better sing. AvgQ mode with Grio(WRED)
14 *		       - A finer grained VQ dequeue based on sugestion
15 *		         from Ren Liu
16 *		       - More error checks
17 *
18 *
19 *
20 *  For all the glorious comments look at Alexey's sch_red.c
21 */
22
23#include <linux/config.h>
24#include <linux/module.h>
25#include <asm/uaccess.h>
26#include <asm/system.h>
27#include <linux/bitops.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/sched.h>
31#include <linux/string.h>
32#include <linux/mm.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/in.h>
36#include <linux/errno.h>
37#include <linux/interrupt.h>
38#include <linux/if_ether.h>
39#include <linux/inet.h>
40#include <linux/netdevice.h>
41#include <linux/etherdevice.h>
42#include <linux/notifier.h>
43#include <net/ip.h>
44#include <net/route.h>
45#include <linux/skbuff.h>
46#include <net/sock.h>
47#include <net/pkt_sched.h>
48
49#if 1 /* control */
50#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
51#else
52#define DPRINTK(format,args...)
53#endif
54
55#if 0 /* data */
56#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
57#else
58#define D2PRINTK(format,args...)
59#endif
60
61struct gred_sched_data;
62struct gred_sched;
63
64struct gred_sched_data
65{
66/* Parameters */
67	u32		limit;		/* HARD maximal queue length	*/
68	u32		qth_min;	/* Min average length threshold: A scaled */
69	u32		qth_max;	/* Max average length threshold: A scaled */
70	u32      	DP;		/* the drop pramaters */
71	char		Wlog;		/* log(W)		*/
72	char		Plog;		/* random number bits	*/
73	u32		Scell_max;
74	u32		Rmask;
75	u32		bytesin;	/* bytes seen on virtualQ so far*/
76	u32		packetsin;	/* packets seen on virtualQ so far*/
77	u32		backlog;	/* bytes on the virtualQ */
78	u32		forced;	/* packets dropped for exceeding limits */
79	u32		early;	/* packets dropped as a warning */
80	u32		other;	/* packets dropped by invoking drop() */
81	u32		pdrop;	/* packets dropped because we exceeded physical queue limits */
82	char		Scell_log;
83	u8		Stab[256];
84	u8              prio;        /* the prio of this vq */
85
86/* Variables */
87	unsigned long	qave;		/* Average queue length: A scaled */
88	int		qcount;		/* Packets since last random number generation */
89	u32		qR;		/* Cached random number */
90
91	psched_time_t	qidlestart;	/* Start of idle period	*/
92};
93
94struct gred_sched
95{
96	struct gred_sched_data *tab[MAX_DPs];
97	u32 		DPs;
98	u32 		def;
99	u8 		initd;
100	u8 		grio;
101	u8 		eqp;
102};
103
104static int
105gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
106{
107	psched_time_t now;
108	struct gred_sched_data *q=NULL;
109	struct gred_sched *t= qdisc_priv(sch);
110	unsigned long	qave=0;
111	int i=0;
112
113	if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
114		D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
115		goto do_enqueue;
116	}
117
118
119	if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
120		printk("GRED: setting to default (%d)\n ",t->def);
121		if (!(q=t->tab[t->def])) {
122			DPRINTK("GRED: setting to default FAILED! dropping!! "
123			    "(%d)\n ", t->def);
124			goto drop;
125		}
126		/* fix tc_index? --could be controvesial but needed for
127		   requeueing */
128		skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
129	}
130
131	D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
132	    "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
133	    sch->qstats.backlog);
134	/* sum up all the qaves of prios <= to ours to get the new qave*/
135	if (!t->eqp && t->grio) {
136		for (i=0;i<t->DPs;i++) {
137			if ((!t->tab[i]) || (i==q->DP))
138				continue;
139
140			if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
141				qave +=t->tab[i]->qave;
142		}
143
144	}
145
146	q->packetsin++;
147	q->bytesin+=skb->len;
148
149	if (t->eqp && t->grio) {
150		qave=0;
151		q->qave=t->tab[t->def]->qave;
152		q->qidlestart=t->tab[t->def]->qidlestart;
153	}
154
155	if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
156		long us_idle;
157		PSCHED_GET_TIME(now);
158		us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
159		PSCHED_SET_PASTPERFECT(q->qidlestart);
160
161		q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
162	} else {
163		if (t->eqp) {
164			q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
165		} else {
166			q->qave += q->backlog - (q->qave >> q->Wlog);
167		}
168
169	}
170
171
172	if (t->eqp && t->grio)
173		t->tab[t->def]->qave=q->qave;
174
175	if ((q->qave+qave) < q->qth_min) {
176		q->qcount = -1;
177enqueue:
178		if (q->backlog + skb->len <= q->limit) {
179			q->backlog += skb->len;
180do_enqueue:
181			__skb_queue_tail(&sch->q, skb);
182			sch->qstats.backlog += skb->len;
183			sch->bstats.bytes += skb->len;
184			sch->bstats.packets++;
185			return 0;
186		} else {
187			q->pdrop++;
188		}
189
190drop:
191		kfree_skb(skb);
192		sch->qstats.drops++;
193		return NET_XMIT_DROP;
194	}
195	if ((q->qave+qave) >= q->qth_max) {
196		q->qcount = -1;
197		sch->qstats.overlimits++;
198		q->forced++;
199		goto drop;
200	}
201	if (++q->qcount) {
202		if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
203			goto enqueue;
204		q->qcount = 0;
205		q->qR = net_random()&q->Rmask;
206		sch->qstats.overlimits++;
207		q->early++;
208		goto drop;
209	}
210	q->qR = net_random()&q->Rmask;
211	goto enqueue;
212}
213
214static int
215gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
216{
217	struct gred_sched_data *q;
218	struct gred_sched *t= qdisc_priv(sch);
219	q= t->tab[(skb->tc_index&0xf)];
220/* error checking here -- probably unnecessary */
221	PSCHED_SET_PASTPERFECT(q->qidlestart);
222
223	__skb_queue_head(&sch->q, skb);
224	sch->qstats.backlog += skb->len;
225	sch->qstats.requeues++;
226	q->backlog += skb->len;
227	return 0;
228}
229
230static struct sk_buff *
231gred_dequeue(struct Qdisc* sch)
232{
233	struct sk_buff *skb;
234	struct gred_sched_data *q;
235	struct gred_sched *t= qdisc_priv(sch);
236
237	skb = __skb_dequeue(&sch->q);
238	if (skb) {
239		sch->qstats.backlog -= skb->len;
240		q= t->tab[(skb->tc_index&0xf)];
241		if (q) {
242			q->backlog -= skb->len;
243			if (!q->backlog && !t->eqp)
244				PSCHED_GET_TIME(q->qidlestart);
245		} else {
246			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
247		}
248		return skb;
249	}
250
251	if (t->eqp) {
252			q= t->tab[t->def];
253			if (!q)
254				D2PRINTK("no default VQ set: Results will be "
255				       "screwed up\n");
256			else
257				PSCHED_GET_TIME(q->qidlestart);
258	}
259
260	return NULL;
261}
262
263static unsigned int gred_drop(struct Qdisc* sch)
264{
265	struct sk_buff *skb;
266
267	struct gred_sched_data *q;
268	struct gred_sched *t= qdisc_priv(sch);
269
270	skb = __skb_dequeue_tail(&sch->q);
271	if (skb) {
272		unsigned int len = skb->len;
273		sch->qstats.backlog -= len;
274		sch->qstats.drops++;
275		q= t->tab[(skb->tc_index&0xf)];
276		if (q) {
277			q->backlog -= len;
278			q->other++;
279			if (!q->backlog && !t->eqp)
280				PSCHED_GET_TIME(q->qidlestart);
281		} else {
282			D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
283		}
284
285		kfree_skb(skb);
286		return len;
287	}
288
289	q=t->tab[t->def];
290	if (!q) {
291		D2PRINTK("no default VQ set: Results might be screwed up\n");
292		return 0;
293	}
294
295	PSCHED_GET_TIME(q->qidlestart);
296	return 0;
297
298}
299
300static void gred_reset(struct Qdisc* sch)
301{
302	int i;
303	struct gred_sched_data *q;
304	struct gred_sched *t= qdisc_priv(sch);
305
306	__skb_queue_purge(&sch->q);
307
308	sch->qstats.backlog = 0;
309
310        for (i=0;i<t->DPs;i++) {
311	        q= t->tab[i];
312		if (!q)
313			continue;
314		PSCHED_SET_PASTPERFECT(q->qidlestart);
315		q->qave = 0;
316		q->qcount = -1;
317		q->backlog = 0;
318		q->other=0;
319		q->forced=0;
320		q->pdrop=0;
321		q->early=0;
322	}
323}
324
325static int gred_change(struct Qdisc *sch, struct rtattr *opt)
326{
327	struct gred_sched *table = qdisc_priv(sch);
328	struct gred_sched_data *q;
329	struct tc_gred_qopt *ctl;
330	struct tc_gred_sopt *sopt;
331	struct rtattr *tb[TCA_GRED_STAB];
332	struct rtattr *tb2[TCA_GRED_DPS];
333	int i;
334
335	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
336		return -EINVAL;
337
338	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
339		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
340
341	    if (tb2[TCA_GRED_DPS-1] == 0)
342			return -EINVAL;
343
344		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
345		table->DPs=sopt->DPs;
346		table->def=sopt->def_DP;
347		table->grio=sopt->grio;
348		table->initd=0;
349		/* probably need to clear all the table DP entries as well */
350		return 0;
351	    }
352
353
354	if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
355		RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
356		RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
357			return -EINVAL;
358
359	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
360	if (ctl->DP > MAX_DPs-1 ) {
361		/* misbehaving is punished! Put in the default drop probability */
362		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
363			"set to default at %d\n",ctl->DP,table->def);
364		ctl->DP=table->def;
365	}
366
367	if (table->tab[ctl->DP] == NULL) {
368		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
369					    GFP_KERNEL);
370		if (NULL == table->tab[ctl->DP])
371			return -ENOMEM;
372		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
373	}
374	q= table->tab[ctl->DP];
375
376	if (table->grio) {
377		if (ctl->prio <=0) {
378			if (table->def && table->tab[table->def]) {
379				DPRINTK("\nGRED: DP %u does not have a prio"
380					"setting default to %d\n",ctl->DP,
381					table->tab[table->def]->prio);
382				q->prio=table->tab[table->def]->prio;
383			} else {
384				DPRINTK("\nGRED: DP %u does not have a prio"
385					" setting default to 8\n",ctl->DP);
386				q->prio=8;
387			}
388		} else {
389			q->prio=ctl->prio;
390		}
391	} else {
392		q->prio=8;
393	}
394
395
396	q->DP=ctl->DP;
397	q->Wlog = ctl->Wlog;
398	q->Plog = ctl->Plog;
399	q->limit = ctl->limit;
400	q->Scell_log = ctl->Scell_log;
401	q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
402	q->Scell_max = (255<<q->Scell_log);
403	q->qth_min = ctl->qth_min<<ctl->Wlog;
404	q->qth_max = ctl->qth_max<<ctl->Wlog;
405	q->qave=0;
406	q->backlog=0;
407	q->qcount = -1;
408	q->other=0;
409	q->forced=0;
410	q->pdrop=0;
411	q->early=0;
412
413	PSCHED_SET_PASTPERFECT(q->qidlestart);
414	memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
415
416	if ( table->initd && table->grio) {
417	/* this looks ugly but it's not in the fast path */
418		for (i=0;i<table->DPs;i++) {
419			if ((!table->tab[i]) || (i==q->DP) )
420				continue;
421			if (table->tab[i]->prio == q->prio ){
422				/* WRED mode detected */
423				table->eqp=1;
424				break;
425			}
426		}
427	}
428
429	if (!table->initd) {
430		table->initd=1;
431		/*
432        	the first entry also goes into the default until
433        	over-written
434		*/
435
436		if (table->tab[table->def] == NULL) {
437			table->tab[table->def]=
438				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
439			if (NULL == table->tab[table->def])
440				return -ENOMEM;
441
442			memset(table->tab[table->def], 0,
443			       (sizeof(struct gred_sched_data)));
444		}
445		q= table->tab[table->def];
446		q->DP=table->def;
447		q->Wlog = ctl->Wlog;
448		q->Plog = ctl->Plog;
449		q->limit = ctl->limit;
450		q->Scell_log = ctl->Scell_log;
451		q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
452		q->Scell_max = (255<<q->Scell_log);
453		q->qth_min = ctl->qth_min<<ctl->Wlog;
454		q->qth_max = ctl->qth_max<<ctl->Wlog;
455
456		if (table->grio)
457			q->prio=table->tab[ctl->DP]->prio;
458		else
459			q->prio=8;
460
461		q->qcount = -1;
462		PSCHED_SET_PASTPERFECT(q->qidlestart);
463		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
464	}
465	return 0;
466
467}
468
469static int gred_init(struct Qdisc *sch, struct rtattr *opt)
470{
471	struct gred_sched *table = qdisc_priv(sch);
472	struct tc_gred_sopt *sopt;
473	struct rtattr *tb[TCA_GRED_STAB];
474	struct rtattr *tb2[TCA_GRED_DPS];
475
476	if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
477		return -EINVAL;
478
479	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
480		rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
481
482	    if (tb2[TCA_GRED_DPS-1] == 0)
483			return -EINVAL;
484
485		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
486		table->DPs=sopt->DPs;
487		table->def=sopt->def_DP;
488		table->grio=sopt->grio;
489		table->initd=0;
490		return 0;
491	}
492
493	DPRINTK("\n GRED_INIT error!\n");
494	return -EINVAL;
495}
496
497static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
498{
499	unsigned long qave;
500	struct rtattr *rta;
501	struct tc_gred_qopt *opt = NULL ;
502	struct tc_gred_qopt *dst;
503	struct gred_sched *table = qdisc_priv(sch);
504	struct gred_sched_data *q;
505	int i;
506	unsigned char	 *b = skb->tail;
507
508	rta = (struct rtattr*)b;
509	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
510
511	opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
512
513	if (opt  == NULL) {
514		DPRINTK("gred_dump:failed to malloc for %Zd\n",
515		    sizeof(struct tc_gred_qopt)*MAX_DPs);
516		goto rtattr_failure;
517	}
518
519	memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
520
521	if (!table->initd) {
522		DPRINTK("NO GRED Queues setup!\n");
523	}
524
525	for (i=0;i<MAX_DPs;i++) {
526		dst= &opt[i];
527		q= table->tab[i];
528
529		if (!q) {
530			/* hack -- fix at some point with proper message
531			   This is how we indicate to tc that there is no VQ
532			   at this DP */
533
534			dst->DP=MAX_DPs+i;
535			continue;
536		}
537
538		dst->limit=q->limit;
539		dst->qth_min=q->qth_min>>q->Wlog;
540		dst->qth_max=q->qth_max>>q->Wlog;
541		dst->DP=q->DP;
542		dst->backlog=q->backlog;
543		if (q->qave) {
544			if (table->eqp && table->grio) {
545				q->qidlestart=table->tab[table->def]->qidlestart;
546				q->qave=table->tab[table->def]->qave;
547			}
548			if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
549				long idle;
550				psched_time_t now;
551				PSCHED_GET_TIME(now);
552				idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
553				qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
554				dst->qave = qave >> q->Wlog;
555
556			} else {
557				dst->qave = q->qave >> q->Wlog;
558			}
559		} else {
560			dst->qave = 0;
561		}
562
563
564		dst->Wlog = q->Wlog;
565		dst->Plog = q->Plog;
566		dst->Scell_log = q->Scell_log;
567		dst->other = q->other;
568		dst->forced = q->forced;
569		dst->early = q->early;
570		dst->pdrop = q->pdrop;
571		dst->prio = q->prio;
572		dst->packets=q->packetsin;
573		dst->bytesin=q->bytesin;
574	}
575
576	RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
577	rta->rta_len = skb->tail - b;
578
579	kfree(opt);
580	return skb->len;
581
582rtattr_failure:
583	if (opt)
584		kfree(opt);
585	DPRINTK("gred_dump: FAILURE!!!!\n");
586
587/* also free the opt struct here */
588	skb_trim(skb, b - skb->data);
589	return -1;
590}
591
592static void gred_destroy(struct Qdisc *sch)
593{
594	struct gred_sched *table = qdisc_priv(sch);
595	int i;
596
597	for (i = 0;i < table->DPs; i++) {
598		if (table->tab[i])
599			kfree(table->tab[i]);
600	}
601}
602
603static struct Qdisc_ops gred_qdisc_ops = {
604	.next		=	NULL,
605	.cl_ops		=	NULL,
606	.id		=	"gred",
607	.priv_size	=	sizeof(struct gred_sched),
608	.enqueue	=	gred_enqueue,
609	.dequeue	=	gred_dequeue,
610	.requeue	=	gred_requeue,
611	.drop		=	gred_drop,
612	.init		=	gred_init,
613	.reset		=	gred_reset,
614	.destroy	=	gred_destroy,
615	.change		=	gred_change,
616	.dump		=	gred_dump,
617	.owner		=	THIS_MODULE,
618};
619
620static int __init gred_module_init(void)
621{
622	return register_qdisc(&gred_qdisc_ops);
623}
624static void __exit gred_module_exit(void)
625{
626	unregister_qdisc(&gred_qdisc_ops);
627}
628module_init(gred_module_init)
629module_exit(gred_module_exit)
630MODULE_LICENSE("GPL");
631