sch_gred.c revision 05f1cc01b4d24bc5432ae7044f8209d464f2b8ec
1/* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * 19 * 20 * For all the glorious comments look at Alexey's sch_red.c 21 */ 22 23#include <linux/config.h> 24#include <linux/module.h> 25#include <asm/uaccess.h> 26#include <asm/system.h> 27#include <linux/bitops.h> 28#include <linux/types.h> 29#include <linux/kernel.h> 30#include <linux/sched.h> 31#include <linux/string.h> 32#include <linux/mm.h> 33#include <linux/socket.h> 34#include <linux/sockios.h> 35#include <linux/in.h> 36#include <linux/errno.h> 37#include <linux/interrupt.h> 38#include <linux/if_ether.h> 39#include <linux/inet.h> 40#include <linux/netdevice.h> 41#include <linux/etherdevice.h> 42#include <linux/notifier.h> 43#include <net/ip.h> 44#include <net/route.h> 45#include <linux/skbuff.h> 46#include <net/sock.h> 47#include <net/pkt_sched.h> 48 49#if 1 /* control */ 50#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 51#else 52#define DPRINTK(format,args...) 53#endif 54 55#if 0 /* data */ 56#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) 57#else 58#define D2PRINTK(format,args...) 59#endif 60 61struct gred_sched_data; 62struct gred_sched; 63 64struct gred_sched_data 65{ 66/* Parameters */ 67 u32 limit; /* HARD maximal queue length */ 68 u32 qth_min; /* Min average length threshold: A scaled */ 69 u32 qth_max; /* Max average length threshold: A scaled */ 70 u32 DP; /* the drop pramaters */ 71 char Wlog; /* log(W) */ 72 char Plog; /* random number bits */ 73 u32 Scell_max; 74 u32 Rmask; 75 u32 bytesin; /* bytes seen on virtualQ so far*/ 76 u32 packetsin; /* packets seen on virtualQ so far*/ 77 u32 backlog; /* bytes on the virtualQ */ 78 u32 forced; /* packets dropped for exceeding limits */ 79 u32 early; /* packets dropped as a warning */ 80 u32 other; /* packets dropped by invoking drop() */ 81 u32 pdrop; /* packets dropped because we exceeded physical queue limits */ 82 char Scell_log; 83 u8 Stab[256]; 84 u8 prio; /* the prio of this vq */ 85 86/* Variables */ 87 unsigned long qave; /* Average queue length: A scaled */ 88 int qcount; /* Packets since last random number generation */ 89 u32 qR; /* Cached random number */ 90 91 psched_time_t qidlestart; /* Start of idle period */ 92}; 93 94enum { 95 GRED_WRED_MODE = 1, 96 GRED_RIO_MODE, 97}; 98 99struct gred_sched 100{ 101 struct gred_sched_data *tab[MAX_DPs]; 102 unsigned long flags; 103 u32 DPs; 104 u32 def; 105 u8 initd; 106}; 107 108static inline int gred_wred_mode(struct gred_sched *table) 109{ 110 return test_bit(GRED_WRED_MODE, &table->flags); 111} 112 113static inline void gred_enable_wred_mode(struct gred_sched *table) 114{ 115 __set_bit(GRED_WRED_MODE, &table->flags); 116} 117 118static inline void gred_disable_wred_mode(struct gred_sched *table) 119{ 120 __clear_bit(GRED_WRED_MODE, &table->flags); 121} 122 123static inline int gred_rio_mode(struct gred_sched *table) 124{ 125 return test_bit(GRED_RIO_MODE, &table->flags); 126} 127 128static inline void gred_enable_rio_mode(struct gred_sched *table) 129{ 130 __set_bit(GRED_RIO_MODE, &table->flags); 131} 132 133static inline void gred_disable_rio_mode(struct gred_sched *table) 134{ 135 __clear_bit(GRED_RIO_MODE, &table->flags); 136} 137 138static inline int gred_wred_mode_check(struct Qdisc *sch) 139{ 140 struct gred_sched *table = qdisc_priv(sch); 141 int i; 142 143 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 144 for (i = 0; i < table->DPs; i++) { 145 struct gred_sched_data *q = table->tab[i]; 146 int n; 147 148 if (q == NULL) 149 continue; 150 151 for (n = 0; n < table->DPs; n++) 152 if (table->tab[n] && table->tab[n] != q && 153 table->tab[n]->prio == q->prio) 154 return 1; 155 } 156 157 return 0; 158} 159 160static int 161gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 162{ 163 psched_time_t now; 164 struct gred_sched_data *q=NULL; 165 struct gred_sched *t= qdisc_priv(sch); 166 unsigned long qave=0; 167 int i=0; 168 169 if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { 170 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); 171 goto do_enqueue; 172 } 173 174 175 if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { 176 printk("GRED: setting to default (%d)\n ",t->def); 177 if (!(q=t->tab[t->def])) { 178 DPRINTK("GRED: setting to default FAILED! dropping!! " 179 "(%d)\n ", t->def); 180 goto drop; 181 } 182 /* fix tc_index? --could be controvesial but needed for 183 requeueing */ 184 skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; 185 } 186 187 D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " 188 "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, 189 sch->qstats.backlog); 190 /* sum up all the qaves of prios <= to ours to get the new qave*/ 191 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 192 for (i=0;i<t->DPs;i++) { 193 if ((!t->tab[i]) || (i==q->DP)) 194 continue; 195 196 if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart))) 197 qave +=t->tab[i]->qave; 198 } 199 200 } 201 202 q->packetsin++; 203 q->bytesin+=skb->len; 204 205 if (gred_wred_mode(t)) { 206 qave=0; 207 q->qave=t->tab[t->def]->qave; 208 q->qidlestart=t->tab[t->def]->qidlestart; 209 } 210 211 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 212 long us_idle; 213 PSCHED_GET_TIME(now); 214 us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); 215 PSCHED_SET_PASTPERFECT(q->qidlestart); 216 217 q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF]; 218 } else { 219 if (gred_wred_mode(t)) { 220 q->qave += sch->qstats.backlog - (q->qave >> q->Wlog); 221 } else { 222 q->qave += q->backlog - (q->qave >> q->Wlog); 223 } 224 225 } 226 227 228 if (gred_wred_mode(t)) 229 t->tab[t->def]->qave=q->qave; 230 231 if ((q->qave+qave) < q->qth_min) { 232 q->qcount = -1; 233enqueue: 234 if (q->backlog + skb->len <= q->limit) { 235 q->backlog += skb->len; 236do_enqueue: 237 __skb_queue_tail(&sch->q, skb); 238 sch->qstats.backlog += skb->len; 239 sch->bstats.bytes += skb->len; 240 sch->bstats.packets++; 241 return 0; 242 } else { 243 q->pdrop++; 244 } 245 246drop: 247 kfree_skb(skb); 248 sch->qstats.drops++; 249 return NET_XMIT_DROP; 250 } 251 if ((q->qave+qave) >= q->qth_max) { 252 q->qcount = -1; 253 sch->qstats.overlimits++; 254 q->forced++; 255 goto drop; 256 } 257 if (++q->qcount) { 258 if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR) 259 goto enqueue; 260 q->qcount = 0; 261 q->qR = net_random()&q->Rmask; 262 sch->qstats.overlimits++; 263 q->early++; 264 goto drop; 265 } 266 q->qR = net_random()&q->Rmask; 267 goto enqueue; 268} 269 270static int 271gred_requeue(struct sk_buff *skb, struct Qdisc* sch) 272{ 273 struct gred_sched_data *q; 274 struct gred_sched *t= qdisc_priv(sch); 275 q= t->tab[(skb->tc_index&0xf)]; 276/* error checking here -- probably unnecessary */ 277 PSCHED_SET_PASTPERFECT(q->qidlestart); 278 279 __skb_queue_head(&sch->q, skb); 280 sch->qstats.backlog += skb->len; 281 sch->qstats.requeues++; 282 q->backlog += skb->len; 283 return 0; 284} 285 286static struct sk_buff * 287gred_dequeue(struct Qdisc* sch) 288{ 289 struct sk_buff *skb; 290 struct gred_sched_data *q; 291 struct gred_sched *t= qdisc_priv(sch); 292 293 skb = __skb_dequeue(&sch->q); 294 if (skb) { 295 sch->qstats.backlog -= skb->len; 296 q= t->tab[(skb->tc_index&0xf)]; 297 if (q) { 298 q->backlog -= skb->len; 299 if (!q->backlog && !gred_wred_mode(t)) 300 PSCHED_GET_TIME(q->qidlestart); 301 } else { 302 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 303 } 304 return skb; 305 } 306 307 if (gred_wred_mode(t)) { 308 q= t->tab[t->def]; 309 if (!q) 310 D2PRINTK("no default VQ set: Results will be " 311 "screwed up\n"); 312 else 313 PSCHED_GET_TIME(q->qidlestart); 314 } 315 316 return NULL; 317} 318 319static unsigned int gred_drop(struct Qdisc* sch) 320{ 321 struct sk_buff *skb; 322 323 struct gred_sched_data *q; 324 struct gred_sched *t= qdisc_priv(sch); 325 326 skb = __skb_dequeue_tail(&sch->q); 327 if (skb) { 328 unsigned int len = skb->len; 329 sch->qstats.backlog -= len; 330 sch->qstats.drops++; 331 q= t->tab[(skb->tc_index&0xf)]; 332 if (q) { 333 q->backlog -= len; 334 q->other++; 335 if (!q->backlog && !gred_wred_mode(t)) 336 PSCHED_GET_TIME(q->qidlestart); 337 } else { 338 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 339 } 340 341 kfree_skb(skb); 342 return len; 343 } 344 345 q=t->tab[t->def]; 346 if (!q) { 347 D2PRINTK("no default VQ set: Results might be screwed up\n"); 348 return 0; 349 } 350 351 PSCHED_GET_TIME(q->qidlestart); 352 return 0; 353 354} 355 356static void gred_reset(struct Qdisc* sch) 357{ 358 int i; 359 struct gred_sched_data *q; 360 struct gred_sched *t= qdisc_priv(sch); 361 362 __skb_queue_purge(&sch->q); 363 364 sch->qstats.backlog = 0; 365 366 for (i=0;i<t->DPs;i++) { 367 q= t->tab[i]; 368 if (!q) 369 continue; 370 PSCHED_SET_PASTPERFECT(q->qidlestart); 371 q->qave = 0; 372 q->qcount = -1; 373 q->backlog = 0; 374 q->other=0; 375 q->forced=0; 376 q->pdrop=0; 377 q->early=0; 378 } 379} 380 381static int gred_change(struct Qdisc *sch, struct rtattr *opt) 382{ 383 struct gred_sched *table = qdisc_priv(sch); 384 struct gred_sched_data *q; 385 struct tc_gred_qopt *ctl; 386 struct tc_gred_sopt *sopt; 387 struct rtattr *tb[TCA_GRED_STAB]; 388 struct rtattr *tb2[TCA_GRED_DPS]; 389 390 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) 391 return -EINVAL; 392 393 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { 394 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); 395 396 if (tb2[TCA_GRED_DPS-1] == 0) 397 return -EINVAL; 398 399 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 400 table->DPs=sopt->DPs; 401 table->def=sopt->def_DP; 402 403 if (sopt->grio) { 404 gred_enable_rio_mode(table); 405 gred_disable_wred_mode(table); 406 if (gred_wred_mode_check(sch)) 407 gred_enable_wred_mode(table); 408 } else { 409 gred_disable_rio_mode(table); 410 gred_disable_wred_mode(table); 411 } 412 413 table->initd=0; 414 /* probably need to clear all the table DP entries as well */ 415 return 0; 416 } 417 418 419 if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 || 420 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 421 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) 422 return -EINVAL; 423 424 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 425 if (ctl->DP > MAX_DPs-1 ) { 426 /* misbehaving is punished! Put in the default drop probability */ 427 DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP " 428 "set to default at %d\n",ctl->DP,table->def); 429 ctl->DP=table->def; 430 } 431 432 if (table->tab[ctl->DP] == NULL) { 433 table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data), 434 GFP_KERNEL); 435 if (NULL == table->tab[ctl->DP]) 436 return -ENOMEM; 437 memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data))); 438 } 439 q= table->tab[ctl->DP]; 440 441 if (gred_rio_mode(table)) { 442 if (ctl->prio <=0) { 443 if (table->def && table->tab[table->def]) { 444 DPRINTK("\nGRED: DP %u does not have a prio" 445 "setting default to %d\n",ctl->DP, 446 table->tab[table->def]->prio); 447 q->prio=table->tab[table->def]->prio; 448 } else { 449 DPRINTK("\nGRED: DP %u does not have a prio" 450 " setting default to 8\n",ctl->DP); 451 q->prio=8; 452 } 453 } else { 454 q->prio=ctl->prio; 455 } 456 } else { 457 q->prio=8; 458 } 459 460 461 q->DP=ctl->DP; 462 q->Wlog = ctl->Wlog; 463 q->Plog = ctl->Plog; 464 q->limit = ctl->limit; 465 q->Scell_log = ctl->Scell_log; 466 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; 467 q->Scell_max = (255<<q->Scell_log); 468 q->qth_min = ctl->qth_min<<ctl->Wlog; 469 q->qth_max = ctl->qth_max<<ctl->Wlog; 470 q->qave=0; 471 q->backlog=0; 472 q->qcount = -1; 473 q->other=0; 474 q->forced=0; 475 q->pdrop=0; 476 q->early=0; 477 478 PSCHED_SET_PASTPERFECT(q->qidlestart); 479 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); 480 481 if (gred_rio_mode(table)) { 482 gred_disable_wred_mode(table); 483 if (gred_wred_mode_check(sch)) 484 gred_enable_wred_mode(table); 485 } 486 487 if (!table->initd) { 488 table->initd=1; 489 /* 490 the first entry also goes into the default until 491 over-written 492 */ 493 494 if (table->tab[table->def] == NULL) { 495 table->tab[table->def]= 496 kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL); 497 if (NULL == table->tab[table->def]) 498 return -ENOMEM; 499 500 memset(table->tab[table->def], 0, 501 (sizeof(struct gred_sched_data))); 502 } 503 q= table->tab[table->def]; 504 q->DP=table->def; 505 q->Wlog = ctl->Wlog; 506 q->Plog = ctl->Plog; 507 q->limit = ctl->limit; 508 q->Scell_log = ctl->Scell_log; 509 q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL; 510 q->Scell_max = (255<<q->Scell_log); 511 q->qth_min = ctl->qth_min<<ctl->Wlog; 512 q->qth_max = ctl->qth_max<<ctl->Wlog; 513 514 if (gred_rio_mode(table)) 515 q->prio=table->tab[ctl->DP]->prio; 516 else 517 q->prio=8; 518 519 q->qcount = -1; 520 PSCHED_SET_PASTPERFECT(q->qidlestart); 521 memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256); 522 } 523 return 0; 524 525} 526 527static int gred_init(struct Qdisc *sch, struct rtattr *opt) 528{ 529 struct gred_sched *table = qdisc_priv(sch); 530 struct tc_gred_sopt *sopt; 531 struct rtattr *tb[TCA_GRED_STAB]; 532 struct rtattr *tb2[TCA_GRED_DPS]; 533 534 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt)) 535 return -EINVAL; 536 537 if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) { 538 rtattr_parse_nested(tb2, TCA_GRED_DPS, opt); 539 540 if (tb2[TCA_GRED_DPS-1] == 0) 541 return -EINVAL; 542 543 sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]); 544 table->DPs=sopt->DPs; 545 table->def=sopt->def_DP; 546 547 if (sopt->grio) 548 gred_enable_rio_mode(table); 549 else 550 gred_disable_rio_mode(table); 551 552 table->initd=0; 553 return 0; 554 } 555 556 DPRINTK("\n GRED_INIT error!\n"); 557 return -EINVAL; 558} 559 560static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 561{ 562 struct gred_sched *table = qdisc_priv(sch); 563 struct rtattr *parms, *opts = NULL; 564 int i; 565 566 opts = RTA_NEST(skb, TCA_OPTIONS); 567 parms = RTA_NEST(skb, TCA_GRED_PARMS); 568 569 for (i = 0; i < MAX_DPs; i++) { 570 struct gred_sched_data *q = table->tab[i]; 571 struct tc_gred_qopt opt; 572 573 memset(&opt, 0, sizeof(opt)); 574 575 if (!q) { 576 /* hack -- fix at some point with proper message 577 This is how we indicate to tc that there is no VQ 578 at this DP */ 579 580 opt.DP = MAX_DPs + i; 581 goto append_opt; 582 } 583 584 opt.limit = q->limit; 585 opt.DP = q->DP; 586 opt.backlog = q->backlog; 587 opt.prio = q->prio; 588 opt.qth_min = q->qth_min >> q->Wlog; 589 opt.qth_max = q->qth_max >> q->Wlog; 590 opt.Wlog = q->Wlog; 591 opt.Plog = q->Plog; 592 opt.Scell_log = q->Scell_log; 593 opt.other = q->other; 594 opt.early = q->early; 595 opt.forced = q->forced; 596 opt.pdrop = q->pdrop; 597 opt.packets = q->packetsin; 598 opt.bytesin = q->bytesin; 599 600 if (q->qave) { 601 if (gred_wred_mode(table)) { 602 q->qidlestart=table->tab[table->def]->qidlestart; 603 q->qave=table->tab[table->def]->qave; 604 } 605 if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) { 606 long idle; 607 unsigned long qave; 608 psched_time_t now; 609 PSCHED_GET_TIME(now); 610 idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max); 611 qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF]; 612 opt.qave = qave >> q->Wlog; 613 614 } else { 615 opt.qave = q->qave >> q->Wlog; 616 } 617 } 618 619append_opt: 620 RTA_APPEND(skb, sizeof(opt), &opt); 621 } 622 623 RTA_NEST_END(skb, parms); 624 625 return RTA_NEST_END(skb, opts); 626 627rtattr_failure: 628 return RTA_NEST_CANCEL(skb, opts); 629} 630 631static void gred_destroy(struct Qdisc *sch) 632{ 633 struct gred_sched *table = qdisc_priv(sch); 634 int i; 635 636 for (i = 0;i < table->DPs; i++) { 637 if (table->tab[i]) 638 kfree(table->tab[i]); 639 } 640} 641 642static struct Qdisc_ops gred_qdisc_ops = { 643 .next = NULL, 644 .cl_ops = NULL, 645 .id = "gred", 646 .priv_size = sizeof(struct gred_sched), 647 .enqueue = gred_enqueue, 648 .dequeue = gred_dequeue, 649 .requeue = gred_requeue, 650 .drop = gred_drop, 651 .init = gred_init, 652 .reset = gred_reset, 653 .destroy = gred_destroy, 654 .change = gred_change, 655 .dump = gred_dump, 656 .owner = THIS_MODULE, 657}; 658 659static int __init gred_module_init(void) 660{ 661 return register_qdisc(&gred_qdisc_ops); 662} 663static void __exit gred_module_exit(void) 664{ 665 unregister_qdisc(&gred_qdisc_ops); 666} 667module_init(gred_module_init) 668module_exit(gred_module_exit) 669MODULE_LICENSE("GPL"); 670