sch_gred.c revision 22b33429ab93155895854e9518a253680a920493
1/* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * 19 * 20 * For all the glorious comments look at Alexey's sch_red.c 21 */ 22 23#include <linux/config.h> 24#include <linux/module.h> 25#include <asm/uaccess.h> 26#include <asm/system.h> 27#include <linux/bitops.h> 28#include <linux/types.h> 29#include <linux/kernel.h> 30#include <linux/sched.h> 31#include <linux/string.h> 32#include <linux/mm.h> 33#include <linux/socket.h> 34#include <linux/sockios.h> 35#include <linux/in.h> 36#include <linux/errno.h> 37#include <linux/interrupt.h> 38#include <linux/if_ether.h> 39#include <linux/inet.h> 40#include <linux/netdevice.h> 41#include <linux/etherdevice.h> 42#include <linux/notifier.h> 43#include <net/ip.h> 44#include <net/route.h> 45#include <linux/skbuff.h> 46#include <net/sock.h> 47#include <net/pkt_sched.h> 48#include <net/red.h> 49 50#if 1 /* control */ 51#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) 52#else 53#define DPRINTK(format,args...) 54#endif 55 56#if 0 /* data */ 57#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) 58#else 59#define D2PRINTK(format,args...) 60#endif 61 62#define GRED_DEF_PRIO (MAX_DPs / 2) 63 64struct gred_sched_data; 65struct gred_sched; 66 67struct gred_sched_data 68{ 69 u32 limit; /* HARD maximal queue length */ 70 u32 DP; /* the drop pramaters */ 71 u32 bytesin; /* bytes seen on virtualQ so far*/ 72 u32 packetsin; /* packets seen on virtualQ so far*/ 73 u32 backlog; /* bytes on the virtualQ */ 74 u8 prio; /* the prio of this vq */ 75 76 struct red_parms parms; 77 struct red_stats stats; 78}; 79 80enum { 81 GRED_WRED_MODE = 1, 82 GRED_RIO_MODE, 83}; 84 85struct gred_sched 86{ 87 struct gred_sched_data *tab[MAX_DPs]; 88 unsigned long flags; 89 u32 DPs; 90 u32 def; 91 u8 initd; 92}; 93 94static inline int gred_wred_mode(struct gred_sched *table) 95{ 96 return test_bit(GRED_WRED_MODE, &table->flags); 97} 98 99static inline void gred_enable_wred_mode(struct gred_sched *table) 100{ 101 __set_bit(GRED_WRED_MODE, &table->flags); 102} 103 104static inline void gred_disable_wred_mode(struct gred_sched *table) 105{ 106 __clear_bit(GRED_WRED_MODE, &table->flags); 107} 108 109static inline int gred_rio_mode(struct gred_sched *table) 110{ 111 return test_bit(GRED_RIO_MODE, &table->flags); 112} 113 114static inline void gred_enable_rio_mode(struct gred_sched *table) 115{ 116 __set_bit(GRED_RIO_MODE, &table->flags); 117} 118 119static inline void gred_disable_rio_mode(struct gred_sched *table) 120{ 121 __clear_bit(GRED_RIO_MODE, &table->flags); 122} 123 124static inline int gred_wred_mode_check(struct Qdisc *sch) 125{ 126 struct gred_sched *table = qdisc_priv(sch); 127 int i; 128 129 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 130 for (i = 0; i < table->DPs; i++) { 131 struct gred_sched_data *q = table->tab[i]; 132 int n; 133 134 if (q == NULL) 135 continue; 136 137 for (n = 0; n < table->DPs; n++) 138 if (table->tab[n] && table->tab[n] != q && 139 table->tab[n]->prio == q->prio) 140 return 1; 141 } 142 143 return 0; 144} 145 146static inline unsigned int gred_backlog(struct gred_sched *table, 147 struct gred_sched_data *q, 148 struct Qdisc *sch) 149{ 150 if (gred_wred_mode(table)) 151 return sch->qstats.backlog; 152 else 153 return q->backlog; 154} 155 156static int 157gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 158{ 159 struct gred_sched_data *q=NULL; 160 struct gred_sched *t= qdisc_priv(sch); 161 unsigned long qavg = 0; 162 int i=0; 163 164 if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) { 165 D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n"); 166 goto do_enqueue; 167 } 168 169 170 if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) { 171 printk("GRED: setting to default (%d)\n ",t->def); 172 if (!(q=t->tab[t->def])) { 173 DPRINTK("GRED: setting to default FAILED! dropping!! " 174 "(%d)\n ", t->def); 175 goto drop; 176 } 177 /* fix tc_index? --could be controvesial but needed for 178 requeueing */ 179 skb->tc_index=(skb->tc_index&0xfffffff0) | t->def; 180 } 181 182 D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d " 183 "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog, 184 sch->qstats.backlog); 185 /* sum up all the qaves of prios <= to ours to get the new qave*/ 186 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 187 for (i=0;i<t->DPs;i++) { 188 if ((!t->tab[i]) || (i==q->DP)) 189 continue; 190 191 if (t->tab[i]->prio < q->prio && 192 !red_is_idling(&t->tab[i]->parms)) 193 qavg +=t->tab[i]->parms.qavg; 194 } 195 196 } 197 198 q->packetsin++; 199 q->bytesin+=skb->len; 200 201 if (gred_wred_mode(t)) { 202 qavg = 0; 203 q->parms.qavg = t->tab[t->def]->parms.qavg; 204 q->parms.qidlestart = t->tab[t->def]->parms.qidlestart; 205 } 206 207 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); 208 209 if (red_is_idling(&q->parms)) 210 red_end_of_idle_period(&q->parms); 211 212 if (gred_wred_mode(t)) 213 t->tab[t->def]->parms.qavg = q->parms.qavg; 214 215 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 216 case RED_DONT_MARK: 217 break; 218 219 case RED_PROB_MARK: 220 sch->qstats.overlimits++; 221 q->stats.prob_drop++; 222 goto drop; 223 224 case RED_HARD_MARK: 225 sch->qstats.overlimits++; 226 q->stats.forced_drop++; 227 goto drop; 228 } 229 230 if (q->backlog + skb->len <= q->limit) { 231 q->backlog += skb->len; 232do_enqueue: 233 __skb_queue_tail(&sch->q, skb); 234 sch->qstats.backlog += skb->len; 235 sch->bstats.bytes += skb->len; 236 sch->bstats.packets++; 237 return 0; 238 } 239 240 q->stats.pdrop++; 241drop: 242 kfree_skb(skb); 243 sch->qstats.drops++; 244 return NET_XMIT_DROP; 245} 246 247static int 248gred_requeue(struct sk_buff *skb, struct Qdisc* sch) 249{ 250 struct gred_sched_data *q; 251 struct gred_sched *t= qdisc_priv(sch); 252 q= t->tab[(skb->tc_index&0xf)]; 253/* error checking here -- probably unnecessary */ 254 255 if (red_is_idling(&q->parms)) 256 red_end_of_idle_period(&q->parms); 257 258 __skb_queue_head(&sch->q, skb); 259 sch->qstats.backlog += skb->len; 260 sch->qstats.requeues++; 261 q->backlog += skb->len; 262 return 0; 263} 264 265static struct sk_buff * 266gred_dequeue(struct Qdisc* sch) 267{ 268 struct sk_buff *skb; 269 struct gred_sched_data *q; 270 struct gred_sched *t= qdisc_priv(sch); 271 272 skb = __skb_dequeue(&sch->q); 273 if (skb) { 274 sch->qstats.backlog -= skb->len; 275 q= t->tab[(skb->tc_index&0xf)]; 276 if (q) { 277 q->backlog -= skb->len; 278 if (!q->backlog && !gred_wred_mode(t)) 279 red_start_of_idle_period(&q->parms); 280 } else { 281 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 282 } 283 return skb; 284 } 285 286 if (gred_wred_mode(t)) { 287 q= t->tab[t->def]; 288 if (!q) 289 D2PRINTK("no default VQ set: Results will be " 290 "screwed up\n"); 291 else 292 red_start_of_idle_period(&q->parms); 293 } 294 295 return NULL; 296} 297 298static unsigned int gred_drop(struct Qdisc* sch) 299{ 300 struct sk_buff *skb; 301 302 struct gred_sched_data *q; 303 struct gred_sched *t= qdisc_priv(sch); 304 305 skb = __skb_dequeue_tail(&sch->q); 306 if (skb) { 307 unsigned int len = skb->len; 308 sch->qstats.backlog -= len; 309 sch->qstats.drops++; 310 q= t->tab[(skb->tc_index&0xf)]; 311 if (q) { 312 q->backlog -= len; 313 q->stats.other++; 314 if (!q->backlog && !gred_wred_mode(t)) 315 red_start_of_idle_period(&q->parms); 316 } else { 317 D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 318 } 319 320 kfree_skb(skb); 321 return len; 322 } 323 324 q=t->tab[t->def]; 325 if (!q) { 326 D2PRINTK("no default VQ set: Results might be screwed up\n"); 327 return 0; 328 } 329 330 red_start_of_idle_period(&q->parms); 331 return 0; 332 333} 334 335static void gred_reset(struct Qdisc* sch) 336{ 337 int i; 338 struct gred_sched_data *q; 339 struct gred_sched *t= qdisc_priv(sch); 340 341 __skb_queue_purge(&sch->q); 342 343 sch->qstats.backlog = 0; 344 345 for (i=0;i<t->DPs;i++) { 346 q= t->tab[i]; 347 if (!q) 348 continue; 349 red_restart(&q->parms); 350 q->backlog = 0; 351 q->stats.other = 0; 352 q->stats.forced_drop = 0; 353 q->stats.prob_drop = 0; 354 q->stats.pdrop = 0; 355 } 356} 357 358static inline void gred_destroy_vq(struct gred_sched_data *q) 359{ 360 kfree(q); 361} 362 363static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) 364{ 365 struct gred_sched *table = qdisc_priv(sch); 366 struct tc_gred_sopt *sopt; 367 int i; 368 369 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) 370 return -EINVAL; 371 372 sopt = RTA_DATA(dps); 373 374 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 375 return -EINVAL; 376 377 sch_tree_lock(sch); 378 table->DPs = sopt->DPs; 379 table->def = sopt->def_DP; 380 381 /* 382 * Every entry point to GRED is synchronized with the above code 383 * and the DP is checked against DPs, i.e. shadowed VQs can no 384 * longer be found so we can unlock right here. 385 */ 386 sch_tree_unlock(sch); 387 388 if (sopt->grio) { 389 gred_enable_rio_mode(table); 390 gred_disable_wred_mode(table); 391 if (gred_wred_mode_check(sch)) 392 gred_enable_wred_mode(table); 393 } else { 394 gred_disable_rio_mode(table); 395 gred_disable_wred_mode(table); 396 } 397 398 for (i = table->DPs; i < MAX_DPs; i++) { 399 if (table->tab[i]) { 400 printk(KERN_WARNING "GRED: Warning: Destroying " 401 "shadowed VQ 0x%x\n", i); 402 gred_destroy_vq(table->tab[i]); 403 table->tab[i] = NULL; 404 } 405 } 406 407 table->initd = 0; 408 409 return 0; 410} 411 412static inline int gred_change_vq(struct Qdisc *sch, int dp, 413 struct tc_gred_qopt *ctl, int prio, u8 *stab) 414{ 415 struct gred_sched *table = qdisc_priv(sch); 416 struct gred_sched_data *q; 417 418 if (table->tab[dp] == NULL) { 419 table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); 420 if (table->tab[dp] == NULL) 421 return -ENOMEM; 422 memset(table->tab[dp], 0, sizeof(*q)); 423 } 424 425 q = table->tab[dp]; 426 q->DP = dp; 427 q->prio = prio; 428 q->limit = ctl->limit; 429 430 if (q->backlog == 0) 431 red_end_of_idle_period(&q->parms); 432 433 red_set_parms(&q->parms, 434 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 435 ctl->Scell_log, stab); 436 437 q->stats.other = 0; 438 q->stats.forced_drop = 0; 439 q->stats.prob_drop = 0; 440 q->stats.pdrop = 0; 441 442 return 0; 443} 444 445static int gred_change(struct Qdisc *sch, struct rtattr *opt) 446{ 447 struct gred_sched *table = qdisc_priv(sch); 448 struct tc_gred_qopt *ctl; 449 struct rtattr *tb[TCA_GRED_MAX]; 450 int err = -EINVAL, prio = GRED_DEF_PRIO; 451 u8 *stab; 452 453 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 454 return -EINVAL; 455 456 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) 457 return gred_change_table_def(sch, opt); 458 459 if (tb[TCA_GRED_PARMS-1] == NULL || 460 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 461 tb[TCA_GRED_STAB-1] == NULL || 462 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) 463 return -EINVAL; 464 465 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 466 stab = RTA_DATA(tb[TCA_GRED_STAB-1]); 467 468 if (ctl->DP >= table->DPs) 469 goto errout; 470 471 if (gred_rio_mode(table)) { 472 if (ctl->prio == 0) { 473 int def_prio = GRED_DEF_PRIO; 474 475 if (table->tab[table->def]) 476 def_prio = table->tab[table->def]->prio; 477 478 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 479 "setting default to %d\n", ctl->DP, def_prio); 480 481 prio = def_prio; 482 } else 483 prio = ctl->prio; 484 } 485 486 sch_tree_lock(sch); 487 488 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); 489 if (err < 0) 490 goto errout_locked; 491 492 if (table->tab[table->def] == NULL) { 493 if (gred_rio_mode(table)) 494 prio = table->tab[ctl->DP]->prio; 495 496 err = gred_change_vq(sch, table->def, ctl, prio, stab); 497 if (err < 0) 498 goto errout_locked; 499 } 500 501 table->initd = 1; 502 503 if (gred_rio_mode(table)) { 504 gred_disable_wred_mode(table); 505 if (gred_wred_mode_check(sch)) 506 gred_enable_wred_mode(table); 507 } 508 509 err = 0; 510 511errout_locked: 512 sch_tree_unlock(sch); 513errout: 514 return err; 515} 516 517static int gred_init(struct Qdisc *sch, struct rtattr *opt) 518{ 519 struct rtattr *tb[TCA_GRED_MAX]; 520 521 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 522 return -EINVAL; 523 524 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) 525 return -EINVAL; 526 527 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); 528} 529 530static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 531{ 532 struct gred_sched *table = qdisc_priv(sch); 533 struct rtattr *parms, *opts = NULL; 534 int i; 535 struct tc_gred_sopt sopt = { 536 .DPs = table->DPs, 537 .def_DP = table->def, 538 .grio = gred_rio_mode(table), 539 }; 540 541 opts = RTA_NEST(skb, TCA_OPTIONS); 542 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 543 parms = RTA_NEST(skb, TCA_GRED_PARMS); 544 545 for (i = 0; i < MAX_DPs; i++) { 546 struct gred_sched_data *q = table->tab[i]; 547 struct tc_gred_qopt opt; 548 549 memset(&opt, 0, sizeof(opt)); 550 551 if (!q) { 552 /* hack -- fix at some point with proper message 553 This is how we indicate to tc that there is no VQ 554 at this DP */ 555 556 opt.DP = MAX_DPs + i; 557 goto append_opt; 558 } 559 560 opt.limit = q->limit; 561 opt.DP = q->DP; 562 opt.backlog = q->backlog; 563 opt.prio = q->prio; 564 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 565 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 566 opt.Wlog = q->parms.Wlog; 567 opt.Plog = q->parms.Plog; 568 opt.Scell_log = q->parms.Scell_log; 569 opt.other = q->stats.other; 570 opt.early = q->stats.prob_drop; 571 opt.forced = q->stats.forced_drop; 572 opt.pdrop = q->stats.pdrop; 573 opt.packets = q->packetsin; 574 opt.bytesin = q->bytesin; 575 576 if (gred_wred_mode(table)) { 577 q->parms.qidlestart = 578 table->tab[table->def]->parms.qidlestart; 579 q->parms.qavg = table->tab[table->def]->parms.qavg; 580 } 581 582 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 583 584append_opt: 585 RTA_APPEND(skb, sizeof(opt), &opt); 586 } 587 588 RTA_NEST_END(skb, parms); 589 590 return RTA_NEST_END(skb, opts); 591 592rtattr_failure: 593 return RTA_NEST_CANCEL(skb, opts); 594} 595 596static void gred_destroy(struct Qdisc *sch) 597{ 598 struct gred_sched *table = qdisc_priv(sch); 599 int i; 600 601 for (i = 0;i < table->DPs; i++) { 602 if (table->tab[i]) 603 gred_destroy_vq(table->tab[i]); 604 } 605} 606 607static struct Qdisc_ops gred_qdisc_ops = { 608 .next = NULL, 609 .cl_ops = NULL, 610 .id = "gred", 611 .priv_size = sizeof(struct gred_sched), 612 .enqueue = gred_enqueue, 613 .dequeue = gred_dequeue, 614 .requeue = gred_requeue, 615 .drop = gred_drop, 616 .init = gred_init, 617 .reset = gred_reset, 618 .destroy = gred_destroy, 619 .change = gred_change, 620 .dump = gred_dump, 621 .owner = THIS_MODULE, 622}; 623 624static int __init gred_module_init(void) 625{ 626 return register_qdisc(&gred_qdisc_ops); 627} 628static void __exit gred_module_exit(void) 629{ 630 unregister_qdisc(&gred_qdisc_ops); 631} 632module_init(gred_module_init) 633module_exit(gred_module_exit) 634MODULE_LICENSE("GPL"); 635