sch_gred.c revision 1fe37b106b039d9358fd1211c39b1fa199e547a8
1/* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21#include <linux/slab.h> 22#include <linux/module.h> 23#include <linux/types.h> 24#include <linux/kernel.h> 25#include <linux/skbuff.h> 26#include <net/pkt_sched.h> 27#include <net/red.h> 28 29#define GRED_DEF_PRIO (MAX_DPs / 2) 30#define GRED_VQ_MASK (MAX_DPs - 1) 31 32struct gred_sched_data; 33struct gred_sched; 34 35struct gred_sched_data { 36 u32 limit; /* HARD maximal queue length */ 37 u32 DP; /* the drop parameters */ 38 u32 bytesin; /* bytes seen on virtualQ so far*/ 39 u32 packetsin; /* packets seen on virtualQ so far*/ 40 u32 backlog; /* bytes on the virtualQ */ 41 u8 prio; /* the prio of this vq */ 42 43 struct red_parms parms; 44 struct red_vars vars; 45 struct red_stats stats; 46}; 47 48enum { 49 GRED_WRED_MODE = 1, 50 GRED_RIO_MODE, 51}; 52 53struct gred_sched { 54 struct gred_sched_data *tab[MAX_DPs]; 55 unsigned long flags; 56 u32 red_flags; 57 u32 DPs; 58 u32 def; 59 struct red_vars wred_set; 60}; 61 62static inline int gred_wred_mode(struct gred_sched *table) 63{ 64 return test_bit(GRED_WRED_MODE, &table->flags); 65} 66 67static inline void gred_enable_wred_mode(struct gred_sched *table) 68{ 69 __set_bit(GRED_WRED_MODE, &table->flags); 70} 71 72static inline void gred_disable_wred_mode(struct gred_sched *table) 73{ 74 __clear_bit(GRED_WRED_MODE, &table->flags); 75} 76 77static inline int gred_rio_mode(struct gred_sched *table) 78{ 79 return test_bit(GRED_RIO_MODE, &table->flags); 80} 81 82static inline void gred_enable_rio_mode(struct gred_sched *table) 83{ 84 __set_bit(GRED_RIO_MODE, &table->flags); 85} 86 87static inline void gred_disable_rio_mode(struct gred_sched *table) 88{ 89 __clear_bit(GRED_RIO_MODE, &table->flags); 90} 91 92static inline int gred_wred_mode_check(struct Qdisc *sch) 93{ 94 struct gred_sched *table = qdisc_priv(sch); 95 int i; 96 97 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 98 for (i = 0; i < table->DPs; i++) { 99 struct gred_sched_data *q = table->tab[i]; 100 int n; 101 102 if (q == NULL) 103 continue; 104 105 for (n = i + 1; n < table->DPs; n++) 106 if (table->tab[n] && table->tab[n]->prio == q->prio) 107 return 1; 108 } 109 110 return 0; 111} 112 113static inline unsigned int gred_backlog(struct gred_sched *table, 114 struct gred_sched_data *q, 115 struct Qdisc *sch) 116{ 117 if (gred_wred_mode(table)) 118 return sch->qstats.backlog; 119 else 120 return q->backlog; 121} 122 123static inline u16 tc_index_to_dp(struct sk_buff *skb) 124{ 125 return skb->tc_index & GRED_VQ_MASK; 126} 127 128static inline void gred_load_wred_set(const struct gred_sched *table, 129 struct gred_sched_data *q) 130{ 131 q->vars.qavg = table->wred_set.qavg; 132 q->vars.qidlestart = table->wred_set.qidlestart; 133} 134 135static inline void gred_store_wred_set(struct gred_sched *table, 136 struct gred_sched_data *q) 137{ 138 table->wred_set.qavg = q->vars.qavg; 139} 140 141static inline int gred_use_ecn(struct gred_sched *t) 142{ 143 return t->red_flags & TC_RED_ECN; 144} 145 146static inline int gred_use_harddrop(struct gred_sched *t) 147{ 148 return t->red_flags & TC_RED_HARDDROP; 149} 150 151static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) 152{ 153 struct gred_sched_data *q = NULL; 154 struct gred_sched *t = qdisc_priv(sch); 155 unsigned long qavg = 0; 156 u16 dp = tc_index_to_dp(skb); 157 158 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 159 dp = t->def; 160 161 q = t->tab[dp]; 162 if (!q) { 163 /* Pass through packets not assigned to a DP 164 * if no default DP has been configured. This 165 * allows for DP flows to be left untouched. 166 */ 167 if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len) 168 return qdisc_enqueue_tail(skb, sch); 169 else 170 goto drop; 171 } 172 173 /* fix tc_index? --could be controversial but needed for 174 requeueing */ 175 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 176 } 177 178 /* sum up all the qaves of prios < ours to get the new qave */ 179 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 180 int i; 181 182 for (i = 0; i < t->DPs; i++) { 183 if (t->tab[i] && t->tab[i]->prio < q->prio && 184 !red_is_idling(&t->tab[i]->vars)) 185 qavg += t->tab[i]->vars.qavg; 186 } 187 188 } 189 190 q->packetsin++; 191 q->bytesin += qdisc_pkt_len(skb); 192 193 if (gred_wred_mode(t)) 194 gred_load_wred_set(t, q); 195 196 q->vars.qavg = red_calc_qavg(&q->parms, 197 &q->vars, 198 gred_backlog(t, q, sch)); 199 200 if (red_is_idling(&q->vars)) 201 red_end_of_idle_period(&q->vars); 202 203 if (gred_wred_mode(t)) 204 gred_store_wred_set(t, q); 205 206 switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) { 207 case RED_DONT_MARK: 208 break; 209 210 case RED_PROB_MARK: 211 sch->qstats.overlimits++; 212 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 213 q->stats.prob_drop++; 214 goto congestion_drop; 215 } 216 217 q->stats.prob_mark++; 218 break; 219 220 case RED_HARD_MARK: 221 sch->qstats.overlimits++; 222 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 223 !INET_ECN_set_ce(skb)) { 224 q->stats.forced_drop++; 225 goto congestion_drop; 226 } 227 q->stats.forced_mark++; 228 break; 229 } 230 231 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 232 q->backlog += qdisc_pkt_len(skb); 233 return qdisc_enqueue_tail(skb, sch); 234 } 235 236 q->stats.pdrop++; 237drop: 238 return qdisc_drop(skb, sch); 239 240congestion_drop: 241 qdisc_drop(skb, sch); 242 return NET_XMIT_CN; 243} 244 245static struct sk_buff *gred_dequeue(struct Qdisc *sch) 246{ 247 struct sk_buff *skb; 248 struct gred_sched *t = qdisc_priv(sch); 249 250 skb = qdisc_dequeue_head(sch); 251 252 if (skb) { 253 struct gred_sched_data *q; 254 u16 dp = tc_index_to_dp(skb); 255 256 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 257 net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n", 258 tc_index_to_dp(skb)); 259 } else { 260 q->backlog -= qdisc_pkt_len(skb); 261 262 if (!q->backlog && !gred_wred_mode(t)) 263 red_start_of_idle_period(&q->vars); 264 } 265 266 return skb; 267 } 268 269 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 270 red_start_of_idle_period(&t->wred_set); 271 272 return NULL; 273} 274 275static unsigned int gred_drop(struct Qdisc *sch) 276{ 277 struct sk_buff *skb; 278 struct gred_sched *t = qdisc_priv(sch); 279 280 skb = qdisc_dequeue_tail(sch); 281 if (skb) { 282 unsigned int len = qdisc_pkt_len(skb); 283 struct gred_sched_data *q; 284 u16 dp = tc_index_to_dp(skb); 285 286 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 287 net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n", 288 tc_index_to_dp(skb)); 289 } else { 290 q->backlog -= len; 291 q->stats.other++; 292 293 if (!q->backlog && !gred_wred_mode(t)) 294 red_start_of_idle_period(&q->vars); 295 } 296 297 qdisc_drop(skb, sch); 298 return len; 299 } 300 301 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 302 red_start_of_idle_period(&t->wred_set); 303 304 return 0; 305 306} 307 308static void gred_reset(struct Qdisc *sch) 309{ 310 int i; 311 struct gred_sched *t = qdisc_priv(sch); 312 313 qdisc_reset_queue(sch); 314 315 for (i = 0; i < t->DPs; i++) { 316 struct gred_sched_data *q = t->tab[i]; 317 318 if (!q) 319 continue; 320 321 red_restart(&q->vars); 322 q->backlog = 0; 323 } 324} 325 326static inline void gred_destroy_vq(struct gred_sched_data *q) 327{ 328 kfree(q); 329} 330 331static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) 332{ 333 struct gred_sched *table = qdisc_priv(sch); 334 struct tc_gred_sopt *sopt; 335 int i; 336 337 if (dps == NULL) 338 return -EINVAL; 339 340 sopt = nla_data(dps); 341 342 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 343 return -EINVAL; 344 345 sch_tree_lock(sch); 346 table->DPs = sopt->DPs; 347 table->def = sopt->def_DP; 348 table->red_flags = sopt->flags; 349 350 /* 351 * Every entry point to GRED is synchronized with the above code 352 * and the DP is checked against DPs, i.e. shadowed VQs can no 353 * longer be found so we can unlock right here. 354 */ 355 sch_tree_unlock(sch); 356 357 if (sopt->grio) { 358 gred_enable_rio_mode(table); 359 gred_disable_wred_mode(table); 360 if (gred_wred_mode_check(sch)) 361 gred_enable_wred_mode(table); 362 } else { 363 gred_disable_rio_mode(table); 364 gred_disable_wred_mode(table); 365 } 366 367 for (i = table->DPs; i < MAX_DPs; i++) { 368 if (table->tab[i]) { 369 pr_warning("GRED: Warning: Destroying " 370 "shadowed VQ 0x%x\n", i); 371 gred_destroy_vq(table->tab[i]); 372 table->tab[i] = NULL; 373 } 374 } 375 376 return 0; 377} 378 379static inline int gred_change_vq(struct Qdisc *sch, int dp, 380 struct tc_gred_qopt *ctl, int prio, 381 u8 *stab, u32 max_P, 382 struct gred_sched_data **prealloc) 383{ 384 struct gred_sched *table = qdisc_priv(sch); 385 struct gred_sched_data *q = table->tab[dp]; 386 387 if (!q) { 388 table->tab[dp] = q = *prealloc; 389 *prealloc = NULL; 390 if (!q) 391 return -ENOMEM; 392 } 393 394 q->DP = dp; 395 q->prio = prio; 396 q->limit = ctl->limit; 397 398 if (q->backlog == 0) 399 red_end_of_idle_period(&q->vars); 400 401 red_set_parms(&q->parms, 402 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 403 ctl->Scell_log, stab, max_P); 404 red_set_vars(&q->vars); 405 return 0; 406} 407 408static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = { 409 [TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) }, 410 [TCA_GRED_STAB] = { .len = 256 }, 411 [TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) }, 412 [TCA_GRED_MAX_P] = { .type = NLA_U32 }, 413}; 414 415static int gred_change(struct Qdisc *sch, struct nlattr *opt) 416{ 417 struct gred_sched *table = qdisc_priv(sch); 418 struct tc_gred_qopt *ctl; 419 struct nlattr *tb[TCA_GRED_MAX + 1]; 420 int err, prio = GRED_DEF_PRIO; 421 u8 *stab; 422 u32 max_P; 423 struct gred_sched_data *prealloc; 424 425 if (opt == NULL) 426 return -EINVAL; 427 428 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 429 if (err < 0) 430 return err; 431 432 if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) 433 return gred_change_table_def(sch, opt); 434 435 if (tb[TCA_GRED_PARMS] == NULL || 436 tb[TCA_GRED_STAB] == NULL) 437 return -EINVAL; 438 439 max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0; 440 441 err = -EINVAL; 442 ctl = nla_data(tb[TCA_GRED_PARMS]); 443 stab = nla_data(tb[TCA_GRED_STAB]); 444 445 if (ctl->DP >= table->DPs) 446 goto errout; 447 448 if (gred_rio_mode(table)) { 449 if (ctl->prio == 0) { 450 int def_prio = GRED_DEF_PRIO; 451 452 if (table->tab[table->def]) 453 def_prio = table->tab[table->def]->prio; 454 455 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 456 "setting default to %d\n", ctl->DP, def_prio); 457 458 prio = def_prio; 459 } else 460 prio = ctl->prio; 461 } 462 463 prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); 464 sch_tree_lock(sch); 465 466 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc); 467 if (err < 0) 468 goto errout_locked; 469 470 if (gred_rio_mode(table)) { 471 gred_disable_wred_mode(table); 472 if (gred_wred_mode_check(sch)) 473 gred_enable_wred_mode(table); 474 } 475 476 err = 0; 477 478errout_locked: 479 sch_tree_unlock(sch); 480 kfree(prealloc); 481errout: 482 return err; 483} 484 485static int gred_init(struct Qdisc *sch, struct nlattr *opt) 486{ 487 struct nlattr *tb[TCA_GRED_MAX + 1]; 488 int err; 489 490 if (opt == NULL) 491 return -EINVAL; 492 493 err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy); 494 if (err < 0) 495 return err; 496 497 if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) 498 return -EINVAL; 499 500 return gred_change_table_def(sch, tb[TCA_GRED_DPS]); 501} 502 503static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 504{ 505 struct gred_sched *table = qdisc_priv(sch); 506 struct nlattr *parms, *opts = NULL; 507 int i; 508 u32 max_p[MAX_DPs]; 509 struct tc_gred_sopt sopt = { 510 .DPs = table->DPs, 511 .def_DP = table->def, 512 .grio = gred_rio_mode(table), 513 .flags = table->red_flags, 514 }; 515 516 opts = nla_nest_start(skb, TCA_OPTIONS); 517 if (opts == NULL) 518 goto nla_put_failure; 519 if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt)) 520 goto nla_put_failure; 521 522 for (i = 0; i < MAX_DPs; i++) { 523 struct gred_sched_data *q = table->tab[i]; 524 525 max_p[i] = q ? q->parms.max_P : 0; 526 } 527 if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p)) 528 goto nla_put_failure; 529 530 parms = nla_nest_start(skb, TCA_GRED_PARMS); 531 if (parms == NULL) 532 goto nla_put_failure; 533 534 for (i = 0; i < MAX_DPs; i++) { 535 struct gred_sched_data *q = table->tab[i]; 536 struct tc_gred_qopt opt; 537 unsigned long qavg; 538 539 memset(&opt, 0, sizeof(opt)); 540 541 if (!q) { 542 /* hack -- fix at some point with proper message 543 This is how we indicate to tc that there is no VQ 544 at this DP */ 545 546 opt.DP = MAX_DPs + i; 547 goto append_opt; 548 } 549 550 opt.limit = q->limit; 551 opt.DP = q->DP; 552 opt.backlog = q->backlog; 553 opt.prio = q->prio; 554 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 555 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 556 opt.Wlog = q->parms.Wlog; 557 opt.Plog = q->parms.Plog; 558 opt.Scell_log = q->parms.Scell_log; 559 opt.other = q->stats.other; 560 opt.early = q->stats.prob_drop; 561 opt.forced = q->stats.forced_drop; 562 opt.pdrop = q->stats.pdrop; 563 opt.packets = q->packetsin; 564 opt.bytesin = q->bytesin; 565 566 if (gred_wred_mode(table)) 567 gred_load_wred_set(table, q); 568 569 qavg = red_calc_qavg(&q->parms, &q->vars, 570 q->vars.qavg >> q->parms.Wlog); 571 opt.qave = qavg >> q->parms.Wlog; 572 573append_opt: 574 if (nla_append(skb, sizeof(opt), &opt) < 0) 575 goto nla_put_failure; 576 } 577 578 nla_nest_end(skb, parms); 579 580 return nla_nest_end(skb, opts); 581 582nla_put_failure: 583 nla_nest_cancel(skb, opts); 584 return -EMSGSIZE; 585} 586 587static void gred_destroy(struct Qdisc *sch) 588{ 589 struct gred_sched *table = qdisc_priv(sch); 590 int i; 591 592 for (i = 0; i < table->DPs; i++) { 593 if (table->tab[i]) 594 gred_destroy_vq(table->tab[i]); 595 } 596} 597 598static struct Qdisc_ops gred_qdisc_ops __read_mostly = { 599 .id = "gred", 600 .priv_size = sizeof(struct gred_sched), 601 .enqueue = gred_enqueue, 602 .dequeue = gred_dequeue, 603 .peek = qdisc_peek_head, 604 .drop = gred_drop, 605 .init = gred_init, 606 .reset = gred_reset, 607 .destroy = gred_destroy, 608 .change = gred_change, 609 .dump = gred_dump, 610 .owner = THIS_MODULE, 611}; 612 613static int __init gred_module_init(void) 614{ 615 return register_qdisc(&gred_qdisc_ops); 616} 617 618static void __exit gred_module_exit(void) 619{ 620 unregister_qdisc(&gred_qdisc_ops); 621} 622 623module_init(gred_module_init) 624module_exit(gred_module_exit) 625 626MODULE_LICENSE("GPL"); 627