cls_flow.c revision 3a53943b5ae8b61913e2d61e98cbeedf67861c92
1/* 2 * net/sched/cls_flow.c Generic flow classifier 3 * 4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 */ 11 12#include <linux/kernel.h> 13#include <linux/init.h> 14#include <linux/list.h> 15#include <linux/jhash.h> 16#include <linux/random.h> 17#include <linux/pkt_cls.h> 18#include <linux/skbuff.h> 19#include <linux/in.h> 20#include <linux/ip.h> 21#include <linux/ipv6.h> 22#include <linux/if_vlan.h> 23#include <linux/slab.h> 24#include <linux/module.h> 25 26#include <net/pkt_cls.h> 27#include <net/ip.h> 28#include <net/route.h> 29#include <net/flow_keys.h> 30 31#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 32#include <net/netfilter/nf_conntrack.h> 33#endif 34 35struct flow_head { 36 struct list_head filters; 37}; 38 39struct flow_filter { 40 struct list_head list; 41 struct tcf_exts exts; 42 struct tcf_ematch_tree ematches; 43 struct timer_list perturb_timer; 44 u32 perturb_period; 45 u32 handle; 46 47 u32 nkeys; 48 u32 keymask; 49 u32 mode; 50 u32 mask; 51 u32 xor; 52 u32 rshift; 53 u32 addend; 54 u32 divisor; 55 u32 baseclass; 56 u32 hashrnd; 57}; 58 59static const struct tcf_ext_map flow_ext_map = { 60 .action = TCA_FLOW_ACT, 61 .police = TCA_FLOW_POLICE, 62}; 63 64static inline u32 addr_fold(void *addr) 65{ 66 unsigned long a = (unsigned long)addr; 67 68 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); 69} 70 71static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow) 72{ 73 if (flow->src) 74 return ntohl(flow->src); 75 return addr_fold(skb->sk); 76} 77 78static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) 79{ 80 if (flow->dst) 81 return ntohl(flow->dst); 82 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 83} 84 85static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) 86{ 87 return flow->ip_proto; 88} 89 90static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) 91{ 92 if (flow->ports) 93 return ntohs(flow->port16[0]); 94 95 return addr_fold(skb->sk); 96} 97 98static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) 99{ 100 if (flow->ports) 101 return ntohs(flow->port16[1]); 102 103 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 104} 105 106static u32 flow_get_iif(const struct sk_buff *skb) 107{ 108 return skb->skb_iif; 109} 110 111static u32 flow_get_priority(const struct sk_buff *skb) 112{ 113 return skb->priority; 114} 115 116static u32 flow_get_mark(const struct sk_buff *skb) 117{ 118 return skb->mark; 119} 120 121static u32 flow_get_nfct(const struct sk_buff *skb) 122{ 123#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 124 return addr_fold(skb->nfct); 125#else 126 return 0; 127#endif 128} 129 130#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 131#define CTTUPLE(skb, member) \ 132({ \ 133 enum ip_conntrack_info ctinfo; \ 134 const struct nf_conn *ct = nf_ct_get(skb, &ctinfo); \ 135 if (ct == NULL) \ 136 goto fallback; \ 137 ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member; \ 138}) 139#else 140#define CTTUPLE(skb, member) \ 141({ \ 142 goto fallback; \ 143 0; \ 144}) 145#endif 146 147static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) 148{ 149 switch (skb->protocol) { 150 case htons(ETH_P_IP): 151 return ntohl(CTTUPLE(skb, src.u3.ip)); 152 case htons(ETH_P_IPV6): 153 return ntohl(CTTUPLE(skb, src.u3.ip6[3])); 154 } 155fallback: 156 return flow_get_src(skb, flow); 157} 158 159static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) 160{ 161 switch (skb->protocol) { 162 case htons(ETH_P_IP): 163 return ntohl(CTTUPLE(skb, dst.u3.ip)); 164 case htons(ETH_P_IPV6): 165 return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); 166 } 167fallback: 168 return flow_get_dst(skb, flow); 169} 170 171static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) 172{ 173 return ntohs(CTTUPLE(skb, src.u.all)); 174fallback: 175 return flow_get_proto_src(skb, flow); 176} 177 178static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) 179{ 180 return ntohs(CTTUPLE(skb, dst.u.all)); 181fallback: 182 return flow_get_proto_dst(skb, flow); 183} 184 185static u32 flow_get_rtclassid(const struct sk_buff *skb) 186{ 187#ifdef CONFIG_IP_ROUTE_CLASSID 188 if (skb_dst(skb)) 189 return skb_dst(skb)->tclassid; 190#endif 191 return 0; 192} 193 194static u32 flow_get_skuid(const struct sk_buff *skb) 195{ 196 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 197 return skb->sk->sk_socket->file->f_cred->fsuid; 198 return 0; 199} 200 201static u32 flow_get_skgid(const struct sk_buff *skb) 202{ 203 if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) 204 return skb->sk->sk_socket->file->f_cred->fsgid; 205 return 0; 206} 207 208static u32 flow_get_vlan_tag(const struct sk_buff *skb) 209{ 210 u16 uninitialized_var(tag); 211 212 if (vlan_get_tag(skb, &tag) < 0) 213 return 0; 214 return tag & VLAN_VID_MASK; 215} 216 217static u32 flow_get_rxhash(struct sk_buff *skb) 218{ 219 return skb_get_rxhash(skb); 220} 221 222static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) 223{ 224 switch (key) { 225 case FLOW_KEY_SRC: 226 return flow_get_src(skb, flow); 227 case FLOW_KEY_DST: 228 return flow_get_dst(skb, flow); 229 case FLOW_KEY_PROTO: 230 return flow_get_proto(skb, flow); 231 case FLOW_KEY_PROTO_SRC: 232 return flow_get_proto_src(skb, flow); 233 case FLOW_KEY_PROTO_DST: 234 return flow_get_proto_dst(skb, flow); 235 case FLOW_KEY_IIF: 236 return flow_get_iif(skb); 237 case FLOW_KEY_PRIORITY: 238 return flow_get_priority(skb); 239 case FLOW_KEY_MARK: 240 return flow_get_mark(skb); 241 case FLOW_KEY_NFCT: 242 return flow_get_nfct(skb); 243 case FLOW_KEY_NFCT_SRC: 244 return flow_get_nfct_src(skb, flow); 245 case FLOW_KEY_NFCT_DST: 246 return flow_get_nfct_dst(skb, flow); 247 case FLOW_KEY_NFCT_PROTO_SRC: 248 return flow_get_nfct_proto_src(skb, flow); 249 case FLOW_KEY_NFCT_PROTO_DST: 250 return flow_get_nfct_proto_dst(skb, flow); 251 case FLOW_KEY_RTCLASSID: 252 return flow_get_rtclassid(skb); 253 case FLOW_KEY_SKUID: 254 return flow_get_skuid(skb); 255 case FLOW_KEY_SKGID: 256 return flow_get_skgid(skb); 257 case FLOW_KEY_VLAN_TAG: 258 return flow_get_vlan_tag(skb); 259 case FLOW_KEY_RXHASH: 260 return flow_get_rxhash(skb); 261 default: 262 WARN_ON(1); 263 return 0; 264 } 265} 266 267#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | \ 268 (1 << FLOW_KEY_DST) | \ 269 (1 << FLOW_KEY_PROTO) | \ 270 (1 << FLOW_KEY_PROTO_SRC) | \ 271 (1 << FLOW_KEY_PROTO_DST) | \ 272 (1 << FLOW_KEY_NFCT_SRC) | \ 273 (1 << FLOW_KEY_NFCT_DST) | \ 274 (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ 275 (1 << FLOW_KEY_NFCT_PROTO_DST)) 276 277static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, 278 struct tcf_result *res) 279{ 280 struct flow_head *head = tp->root; 281 struct flow_filter *f; 282 u32 keymask; 283 u32 classid; 284 unsigned int n, key; 285 int r; 286 287 list_for_each_entry(f, &head->filters, list) { 288 u32 keys[FLOW_KEY_MAX + 1]; 289 struct flow_keys flow_keys; 290 291 if (!tcf_em_tree_match(skb, &f->ematches, NULL)) 292 continue; 293 294 keymask = f->keymask; 295 if (keymask & FLOW_KEYS_NEEDED) 296 skb_flow_dissect(skb, &flow_keys); 297 298 for (n = 0; n < f->nkeys; n++) { 299 key = ffs(keymask) - 1; 300 keymask &= ~(1 << key); 301 keys[n] = flow_key_get(skb, key, &flow_keys); 302 } 303 304 if (f->mode == FLOW_MODE_HASH) 305 classid = jhash2(keys, f->nkeys, f->hashrnd); 306 else { 307 classid = keys[0]; 308 classid = (classid & f->mask) ^ f->xor; 309 classid = (classid >> f->rshift) + f->addend; 310 } 311 312 if (f->divisor) 313 classid %= f->divisor; 314 315 res->class = 0; 316 res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid); 317 318 r = tcf_exts_exec(skb, &f->exts, res); 319 if (r < 0) 320 continue; 321 return r; 322 } 323 return -1; 324} 325 326static void flow_perturbation(unsigned long arg) 327{ 328 struct flow_filter *f = (struct flow_filter *)arg; 329 330 get_random_bytes(&f->hashrnd, 4); 331 if (f->perturb_period) 332 mod_timer(&f->perturb_timer, jiffies + f->perturb_period); 333} 334 335static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { 336 [TCA_FLOW_KEYS] = { .type = NLA_U32 }, 337 [TCA_FLOW_MODE] = { .type = NLA_U32 }, 338 [TCA_FLOW_BASECLASS] = { .type = NLA_U32 }, 339 [TCA_FLOW_RSHIFT] = { .type = NLA_U32 }, 340 [TCA_FLOW_ADDEND] = { .type = NLA_U32 }, 341 [TCA_FLOW_MASK] = { .type = NLA_U32 }, 342 [TCA_FLOW_XOR] = { .type = NLA_U32 }, 343 [TCA_FLOW_DIVISOR] = { .type = NLA_U32 }, 344 [TCA_FLOW_ACT] = { .type = NLA_NESTED }, 345 [TCA_FLOW_POLICE] = { .type = NLA_NESTED }, 346 [TCA_FLOW_EMATCHES] = { .type = NLA_NESTED }, 347 [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, 348}; 349 350static int flow_change(struct tcf_proto *tp, unsigned long base, 351 u32 handle, struct nlattr **tca, 352 unsigned long *arg) 353{ 354 struct flow_head *head = tp->root; 355 struct flow_filter *f; 356 struct nlattr *opt = tca[TCA_OPTIONS]; 357 struct nlattr *tb[TCA_FLOW_MAX + 1]; 358 struct tcf_exts e; 359 struct tcf_ematch_tree t; 360 unsigned int nkeys = 0; 361 unsigned int perturb_period = 0; 362 u32 baseclass = 0; 363 u32 keymask = 0; 364 u32 mode; 365 int err; 366 367 if (opt == NULL) 368 return -EINVAL; 369 370 err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy); 371 if (err < 0) 372 return err; 373 374 if (tb[TCA_FLOW_BASECLASS]) { 375 baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]); 376 if (TC_H_MIN(baseclass) == 0) 377 return -EINVAL; 378 } 379 380 if (tb[TCA_FLOW_KEYS]) { 381 keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); 382 383 nkeys = hweight32(keymask); 384 if (nkeys == 0) 385 return -EINVAL; 386 387 if (fls(keymask) - 1 > FLOW_KEY_MAX) 388 return -EOPNOTSUPP; 389 } 390 391 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); 392 if (err < 0) 393 return err; 394 395 err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); 396 if (err < 0) 397 goto err1; 398 399 f = (struct flow_filter *)*arg; 400 if (f != NULL) { 401 err = -EINVAL; 402 if (f->handle != handle && handle) 403 goto err2; 404 405 mode = f->mode; 406 if (tb[TCA_FLOW_MODE]) 407 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 408 if (mode != FLOW_MODE_HASH && nkeys > 1) 409 goto err2; 410 411 if (mode == FLOW_MODE_HASH) 412 perturb_period = f->perturb_period; 413 if (tb[TCA_FLOW_PERTURB]) { 414 if (mode != FLOW_MODE_HASH) 415 goto err2; 416 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 417 } 418 } else { 419 err = -EINVAL; 420 if (!handle) 421 goto err2; 422 if (!tb[TCA_FLOW_KEYS]) 423 goto err2; 424 425 mode = FLOW_MODE_MAP; 426 if (tb[TCA_FLOW_MODE]) 427 mode = nla_get_u32(tb[TCA_FLOW_MODE]); 428 if (mode != FLOW_MODE_HASH && nkeys > 1) 429 goto err2; 430 431 if (tb[TCA_FLOW_PERTURB]) { 432 if (mode != FLOW_MODE_HASH) 433 goto err2; 434 perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; 435 } 436 437 if (TC_H_MAJ(baseclass) == 0) 438 baseclass = TC_H_MAKE(tp->q->handle, baseclass); 439 if (TC_H_MIN(baseclass) == 0) 440 baseclass = TC_H_MAKE(baseclass, 1); 441 442 err = -ENOBUFS; 443 f = kzalloc(sizeof(*f), GFP_KERNEL); 444 if (f == NULL) 445 goto err2; 446 447 f->handle = handle; 448 f->mask = ~0U; 449 450 get_random_bytes(&f->hashrnd, 4); 451 f->perturb_timer.function = flow_perturbation; 452 f->perturb_timer.data = (unsigned long)f; 453 init_timer_deferrable(&f->perturb_timer); 454 } 455 456 tcf_exts_change(tp, &f->exts, &e); 457 tcf_em_tree_change(tp, &f->ematches, &t); 458 459 tcf_tree_lock(tp); 460 461 if (tb[TCA_FLOW_KEYS]) { 462 f->keymask = keymask; 463 f->nkeys = nkeys; 464 } 465 466 f->mode = mode; 467 468 if (tb[TCA_FLOW_MASK]) 469 f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); 470 if (tb[TCA_FLOW_XOR]) 471 f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); 472 if (tb[TCA_FLOW_RSHIFT]) 473 f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); 474 if (tb[TCA_FLOW_ADDEND]) 475 f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); 476 477 if (tb[TCA_FLOW_DIVISOR]) 478 f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); 479 if (baseclass) 480 f->baseclass = baseclass; 481 482 f->perturb_period = perturb_period; 483 del_timer(&f->perturb_timer); 484 if (perturb_period) 485 mod_timer(&f->perturb_timer, jiffies + perturb_period); 486 487 if (*arg == 0) 488 list_add_tail(&f->list, &head->filters); 489 490 tcf_tree_unlock(tp); 491 492 *arg = (unsigned long)f; 493 return 0; 494 495err2: 496 tcf_em_tree_destroy(tp, &t); 497err1: 498 tcf_exts_destroy(tp, &e); 499 return err; 500} 501 502static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) 503{ 504 del_timer_sync(&f->perturb_timer); 505 tcf_exts_destroy(tp, &f->exts); 506 tcf_em_tree_destroy(tp, &f->ematches); 507 kfree(f); 508} 509 510static int flow_delete(struct tcf_proto *tp, unsigned long arg) 511{ 512 struct flow_filter *f = (struct flow_filter *)arg; 513 514 tcf_tree_lock(tp); 515 list_del(&f->list); 516 tcf_tree_unlock(tp); 517 flow_destroy_filter(tp, f); 518 return 0; 519} 520 521static int flow_init(struct tcf_proto *tp) 522{ 523 struct flow_head *head; 524 525 head = kzalloc(sizeof(*head), GFP_KERNEL); 526 if (head == NULL) 527 return -ENOBUFS; 528 INIT_LIST_HEAD(&head->filters); 529 tp->root = head; 530 return 0; 531} 532 533static void flow_destroy(struct tcf_proto *tp) 534{ 535 struct flow_head *head = tp->root; 536 struct flow_filter *f, *next; 537 538 list_for_each_entry_safe(f, next, &head->filters, list) { 539 list_del(&f->list); 540 flow_destroy_filter(tp, f); 541 } 542 kfree(head); 543} 544 545static unsigned long flow_get(struct tcf_proto *tp, u32 handle) 546{ 547 struct flow_head *head = tp->root; 548 struct flow_filter *f; 549 550 list_for_each_entry(f, &head->filters, list) 551 if (f->handle == handle) 552 return (unsigned long)f; 553 return 0; 554} 555 556static void flow_put(struct tcf_proto *tp, unsigned long f) 557{ 558} 559 560static int flow_dump(struct tcf_proto *tp, unsigned long fh, 561 struct sk_buff *skb, struct tcmsg *t) 562{ 563 struct flow_filter *f = (struct flow_filter *)fh; 564 struct nlattr *nest; 565 566 if (f == NULL) 567 return skb->len; 568 569 t->tcm_handle = f->handle; 570 571 nest = nla_nest_start(skb, TCA_OPTIONS); 572 if (nest == NULL) 573 goto nla_put_failure; 574 575 NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask); 576 NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode); 577 578 if (f->mask != ~0 || f->xor != 0) { 579 NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask); 580 NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor); 581 } 582 if (f->rshift) 583 NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift); 584 if (f->addend) 585 NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend); 586 587 if (f->divisor) 588 NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor); 589 if (f->baseclass) 590 NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass); 591 592 if (f->perturb_period) 593 NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ); 594 595 if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) 596 goto nla_put_failure; 597#ifdef CONFIG_NET_EMATCH 598 if (f->ematches.hdr.nmatches && 599 tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) 600 goto nla_put_failure; 601#endif 602 nla_nest_end(skb, nest); 603 604 if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) 605 goto nla_put_failure; 606 607 return skb->len; 608 609nla_put_failure: 610 nlmsg_trim(skb, nest); 611 return -1; 612} 613 614static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) 615{ 616 struct flow_head *head = tp->root; 617 struct flow_filter *f; 618 619 list_for_each_entry(f, &head->filters, list) { 620 if (arg->count < arg->skip) 621 goto skip; 622 if (arg->fn(tp, (unsigned long)f, arg) < 0) { 623 arg->stop = 1; 624 break; 625 } 626skip: 627 arg->count++; 628 } 629} 630 631static struct tcf_proto_ops cls_flow_ops __read_mostly = { 632 .kind = "flow", 633 .classify = flow_classify, 634 .init = flow_init, 635 .destroy = flow_destroy, 636 .change = flow_change, 637 .delete = flow_delete, 638 .get = flow_get, 639 .put = flow_put, 640 .dump = flow_dump, 641 .walk = flow_walk, 642 .owner = THIS_MODULE, 643}; 644 645static int __init cls_flow_init(void) 646{ 647 return register_tcf_proto_ops(&cls_flow_ops); 648} 649 650static void __exit cls_flow_exit(void) 651{ 652 unregister_tcf_proto_ops(&cls_flow_ops); 653} 654 655module_init(cls_flow_init); 656module_exit(cls_flow_exit); 657 658MODULE_LICENSE("GPL"); 659MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 660MODULE_DESCRIPTION("TC flow classifier"); 661