fib_trie.c revision f0e36f8cee8101604378085171c980d9cc71d779
1/* 2 * This program is free software; you can redistribute it and/or 3 * modify it under the terms of the GNU General Public License 4 * as published by the Free Software Foundation; either version 5 * 2 of the License, or (at your option) any later version. 6 * 7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet 8 * & Swedish University of Agricultural Sciences. 9 * 10 * Jens Laas <jens.laas@data.slu.se> Swedish University of 11 * Agricultural Sciences. 12 * 13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 14 * 15 * This work is based on the LPC-trie which is originally descibed in: 16 * 17 * An experimental study of compression methods for dynamic tries 18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 20 * 21 * 22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 24 * 25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $ 26 * 27 * 28 * Code from fib_hash has been reused which includes the following header: 29 * 30 * 31 * INET An implementation of the TCP/IP protocol suite for the LINUX 32 * operating system. INET is implemented using the BSD Socket 33 * interface as the means of communication with the user level. 34 * 35 * IPv4 FIB: lookup engine and maintenance routines. 36 * 37 * 38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 39 * 40 * This program is free software; you can redistribute it and/or 41 * modify it under the terms of the GNU General Public License 42 * as published by the Free Software Foundation; either version 43 * 2 of the License, or (at your option) any later version. 44 */ 45 46#define VERSION "0.324" 47 48#include <linux/config.h> 49#include <asm/uaccess.h> 50#include <asm/system.h> 51#include <asm/bitops.h> 52#include <linux/types.h> 53#include <linux/kernel.h> 54#include <linux/sched.h> 55#include <linux/mm.h> 56#include <linux/string.h> 57#include <linux/socket.h> 58#include <linux/sockios.h> 59#include <linux/errno.h> 60#include <linux/in.h> 61#include <linux/inet.h> 62#include <linux/netdevice.h> 63#include <linux/if_arp.h> 64#include <linux/proc_fs.h> 65#include <linux/skbuff.h> 66#include <linux/netlink.h> 67#include <linux/init.h> 68#include <linux/list.h> 69#include <net/ip.h> 70#include <net/protocol.h> 71#include <net/route.h> 72#include <net/tcp.h> 73#include <net/sock.h> 74#include <net/ip_fib.h> 75#include "fib_lookup.h" 76 77#undef CONFIG_IP_FIB_TRIE_STATS 78#define MAX_CHILDS 16384 79 80#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n))) 81#define KEYLENGTH (8*sizeof(t_key)) 82#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) 83#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) 84 85static DEFINE_RWLOCK(fib_lock); 86 87typedef unsigned int t_key; 88 89#define T_TNODE 0 90#define T_LEAF 1 91#define NODE_TYPE_MASK 0x1UL 92#define NODE_PARENT(_node) \ 93((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK)) 94#define NODE_SET_PARENT(_node, _ptr) \ 95((_node)->_parent = (((unsigned long)(_ptr)) | \ 96 ((_node)->_parent & NODE_TYPE_MASK))) 97#define NODE_INIT_PARENT(_node, _type) \ 98((_node)->_parent = (_type)) 99#define NODE_TYPE(_node) \ 100((_node)->_parent & NODE_TYPE_MASK) 101 102#define IS_TNODE(n) (!(n->_parent & T_LEAF)) 103#define IS_LEAF(n) (n->_parent & T_LEAF) 104 105struct node { 106 t_key key; 107 unsigned long _parent; 108}; 109 110struct leaf { 111 t_key key; 112 unsigned long _parent; 113 struct hlist_head list; 114}; 115 116struct leaf_info { 117 struct hlist_node hlist; 118 int plen; 119 struct list_head falh; 120}; 121 122struct tnode { 123 t_key key; 124 unsigned long _parent; 125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */ 126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */ 127 unsigned short full_children; /* KEYLENGTH bits needed */ 128 unsigned short empty_children; /* KEYLENGTH bits needed */ 129 struct node *child[0]; 130}; 131 132#ifdef CONFIG_IP_FIB_TRIE_STATS 133struct trie_use_stats { 134 unsigned int gets; 135 unsigned int backtrack; 136 unsigned int semantic_match_passed; 137 unsigned int semantic_match_miss; 138 unsigned int null_node_hit; 139}; 140#endif 141 142struct trie_stat { 143 unsigned int totdepth; 144 unsigned int maxdepth; 145 unsigned int tnodes; 146 unsigned int leaves; 147 unsigned int nullpointers; 148 unsigned int nodesizes[MAX_CHILDS]; 149}; 150 151struct trie { 152 struct node *trie; 153#ifdef CONFIG_IP_FIB_TRIE_STATS 154 struct trie_use_stats stats; 155#endif 156 int size; 157 unsigned int revision; 158}; 159 160static int trie_debug = 0; 161 162static int tnode_full(struct tnode *tn, struct node *n); 163static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 165static int tnode_child_length(struct tnode *tn); 166static struct node *resize(struct trie *t, struct tnode *tn); 167static struct tnode *inflate(struct trie *t, struct tnode *tn); 168static struct tnode *halve(struct trie *t, struct tnode *tn); 169static void tnode_free(struct tnode *tn); 170static void trie_dump_seq(struct seq_file *seq, struct trie *t); 171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 172extern int fib_detect_death(struct fib_info *fi, int order, 173 struct fib_info **last_resort, int *last_idx, int *dflt); 174 175extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id, 176 struct nlmsghdr *n, struct netlink_skb_parms *req); 177 178static kmem_cache_t *fn_alias_kmem; 179static struct trie *trie_local = NULL, *trie_main = NULL; 180 181static void trie_bug(char *err) 182{ 183 printk("Trie Bug: %s\n", err); 184 BUG(); 185} 186 187static inline struct node *tnode_get_child(struct tnode *tn, int i) 188{ 189 if (i >= 1<<tn->bits) 190 trie_bug("tnode_get_child"); 191 192 return tn->child[i]; 193} 194 195static inline int tnode_child_length(struct tnode *tn) 196{ 197 return 1<<tn->bits; 198} 199 200/* 201 _________________________________________________________________ 202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 203 ---------------------------------------------------------------- 204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 205 206 _________________________________________________________________ 207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 208 ----------------------------------------------------------------- 209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 210 211 tp->pos = 7 212 tp->bits = 3 213 n->pos = 15 214 n->bits=4 215 KEYLENGTH=32 216*/ 217 218static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 219{ 220 if (offset < KEYLENGTH) 221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 222 else 223 return 0; 224} 225 226static inline int tkey_equals(t_key a, t_key b) 227{ 228 return a == b; 229} 230 231static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b) 232{ 233 if (bits == 0 || offset >= KEYLENGTH) 234 return 1; 235 bits = bits > KEYLENGTH ? KEYLENGTH : bits; 236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0; 237} 238 239static inline int tkey_mismatch(t_key a, int offset, t_key b) 240{ 241 t_key diff = a ^ b; 242 int i = offset; 243 244 if(!diff) 245 return 0; 246 while((diff << i) >> (KEYLENGTH-1) == 0) 247 i++; 248 return i; 249} 250 251/* Candiate for fib_semantics */ 252 253static void fn_free_alias(struct fib_alias *fa) 254{ 255 fib_release_info(fa->fa_info); 256 kmem_cache_free(fn_alias_kmem, fa); 257} 258 259/* 260 To understand this stuff, an understanding of keys and all their bits is 261 necessary. Every node in the trie has a key associated with it, but not 262 all of the bits in that key are significant. 263 264 Consider a node 'n' and its parent 'tp'. 265 266 If n is a leaf, every bit in its key is significant. Its presence is 267 necessitaded by path compression, since during a tree traversal (when 268 searching for a leaf - unless we are doing an insertion) we will completely 269 ignore all skipped bits we encounter. Thus we need to verify, at the end of 270 a potentially successful search, that we have indeed been walking the 271 correct key path. 272 273 Note that we can never "miss" the correct key in the tree if present by 274 following the wrong path. Path compression ensures that segments of the key 275 that are the same for all keys with a given prefix are skipped, but the 276 skipped part *is* identical for each node in the subtrie below the skipped 277 bit! trie_insert() in this implementation takes care of that - note the 278 call to tkey_sub_equals() in trie_insert(). 279 280 if n is an internal node - a 'tnode' here, the various parts of its key 281 have many different meanings. 282 283 Example: 284 _________________________________________________________________ 285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 286 ----------------------------------------------------------------- 287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 288 289 _________________________________________________________________ 290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 291 ----------------------------------------------------------------- 292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 293 294 tp->pos = 7 295 tp->bits = 3 296 n->pos = 15 297 n->bits=4 298 299 First, let's just ignore the bits that come before the parent tp, that is 300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do 301 not use them for anything. 302 303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 304 index into the parent's child array. That is, they will be used to find 305 'n' among tp's children. 306 307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits 308 for the node n. 309 310 All the bits we have seen so far are significant to the node n. The rest 311 of the bits are really not needed or indeed known in n->key. 312 313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into 314 n's child array, and will of course be different for each child. 315 316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown 317 at this point. 318 319*/ 320 321static void check_tnode(struct tnode *tn) 322{ 323 if(tn && tn->pos+tn->bits > 32) { 324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits); 325 } 326} 327 328static int halve_threshold = 25; 329static int inflate_threshold = 50; 330 331static struct leaf *leaf_new(void) 332{ 333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL); 334 if(l) { 335 NODE_INIT_PARENT(l, T_LEAF); 336 INIT_HLIST_HEAD(&l->list); 337 } 338 return l; 339} 340 341static struct leaf_info *leaf_info_new(int plen) 342{ 343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 344 if(li) { 345 li->plen = plen; 346 INIT_LIST_HEAD(&li->falh); 347 } 348 return li; 349} 350 351static inline void free_leaf(struct leaf *l) 352{ 353 kfree(l); 354} 355 356static inline void free_leaf_info(struct leaf_info *li) 357{ 358 kfree(li); 359} 360 361static struct tnode *tnode_alloc(unsigned int size) 362{ 363 if (size <= PAGE_SIZE) { 364 return kmalloc(size, GFP_KERNEL); 365 } else { 366 return (struct tnode *) 367 __get_free_pages(GFP_KERNEL, get_order(size)); 368 } 369} 370 371static void __tnode_free(struct tnode *tn) 372{ 373 unsigned int size = sizeof(struct tnode) + 374 (1<<tn->bits) * sizeof(struct node *); 375 376 if (size <= PAGE_SIZE) 377 kfree(tn); 378 else 379 free_pages((unsigned long)tn, get_order(size)); 380} 381 382static struct tnode* tnode_new(t_key key, int pos, int bits) 383{ 384 int nchildren = 1<<bits; 385 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 386 struct tnode *tn = tnode_alloc(sz); 387 388 if(tn) { 389 memset(tn, 0, sz); 390 NODE_INIT_PARENT(tn, T_TNODE); 391 tn->pos = pos; 392 tn->bits = bits; 393 tn->key = key; 394 tn->full_children = 0; 395 tn->empty_children = 1<<bits; 396 } 397 if(trie_debug > 0) 398 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode), 399 (unsigned int) (sizeof(struct node) * 1<<bits)); 400 return tn; 401} 402 403static void tnode_free(struct tnode *tn) 404{ 405 if(!tn) { 406 trie_bug("tnode_free\n"); 407 } 408 if(IS_LEAF(tn)) { 409 free_leaf((struct leaf *)tn); 410 if(trie_debug > 0 ) 411 printk("FL %p \n", tn); 412 } 413 else if(IS_TNODE(tn)) { 414 __tnode_free(tn); 415 if(trie_debug > 0 ) 416 printk("FT %p \n", tn); 417 } 418 else { 419 trie_bug("tnode_free\n"); 420 } 421} 422 423/* 424 * Check whether a tnode 'n' is "full", i.e. it is an internal node 425 * and no bits are skipped. See discussion in dyntree paper p. 6 426 */ 427 428static inline int tnode_full(struct tnode *tn, struct node *n) 429{ 430 if(n == NULL || IS_LEAF(n)) 431 return 0; 432 433 return ((struct tnode *) n)->pos == tn->pos + tn->bits; 434} 435 436static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n) 437{ 438 tnode_put_child_reorg(tn, i, n, -1); 439} 440 441 /* 442 * Add a child at position i overwriting the old value. 443 * Update the value of full_children and empty_children. 444 */ 445 446static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull) 447{ 448 struct node *chi; 449 int isfull; 450 451 if(i >= 1<<tn->bits) { 452 printk("bits=%d, i=%d\n", tn->bits, i); 453 trie_bug("tnode_put_child_reorg bits"); 454 } 455 write_lock_bh(&fib_lock); 456 chi = tn->child[i]; 457 458 /* update emptyChildren */ 459 if (n == NULL && chi != NULL) 460 tn->empty_children++; 461 else if (n != NULL && chi == NULL) 462 tn->empty_children--; 463 464 /* update fullChildren */ 465 if (wasfull == -1) 466 wasfull = tnode_full(tn, chi); 467 468 isfull = tnode_full(tn, n); 469 if (wasfull && !isfull) 470 tn->full_children--; 471 472 else if (!wasfull && isfull) 473 tn->full_children++; 474 if(n) 475 NODE_SET_PARENT(n, tn); 476 477 tn->child[i] = n; 478 write_unlock_bh(&fib_lock); 479} 480 481static struct node *resize(struct trie *t, struct tnode *tn) 482{ 483 int i; 484 485 if (!tn) 486 return NULL; 487 488 if(trie_debug) 489 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n", 490 tn, inflate_threshold, halve_threshold); 491 492 /* No children */ 493 if (tn->empty_children == tnode_child_length(tn)) { 494 tnode_free(tn); 495 return NULL; 496 } 497 /* One child */ 498 if (tn->empty_children == tnode_child_length(tn) - 1) 499 for (i = 0; i < tnode_child_length(tn); i++) { 500 501 write_lock_bh(&fib_lock); 502 if (tn->child[i] != NULL) { 503 504 /* compress one level */ 505 struct node *n = tn->child[i]; 506 if(n) 507 NODE_INIT_PARENT(n, NODE_TYPE(n)); 508 509 write_unlock_bh(&fib_lock); 510 tnode_free(tn); 511 return n; 512 } 513 write_unlock_bh(&fib_lock); 514 } 515 /* 516 * Double as long as the resulting node has a number of 517 * nonempty nodes that are above the threshold. 518 */ 519 520 /* 521 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of 522 * the Helsinki University of Technology and Matti Tikkanen of Nokia 523 * Telecommunications, page 6: 524 * "A node is doubled if the ratio of non-empty children to all 525 * children in the *doubled* node is at least 'high'." 526 * 527 * 'high' in this instance is the variable 'inflate_threshold'. It 528 * is expressed as a percentage, so we multiply it with 529 * tnode_child_length() and instead of multiplying by 2 (since the 530 * child array will be doubled by inflate()) and multiplying 531 * the left-hand side by 100 (to handle the percentage thing) we 532 * multiply the left-hand side by 50. 533 * 534 * The left-hand side may look a bit weird: tnode_child_length(tn) 535 * - tn->empty_children is of course the number of non-null children 536 * in the current node. tn->full_children is the number of "full" 537 * children, that is non-null tnodes with a skip value of 0. 538 * All of those will be doubled in the resulting inflated tnode, so 539 * we just count them one extra time here. 540 * 541 * A clearer way to write this would be: 542 * 543 * to_be_doubled = tn->full_children; 544 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children - 545 * tn->full_children; 546 * 547 * new_child_length = tnode_child_length(tn) * 2; 548 * 549 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) / 550 * new_child_length; 551 * if (new_fill_factor >= inflate_threshold) 552 * 553 * ...and so on, tho it would mess up the while() loop. 554 * 555 * anyway, 556 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >= 557 * inflate_threshold 558 * 559 * avoid a division: 560 * 100 * (not_to_be_doubled + 2*to_be_doubled) >= 561 * inflate_threshold * new_child_length 562 * 563 * expand not_to_be_doubled and to_be_doubled, and shorten: 564 * 100 * (tnode_child_length(tn) - tn->empty_children + 565 * tn->full_children ) >= inflate_threshold * new_child_length 566 * 567 * expand new_child_length: 568 * 100 * (tnode_child_length(tn) - tn->empty_children + 569 * tn->full_children ) >= 570 * inflate_threshold * tnode_child_length(tn) * 2 571 * 572 * shorten again: 573 * 50 * (tn->full_children + tnode_child_length(tn) - 574 * tn->empty_children ) >= inflate_threshold * 575 * tnode_child_length(tn) 576 * 577 */ 578 579 check_tnode(tn); 580 581 while ((tn->full_children > 0 && 582 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 583 inflate_threshold * tnode_child_length(tn))) { 584 585 tn = inflate(t, tn); 586 } 587 588 check_tnode(tn); 589 590 /* 591 * Halve as long as the number of empty children in this 592 * node is above threshold. 593 */ 594 while (tn->bits > 1 && 595 100 * (tnode_child_length(tn) - tn->empty_children) < 596 halve_threshold * tnode_child_length(tn)) 597 598 tn = halve(t, tn); 599 600 /* Only one child remains */ 601 602 if (tn->empty_children == tnode_child_length(tn) - 1) 603 for (i = 0; i < tnode_child_length(tn); i++) { 604 605 write_lock_bh(&fib_lock); 606 if (tn->child[i] != NULL) { 607 /* compress one level */ 608 struct node *n = tn->child[i]; 609 610 if(n) 611 NODE_INIT_PARENT(n, NODE_TYPE(n)); 612 613 write_unlock_bh(&fib_lock); 614 tnode_free(tn); 615 return n; 616 } 617 write_unlock_bh(&fib_lock); 618 } 619 620 return (struct node *) tn; 621} 622 623static struct tnode *inflate(struct trie *t, struct tnode *tn) 624{ 625 struct tnode *inode; 626 struct tnode *oldtnode = tn; 627 int olen = tnode_child_length(tn); 628 int i; 629 630 if(trie_debug) 631 printk("In inflate\n"); 632 633 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 634 635 if (!tn) 636 trie_bug("tnode_new failed"); 637 638 for(i = 0; i < olen; i++) { 639 struct node *node = tnode_get_child(oldtnode, i); 640 641 /* An empty child */ 642 if (node == NULL) 643 continue; 644 645 /* A leaf or an internal node with skipped bits */ 646 647 if(IS_LEAF(node) || ((struct tnode *) node)->pos > 648 tn->pos + tn->bits - 1) { 649 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, 650 1) == 0) 651 put_child(t, tn, 2*i, node); 652 else 653 put_child(t, tn, 2*i+1, node); 654 continue; 655 } 656 657 /* An internal node with two children */ 658 inode = (struct tnode *) node; 659 660 if (inode->bits == 1) { 661 put_child(t, tn, 2*i, inode->child[0]); 662 put_child(t, tn, 2*i+1, inode->child[1]); 663 664 tnode_free(inode); 665 } 666 667 /* An internal node with more than two children */ 668 else { 669 struct tnode *left, *right; 670 int size, j; 671 672 /* We will replace this node 'inode' with two new 673 * ones, 'left' and 'right', each with half of the 674 * original children. The two new nodes will have 675 * a position one bit further down the key and this 676 * means that the "significant" part of their keys 677 * (see the discussion near the top of this file) 678 * will differ by one bit, which will be "0" in 679 * left's key and "1" in right's key. Since we are 680 * moving the key position by one step, the bit that 681 * we are moving away from - the bit at position 682 * (inode->pos) - is the one that will differ between 683 * left and right. So... we synthesize that bit in the 684 * two new keys. 685 * The mask 'm' below will be a single "one" bit at 686 * the position (inode->pos) 687 */ 688 689 t_key m = TKEY_GET_MASK(inode->pos, 1); 690 691 /* Use the old key, but set the new significant 692 * bit to zero. 693 */ 694 left = tnode_new(inode->key&(~m), inode->pos + 1, 695 inode->bits - 1); 696 697 if(!left) 698 trie_bug("tnode_new failed"); 699 700 701 /* Use the old key, but set the new significant 702 * bit to one. 703 */ 704 right = tnode_new(inode->key|m, inode->pos + 1, 705 inode->bits - 1); 706 707 if(!right) 708 trie_bug("tnode_new failed"); 709 710 size = tnode_child_length(left); 711 for(j = 0; j < size; j++) { 712 put_child(t, left, j, inode->child[j]); 713 put_child(t, right, j, inode->child[j + size]); 714 } 715 put_child(t, tn, 2*i, resize(t, left)); 716 put_child(t, tn, 2*i+1, resize(t, right)); 717 718 tnode_free(inode); 719 } 720 } 721 tnode_free(oldtnode); 722 return tn; 723} 724 725static struct tnode *halve(struct trie *t, struct tnode *tn) 726{ 727 struct tnode *oldtnode = tn; 728 struct node *left, *right; 729 int i; 730 int olen = tnode_child_length(tn); 731 732 if(trie_debug) printk("In halve\n"); 733 734 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 735 736 if(!tn) 737 trie_bug("tnode_new failed"); 738 739 for(i = 0; i < olen; i += 2) { 740 left = tnode_get_child(oldtnode, i); 741 right = tnode_get_child(oldtnode, i+1); 742 743 /* At least one of the children is empty */ 744 if (left == NULL) { 745 if (right == NULL) /* Both are empty */ 746 continue; 747 put_child(t, tn, i/2, right); 748 } else if (right == NULL) 749 put_child(t, tn, i/2, left); 750 751 /* Two nonempty children */ 752 else { 753 struct tnode *newBinNode = 754 tnode_new(left->key, tn->pos + tn->bits, 1); 755 756 if(!newBinNode) 757 trie_bug("tnode_new failed"); 758 759 put_child(t, newBinNode, 0, left); 760 put_child(t, newBinNode, 1, right); 761 put_child(t, tn, i/2, resize(t, newBinNode)); 762 } 763 } 764 tnode_free(oldtnode); 765 return tn; 766} 767 768static void *trie_init(struct trie *t) 769{ 770 if(t) { 771 t->size = 0; 772 t->trie = NULL; 773 t->revision = 0; 774#ifdef CONFIG_IP_FIB_TRIE_STATS 775 memset(&t->stats, 0, sizeof(struct trie_use_stats)); 776#endif 777 } 778 return t; 779} 780 781static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) 782{ 783 struct hlist_node *node; 784 struct leaf_info *li; 785 786 hlist_for_each_entry(li, node, head, hlist) { 787 788 if ( li->plen == plen ) 789 return li; 790 } 791 return NULL; 792} 793 794static inline struct list_head * get_fa_head(struct leaf *l, int plen) 795{ 796 struct list_head *fa_head=NULL; 797 struct leaf_info *li = find_leaf_info(&l->list, plen); 798 799 if(li) 800 fa_head = &li->falh; 801 802 return fa_head; 803} 804 805static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new) 806{ 807 struct leaf_info *li=NULL, *last=NULL; 808 struct hlist_node *node, *tmp; 809 810 write_lock_bh(&fib_lock); 811 812 if(hlist_empty(head)) 813 hlist_add_head(&new->hlist, head); 814 else { 815 hlist_for_each_entry_safe(li, node, tmp, head, hlist) { 816 817 if (new->plen > li->plen) 818 break; 819 820 last = li; 821 } 822 if(last) 823 hlist_add_after(&last->hlist, &new->hlist); 824 else 825 hlist_add_before(&new->hlist, &li->hlist); 826 } 827 write_unlock_bh(&fib_lock); 828} 829 830static struct leaf * 831fib_find_node(struct trie *t, u32 key) 832{ 833 int pos; 834 struct tnode *tn; 835 struct node *n; 836 837 pos = 0; 838 n=t->trie; 839 840 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 841 tn = (struct tnode *) n; 842 843 check_tnode(tn); 844 845 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 846 pos=tn->pos + tn->bits; 847 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 848 } 849 else 850 break; 851 } 852 /* Case we have found a leaf. Compare prefixes */ 853 854 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 855 struct leaf *l = (struct leaf *) n; 856 return l; 857 } 858 return NULL; 859} 860 861static struct node *trie_rebalance(struct trie *t, struct tnode *tn) 862{ 863 int i = 0; 864 int wasfull; 865 t_key cindex, key; 866 struct tnode *tp = NULL; 867 868 if(!tn) 869 BUG(); 870 871 key = tn->key; 872 i = 0; 873 874 while (tn != NULL && NODE_PARENT(tn) != NULL) { 875 876 if( i > 10 ) { 877 printk("Rebalance tn=%p \n", tn); 878 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn)); 879 880 printk("Rebalance tp=%p \n", tp); 881 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp)); 882 } 883 884 if( i > 12 ) BUG(); 885 i++; 886 887 tp = NODE_PARENT(tn); 888 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 889 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 890 tn = (struct tnode *) resize (t, (struct tnode *)tn); 891 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); 892 893 if(!NODE_PARENT(tn)) 894 break; 895 896 tn = NODE_PARENT(tn); 897 } 898 /* Handle last (top) tnode */ 899 if (IS_TNODE(tn)) 900 tn = (struct tnode*) resize(t, (struct tnode *)tn); 901 902 return (struct node*) tn; 903} 904 905static struct list_head * 906fib_insert_node(struct trie *t, int *err, u32 key, int plen) 907{ 908 int pos, newpos; 909 struct tnode *tp = NULL, *tn = NULL; 910 struct node *n; 911 struct leaf *l; 912 int missbit; 913 struct list_head *fa_head=NULL; 914 struct leaf_info *li; 915 t_key cindex; 916 917 pos = 0; 918 n=t->trie; 919 920 /* If we point to NULL, stop. Either the tree is empty and we should 921 * just put a new leaf in if, or we have reached an empty child slot, 922 * and we should just put our new leaf in that. 923 * If we point to a T_TNODE, check if it matches our key. Note that 924 * a T_TNODE might be skipping any number of bits - its 'pos' need 925 * not be the parent's 'pos'+'bits'! 926 * 927 * If it does match the current key, get pos/bits from it, extract 928 * the index from our key, push the T_TNODE and walk the tree. 929 * 930 * If it doesn't, we have to replace it with a new T_TNODE. 931 * 932 * If we point to a T_LEAF, it might or might not have the same key 933 * as we do. If it does, just change the value, update the T_LEAF's 934 * value, and return it. 935 * If it doesn't, we need to replace it with a T_TNODE. 936 */ 937 938 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 939 tn = (struct tnode *) n; 940 941 check_tnode(tn); 942 943 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 944 tp = tn; 945 pos=tn->pos + tn->bits; 946 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); 947 948 if(n && NODE_PARENT(n) != tn) { 949 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); 950 BUG(); 951 } 952 } 953 else 954 break; 955 } 956 957 /* 958 * n ----> NULL, LEAF or TNODE 959 * 960 * tp is n's (parent) ----> NULL or TNODE 961 */ 962 963 if(tp && IS_LEAF(tp)) 964 BUG(); 965 966 967 /* Case 1: n is a leaf. Compare prefixes */ 968 969 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) { 970 struct leaf *l = ( struct leaf *) n; 971 972 li = leaf_info_new(plen); 973 974 if(! li) { 975 *err = -ENOMEM; 976 goto err; 977 } 978 979 fa_head = &li->falh; 980 insert_leaf_info(&l->list, li); 981 goto done; 982 } 983 t->size++; 984 l = leaf_new(); 985 986 if(! l) { 987 *err = -ENOMEM; 988 goto err; 989 } 990 991 l->key = key; 992 li = leaf_info_new(plen); 993 994 if(! li) { 995 tnode_free((struct tnode *) l); 996 *err = -ENOMEM; 997 goto err; 998 } 999 1000 fa_head = &li->falh; 1001 insert_leaf_info(&l->list, li); 1002 1003 /* Case 2: n is NULL, and will just insert a new leaf */ 1004 if (t->trie && n == NULL) { 1005 1006 NODE_SET_PARENT(l, tp); 1007 1008 if (!tp) 1009 BUG(); 1010 1011 else { 1012 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1013 put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 1014 } 1015 } 1016 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1017 else { 1018 /* 1019 * Add a new tnode here 1020 * first tnode need some special handling 1021 */ 1022 1023 if (tp) 1024 pos=tp->pos+tp->bits; 1025 else 1026 pos=0; 1027 if(n) { 1028 newpos = tkey_mismatch(key, pos, n->key); 1029 tn = tnode_new(n->key, newpos, 1); 1030 } 1031 else { 1032 newpos = 0; 1033 tn = tnode_new(key, newpos, 1); /* First tnode */ 1034 } 1035 1036 if(!tn) { 1037 free_leaf_info(li); 1038 tnode_free((struct tnode *) l); 1039 *err = -ENOMEM; 1040 goto err; 1041 } 1042 1043 NODE_SET_PARENT(tn, tp); 1044 1045 missbit=tkey_extract_bits(key, newpos, 1); 1046 put_child(t, tn, missbit, (struct node *)l); 1047 put_child(t, tn, 1-missbit, n); 1048 1049 if(tp) { 1050 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1051 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn); 1052 } 1053 else { 1054 t->trie = (struct node*) tn; /* First tnode */ 1055 tp = tn; 1056 } 1057 } 1058 if(tp && tp->pos+tp->bits > 32) { 1059 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", 1060 tp, tp->pos, tp->bits, key, plen); 1061 } 1062 /* Rebalance the trie */ 1063 t->trie = trie_rebalance(t, tp); 1064done: 1065 t->revision++; 1066err:; 1067 return fa_head; 1068} 1069 1070static int 1071fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, 1072 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) 1073{ 1074 struct trie *t = (struct trie *) tb->tb_data; 1075 struct fib_alias *fa, *new_fa; 1076 struct list_head *fa_head=NULL; 1077 struct fib_info *fi; 1078 int plen = r->rtm_dst_len; 1079 int type = r->rtm_type; 1080 u8 tos = r->rtm_tos; 1081 u32 key, mask; 1082 int err; 1083 struct leaf *l; 1084 1085 if (plen > 32) 1086 return -EINVAL; 1087 1088 key = 0; 1089 if (rta->rta_dst) 1090 memcpy(&key, rta->rta_dst, 4); 1091 1092 key = ntohl(key); 1093 1094 if(trie_debug) 1095 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); 1096 1097 mask = ntohl( inet_make_mask(plen) ); 1098 1099 if(key & ~mask) 1100 return -EINVAL; 1101 1102 key = key & mask; 1103 1104 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL) 1105 goto err; 1106 1107 l = fib_find_node(t, key); 1108 fa = NULL; 1109 1110 if(l) { 1111 fa_head = get_fa_head(l, plen); 1112 fa = fib_find_alias(fa_head, tos, fi->fib_priority); 1113 } 1114 1115 /* Now fa, if non-NULL, points to the first fib alias 1116 * with the same keys [prefix,tos,priority], if such key already 1117 * exists or to the node before which we will insert new one. 1118 * 1119 * If fa is NULL, we will need to allocate a new one and 1120 * insert to the head of f. 1121 * 1122 * If f is NULL, no fib node matched the destination key 1123 * and we need to allocate a new one of those as well. 1124 */ 1125 1126 if (fa && 1127 fa->fa_info->fib_priority == fi->fib_priority) { 1128 struct fib_alias *fa_orig; 1129 1130 err = -EEXIST; 1131 if (nlhdr->nlmsg_flags & NLM_F_EXCL) 1132 goto out; 1133 1134 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { 1135 struct fib_info *fi_drop; 1136 u8 state; 1137 1138 write_lock_bh(&fib_lock); 1139 1140 fi_drop = fa->fa_info; 1141 fa->fa_info = fi; 1142 fa->fa_type = type; 1143 fa->fa_scope = r->rtm_scope; 1144 state = fa->fa_state; 1145 fa->fa_state &= ~FA_S_ACCESSED; 1146 1147 write_unlock_bh(&fib_lock); 1148 1149 fib_release_info(fi_drop); 1150 if (state & FA_S_ACCESSED) 1151 rt_cache_flush(-1); 1152 1153 goto succeeded; 1154 } 1155 /* Error if we find a perfect match which 1156 * uses the same scope, type, and nexthop 1157 * information. 1158 */ 1159 fa_orig = fa; 1160 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { 1161 if (fa->fa_tos != tos) 1162 break; 1163 if (fa->fa_info->fib_priority != fi->fib_priority) 1164 break; 1165 if (fa->fa_type == type && 1166 fa->fa_scope == r->rtm_scope && 1167 fa->fa_info == fi) { 1168 goto out; 1169 } 1170 } 1171 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) 1172 fa = fa_orig; 1173 } 1174 err = -ENOENT; 1175 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE)) 1176 goto out; 1177 1178 err = -ENOBUFS; 1179 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); 1180 if (new_fa == NULL) 1181 goto out; 1182 1183 new_fa->fa_info = fi; 1184 new_fa->fa_tos = tos; 1185 new_fa->fa_type = type; 1186 new_fa->fa_scope = r->rtm_scope; 1187 new_fa->fa_state = 0; 1188#if 0 1189 new_fa->dst = NULL; 1190#endif 1191 /* 1192 * Insert new entry to the list. 1193 */ 1194 1195 if(!fa_head) { 1196 fa_head = fib_insert_node(t, &err, key, plen); 1197 err = 0; 1198 if(err) 1199 goto out_free_new_fa; 1200 } 1201 1202 write_lock_bh(&fib_lock); 1203 1204 list_add_tail(&new_fa->fa_list, 1205 (fa ? &fa->fa_list : fa_head)); 1206 1207 write_unlock_bh(&fib_lock); 1208 1209 rt_cache_flush(-1); 1210 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); 1211succeeded: 1212 return 0; 1213 1214out_free_new_fa: 1215 kmem_cache_free(fn_alias_kmem, new_fa); 1216out: 1217 fib_release_info(fi); 1218err:; 1219 return err; 1220} 1221 1222static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, 1223 struct fib_result *res, int *err) 1224{ 1225 int i; 1226 t_key mask; 1227 struct leaf_info *li; 1228 struct hlist_head *hhead = &l->list; 1229 struct hlist_node *node; 1230 1231 hlist_for_each_entry(li, node, hhead, hlist) { 1232 1233 i = li->plen; 1234 mask = ntohl(inet_make_mask(i)); 1235 if (l->key != (key & mask)) 1236 continue; 1237 1238 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { 1239 *plen = i; 1240#ifdef CONFIG_IP_FIB_TRIE_STATS 1241 t->stats.semantic_match_passed++; 1242#endif 1243 return 1; 1244 } 1245#ifdef CONFIG_IP_FIB_TRIE_STATS 1246 t->stats.semantic_match_miss++; 1247#endif 1248 } 1249 return 0; 1250} 1251 1252static int 1253fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1254{ 1255 struct trie *t = (struct trie *) tb->tb_data; 1256 int plen, ret = 0; 1257 struct node *n; 1258 struct tnode *pn; 1259 int pos, bits; 1260 t_key key=ntohl(flp->fl4_dst); 1261 int chopped_off; 1262 t_key cindex = 0; 1263 int current_prefix_length = KEYLENGTH; 1264 n = t->trie; 1265 1266 read_lock(&fib_lock); 1267 if(!n) 1268 goto failed; 1269 1270#ifdef CONFIG_IP_FIB_TRIE_STATS 1271 t->stats.gets++; 1272#endif 1273 1274 /* Just a leaf? */ 1275 if (IS_LEAF(n)) { 1276 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) ) 1277 goto found; 1278 goto failed; 1279 } 1280 pn = (struct tnode *) n; 1281 chopped_off = 0; 1282 1283 while (pn) { 1284 1285 pos = pn->pos; 1286 bits = pn->bits; 1287 1288 if(!chopped_off) 1289 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); 1290 1291 n = tnode_get_child(pn, cindex); 1292 1293 if (n == NULL) { 1294#ifdef CONFIG_IP_FIB_TRIE_STATS 1295 t->stats.null_node_hit++; 1296#endif 1297 goto backtrace; 1298 } 1299 1300 if (IS_TNODE(n)) { 1301#define HL_OPTIMIZE 1302#ifdef HL_OPTIMIZE 1303 struct tnode *cn = (struct tnode *)n; 1304 t_key node_prefix, key_prefix, pref_mismatch; 1305 int mp; 1306 1307 /* 1308 * It's a tnode, and we can do some extra checks here if we 1309 * like, to avoid descending into a dead-end branch. 1310 * This tnode is in the parent's child array at index 1311 * key[p_pos..p_pos+p_bits] but potentially with some bits 1312 * chopped off, so in reality the index may be just a 1313 * subprefix, padded with zero at the end. 1314 * We can also take a look at any skipped bits in this 1315 * tnode - everything up to p_pos is supposed to be ok, 1316 * and the non-chopped bits of the index (se previous 1317 * paragraph) are also guaranteed ok, but the rest is 1318 * considered unknown. 1319 * 1320 * The skipped bits are key[pos+bits..cn->pos]. 1321 */ 1322 1323 /* If current_prefix_length < pos+bits, we are already doing 1324 * actual prefix matching, which means everything from 1325 * pos+(bits-chopped_off) onward must be zero along some 1326 * branch of this subtree - otherwise there is *no* valid 1327 * prefix present. Here we can only check the skipped 1328 * bits. Remember, since we have already indexed into the 1329 * parent's child array, we know that the bits we chopped of 1330 * *are* zero. 1331 */ 1332 1333 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */ 1334 1335 if (current_prefix_length < pos+bits) { 1336 if (tkey_extract_bits(cn->key, current_prefix_length, 1337 cn->pos - current_prefix_length) != 0 || 1338 !(cn->child[0])) 1339 goto backtrace; 1340 } 1341 1342 /* 1343 * If chopped_off=0, the index is fully validated and we 1344 * only need to look at the skipped bits for this, the new, 1345 * tnode. What we actually want to do is to find out if 1346 * these skipped bits match our key perfectly, or if we will 1347 * have to count on finding a matching prefix further down, 1348 * because if we do, we would like to have some way of 1349 * verifying the existence of such a prefix at this point. 1350 */ 1351 1352 /* The only thing we can do at this point is to verify that 1353 * any such matching prefix can indeed be a prefix to our 1354 * key, and if the bits in the node we are inspecting that 1355 * do not match our key are not ZERO, this cannot be true. 1356 * Thus, find out where there is a mismatch (before cn->pos) 1357 * and verify that all the mismatching bits are zero in the 1358 * new tnode's key. 1359 */ 1360 1361 /* Note: We aren't very concerned about the piece of the key 1362 * that precede pn->pos+pn->bits, since these have already been 1363 * checked. The bits after cn->pos aren't checked since these are 1364 * by definition "unknown" at this point. Thus, what we want to 1365 * see is if we are about to enter the "prefix matching" state, 1366 * and in that case verify that the skipped bits that will prevail 1367 * throughout this subtree are zero, as they have to be if we are 1368 * to find a matching prefix. 1369 */ 1370 1371 node_prefix = MASK_PFX(cn->key, cn->pos); 1372 key_prefix = MASK_PFX(key, cn->pos); 1373 pref_mismatch = key_prefix^node_prefix; 1374 mp = 0; 1375 1376 /* In short: If skipped bits in this node do not match the search 1377 * key, enter the "prefix matching" state.directly. 1378 */ 1379 if (pref_mismatch) { 1380 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1381 mp++; 1382 pref_mismatch = pref_mismatch <<1; 1383 } 1384 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); 1385 1386 if (key_prefix != 0) 1387 goto backtrace; 1388 1389 if (current_prefix_length >= cn->pos) 1390 current_prefix_length=mp; 1391 } 1392#endif 1393 pn = (struct tnode *)n; /* Descend */ 1394 chopped_off = 0; 1395 continue; 1396 } 1397 if (IS_LEAF(n)) { 1398 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) 1399 goto found; 1400 } 1401backtrace: 1402 chopped_off++; 1403 1404 /* As zero don't change the child key (cindex) */ 1405 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) { 1406 chopped_off++; 1407 } 1408 1409 /* Decrease current_... with bits chopped off */ 1410 if (current_prefix_length > pn->pos + pn->bits - chopped_off) 1411 current_prefix_length = pn->pos + pn->bits - chopped_off; 1412 1413 /* 1414 * Either we do the actual chop off according or if we have 1415 * chopped off all bits in this tnode walk up to our parent. 1416 */ 1417 1418 if(chopped_off <= pn->bits) 1419 cindex &= ~(1 << (chopped_off-1)); 1420 else { 1421 if( NODE_PARENT(pn) == NULL) 1422 goto failed; 1423 1424 /* Get Child's index */ 1425 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); 1426 pn = NODE_PARENT(pn); 1427 chopped_off = 0; 1428 1429#ifdef CONFIG_IP_FIB_TRIE_STATS 1430 t->stats.backtrack++; 1431#endif 1432 goto backtrace; 1433 } 1434 } 1435failed: 1436 ret = 1; 1437found: 1438 read_unlock(&fib_lock); 1439 return ret; 1440} 1441 1442static int trie_leaf_remove(struct trie *t, t_key key) 1443{ 1444 t_key cindex; 1445 struct tnode *tp = NULL; 1446 struct node *n = t->trie; 1447 struct leaf *l; 1448 1449 if(trie_debug) 1450 printk("entering trie_leaf_remove(%p)\n", n); 1451 1452 /* Note that in the case skipped bits, those bits are *not* checked! 1453 * When we finish this, we will have NULL or a T_LEAF, and the 1454 * T_LEAF may or may not match our key. 1455 */ 1456 1457 while (n != NULL && IS_TNODE(n)) { 1458 struct tnode *tn = (struct tnode *) n; 1459 check_tnode(tn); 1460 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); 1461 1462 if(n && NODE_PARENT(n) != tn) { 1463 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n)); 1464 BUG(); 1465 } 1466 } 1467 l = (struct leaf *) n; 1468 1469 if(!n || !tkey_equals(l->key, key)) 1470 return 0; 1471 1472 /* 1473 * Key found. 1474 * Remove the leaf and rebalance the tree 1475 */ 1476 1477 t->revision++; 1478 t->size--; 1479 1480 tp = NODE_PARENT(n); 1481 tnode_free((struct tnode *) n); 1482 1483 if(tp) { 1484 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1485 put_child(t, (struct tnode *)tp, cindex, NULL); 1486 t->trie = trie_rebalance(t, tp); 1487 } 1488 else 1489 t->trie = NULL; 1490 1491 return 1; 1492} 1493 1494static int 1495fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, 1496 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) 1497{ 1498 struct trie *t = (struct trie *) tb->tb_data; 1499 u32 key, mask; 1500 int plen = r->rtm_dst_len; 1501 u8 tos = r->rtm_tos; 1502 struct fib_alias *fa, *fa_to_delete; 1503 struct list_head *fa_head; 1504 struct leaf *l; 1505 1506 if (plen > 32) 1507 return -EINVAL; 1508 1509 key = 0; 1510 if (rta->rta_dst) 1511 memcpy(&key, rta->rta_dst, 4); 1512 1513 key = ntohl(key); 1514 mask = ntohl( inet_make_mask(plen) ); 1515 1516 if(key & ~mask) 1517 return -EINVAL; 1518 1519 key = key & mask; 1520 l = fib_find_node(t, key); 1521 1522 if(!l) 1523 return -ESRCH; 1524 1525 fa_head = get_fa_head(l, plen); 1526 fa = fib_find_alias(fa_head, tos, 0); 1527 1528 if (!fa) 1529 return -ESRCH; 1530 1531 if (trie_debug) 1532 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t); 1533 1534 fa_to_delete = NULL; 1535 fa_head = fa->fa_list.prev; 1536 list_for_each_entry(fa, fa_head, fa_list) { 1537 struct fib_info *fi = fa->fa_info; 1538 1539 if (fa->fa_tos != tos) 1540 break; 1541 1542 if ((!r->rtm_type || 1543 fa->fa_type == r->rtm_type) && 1544 (r->rtm_scope == RT_SCOPE_NOWHERE || 1545 fa->fa_scope == r->rtm_scope) && 1546 (!r->rtm_protocol || 1547 fi->fib_protocol == r->rtm_protocol) && 1548 fib_nh_match(r, nlhdr, rta, fi) == 0) { 1549 fa_to_delete = fa; 1550 break; 1551 } 1552 } 1553 1554 if (fa_to_delete) { 1555 int kill_li = 0; 1556 struct leaf_info *li; 1557 1558 fa = fa_to_delete; 1559 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); 1560 1561 l = fib_find_node(t, key); 1562 li = find_leaf_info(&l->list, plen); 1563 1564 write_lock_bh(&fib_lock); 1565 1566 list_del(&fa->fa_list); 1567 1568 if(list_empty(fa_head)) { 1569 hlist_del(&li->hlist); 1570 kill_li = 1; 1571 } 1572 write_unlock_bh(&fib_lock); 1573 1574 if(kill_li) 1575 free_leaf_info(li); 1576 1577 if(hlist_empty(&l->list)) 1578 trie_leaf_remove(t, key); 1579 1580 if (fa->fa_state & FA_S_ACCESSED) 1581 rt_cache_flush(-1); 1582 1583 fn_free_alias(fa); 1584 return 0; 1585 } 1586 return -ESRCH; 1587} 1588 1589static int trie_flush_list(struct trie *t, struct list_head *head) 1590{ 1591 struct fib_alias *fa, *fa_node; 1592 int found = 0; 1593 1594 list_for_each_entry_safe(fa, fa_node, head, fa_list) { 1595 struct fib_info *fi = fa->fa_info; 1596 1597 if (fi && (fi->fib_flags&RTNH_F_DEAD)) { 1598 1599 write_lock_bh(&fib_lock); 1600 list_del(&fa->fa_list); 1601 write_unlock_bh(&fib_lock); 1602 1603 fn_free_alias(fa); 1604 found++; 1605 } 1606 } 1607 return found; 1608} 1609 1610static int trie_flush_leaf(struct trie *t, struct leaf *l) 1611{ 1612 int found = 0; 1613 struct hlist_head *lih = &l->list; 1614 struct hlist_node *node, *tmp; 1615 struct leaf_info *li = NULL; 1616 1617 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) { 1618 1619 found += trie_flush_list(t, &li->falh); 1620 1621 if (list_empty(&li->falh)) { 1622 1623 write_lock_bh(&fib_lock); 1624 hlist_del(&li->hlist); 1625 write_unlock_bh(&fib_lock); 1626 1627 free_leaf_info(li); 1628 } 1629 } 1630 return found; 1631} 1632 1633static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf) 1634{ 1635 struct node *c = (struct node *) thisleaf; 1636 struct tnode *p; 1637 int idx; 1638 1639 if(c == NULL) { 1640 if(t->trie == NULL) 1641 return NULL; 1642 1643 if (IS_LEAF(t->trie)) /* trie w. just a leaf */ 1644 return (struct leaf *) t->trie; 1645 1646 p = (struct tnode*) t->trie; /* Start */ 1647 } 1648 else 1649 p = (struct tnode *) NODE_PARENT(c); 1650 while (p) { 1651 int pos, last; 1652 1653 /* Find the next child of the parent */ 1654 if(c) 1655 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits); 1656 else 1657 pos = 0; 1658 1659 last = 1 << p->bits; 1660 for(idx = pos; idx < last ; idx++) { 1661 if( p->child[idx]) { 1662 1663 /* Decend if tnode */ 1664 1665 while (IS_TNODE(p->child[idx])) { 1666 p = (struct tnode*) p->child[idx]; 1667 idx = 0; 1668 1669 /* Rightmost non-NULL branch */ 1670 if( p && IS_TNODE(p) ) 1671 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++; 1672 1673 /* Done with this tnode? */ 1674 if( idx >= (1 << p->bits) || p->child[idx] == NULL ) 1675 goto up; 1676 } 1677 return (struct leaf*) p->child[idx]; 1678 } 1679 } 1680up: 1681 /* No more children go up one step */ 1682 c = (struct node*) p; 1683 p = (struct tnode *) NODE_PARENT(p); 1684 } 1685 return NULL; /* Ready. Root of trie */ 1686} 1687 1688static int fn_trie_flush(struct fib_table *tb) 1689{ 1690 struct trie *t = (struct trie *) tb->tb_data; 1691 struct leaf *ll = NULL, *l = NULL; 1692 int found = 0, h; 1693 1694 t->revision++; 1695 1696 for (h=0; (l = nextleaf(t, l)) != NULL; h++) { 1697 found += trie_flush_leaf(t, l); 1698 1699 if (ll && hlist_empty(&ll->list)) 1700 trie_leaf_remove(t, ll->key); 1701 ll = l; 1702 } 1703 1704 if (ll && hlist_empty(&ll->list)) 1705 trie_leaf_remove(t, ll->key); 1706 1707 if(trie_debug) 1708 printk("trie_flush found=%d\n", found); 1709 return found; 1710} 1711 1712static int trie_last_dflt=-1; 1713 1714static void 1715fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) 1716{ 1717 struct trie *t = (struct trie *) tb->tb_data; 1718 int order, last_idx; 1719 struct fib_info *fi = NULL; 1720 struct fib_info *last_resort; 1721 struct fib_alias *fa = NULL; 1722 struct list_head *fa_head; 1723 struct leaf *l; 1724 1725 last_idx = -1; 1726 last_resort = NULL; 1727 order = -1; 1728 1729 read_lock(&fib_lock); 1730 1731 l = fib_find_node(t, 0); 1732 if(!l) 1733 goto out; 1734 1735 fa_head = get_fa_head(l, 0); 1736 if(!fa_head) 1737 goto out; 1738 1739 if (list_empty(fa_head)) 1740 goto out; 1741 1742 list_for_each_entry(fa, fa_head, fa_list) { 1743 struct fib_info *next_fi = fa->fa_info; 1744 1745 if (fa->fa_scope != res->scope || 1746 fa->fa_type != RTN_UNICAST) 1747 continue; 1748 1749 if (next_fi->fib_priority > res->fi->fib_priority) 1750 break; 1751 if (!next_fi->fib_nh[0].nh_gw || 1752 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1753 continue; 1754 fa->fa_state |= FA_S_ACCESSED; 1755 1756 if (fi == NULL) { 1757 if (next_fi != res->fi) 1758 break; 1759 } else if (!fib_detect_death(fi, order, &last_resort, 1760 &last_idx, &trie_last_dflt)) { 1761 if (res->fi) 1762 fib_info_put(res->fi); 1763 res->fi = fi; 1764 atomic_inc(&fi->fib_clntref); 1765 trie_last_dflt = order; 1766 goto out; 1767 } 1768 fi = next_fi; 1769 order++; 1770 } 1771 if (order <= 0 || fi == NULL) { 1772 trie_last_dflt = -1; 1773 goto out; 1774 } 1775 1776 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { 1777 if (res->fi) 1778 fib_info_put(res->fi); 1779 res->fi = fi; 1780 atomic_inc(&fi->fib_clntref); 1781 trie_last_dflt = order; 1782 goto out; 1783 } 1784 if (last_idx >= 0) { 1785 if (res->fi) 1786 fib_info_put(res->fi); 1787 res->fi = last_resort; 1788 if (last_resort) 1789 atomic_inc(&last_resort->fib_clntref); 1790 } 1791 trie_last_dflt = last_idx; 1792 out:; 1793 read_unlock(&fib_lock); 1794} 1795 1796static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, 1797 struct sk_buff *skb, struct netlink_callback *cb) 1798{ 1799 int i, s_i; 1800 struct fib_alias *fa; 1801 1802 u32 xkey=htonl(key); 1803 1804 s_i=cb->args[3]; 1805 i = 0; 1806 1807 list_for_each_entry(fa, fah, fa_list) { 1808 if (i < s_i) { 1809 i++; 1810 continue; 1811 } 1812 if (fa->fa_info->fib_nh == NULL) { 1813 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); 1814 i++; 1815 continue; 1816 } 1817 if (fa->fa_info == NULL) { 1818 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); 1819 i++; 1820 continue; 1821 } 1822 1823 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, 1824 cb->nlh->nlmsg_seq, 1825 RTM_NEWROUTE, 1826 tb->tb_id, 1827 fa->fa_type, 1828 fa->fa_scope, 1829 &xkey, 1830 plen, 1831 fa->fa_tos, 1832 fa->fa_info, 0) < 0) { 1833 cb->args[3] = i; 1834 return -1; 1835 } 1836 i++; 1837 } 1838 cb->args[3]=i; 1839 return skb->len; 1840} 1841 1842static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb, 1843 struct netlink_callback *cb) 1844{ 1845 int h, s_h; 1846 struct list_head *fa_head; 1847 struct leaf *l = NULL; 1848 s_h=cb->args[2]; 1849 1850 for (h=0; (l = nextleaf(t, l)) != NULL; h++) { 1851 1852 if (h < s_h) 1853 continue; 1854 if (h > s_h) 1855 memset(&cb->args[3], 0, 1856 sizeof(cb->args) - 3*sizeof(cb->args[0])); 1857 1858 fa_head = get_fa_head(l, plen); 1859 1860 if(!fa_head) 1861 continue; 1862 1863 if(list_empty(fa_head)) 1864 continue; 1865 1866 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { 1867 cb->args[2]=h; 1868 return -1; 1869 } 1870 } 1871 cb->args[2]=h; 1872 return skb->len; 1873} 1874 1875static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) 1876{ 1877 int m, s_m; 1878 struct trie *t = (struct trie *) tb->tb_data; 1879 1880 s_m = cb->args[1]; 1881 1882 read_lock(&fib_lock); 1883 for (m=0; m<=32; m++) { 1884 1885 if (m < s_m) 1886 continue; 1887 if (m > s_m) 1888 memset(&cb->args[2], 0, 1889 sizeof(cb->args) - 2*sizeof(cb->args[0])); 1890 1891 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { 1892 cb->args[1] = m; 1893 goto out; 1894 } 1895 } 1896 read_unlock(&fib_lock); 1897 cb->args[1] = m; 1898 return skb->len; 1899 out: 1900 read_unlock(&fib_lock); 1901 return -1; 1902} 1903 1904/* Fix more generic FIB names for init later */ 1905 1906#ifdef CONFIG_IP_MULTIPLE_TABLES 1907struct fib_table * fib_hash_init(int id) 1908#else 1909struct fib_table * __init fib_hash_init(int id) 1910#endif 1911{ 1912 struct fib_table *tb; 1913 struct trie *t; 1914 1915 if (fn_alias_kmem == NULL) 1916 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1917 sizeof(struct fib_alias), 1918 0, SLAB_HWCACHE_ALIGN, 1919 NULL, NULL); 1920 1921 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie), 1922 GFP_KERNEL); 1923 if (tb == NULL) 1924 return NULL; 1925 1926 tb->tb_id = id; 1927 tb->tb_lookup = fn_trie_lookup; 1928 tb->tb_insert = fn_trie_insert; 1929 tb->tb_delete = fn_trie_delete; 1930 tb->tb_flush = fn_trie_flush; 1931 tb->tb_select_default = fn_trie_select_default; 1932 tb->tb_dump = fn_trie_dump; 1933 memset(tb->tb_data, 0, sizeof(struct trie)); 1934 1935 t = (struct trie *) tb->tb_data; 1936 1937 trie_init(t); 1938 1939 if (id == RT_TABLE_LOCAL) 1940 trie_local=t; 1941 else if (id == RT_TABLE_MAIN) 1942 trie_main=t; 1943 1944 if (id == RT_TABLE_LOCAL) 1945 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); 1946 1947 return tb; 1948} 1949 1950/* Trie dump functions */ 1951 1952static void putspace_seq(struct seq_file *seq, int n) 1953{ 1954 while (n--) seq_printf(seq, " "); 1955} 1956 1957static void printbin_seq(struct seq_file *seq, unsigned int v, int bits) 1958{ 1959 while (bits--) 1960 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0"); 1961} 1962 1963static void printnode_seq(struct seq_file *seq, int indent, struct node *n, 1964 int pend, int cindex, int bits) 1965{ 1966 putspace_seq(seq, indent); 1967 if (IS_LEAF(n)) 1968 seq_printf(seq, "|"); 1969 else 1970 seq_printf(seq, "+"); 1971 if (bits) { 1972 seq_printf(seq, "%d/", cindex); 1973 printbin_seq(seq, cindex, bits); 1974 seq_printf(seq, ": "); 1975 } 1976 else 1977 seq_printf(seq, "<root>: "); 1978 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n); 1979 1980 if (IS_LEAF(n)) 1981 seq_printf(seq, "key=%d.%d.%d.%d\n", 1982 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256); 1983 else { 1984 int plen=((struct tnode *)n)->pos; 1985 t_key prf=MASK_PFX(n->key, plen); 1986 seq_printf(seq, "key=%d.%d.%d.%d/%d\n", 1987 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen); 1988 } 1989 if (IS_LEAF(n)) { 1990 struct leaf *l=(struct leaf *)n; 1991 struct fib_alias *fa; 1992 int i; 1993 for (i=32; i>=0; i--) 1994 if(find_leaf_info(&l->list, i)) { 1995 1996 struct list_head *fa_head = get_fa_head(l, i); 1997 1998 if(!fa_head) 1999 continue; 2000 2001 if(list_empty(fa_head)) 2002 continue; 2003 2004 putspace_seq(seq, indent+2); 2005 seq_printf(seq, "{/%d...dumping}\n", i); 2006 2007 2008 list_for_each_entry(fa, fa_head, fa_list) { 2009 putspace_seq(seq, indent+2); 2010 if (fa->fa_info->fib_nh == NULL) { 2011 seq_printf(seq, "Error _fib_nh=NULL\n"); 2012 continue; 2013 } 2014 if (fa->fa_info == NULL) { 2015 seq_printf(seq, "Error fa_info=NULL\n"); 2016 continue; 2017 } 2018 2019 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n", 2020 fa->fa_type, 2021 fa->fa_scope, 2022 fa->fa_tos); 2023 } 2024 } 2025 } 2026 else if (IS_TNODE(n)) { 2027 struct tnode *tn=(struct tnode *)n; 2028 putspace_seq(seq, indent); seq_printf(seq, "| "); 2029 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos)); 2030 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos); 2031 seq_printf(seq, "}\n"); 2032 putspace_seq(seq, indent); seq_printf(seq, "| "); 2033 seq_printf(seq, "{pos=%d", tn->pos); 2034 seq_printf(seq, " (skip=%d bits)", tn->pos - pend); 2035 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits)); 2036 putspace_seq(seq, indent); seq_printf(seq, "| "); 2037 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children); 2038 } 2039} 2040 2041static void trie_dump_seq(struct seq_file *seq, struct trie *t) 2042{ 2043 struct node *n=t->trie; 2044 int cindex=0; 2045 int indent=1; 2046 int pend=0; 2047 int depth = 0; 2048 2049 read_lock(&fib_lock); 2050 2051 seq_printf(seq, "------ trie_dump of t=%p ------\n", t); 2052 if (n) { 2053 printnode_seq(seq, indent, n, pend, cindex, 0); 2054 if (IS_TNODE(n)) { 2055 struct tnode *tn=(struct tnode *)n; 2056 pend = tn->pos+tn->bits; 2057 putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); 2058 indent += 3; 2059 depth++; 2060 2061 while (tn && cindex < (1 << tn->bits)) { 2062 if (tn->child[cindex]) { 2063 2064 /* Got a child */ 2065 2066 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits); 2067 if (IS_LEAF(tn->child[cindex])) { 2068 cindex++; 2069 2070 } 2071 else { 2072 /* 2073 * New tnode. Decend one level 2074 */ 2075 2076 depth++; 2077 n=tn->child[cindex]; 2078 tn=(struct tnode *)n; 2079 pend=tn->pos+tn->bits; 2080 putspace_seq(seq, indent); seq_printf(seq, "\\--\n"); 2081 indent+=3; 2082 cindex=0; 2083 } 2084 } 2085 else 2086 cindex++; 2087 2088 /* 2089 * Test if we are done 2090 */ 2091 2092 while (cindex >= (1 << tn->bits)) { 2093 2094 /* 2095 * Move upwards and test for root 2096 * pop off all traversed nodes 2097 */ 2098 2099 if (NODE_PARENT(tn) == NULL) { 2100 tn = NULL; 2101 n = NULL; 2102 break; 2103 } 2104 else { 2105 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2106 tn = NODE_PARENT(tn); 2107 cindex++; 2108 n=(struct node *)tn; 2109 pend=tn->pos+tn->bits; 2110 indent-=3; 2111 depth--; 2112 } 2113 } 2114 } 2115 } 2116 else n = NULL; 2117 } 2118 else seq_printf(seq, "------ trie is empty\n"); 2119 2120 read_unlock(&fib_lock); 2121} 2122 2123static struct trie_stat *trie_stat_new(void) 2124{ 2125 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL); 2126 int i; 2127 2128 if(s) { 2129 s->totdepth = 0; 2130 s->maxdepth = 0; 2131 s->tnodes = 0; 2132 s->leaves = 0; 2133 s->nullpointers = 0; 2134 2135 for(i=0; i< MAX_CHILDS; i++) 2136 s->nodesizes[i] = 0; 2137 } 2138 return s; 2139} 2140 2141static struct trie_stat *trie_collect_stats(struct trie *t) 2142{ 2143 struct node *n=t->trie; 2144 struct trie_stat *s = trie_stat_new(); 2145 int cindex = 0; 2146 int indent = 1; 2147 int pend = 0; 2148 int depth = 0; 2149 2150 read_lock(&fib_lock); 2151 2152 if (s) { 2153 if (n) { 2154 if (IS_TNODE(n)) { 2155 struct tnode *tn = (struct tnode *)n; 2156 pend=tn->pos+tn->bits; 2157 indent += 3; 2158 s->nodesizes[tn->bits]++; 2159 depth++; 2160 2161 while (tn && cindex < (1 << tn->bits)) { 2162 if (tn->child[cindex]) { 2163 /* Got a child */ 2164 2165 if (IS_LEAF(tn->child[cindex])) { 2166 cindex++; 2167 2168 /* stats */ 2169 if (depth > s->maxdepth) 2170 s->maxdepth = depth; 2171 s->totdepth += depth; 2172 s->leaves++; 2173 } 2174 2175 else { 2176 /* 2177 * New tnode. Decend one level 2178 */ 2179 2180 s->tnodes++; 2181 s->nodesizes[tn->bits]++; 2182 depth++; 2183 2184 n = tn->child[cindex]; 2185 tn = (struct tnode *)n; 2186 pend = tn->pos+tn->bits; 2187 2188 indent += 3; 2189 cindex = 0; 2190 } 2191 } 2192 else { 2193 cindex++; 2194 s->nullpointers++; 2195 } 2196 2197 /* 2198 * Test if we are done 2199 */ 2200 2201 while (cindex >= (1 << tn->bits)) { 2202 2203 /* 2204 * Move upwards and test for root 2205 * pop off all traversed nodes 2206 */ 2207 2208 2209 if (NODE_PARENT(tn) == NULL) { 2210 tn = NULL; 2211 n = NULL; 2212 break; 2213 } 2214 else { 2215 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits); 2216 tn = NODE_PARENT(tn); 2217 cindex++; 2218 n = (struct node *)tn; 2219 pend=tn->pos+tn->bits; 2220 indent -= 3; 2221 depth--; 2222 } 2223 } 2224 } 2225 } 2226 else n = NULL; 2227 } 2228 } 2229 2230 read_unlock(&fib_lock); 2231 return s; 2232} 2233 2234#ifdef CONFIG_PROC_FS 2235 2236static struct fib_alias *fib_triestat_get_first(struct seq_file *seq) 2237{ 2238 return NULL; 2239} 2240 2241static struct fib_alias *fib_triestat_get_next(struct seq_file *seq) 2242{ 2243 return NULL; 2244} 2245 2246static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos) 2247{ 2248 void *v = NULL; 2249 2250 if (ip_fib_main_table) 2251 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN; 2252 return v; 2253} 2254 2255static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2256{ 2257 ++*pos; 2258 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq); 2259} 2260 2261static void fib_triestat_seq_stop(struct seq_file *seq, void *v) 2262{ 2263 2264} 2265 2266/* 2267 * This outputs /proc/net/fib_triestats 2268 * 2269 * It always works in backward compatibility mode. 2270 * The format of the file is not supposed to be changed. 2271 */ 2272 2273static void collect_and_show(struct trie *t, struct seq_file *seq) 2274{ 2275 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */ 2276 int i, max, pointers; 2277 struct trie_stat *stat; 2278 int avdepth; 2279 2280 stat = trie_collect_stats(t); 2281 2282 bytes=0; 2283 seq_printf(seq, "trie=%p\n", t); 2284 2285 if (stat) { 2286 if (stat->leaves) 2287 avdepth=stat->totdepth*100 / stat->leaves; 2288 else 2289 avdepth=0; 2290 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 ); 2291 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth); 2292 2293 seq_printf(seq, "Leaves: %d\n", stat->leaves); 2294 bytes += sizeof(struct leaf) * stat->leaves; 2295 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes); 2296 bytes += sizeof(struct tnode) * stat->tnodes; 2297 2298 max = MAX_CHILDS-1; 2299 2300 while (max >= 0 && stat->nodesizes[max] == 0) 2301 max--; 2302 pointers = 0; 2303 2304 for (i = 1; i <= max; i++) 2305 if (stat->nodesizes[i] != 0) { 2306 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]); 2307 pointers += (1<<i) * stat->nodesizes[i]; 2308 } 2309 seq_printf(seq, "\n"); 2310 seq_printf(seq, "Pointers: %d\n", pointers); 2311 bytes += sizeof(struct node *) * pointers; 2312 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers); 2313 seq_printf(seq, "Total size: %d kB\n", bytes / 1024); 2314 2315 kfree(stat); 2316 } 2317 2318#ifdef CONFIG_IP_FIB_TRIE_STATS 2319 seq_printf(seq, "Counters:\n---------\n"); 2320 seq_printf(seq,"gets = %d\n", t->stats.gets); 2321 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack); 2322 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2323 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2324 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2325#ifdef CLEAR_STATS 2326 memset(&(t->stats), 0, sizeof(t->stats)); 2327#endif 2328#endif /* CONFIG_IP_FIB_TRIE_STATS */ 2329} 2330 2331static int fib_triestat_seq_show(struct seq_file *seq, void *v) 2332{ 2333 char bf[128]; 2334 2335 if (v == SEQ_START_TOKEN) { 2336 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n", 2337 sizeof(struct leaf), sizeof(struct tnode)); 2338 if (trie_local) 2339 collect_and_show(trie_local, seq); 2340 2341 if (trie_main) 2342 collect_and_show(trie_main, seq); 2343 } 2344 else { 2345 snprintf(bf, sizeof(bf), 2346 "*\t%08X\t%08X", 200, 400); 2347 2348 seq_printf(seq, "%-127s\n", bf); 2349 } 2350 return 0; 2351} 2352 2353static struct seq_operations fib_triestat_seq_ops = { 2354 .start = fib_triestat_seq_start, 2355 .next = fib_triestat_seq_next, 2356 .stop = fib_triestat_seq_stop, 2357 .show = fib_triestat_seq_show, 2358}; 2359 2360static int fib_triestat_seq_open(struct inode *inode, struct file *file) 2361{ 2362 struct seq_file *seq; 2363 int rc = -ENOMEM; 2364 2365 rc = seq_open(file, &fib_triestat_seq_ops); 2366 if (rc) 2367 goto out_kfree; 2368 2369 seq = file->private_data; 2370out: 2371 return rc; 2372out_kfree: 2373 goto out; 2374} 2375 2376static struct file_operations fib_triestat_seq_fops = { 2377 .owner = THIS_MODULE, 2378 .open = fib_triestat_seq_open, 2379 .read = seq_read, 2380 .llseek = seq_lseek, 2381 .release = seq_release_private, 2382}; 2383 2384int __init fib_stat_proc_init(void) 2385{ 2386 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops)) 2387 return -ENOMEM; 2388 return 0; 2389} 2390 2391void __init fib_stat_proc_exit(void) 2392{ 2393 proc_net_remove("fib_triestat"); 2394} 2395 2396static struct fib_alias *fib_trie_get_first(struct seq_file *seq) 2397{ 2398 return NULL; 2399} 2400 2401static struct fib_alias *fib_trie_get_next(struct seq_file *seq) 2402{ 2403 return NULL; 2404} 2405 2406static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) 2407{ 2408 void *v = NULL; 2409 2410 if (ip_fib_main_table) 2411 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN; 2412 return v; 2413} 2414 2415static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2416{ 2417 ++*pos; 2418 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq); 2419} 2420 2421static void fib_trie_seq_stop(struct seq_file *seq, void *v) 2422{ 2423 2424} 2425 2426/* 2427 * This outputs /proc/net/fib_trie. 2428 * 2429 * It always works in backward compatibility mode. 2430 * The format of the file is not supposed to be changed. 2431 */ 2432 2433static int fib_trie_seq_show(struct seq_file *seq, void *v) 2434{ 2435 char bf[128]; 2436 2437 if (v == SEQ_START_TOKEN) { 2438 if (trie_local) 2439 trie_dump_seq(seq, trie_local); 2440 2441 if (trie_main) 2442 trie_dump_seq(seq, trie_main); 2443 } 2444 2445 else { 2446 snprintf(bf, sizeof(bf), 2447 "*\t%08X\t%08X", 200, 400); 2448 seq_printf(seq, "%-127s\n", bf); 2449 } 2450 2451 return 0; 2452} 2453 2454static struct seq_operations fib_trie_seq_ops = { 2455 .start = fib_trie_seq_start, 2456 .next = fib_trie_seq_next, 2457 .stop = fib_trie_seq_stop, 2458 .show = fib_trie_seq_show, 2459}; 2460 2461static int fib_trie_seq_open(struct inode *inode, struct file *file) 2462{ 2463 struct seq_file *seq; 2464 int rc = -ENOMEM; 2465 2466 rc = seq_open(file, &fib_trie_seq_ops); 2467 if (rc) 2468 goto out_kfree; 2469 2470 seq = file->private_data; 2471out: 2472 return rc; 2473out_kfree: 2474 goto out; 2475} 2476 2477static struct file_operations fib_trie_seq_fops = { 2478 .owner = THIS_MODULE, 2479 .open = fib_trie_seq_open, 2480 .read = seq_read, 2481 .llseek = seq_lseek, 2482 .release = seq_release_private, 2483}; 2484 2485int __init fib_proc_init(void) 2486{ 2487 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops)) 2488 return -ENOMEM; 2489 return 0; 2490} 2491 2492void __init fib_proc_exit(void) 2493{ 2494 proc_net_remove("fib_trie"); 2495} 2496 2497#endif /* CONFIG_PROC_FS */ 2498