fix_node.c revision cf776a7a4dafa330dd371a6a301ddf9e38747d93
1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5#include <linux/time.h> 6#include <linux/slab.h> 7#include <linux/string.h> 8#include "reiserfs.h" 9#include <linux/buffer_head.h> 10 11/* 12 * To make any changes in the tree we find a node that contains item 13 * to be changed/deleted or position in the node we insert a new item 14 * to. We call this node S. To do balancing we need to decide what we 15 * will shift to left/right neighbor, or to a new node, where new item 16 * will be etc. To make this analysis simpler we build virtual 17 * node. Virtual node is an array of items, that will replace items of 18 * node S. (For instance if we are going to delete an item, virtual 19 * node does not contain it). Virtual node keeps information about 20 * item sizes and types, mergeability of first and last items, sizes 21 * of all entries in directory item. We use this array of items when 22 * calculating what we can shift to neighbors and how many nodes we 23 * have to have if we do not any shiftings, if we shift to left/right 24 * neighbor or to both. 25 */ 26 27/* 28 * Takes item number in virtual node, returns number of item 29 * that it has in source buffer 30 */ 31static inline int old_item_num(int new_num, int affected_item_num, int mode) 32{ 33 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 34 return new_num; 35 36 if (mode == M_INSERT) { 37 38 RFALSE(new_num == 0, 39 "vs-8005: for INSERT mode and item number of inserted item"); 40 41 return new_num - 1; 42 } 43 44 RFALSE(mode != M_DELETE, 45 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", 46 mode); 47 /* delete mode */ 48 return new_num + 1; 49} 50 51static void create_virtual_node(struct tree_balance *tb, int h) 52{ 53 struct item_head *ih; 54 struct virtual_node *vn = tb->tb_vn; 55 int new_num; 56 struct buffer_head *Sh; /* this comes from tb->S[h] */ 57 58 Sh = PATH_H_PBUFFER(tb->tb_path, h); 59 60 /* size of changed node */ 61 vn->vn_size = 62 MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; 63 64 /* for internal nodes array if virtual items is not created */ 65 if (h) { 66 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); 67 return; 68 } 69 70 /* number of items in virtual node */ 71 vn->vn_nr_item = 72 B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - 73 ((vn->vn_mode == M_DELETE) ? 1 : 0); 74 75 /* first virtual item */ 76 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); 77 memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); 78 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); 79 80 /* first item in the node */ 81 ih = item_head(Sh, 0); 82 83 /* define the mergeability for 0-th item (if it is not being deleted) */ 84 if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size) 85 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) 86 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; 87 88 /* 89 * go through all items that remain in the virtual 90 * node (except for the new (inserted) one) 91 */ 92 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { 93 int j; 94 struct virtual_item *vi = vn->vn_vi + new_num; 95 int is_affected = 96 ((new_num != vn->vn_affected_item_num) ? 0 : 1); 97 98 if (is_affected && vn->vn_mode == M_INSERT) 99 continue; 100 101 /* get item number in source node */ 102 j = old_item_num(new_num, vn->vn_affected_item_num, 103 vn->vn_mode); 104 105 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; 106 vi->vi_ih = ih + j; 107 vi->vi_item = ih_item_body(Sh, ih + j); 108 vi->vi_uarea = vn->vn_free_ptr; 109 110 /* 111 * FIXME: there is no check that item operation did not 112 * consume too much memory 113 */ 114 vn->vn_free_ptr += 115 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 116 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 117 reiserfs_panic(tb->tb_sb, "vs-8030", 118 "virtual node space consumed"); 119 120 if (!is_affected) 121 /* this is not being changed */ 122 continue; 123 124 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { 125 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; 126 /* pointer to data which is going to be pasted */ 127 vi->vi_new_data = vn->vn_data; 128 } 129 } 130 131 /* virtual inserted item is not defined yet */ 132 if (vn->vn_mode == M_INSERT) { 133 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; 134 135 RFALSE(vn->vn_ins_ih == NULL, 136 "vs-8040: item header of inserted item is not specified"); 137 vi->vi_item_len = tb->insert_size[0]; 138 vi->vi_ih = vn->vn_ins_ih; 139 vi->vi_item = vn->vn_data; 140 vi->vi_uarea = vn->vn_free_ptr; 141 142 op_create_vi(vn, vi, 0 /*not pasted or cut */ , 143 tb->insert_size[0]); 144 } 145 146 /* 147 * set right merge flag we take right delimiting key and 148 * check whether it is a mergeable item 149 */ 150 if (tb->CFR[0]) { 151 struct reiserfs_key *key; 152 153 key = internal_key(tb->CFR[0], tb->rkey[0]); 154 if (op_is_left_mergeable(key, Sh->b_size) 155 && (vn->vn_mode != M_DELETE 156 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) 157 vn->vn_vi[vn->vn_nr_item - 1].vi_type |= 158 VI_TYPE_RIGHT_MERGEABLE; 159 160#ifdef CONFIG_REISERFS_CHECK 161 if (op_is_left_mergeable(key, Sh->b_size) && 162 !(vn->vn_mode != M_DELETE 163 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { 164 /* 165 * we delete last item and it could be merged 166 * with right neighbor's first item 167 */ 168 if (! 169 (B_NR_ITEMS(Sh) == 1 170 && is_direntry_le_ih(item_head(Sh, 0)) 171 && ih_entry_count(item_head(Sh, 0)) == 1)) { 172 /* 173 * node contains more than 1 item, or item 174 * is not directory item, or this item 175 * contains more than 1 entry 176 */ 177 print_block(Sh, 0, -1, -1); 178 reiserfs_panic(tb->tb_sb, "vs-8045", 179 "rdkey %k, affected item==%d " 180 "(mode==%c) Must be %c", 181 key, vn->vn_affected_item_num, 182 vn->vn_mode, M_DELETE); 183 } 184 } 185#endif 186 187 } 188} 189 190/* 191 * Using virtual node check, how many items can be 192 * shifted to left neighbor 193 */ 194static void check_left(struct tree_balance *tb, int h, int cur_free) 195{ 196 int i; 197 struct virtual_node *vn = tb->tb_vn; 198 struct virtual_item *vi; 199 int d_size, ih_size; 200 201 RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); 202 203 /* internal level */ 204 if (h > 0) { 205 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 206 return; 207 } 208 209 /* leaf level */ 210 211 if (!cur_free || !vn->vn_nr_item) { 212 /* no free space or nothing to move */ 213 tb->lnum[h] = 0; 214 tb->lbytes = -1; 215 return; 216 } 217 218 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 219 "vs-8055: parent does not exist or invalid"); 220 221 vi = vn->vn_vi; 222 if ((unsigned int)cur_free >= 223 (vn->vn_size - 224 ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { 225 /* all contents of S[0] fits into L[0] */ 226 227 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 228 "vs-8055: invalid mode or balance condition failed"); 229 230 tb->lnum[0] = vn->vn_nr_item; 231 tb->lbytes = -1; 232 return; 233 } 234 235 d_size = 0, ih_size = IH_SIZE; 236 237 /* first item may be merge with last item in left neighbor */ 238 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) 239 d_size = -((int)IH_SIZE), ih_size = 0; 240 241 tb->lnum[0] = 0; 242 for (i = 0; i < vn->vn_nr_item; 243 i++, ih_size = IH_SIZE, d_size = 0, vi++) { 244 d_size += vi->vi_item_len; 245 if (cur_free >= d_size) { 246 /* the item can be shifted entirely */ 247 cur_free -= d_size; 248 tb->lnum[0]++; 249 continue; 250 } 251 252 /* the item cannot be shifted entirely, try to split it */ 253 /* 254 * check whether L[0] can hold ih and at least one byte 255 * of the item body 256 */ 257 258 /* cannot shift even a part of the current item */ 259 if (cur_free <= ih_size) { 260 tb->lbytes = -1; 261 return; 262 } 263 cur_free -= ih_size; 264 265 tb->lbytes = op_check_left(vi, cur_free, 0, 0); 266 if (tb->lbytes != -1) 267 /* count partially shifted item */ 268 tb->lnum[0]++; 269 270 break; 271 } 272 273 return; 274} 275 276/* 277 * Using virtual node check, how many items can be 278 * shifted to right neighbor 279 */ 280static void check_right(struct tree_balance *tb, int h, int cur_free) 281{ 282 int i; 283 struct virtual_node *vn = tb->tb_vn; 284 struct virtual_item *vi; 285 int d_size, ih_size; 286 287 RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); 288 289 /* internal level */ 290 if (h > 0) { 291 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 292 return; 293 } 294 295 /* leaf level */ 296 297 if (!cur_free || !vn->vn_nr_item) { 298 /* no free space */ 299 tb->rnum[h] = 0; 300 tb->rbytes = -1; 301 return; 302 } 303 304 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 305 "vs-8075: parent does not exist or invalid"); 306 307 vi = vn->vn_vi + vn->vn_nr_item - 1; 308 if ((unsigned int)cur_free >= 309 (vn->vn_size - 310 ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { 311 /* all contents of S[0] fits into R[0] */ 312 313 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 314 "vs-8080: invalid mode or balance condition failed"); 315 316 tb->rnum[h] = vn->vn_nr_item; 317 tb->rbytes = -1; 318 return; 319 } 320 321 d_size = 0, ih_size = IH_SIZE; 322 323 /* last item may be merge with first item in right neighbor */ 324 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) 325 d_size = -(int)IH_SIZE, ih_size = 0; 326 327 tb->rnum[0] = 0; 328 for (i = vn->vn_nr_item - 1; i >= 0; 329 i--, d_size = 0, ih_size = IH_SIZE, vi--) { 330 d_size += vi->vi_item_len; 331 if (cur_free >= d_size) { 332 /* the item can be shifted entirely */ 333 cur_free -= d_size; 334 tb->rnum[0]++; 335 continue; 336 } 337 338 /* 339 * check whether R[0] can hold ih and at least one 340 * byte of the item body 341 */ 342 343 /* cannot shift even a part of the current item */ 344 if (cur_free <= ih_size) { 345 tb->rbytes = -1; 346 return; 347 } 348 349 /* 350 * R[0] can hold the header of the item and at least 351 * one byte of its body 352 */ 353 cur_free -= ih_size; /* cur_free is still > 0 */ 354 355 tb->rbytes = op_check_right(vi, cur_free); 356 if (tb->rbytes != -1) 357 /* count partially shifted item */ 358 tb->rnum[0]++; 359 360 break; 361 } 362 363 return; 364} 365 366/* 367 * from - number of items, which are shifted to left neighbor entirely 368 * to - number of item, which are shifted to right neighbor entirely 369 * from_bytes - number of bytes of boundary item (or directory entries) 370 * which are shifted to left neighbor 371 * to_bytes - number of bytes of boundary item (or directory entries) 372 * which are shifted to right neighbor 373 */ 374static int get_num_ver(int mode, struct tree_balance *tb, int h, 375 int from, int from_bytes, 376 int to, int to_bytes, short *snum012, int flow) 377{ 378 int i; 379 int cur_free; 380 int units; 381 struct virtual_node *vn = tb->tb_vn; 382 int total_node_size, max_node_size, current_item_size; 383 int needed_nodes; 384 385 /* position of item we start filling node from */ 386 int start_item; 387 388 /* position of item we finish filling node by */ 389 int end_item; 390 391 /* 392 * number of first bytes (entries for directory) of start_item-th item 393 * we do not include into node that is being filled 394 */ 395 int start_bytes; 396 397 /* 398 * number of last bytes (entries for directory) of end_item-th item 399 * we do node include into node that is being filled 400 */ 401 int end_bytes; 402 403 /* 404 * these are positions in virtual item of items, that are split 405 * between S[0] and S1new and S1new and S2new 406 */ 407 int split_item_positions[2]; 408 409 split_item_positions[0] = -1; 410 split_item_positions[1] = -1; 411 412 /* 413 * We only create additional nodes if we are in insert or paste mode 414 * or we are in replace mode at the internal level. If h is 0 and 415 * the mode is M_REPLACE then in fix_nodes we change the mode to 416 * paste or insert before we get here in the code. 417 */ 418 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 419 "vs-8100: insert_size < 0 in overflow"); 420 421 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); 422 423 /* 424 * snum012 [0-2] - number of items, that lay 425 * to S[0], first new node and second new node 426 */ 427 snum012[3] = -1; /* s1bytes */ 428 snum012[4] = -1; /* s2bytes */ 429 430 /* internal level */ 431 if (h > 0) { 432 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); 433 if (i == max_node_size) 434 return 1; 435 return (i / max_node_size + 1); 436 } 437 438 /* leaf level */ 439 needed_nodes = 1; 440 total_node_size = 0; 441 cur_free = max_node_size; 442 443 /* start from 'from'-th item */ 444 start_item = from; 445 /* skip its first 'start_bytes' units */ 446 start_bytes = ((from_bytes != -1) ? from_bytes : 0); 447 448 /* last included item is the 'end_item'-th one */ 449 end_item = vn->vn_nr_item - to - 1; 450 /* do not count last 'end_bytes' units of 'end_item'-th item */ 451 end_bytes = (to_bytes != -1) ? to_bytes : 0; 452 453 /* 454 * go through all item beginning from the start_item-th item 455 * and ending by the end_item-th item. Do not count first 456 * 'start_bytes' units of 'start_item'-th item and last 457 * 'end_bytes' of 'end_item'-th item 458 */ 459 for (i = start_item; i <= end_item; i++) { 460 struct virtual_item *vi = vn->vn_vi + i; 461 int skip_from_end = ((i == end_item) ? end_bytes : 0); 462 463 RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); 464 465 /* get size of current item */ 466 current_item_size = vi->vi_item_len; 467 468 /* 469 * do not take in calculation head part (from_bytes) 470 * of from-th item 471 */ 472 current_item_size -= 473 op_part_size(vi, 0 /*from start */ , start_bytes); 474 475 /* do not take in calculation tail part of last item */ 476 current_item_size -= 477 op_part_size(vi, 1 /*from end */ , skip_from_end); 478 479 /* if item fits into current node entierly */ 480 if (total_node_size + current_item_size <= max_node_size) { 481 snum012[needed_nodes - 1]++; 482 total_node_size += current_item_size; 483 start_bytes = 0; 484 continue; 485 } 486 487 /* 488 * virtual item length is longer, than max size of item in 489 * a node. It is impossible for direct item 490 */ 491 if (current_item_size > max_node_size) { 492 RFALSE(is_direct_le_ih(vi->vi_ih), 493 "vs-8110: " 494 "direct item length is %d. It can not be longer than %d", 495 current_item_size, max_node_size); 496 /* we will try to split it */ 497 flow = 1; 498 } 499 500 /* as we do not split items, take new node and continue */ 501 if (!flow) { 502 needed_nodes++; 503 i--; 504 total_node_size = 0; 505 continue; 506 } 507 508 /* 509 * calculate number of item units which fit into node being 510 * filled 511 */ 512 { 513 int free_space; 514 515 free_space = max_node_size - total_node_size - IH_SIZE; 516 units = 517 op_check_left(vi, free_space, start_bytes, 518 skip_from_end); 519 /* 520 * nothing fits into current node, take new 521 * node and continue 522 */ 523 if (units == -1) { 524 needed_nodes++, i--, total_node_size = 0; 525 continue; 526 } 527 } 528 529 /* something fits into the current node */ 530 start_bytes += units; 531 snum012[needed_nodes - 1 + 3] = units; 532 533 if (needed_nodes > 2) 534 reiserfs_warning(tb->tb_sb, "vs-8111", 535 "split_item_position is out of range"); 536 snum012[needed_nodes - 1]++; 537 split_item_positions[needed_nodes - 1] = i; 538 needed_nodes++; 539 /* continue from the same item with start_bytes != -1 */ 540 start_item = i; 541 i--; 542 total_node_size = 0; 543 } 544 545 /* 546 * sum012[4] (if it is not -1) contains number of units of which 547 * are to be in S1new, snum012[3] - to be in S0. They are supposed 548 * to be S1bytes and S2bytes correspondingly, so recalculate 549 */ 550 if (snum012[4] > 0) { 551 int split_item_num; 552 int bytes_to_r, bytes_to_l; 553 int bytes_to_S1new; 554 555 split_item_num = split_item_positions[1]; 556 bytes_to_l = 557 ((from == split_item_num 558 && from_bytes != -1) ? from_bytes : 0); 559 bytes_to_r = 560 ((end_item == split_item_num 561 && end_bytes != -1) ? end_bytes : 0); 562 bytes_to_S1new = 563 ((split_item_positions[0] == 564 split_item_positions[1]) ? snum012[3] : 0); 565 566 /* s2bytes */ 567 snum012[4] = 568 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - 569 bytes_to_r - bytes_to_l - bytes_to_S1new; 570 571 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && 572 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) 573 reiserfs_warning(tb->tb_sb, "vs-8115", 574 "not directory or indirect item"); 575 } 576 577 /* now we know S2bytes, calculate S1bytes */ 578 if (snum012[3] > 0) { 579 int split_item_num; 580 int bytes_to_r, bytes_to_l; 581 int bytes_to_S2new; 582 583 split_item_num = split_item_positions[0]; 584 bytes_to_l = 585 ((from == split_item_num 586 && from_bytes != -1) ? from_bytes : 0); 587 bytes_to_r = 588 ((end_item == split_item_num 589 && end_bytes != -1) ? end_bytes : 0); 590 bytes_to_S2new = 591 ((split_item_positions[0] == split_item_positions[1] 592 && snum012[4] != -1) ? snum012[4] : 0); 593 594 /* s1bytes */ 595 snum012[3] = 596 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - 597 bytes_to_r - bytes_to_l - bytes_to_S2new; 598 } 599 600 return needed_nodes; 601} 602 603 604/* 605 * Set parameters for balancing. 606 * Performs write of results of analysis of balancing into structure tb, 607 * where it will later be used by the functions that actually do the balancing. 608 * Parameters: 609 * tb tree_balance structure; 610 * h current level of the node; 611 * lnum number of items from S[h] that must be shifted to L[h]; 612 * rnum number of items from S[h] that must be shifted to R[h]; 613 * blk_num number of blocks that S[h] will be splitted into; 614 * s012 number of items that fall into splitted nodes. 615 * lbytes number of bytes which flow to the left neighbor from the 616 * item that is not not shifted entirely 617 * rbytes number of bytes which flow to the right neighbor from the 618 * item that is not not shifted entirely 619 * s1bytes number of bytes which flow to the first new node when 620 * S[0] splits (this number is contained in s012 array) 621 */ 622 623static void set_parameters(struct tree_balance *tb, int h, int lnum, 624 int rnum, int blk_num, short *s012, int lb, int rb) 625{ 626 627 tb->lnum[h] = lnum; 628 tb->rnum[h] = rnum; 629 tb->blknum[h] = blk_num; 630 631 /* only for leaf level */ 632 if (h == 0) { 633 if (s012 != NULL) { 634 tb->s0num = *s012++, 635 tb->s1num = *s012++, tb->s2num = *s012++; 636 tb->s1bytes = *s012++; 637 tb->s2bytes = *s012; 638 } 639 tb->lbytes = lb; 640 tb->rbytes = rb; 641 } 642 PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); 643 PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); 644 645 PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); 646 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); 647} 648 649/* 650 * check if node disappears if we shift tb->lnum[0] items to left 651 * neighbor and tb->rnum[0] to the right one. 652 */ 653static int is_leaf_removable(struct tree_balance *tb) 654{ 655 struct virtual_node *vn = tb->tb_vn; 656 int to_left, to_right; 657 int size; 658 int remain_items; 659 660 /* 661 * number of items that will be shifted to left (right) neighbor 662 * entirely 663 */ 664 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 665 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 666 remain_items = vn->vn_nr_item; 667 668 /* how many items remain in S[0] after shiftings to neighbors */ 669 remain_items -= (to_left + to_right); 670 671 /* all content of node can be shifted to neighbors */ 672 if (remain_items < 1) { 673 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, 674 NULL, -1, -1); 675 return 1; 676 } 677 678 /* S[0] is not removable */ 679 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) 680 return 0; 681 682 /* check whether we can divide 1 remaining item between neighbors */ 683 684 /* get size of remaining item (in item units) */ 685 size = op_unit_num(&(vn->vn_vi[to_left])); 686 687 if (tb->lbytes + tb->rbytes >= size) { 688 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, 689 tb->lbytes, -1); 690 return 1; 691 } 692 693 return 0; 694} 695 696/* check whether L, S, R can be joined in one node */ 697static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) 698{ 699 struct virtual_node *vn = tb->tb_vn; 700 int ih_size; 701 struct buffer_head *S0; 702 703 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 704 705 ih_size = 0; 706 if (vn->vn_nr_item) { 707 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) 708 ih_size += IH_SIZE; 709 710 if (vn->vn_vi[vn->vn_nr_item - 1]. 711 vi_type & VI_TYPE_RIGHT_MERGEABLE) 712 ih_size += IH_SIZE; 713 } else { 714 /* there was only one item and it will be deleted */ 715 struct item_head *ih; 716 717 RFALSE(B_NR_ITEMS(S0) != 1, 718 "vs-8125: item number must be 1: it is %d", 719 B_NR_ITEMS(S0)); 720 721 ih = item_head(S0, 0); 722 if (tb->CFR[0] 723 && !comp_short_le_keys(&(ih->ih_key), 724 internal_key(tb->CFR[0], 725 tb->rkey[0]))) 726 /* 727 * Directory must be in correct state here: that is 728 * somewhere at the left side should exist first 729 * directory item. But the item being deleted can 730 * not be that first one because its right neighbor 731 * is item of the same directory. (But first item 732 * always gets deleted in last turn). So, neighbors 733 * of deleted item can be merged, so we can save 734 * ih_size 735 */ 736 if (is_direntry_le_ih(ih)) { 737 ih_size = IH_SIZE; 738 739 /* 740 * we might check that left neighbor exists 741 * and is of the same directory 742 */ 743 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, 744 "vs-8130: first directory item can not be removed until directory is not empty"); 745 } 746 747 } 748 749 if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { 750 set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); 751 PROC_INFO_INC(tb->tb_sb, leaves_removable); 752 return 1; 753 } 754 return 0; 755 756} 757 758/* when we do not split item, lnum and rnum are numbers of entire items */ 759#define SET_PAR_SHIFT_LEFT \ 760if (h)\ 761{\ 762 int to_l;\ 763 \ 764 to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ 765 (MAX_NR_KEY(Sh) + 1 - lpar);\ 766 \ 767 set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ 768}\ 769else \ 770{\ 771 if (lset==LEFT_SHIFT_FLOW)\ 772 set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ 773 tb->lbytes, -1);\ 774 else\ 775 set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ 776 -1, -1);\ 777} 778 779#define SET_PAR_SHIFT_RIGHT \ 780if (h)\ 781{\ 782 int to_r;\ 783 \ 784 to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ 785 \ 786 set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ 787}\ 788else \ 789{\ 790 if (rset==RIGHT_SHIFT_FLOW)\ 791 set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ 792 -1, tb->rbytes);\ 793 else\ 794 set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ 795 -1, -1);\ 796} 797 798static void free_buffers_in_tb(struct tree_balance *tb) 799{ 800 int i; 801 802 pathrelse(tb->tb_path); 803 804 for (i = 0; i < MAX_HEIGHT; i++) { 805 brelse(tb->L[i]); 806 brelse(tb->R[i]); 807 brelse(tb->FL[i]); 808 brelse(tb->FR[i]); 809 brelse(tb->CFL[i]); 810 brelse(tb->CFR[i]); 811 812 tb->L[i] = NULL; 813 tb->R[i] = NULL; 814 tb->FL[i] = NULL; 815 tb->FR[i] = NULL; 816 tb->CFL[i] = NULL; 817 tb->CFR[i] = NULL; 818 } 819} 820 821/* 822 * Get new buffers for storing new nodes that are created while balancing. 823 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 824 * CARRY_ON - schedule didn't occur while the function worked; 825 * NO_DISK_SPACE - no disk space. 826 */ 827/* The function is NOT SCHEDULE-SAFE! */ 828static int get_empty_nodes(struct tree_balance *tb, int h) 829{ 830 struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); 831 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 832 int counter, number_of_freeblk; 833 int amount_needed; /* number of needed empty blocks */ 834 int retval = CARRY_ON; 835 struct super_block *sb = tb->tb_sb; 836 837 /* 838 * number_of_freeblk is the number of empty blocks which have been 839 * acquired for use by the balancing algorithm minus the number of 840 * empty blocks used in the previous levels of the analysis, 841 * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule 842 * occurs after empty blocks are acquired, and the balancing analysis 843 * is then restarted, amount_needed is the number needed by this 844 * level (h) of the balancing analysis. 845 * 846 * Note that for systems with many processes writing, it would be 847 * more layout optimal to calculate the total number needed by all 848 * levels and then to run reiserfs_new_blocks to get all of them at 849 * once. 850 */ 851 852 /* 853 * Initiate number_of_freeblk to the amount acquired prior to the 854 * restart of the analysis or 0 if not restarted, then subtract the 855 * amount needed by all of the levels of the tree below h. 856 */ 857 /* blknum includes S[h], so we subtract 1 in this calculation */ 858 for (counter = 0, number_of_freeblk = tb->cur_blknum; 859 counter < h; counter++) 860 number_of_freeblk -= 861 (tb->blknum[counter]) ? (tb->blknum[counter] - 862 1) : 0; 863 864 /* Allocate missing empty blocks. */ 865 /* if Sh == 0 then we are getting a new root */ 866 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; 867 /* 868 * Amount_needed = the amount that we need more than the 869 * amount that we have. 870 */ 871 if (amount_needed > number_of_freeblk) 872 amount_needed -= number_of_freeblk; 873 else /* If we have enough already then there is nothing to do. */ 874 return CARRY_ON; 875 876 /* 877 * No need to check quota - is not allocated for blocks used 878 * for formatted nodes 879 */ 880 if (reiserfs_new_form_blocknrs(tb, blocknrs, 881 amount_needed) == NO_DISK_SPACE) 882 return NO_DISK_SPACE; 883 884 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 885 for (blocknr = blocknrs, counter = 0; 886 counter < amount_needed; blocknr++, counter++) { 887 888 RFALSE(!*blocknr, 889 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 890 891 new_bh = sb_getblk(sb, *blocknr); 892 RFALSE(buffer_dirty(new_bh) || 893 buffer_journaled(new_bh) || 894 buffer_journal_dirty(new_bh), 895 "PAP-8140: journaled or dirty buffer %b for the new block", 896 new_bh); 897 898 /* Put empty buffers into the array. */ 899 RFALSE(tb->FEB[tb->cur_blknum], 900 "PAP-8141: busy slot for new buffer"); 901 902 set_buffer_journal_new(new_bh); 903 tb->FEB[tb->cur_blknum++] = new_bh; 904 } 905 906 if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) 907 retval = REPEAT_SEARCH; 908 909 return retval; 910} 911 912/* 913 * Get free space of the left neighbor, which is stored in the parent 914 * node of the left neighbor. 915 */ 916static int get_lfree(struct tree_balance *tb, int h) 917{ 918 struct buffer_head *l, *f; 919 int order; 920 921 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || 922 (l = tb->FL[h]) == NULL) 923 return 0; 924 925 if (f == l) 926 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; 927 else { 928 order = B_NR_ITEMS(l); 929 f = l; 930 } 931 932 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 933} 934 935/* 936 * Get free space of the right neighbor, 937 * which is stored in the parent node of the right neighbor. 938 */ 939static int get_rfree(struct tree_balance *tb, int h) 940{ 941 struct buffer_head *r, *f; 942 int order; 943 944 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || 945 (r = tb->FR[h]) == NULL) 946 return 0; 947 948 if (f == r) 949 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; 950 else { 951 order = 0; 952 f = r; 953 } 954 955 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 956 957} 958 959/* Check whether left neighbor is in memory. */ 960static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) 961{ 962 struct buffer_head *father, *left; 963 struct super_block *sb = tb->tb_sb; 964 b_blocknr_t left_neighbor_blocknr; 965 int left_neighbor_position; 966 967 /* Father of the left neighbor does not exist. */ 968 if (!tb->FL[h]) 969 return 0; 970 971 /* Calculate father of the node to be balanced. */ 972 father = PATH_H_PBUFFER(tb->tb_path, h + 1); 973 974 RFALSE(!father || 975 !B_IS_IN_TREE(father) || 976 !B_IS_IN_TREE(tb->FL[h]) || 977 !buffer_uptodate(father) || 978 !buffer_uptodate(tb->FL[h]), 979 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 980 father, tb->FL[h]); 981 982 /* 983 * Get position of the pointer to the left neighbor 984 * into the left father. 985 */ 986 left_neighbor_position = (father == tb->FL[h]) ? 987 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); 988 /* Get left neighbor block number. */ 989 left_neighbor_blocknr = 990 B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); 991 /* Look for the left neighbor in the cache. */ 992 if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { 993 994 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), 995 "vs-8170: left neighbor (%b %z) is not in the tree", 996 left, left); 997 put_bh(left); 998 return 1; 999 } 1000 1001 return 0; 1002} 1003 1004#define LEFT_PARENTS 'l' 1005#define RIGHT_PARENTS 'r' 1006 1007static void decrement_key(struct cpu_key *key) 1008{ 1009 /* call item specific function for this key */ 1010 item_ops[cpu_key_k_type(key)]->decrement_key(key); 1011} 1012 1013/* 1014 * Calculate far left/right parent of the left/right neighbor of the 1015 * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor 1016 * of the parent F[h]. 1017 * Calculate left/right common parent of the current node and L[h]/R[h]. 1018 * Calculate left/right delimiting key position. 1019 * Returns: PATH_INCORRECT - path in the tree is not correct 1020 * SCHEDULE_OCCURRED - schedule occurred while the function worked 1021 * CARRY_ON - schedule didn't occur while the function 1022 * worked 1023 */ 1024static int get_far_parent(struct tree_balance *tb, 1025 int h, 1026 struct buffer_head **pfather, 1027 struct buffer_head **pcom_father, char c_lr_par) 1028{ 1029 struct buffer_head *parent; 1030 INITIALIZE_PATH(s_path_to_neighbor_father); 1031 struct treepath *path = tb->tb_path; 1032 struct cpu_key s_lr_father_key; 1033 int counter, 1034 position = INT_MAX, 1035 first_last_position = 0, 1036 path_offset = PATH_H_PATH_OFFSET(path, h); 1037 1038 /* 1039 * Starting from F[h] go upwards in the tree, and look for the common 1040 * ancestor of F[h], and its neighbor l/r, that should be obtained. 1041 */ 1042 1043 counter = path_offset; 1044 1045 RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, 1046 "PAP-8180: invalid path length"); 1047 1048 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { 1049 /* 1050 * Check whether parent of the current buffer in the path 1051 * is really parent in the tree. 1052 */ 1053 if (!B_IS_IN_TREE 1054 (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) 1055 return REPEAT_SEARCH; 1056 1057 /* Check whether position in the parent is correct. */ 1058 if ((position = 1059 PATH_OFFSET_POSITION(path, 1060 counter - 1)) > 1061 B_NR_ITEMS(parent)) 1062 return REPEAT_SEARCH; 1063 1064 /* 1065 * Check whether parent at the path really points 1066 * to the child. 1067 */ 1068 if (B_N_CHILD_NUM(parent, position) != 1069 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) 1070 return REPEAT_SEARCH; 1071 1072 /* 1073 * Return delimiting key if position in the parent is not 1074 * equal to first/last one. 1075 */ 1076 if (c_lr_par == RIGHT_PARENTS) 1077 first_last_position = B_NR_ITEMS(parent); 1078 if (position != first_last_position) { 1079 *pcom_father = parent; 1080 get_bh(*pcom_father); 1081 /*(*pcom_father = parent)->b_count++; */ 1082 break; 1083 } 1084 } 1085 1086 /* if we are in the root of the tree, then there is no common father */ 1087 if (counter == FIRST_PATH_ELEMENT_OFFSET) { 1088 /* 1089 * Check whether first buffer in the path is the 1090 * root of the tree. 1091 */ 1092 if (PATH_OFFSET_PBUFFER 1093 (tb->tb_path, 1094 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1095 SB_ROOT_BLOCK(tb->tb_sb)) { 1096 *pfather = *pcom_father = NULL; 1097 return CARRY_ON; 1098 } 1099 return REPEAT_SEARCH; 1100 } 1101 1102 RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, 1103 "PAP-8185: (%b %z) level too small", 1104 *pcom_father, *pcom_father); 1105 1106 /* Check whether the common parent is locked. */ 1107 1108 if (buffer_locked(*pcom_father)) { 1109 1110 /* Release the write lock while the buffer is busy */ 1111 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 1112 __wait_on_buffer(*pcom_father); 1113 reiserfs_write_lock_nested(tb->tb_sb, depth); 1114 if (FILESYSTEM_CHANGED_TB(tb)) { 1115 brelse(*pcom_father); 1116 return REPEAT_SEARCH; 1117 } 1118 } 1119 1120 /* 1121 * So, we got common parent of the current node and its 1122 * left/right neighbor. Now we are getting the parent of the 1123 * left/right neighbor. 1124 */ 1125 1126 /* Form key to get parent of the left/right neighbor. */ 1127 le_key2cpu_key(&s_lr_father_key, 1128 internal_key(*pcom_father, 1129 (c_lr_par == 1130 LEFT_PARENTS) ? (tb->lkey[h - 1] = 1131 position - 1132 1) : (tb->rkey[h - 1133 1] = 1134 position))); 1135 1136 if (c_lr_par == LEFT_PARENTS) 1137 decrement_key(&s_lr_father_key); 1138 1139 if (search_by_key 1140 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1141 h + 1) == IO_ERROR) 1142 /* path is released */ 1143 return IO_ERROR; 1144 1145 if (FILESYSTEM_CHANGED_TB(tb)) { 1146 pathrelse(&s_path_to_neighbor_father); 1147 brelse(*pcom_father); 1148 return REPEAT_SEARCH; 1149 } 1150 1151 *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1152 1153 RFALSE(B_LEVEL(*pfather) != h + 1, 1154 "PAP-8190: (%b %z) level too small", *pfather, *pfather); 1155 RFALSE(s_path_to_neighbor_father.path_length < 1156 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); 1157 1158 s_path_to_neighbor_father.path_length--; 1159 pathrelse(&s_path_to_neighbor_father); 1160 return CARRY_ON; 1161} 1162 1163/* 1164 * Get parents of neighbors of node in the path(S[path_offset]) and 1165 * common parents of S[path_offset] and L[path_offset]/R[path_offset]: 1166 * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], 1167 * CFR[path_offset]. 1168 * Calculate numbers of left and right delimiting keys position: 1169 * lkey[path_offset], rkey[path_offset]. 1170 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked 1171 * CARRY_ON - schedule didn't occur while the function worked 1172 */ 1173static int get_parents(struct tree_balance *tb, int h) 1174{ 1175 struct treepath *path = tb->tb_path; 1176 int position, 1177 ret, 1178 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); 1179 struct buffer_head *curf, *curcf; 1180 1181 /* Current node is the root of the tree or will be root of the tree */ 1182 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1183 /* 1184 * The root can not have parents. 1185 * Release nodes which previously were obtained as 1186 * parents of the current node neighbors. 1187 */ 1188 brelse(tb->FL[h]); 1189 brelse(tb->CFL[h]); 1190 brelse(tb->FR[h]); 1191 brelse(tb->CFR[h]); 1192 tb->FL[h] = NULL; 1193 tb->CFL[h] = NULL; 1194 tb->FR[h] = NULL; 1195 tb->CFR[h] = NULL; 1196 return CARRY_ON; 1197 } 1198 1199 /* Get parent FL[path_offset] of L[path_offset]. */ 1200 position = PATH_OFFSET_POSITION(path, path_offset - 1); 1201 if (position) { 1202 /* Current node is not the first child of its parent. */ 1203 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1204 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1205 get_bh(curf); 1206 get_bh(curf); 1207 tb->lkey[h] = position - 1; 1208 } else { 1209 /* 1210 * Calculate current parent of L[path_offset], which is the 1211 * left neighbor of the current node. Calculate current 1212 * common parent of L[path_offset] and the current node. 1213 * Note that CFL[path_offset] not equal FL[path_offset] and 1214 * CFL[path_offset] not equal F[path_offset]. 1215 * Calculate lkey[path_offset]. 1216 */ 1217 if ((ret = get_far_parent(tb, h + 1, &curf, 1218 &curcf, 1219 LEFT_PARENTS)) != CARRY_ON) 1220 return ret; 1221 } 1222 1223 brelse(tb->FL[h]); 1224 tb->FL[h] = curf; /* New initialization of FL[h]. */ 1225 brelse(tb->CFL[h]); 1226 tb->CFL[h] = curcf; /* New initialization of CFL[h]. */ 1227 1228 RFALSE((curf && !B_IS_IN_TREE(curf)) || 1229 (curcf && !B_IS_IN_TREE(curcf)), 1230 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); 1231 1232 /* Get parent FR[h] of R[h]. */ 1233 1234 /* Current node is the last child of F[h]. FR[h] != F[h]. */ 1235 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { 1236 /* 1237 * Calculate current parent of R[h], which is the right 1238 * neighbor of F[h]. Calculate current common parent of 1239 * R[h] and current node. Note that CFR[h] not equal 1240 * FR[path_offset] and CFR[h] not equal F[h]. 1241 */ 1242 if ((ret = 1243 get_far_parent(tb, h + 1, &curf, &curcf, 1244 RIGHT_PARENTS)) != CARRY_ON) 1245 return ret; 1246 } else { 1247 /* Current node is not the last child of its parent F[h]. */ 1248 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1249 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1250 get_bh(curf); 1251 get_bh(curf); 1252 tb->rkey[h] = position; 1253 } 1254 1255 brelse(tb->FR[h]); 1256 /* New initialization of FR[path_offset]. */ 1257 tb->FR[h] = curf; 1258 1259 brelse(tb->CFR[h]); 1260 /* New initialization of CFR[path_offset]. */ 1261 tb->CFR[h] = curcf; 1262 1263 RFALSE((curf && !B_IS_IN_TREE(curf)) || 1264 (curcf && !B_IS_IN_TREE(curcf)), 1265 "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); 1266 1267 return CARRY_ON; 1268} 1269 1270/* 1271 * it is possible to remove node as result of shiftings to 1272 * neighbors even when we insert or paste item. 1273 */ 1274static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, 1275 struct tree_balance *tb, int h) 1276{ 1277 struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); 1278 int levbytes = tb->insert_size[h]; 1279 struct item_head *ih; 1280 struct reiserfs_key *r_key = NULL; 1281 1282 ih = item_head(Sh, 0); 1283 if (tb->CFR[h]) 1284 r_key = internal_key(tb->CFR[h], tb->rkey[h]); 1285 1286 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1287 /* shifting may merge items which might save space */ 1288 - 1289 ((!h 1290 && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0) 1291 - 1292 ((!h && r_key 1293 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) 1294 + ((h) ? KEY_SIZE : 0)) { 1295 /* node can not be removed */ 1296 if (sfree >= levbytes) { 1297 /* new item fits into node S[h] without any shifting */ 1298 if (!h) 1299 tb->s0num = 1300 B_NR_ITEMS(Sh) + 1301 ((mode == M_INSERT) ? 1 : 0); 1302 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1303 return NO_BALANCING_NEEDED; 1304 } 1305 } 1306 PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); 1307 return !NO_BALANCING_NEEDED; 1308} 1309 1310/* 1311 * Check whether current node S[h] is balanced when increasing its size by 1312 * Inserting or Pasting. 1313 * Calculate parameters for balancing for current level h. 1314 * Parameters: 1315 * tb tree_balance structure; 1316 * h current level of the node; 1317 * inum item number in S[h]; 1318 * mode i - insert, p - paste; 1319 * Returns: 1 - schedule occurred; 1320 * 0 - balancing for higher levels needed; 1321 * -1 - no balancing for higher levels needed; 1322 * -2 - no disk space. 1323 */ 1324/* ip means Inserting or Pasting */ 1325static int ip_check_balance(struct tree_balance *tb, int h) 1326{ 1327 struct virtual_node *vn = tb->tb_vn; 1328 /* 1329 * Number of bytes that must be inserted into (value is negative 1330 * if bytes are deleted) buffer which contains node being balanced. 1331 * The mnemonic is that the attempted change in node space used 1332 * level is levbytes bytes. 1333 */ 1334 int levbytes; 1335 int ret; 1336 1337 int lfree, sfree, rfree /* free space in L, S and R */ ; 1338 1339 /* 1340 * nver is short for number of vertixes, and lnver is the number if 1341 * we shift to the left, rnver is the number if we shift to the 1342 * right, and lrnver is the number if we shift in both directions. 1343 * The goal is to minimize first the number of vertixes, and second, 1344 * the number of vertixes whose contents are changed by shifting, 1345 * and third the number of uncached vertixes whose contents are 1346 * changed by shifting and must be read from disk. 1347 */ 1348 int nver, lnver, rnver, lrnver; 1349 1350 /* 1351 * used at leaf level only, S0 = S[0] is the node being balanced, 1352 * sInum [ I = 0,1,2 ] is the number of items that will 1353 * remain in node SI after balancing. S1 and S2 are new 1354 * nodes that might be created. 1355 */ 1356 1357 /* 1358 * we perform 8 calls to get_num_ver(). For each call we 1359 * calculate five parameters. where 4th parameter is s1bytes 1360 * and 5th - s2bytes 1361 * 1362 * s0num, s1num, s2num for 8 cases 1363 * 0,1 - do not shift and do not shift but bottle 1364 * 2 - shift only whole item to left 1365 * 3 - shift to left and bottle as much as possible 1366 * 4,5 - shift to right (whole items and as much as possible 1367 * 6,7 - shift to both directions (whole items and as much as possible) 1368 */ 1369 short snum012[40] = { 0, }; 1370 1371 /* Sh is the node whose balance is currently being checked */ 1372 struct buffer_head *Sh; 1373 1374 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1375 levbytes = tb->insert_size[h]; 1376 1377 /* Calculate balance parameters for creating new root. */ 1378 if (!Sh) { 1379 if (!h) 1380 reiserfs_panic(tb->tb_sb, "vs-8210", 1381 "S[0] can not be 0"); 1382 switch (ret = get_empty_nodes(tb, h)) { 1383 /* no balancing for higher levels needed */ 1384 case CARRY_ON: 1385 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1386 return NO_BALANCING_NEEDED; 1387 1388 case NO_DISK_SPACE: 1389 case REPEAT_SEARCH: 1390 return ret; 1391 default: 1392 reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " 1393 "return value of get_empty_nodes"); 1394 } 1395 } 1396 1397 /* get parents of S[h] neighbors. */ 1398 ret = get_parents(tb, h); 1399 if (ret != CARRY_ON) 1400 return ret; 1401 1402 sfree = B_FREE_SPACE(Sh); 1403 1404 /* get free space of neighbors */ 1405 rfree = get_rfree(tb, h); 1406 lfree = get_lfree(tb, h); 1407 1408 /* and new item fits into node S[h] without any shifting */ 1409 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == 1410 NO_BALANCING_NEEDED) 1411 return NO_BALANCING_NEEDED; 1412 1413 create_virtual_node(tb, h); 1414 1415 /* 1416 * determine maximal number of items we can shift to the left 1417 * neighbor (in tb structure) and the maximal number of bytes 1418 * that can flow to the left neighbor from the left most liquid 1419 * item that cannot be shifted from S[0] entirely (returned value) 1420 */ 1421 check_left(tb, h, lfree); 1422 1423 /* 1424 * determine maximal number of items we can shift to the right 1425 * neighbor (in tb structure) and the maximal number of bytes 1426 * that can flow to the right neighbor from the right most liquid 1427 * item that cannot be shifted from S[0] entirely (returned value) 1428 */ 1429 check_right(tb, h, rfree); 1430 1431 /* 1432 * all contents of internal node S[h] can be moved into its 1433 * neighbors, S[h] will be removed after balancing 1434 */ 1435 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { 1436 int to_r; 1437 1438 /* 1439 * Since we are working on internal nodes, and our internal 1440 * nodes have fixed size entries, then we can balance by the 1441 * number of items rather than the space they consume. In this 1442 * routine we set the left node equal to the right node, 1443 * allowing a difference of less than or equal to 1 child 1444 * pointer. 1445 */ 1446 to_r = 1447 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1448 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1449 tb->rnum[h]); 1450 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1451 -1, -1); 1452 return CARRY_ON; 1453 } 1454 1455 /* 1456 * this checks balance condition, that any two neighboring nodes 1457 * can not fit in one node 1458 */ 1459 RFALSE(h && 1460 (tb->lnum[h] >= vn->vn_nr_item + 1 || 1461 tb->rnum[h] >= vn->vn_nr_item + 1), 1462 "vs-8220: tree is not balanced on internal level"); 1463 RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || 1464 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), 1465 "vs-8225: tree is not balanced on leaf level"); 1466 1467 /* 1468 * all contents of S[0] can be moved into its neighbors 1469 * S[0] will be removed after balancing. 1470 */ 1471 if (!h && is_leaf_removable(tb)) 1472 return CARRY_ON; 1473 1474 /* 1475 * why do we perform this check here rather than earlier?? 1476 * Answer: we can win 1 node in some cases above. Moreover we 1477 * checked it above, when we checked, that S[0] is not removable 1478 * in principle 1479 */ 1480 1481 /* new item fits into node S[h] without any shifting */ 1482 if (sfree >= levbytes) { 1483 if (!h) 1484 tb->s0num = vn->vn_nr_item; 1485 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1486 return NO_BALANCING_NEEDED; 1487 } 1488 1489 { 1490 int lpar, rpar, nset, lset, rset, lrset; 1491 /* regular overflowing of the node */ 1492 1493 /* 1494 * get_num_ver works in 2 modes (FLOW & NO_FLOW) 1495 * lpar, rpar - number of items we can shift to left/right 1496 * neighbor (including splitting item) 1497 * nset, lset, rset, lrset - shows, whether flowing items 1498 * give better packing 1499 */ 1500#define FLOW 1 1501#define NO_FLOW 0 /* do not any splitting */ 1502 1503 /* we choose one of the following */ 1504#define NOTHING_SHIFT_NO_FLOW 0 1505#define NOTHING_SHIFT_FLOW 5 1506#define LEFT_SHIFT_NO_FLOW 10 1507#define LEFT_SHIFT_FLOW 15 1508#define RIGHT_SHIFT_NO_FLOW 20 1509#define RIGHT_SHIFT_FLOW 25 1510#define LR_SHIFT_NO_FLOW 30 1511#define LR_SHIFT_FLOW 35 1512 1513 lpar = tb->lnum[h]; 1514 rpar = tb->rnum[h]; 1515 1516 /* 1517 * calculate number of blocks S[h] must be split into when 1518 * nothing is shifted to the neighbors, as well as number of 1519 * items in each part of the split node (s012 numbers), 1520 * and number of bytes (s1bytes) of the shared drop which 1521 * flow to S1 if any 1522 */ 1523 nset = NOTHING_SHIFT_NO_FLOW; 1524 nver = get_num_ver(vn->vn_mode, tb, h, 1525 0, -1, h ? vn->vn_nr_item : 0, -1, 1526 snum012, NO_FLOW); 1527 1528 if (!h) { 1529 int nver1; 1530 1531 /* 1532 * note, that in this case we try to bottle 1533 * between S[0] and S1 (S1 - the first new node) 1534 */ 1535 nver1 = get_num_ver(vn->vn_mode, tb, h, 1536 0, -1, 0, -1, 1537 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1538 if (nver > nver1) 1539 nset = NOTHING_SHIFT_FLOW, nver = nver1; 1540 } 1541 1542 /* 1543 * calculate number of blocks S[h] must be split into when 1544 * l_shift_num first items and l_shift_bytes of the right 1545 * most liquid item to be shifted are shifted to the left 1546 * neighbor, as well as number of items in each part of the 1547 * splitted node (s012 numbers), and number of bytes 1548 * (s1bytes) of the shared drop which flow to S1 if any 1549 */ 1550 lset = LEFT_SHIFT_NO_FLOW; 1551 lnver = get_num_ver(vn->vn_mode, tb, h, 1552 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1553 -1, h ? vn->vn_nr_item : 0, -1, 1554 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); 1555 if (!h) { 1556 int lnver1; 1557 1558 lnver1 = get_num_ver(vn->vn_mode, tb, h, 1559 lpar - 1560 ((tb->lbytes != -1) ? 1 : 0), 1561 tb->lbytes, 0, -1, 1562 snum012 + LEFT_SHIFT_FLOW, FLOW); 1563 if (lnver > lnver1) 1564 lset = LEFT_SHIFT_FLOW, lnver = lnver1; 1565 } 1566 1567 /* 1568 * calculate number of blocks S[h] must be split into when 1569 * r_shift_num first items and r_shift_bytes of the left most 1570 * liquid item to be shifted are shifted to the right neighbor, 1571 * as well as number of items in each part of the splitted 1572 * node (s012 numbers), and number of bytes (s1bytes) of the 1573 * shared drop which flow to S1 if any 1574 */ 1575 rset = RIGHT_SHIFT_NO_FLOW; 1576 rnver = get_num_ver(vn->vn_mode, tb, h, 1577 0, -1, 1578 h ? (vn->vn_nr_item - rpar) : (rpar - 1579 ((tb-> 1580 rbytes != 1581 -1) ? 1 : 1582 0)), -1, 1583 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); 1584 if (!h) { 1585 int rnver1; 1586 1587 rnver1 = get_num_ver(vn->vn_mode, tb, h, 1588 0, -1, 1589 (rpar - 1590 ((tb->rbytes != -1) ? 1 : 0)), 1591 tb->rbytes, 1592 snum012 + RIGHT_SHIFT_FLOW, FLOW); 1593 1594 if (rnver > rnver1) 1595 rset = RIGHT_SHIFT_FLOW, rnver = rnver1; 1596 } 1597 1598 /* 1599 * calculate number of blocks S[h] must be split into when 1600 * items are shifted in both directions, as well as number 1601 * of items in each part of the splitted node (s012 numbers), 1602 * and number of bytes (s1bytes) of the shared drop which 1603 * flow to S1 if any 1604 */ 1605 lrset = LR_SHIFT_NO_FLOW; 1606 lrnver = get_num_ver(vn->vn_mode, tb, h, 1607 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1608 -1, 1609 h ? (vn->vn_nr_item - rpar) : (rpar - 1610 ((tb-> 1611 rbytes != 1612 -1) ? 1 : 1613 0)), -1, 1614 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); 1615 if (!h) { 1616 int lrnver1; 1617 1618 lrnver1 = get_num_ver(vn->vn_mode, tb, h, 1619 lpar - 1620 ((tb->lbytes != -1) ? 1 : 0), 1621 tb->lbytes, 1622 (rpar - 1623 ((tb->rbytes != -1) ? 1 : 0)), 1624 tb->rbytes, 1625 snum012 + LR_SHIFT_FLOW, FLOW); 1626 if (lrnver > lrnver1) 1627 lrset = LR_SHIFT_FLOW, lrnver = lrnver1; 1628 } 1629 1630 /* 1631 * Our general shifting strategy is: 1632 * 1) to minimized number of new nodes; 1633 * 2) to minimized number of neighbors involved in shifting; 1634 * 3) to minimized number of disk reads; 1635 */ 1636 1637 /* we can win TWO or ONE nodes by shifting in both directions */ 1638 if (lrnver < lnver && lrnver < rnver) { 1639 RFALSE(h && 1640 (tb->lnum[h] != 1 || 1641 tb->rnum[h] != 1 || 1642 lrnver != 1 || rnver != 2 || lnver != 2 1643 || h != 1), "vs-8230: bad h"); 1644 if (lrset == LR_SHIFT_FLOW) 1645 set_parameters(tb, h, tb->lnum[h], tb->rnum[h], 1646 lrnver, snum012 + lrset, 1647 tb->lbytes, tb->rbytes); 1648 else 1649 set_parameters(tb, h, 1650 tb->lnum[h] - 1651 ((tb->lbytes == -1) ? 0 : 1), 1652 tb->rnum[h] - 1653 ((tb->rbytes == -1) ? 0 : 1), 1654 lrnver, snum012 + lrset, -1, -1); 1655 1656 return CARRY_ON; 1657 } 1658 1659 /* 1660 * if shifting doesn't lead to better packing 1661 * then don't shift 1662 */ 1663 if (nver == lrnver) { 1664 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, 1665 -1); 1666 return CARRY_ON; 1667 } 1668 1669 /* 1670 * now we know that for better packing shifting in only one 1671 * direction either to the left or to the right is required 1672 */ 1673 1674 /* 1675 * if shifting to the left is better than 1676 * shifting to the right 1677 */ 1678 if (lnver < rnver) { 1679 SET_PAR_SHIFT_LEFT; 1680 return CARRY_ON; 1681 } 1682 1683 /* 1684 * if shifting to the right is better than 1685 * shifting to the left 1686 */ 1687 if (lnver > rnver) { 1688 SET_PAR_SHIFT_RIGHT; 1689 return CARRY_ON; 1690 } 1691 1692 /* 1693 * now shifting in either direction gives the same number 1694 * of nodes and we can make use of the cached neighbors 1695 */ 1696 if (is_left_neighbor_in_cache(tb, h)) { 1697 SET_PAR_SHIFT_LEFT; 1698 return CARRY_ON; 1699 } 1700 1701 /* 1702 * shift to the right independently on whether the 1703 * right neighbor in cache or not 1704 */ 1705 SET_PAR_SHIFT_RIGHT; 1706 return CARRY_ON; 1707 } 1708} 1709 1710/* 1711 * Check whether current node S[h] is balanced when Decreasing its size by 1712 * Deleting or Cutting for INTERNAL node of S+tree. 1713 * Calculate parameters for balancing for current level h. 1714 * Parameters: 1715 * tb tree_balance structure; 1716 * h current level of the node; 1717 * inum item number in S[h]; 1718 * mode i - insert, p - paste; 1719 * Returns: 1 - schedule occurred; 1720 * 0 - balancing for higher levels needed; 1721 * -1 - no balancing for higher levels needed; 1722 * -2 - no disk space. 1723 * 1724 * Note: Items of internal nodes have fixed size, so the balance condition for 1725 * the internal part of S+tree is as for the B-trees. 1726 */ 1727static int dc_check_balance_internal(struct tree_balance *tb, int h) 1728{ 1729 struct virtual_node *vn = tb->tb_vn; 1730 1731 /* 1732 * Sh is the node whose balance is currently being checked, 1733 * and Fh is its father. 1734 */ 1735 struct buffer_head *Sh, *Fh; 1736 int maxsize, ret; 1737 int lfree, rfree /* free space in L and R */ ; 1738 1739 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1740 Fh = PATH_H_PPARENT(tb->tb_path, h); 1741 1742 maxsize = MAX_CHILD_SIZE(Sh); 1743 1744 /* 1745 * using tb->insert_size[h], which is negative in this case, 1746 * create_virtual_node calculates: 1747 * new_nr_item = number of items node would have if operation is 1748 * performed without balancing (new_nr_item); 1749 */ 1750 create_virtual_node(tb, h); 1751 1752 if (!Fh) { /* S[h] is the root. */ 1753 /* no balancing for higher levels needed */ 1754 if (vn->vn_nr_item > 0) { 1755 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1756 return NO_BALANCING_NEEDED; 1757 } 1758 /* 1759 * new_nr_item == 0. 1760 * Current root will be deleted resulting in 1761 * decrementing the tree height. 1762 */ 1763 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); 1764 return CARRY_ON; 1765 } 1766 1767 if ((ret = get_parents(tb, h)) != CARRY_ON) 1768 return ret; 1769 1770 /* get free space of neighbors */ 1771 rfree = get_rfree(tb, h); 1772 lfree = get_lfree(tb, h); 1773 1774 /* determine maximal number of items we can fit into neighbors */ 1775 check_left(tb, h, lfree); 1776 check_right(tb, h, rfree); 1777 1778 /* 1779 * Balance condition for the internal node is valid. 1780 * In this case we balance only if it leads to better packing. 1781 */ 1782 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { 1783 /* 1784 * Here we join S[h] with one of its neighbors, 1785 * which is impossible with greater values of new_nr_item. 1786 */ 1787 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { 1788 /* All contents of S[h] can be moved to L[h]. */ 1789 if (tb->lnum[h] >= vn->vn_nr_item + 1) { 1790 int n; 1791 int order_L; 1792 1793 order_L = 1794 ((n = 1795 PATH_H_B_ITEM_ORDER(tb->tb_path, 1796 h)) == 1797 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1798 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / 1799 (DC_SIZE + KEY_SIZE); 1800 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, 1801 -1); 1802 return CARRY_ON; 1803 } 1804 1805 /* All contents of S[h] can be moved to R[h]. */ 1806 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1807 int n; 1808 int order_R; 1809 1810 order_R = 1811 ((n = 1812 PATH_H_B_ITEM_ORDER(tb->tb_path, 1813 h)) == 1814 B_NR_ITEMS(Fh)) ? 0 : n + 1; 1815 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / 1816 (DC_SIZE + KEY_SIZE); 1817 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, 1818 -1); 1819 return CARRY_ON; 1820 } 1821 } 1822 1823 /* 1824 * All contents of S[h] can be moved to the neighbors 1825 * (L[h] & R[h]). 1826 */ 1827 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1828 int to_r; 1829 1830 to_r = 1831 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - 1832 tb->rnum[h] + vn->vn_nr_item + 1) / 2 - 1833 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); 1834 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 1835 0, NULL, -1, -1); 1836 return CARRY_ON; 1837 } 1838 1839 /* Balancing does not lead to better packing. */ 1840 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1841 return NO_BALANCING_NEEDED; 1842 } 1843 1844 /* 1845 * Current node contain insufficient number of items. 1846 * Balancing is required. 1847 */ 1848 /* Check whether we can merge S[h] with left neighbor. */ 1849 if (tb->lnum[h] >= vn->vn_nr_item + 1) 1850 if (is_left_neighbor_in_cache(tb, h) 1851 || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { 1852 int n; 1853 int order_L; 1854 1855 order_L = 1856 ((n = 1857 PATH_H_B_ITEM_ORDER(tb->tb_path, 1858 h)) == 1859 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1860 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + 1861 KEY_SIZE); 1862 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); 1863 return CARRY_ON; 1864 } 1865 1866 /* Check whether we can merge S[h] with right neighbor. */ 1867 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1868 int n; 1869 int order_R; 1870 1871 order_R = 1872 ((n = 1873 PATH_H_B_ITEM_ORDER(tb->tb_path, 1874 h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); 1875 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + 1876 KEY_SIZE); 1877 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); 1878 return CARRY_ON; 1879 } 1880 1881 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1882 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1883 int to_r; 1884 1885 to_r = 1886 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1887 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1888 tb->rnum[h]); 1889 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1890 -1, -1); 1891 return CARRY_ON; 1892 } 1893 1894 /* For internal nodes try to borrow item from a neighbor */ 1895 RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); 1896 1897 /* Borrow one or two items from caching neighbor */ 1898 if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { 1899 int from_l; 1900 1901 from_l = 1902 (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1903 1) / 2 - (vn->vn_nr_item + 1); 1904 set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); 1905 return CARRY_ON; 1906 } 1907 1908 set_parameters(tb, h, 0, 1909 -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + 1910 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); 1911 return CARRY_ON; 1912} 1913 1914/* 1915 * Check whether current node S[h] is balanced when Decreasing its size by 1916 * Deleting or Truncating for LEAF node of S+tree. 1917 * Calculate parameters for balancing for current level h. 1918 * Parameters: 1919 * tb tree_balance structure; 1920 * h current level of the node; 1921 * inum item number in S[h]; 1922 * mode i - insert, p - paste; 1923 * Returns: 1 - schedule occurred; 1924 * 0 - balancing for higher levels needed; 1925 * -1 - no balancing for higher levels needed; 1926 * -2 - no disk space. 1927 */ 1928static int dc_check_balance_leaf(struct tree_balance *tb, int h) 1929{ 1930 struct virtual_node *vn = tb->tb_vn; 1931 1932 /* 1933 * Number of bytes that must be deleted from 1934 * (value is negative if bytes are deleted) buffer which 1935 * contains node being balanced. The mnemonic is that the 1936 * attempted change in node space used level is levbytes bytes. 1937 */ 1938 int levbytes; 1939 1940 /* the maximal item size */ 1941 int maxsize, ret; 1942 1943 /* 1944 * S0 is the node whose balance is currently being checked, 1945 * and F0 is its father. 1946 */ 1947 struct buffer_head *S0, *F0; 1948 int lfree, rfree /* free space in L and R */ ; 1949 1950 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 1951 F0 = PATH_H_PPARENT(tb->tb_path, 0); 1952 1953 levbytes = tb->insert_size[h]; 1954 1955 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ 1956 1957 if (!F0) { /* S[0] is the root now. */ 1958 1959 RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), 1960 "vs-8240: attempt to create empty buffer tree"); 1961 1962 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1963 return NO_BALANCING_NEEDED; 1964 } 1965 1966 if ((ret = get_parents(tb, h)) != CARRY_ON) 1967 return ret; 1968 1969 /* get free space of neighbors */ 1970 rfree = get_rfree(tb, h); 1971 lfree = get_lfree(tb, h); 1972 1973 create_virtual_node(tb, h); 1974 1975 /* if 3 leaves can be merge to one, set parameters and return */ 1976 if (are_leaves_removable(tb, lfree, rfree)) 1977 return CARRY_ON; 1978 1979 /* 1980 * determine maximal number of items we can shift to the left/right 1981 * neighbor and the maximal number of bytes that can flow to the 1982 * left/right neighbor from the left/right most liquid item that 1983 * cannot be shifted from S[0] entirely 1984 */ 1985 check_left(tb, h, lfree); 1986 check_right(tb, h, rfree); 1987 1988 /* check whether we can merge S with left neighbor. */ 1989 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) 1990 if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ 1991 !tb->FR[h]) { 1992 1993 RFALSE(!tb->FL[h], 1994 "vs-8245: dc_check_balance_leaf: FL[h] must exist"); 1995 1996 /* set parameter to merge S[0] with its left neighbor */ 1997 set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); 1998 return CARRY_ON; 1999 } 2000 2001 /* check whether we can merge S[0] with right neighbor. */ 2002 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { 2003 set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); 2004 return CARRY_ON; 2005 } 2006 2007 /* 2008 * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). 2009 * Set parameters and return 2010 */ 2011 if (is_leaf_removable(tb)) 2012 return CARRY_ON; 2013 2014 /* Balancing is not required. */ 2015 tb->s0num = vn->vn_nr_item; 2016 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 2017 return NO_BALANCING_NEEDED; 2018} 2019 2020/* 2021 * Check whether current node S[h] is balanced when Decreasing its size by 2022 * Deleting or Cutting. 2023 * Calculate parameters for balancing for current level h. 2024 * Parameters: 2025 * tb tree_balance structure; 2026 * h current level of the node; 2027 * inum item number in S[h]; 2028 * mode d - delete, c - cut. 2029 * Returns: 1 - schedule occurred; 2030 * 0 - balancing for higher levels needed; 2031 * -1 - no balancing for higher levels needed; 2032 * -2 - no disk space. 2033 */ 2034static int dc_check_balance(struct tree_balance *tb, int h) 2035{ 2036 RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), 2037 "vs-8250: S is not initialized"); 2038 2039 if (h) 2040 return dc_check_balance_internal(tb, h); 2041 else 2042 return dc_check_balance_leaf(tb, h); 2043} 2044 2045/* 2046 * Check whether current node S[h] is balanced. 2047 * Calculate parameters for balancing for current level h. 2048 * Parameters: 2049 * 2050 * tb tree_balance structure: 2051 * 2052 * tb is a large structure that must be read about in the header 2053 * file at the same time as this procedure if the reader is 2054 * to successfully understand this procedure 2055 * 2056 * h current level of the node; 2057 * inum item number in S[h]; 2058 * mode i - insert, p - paste, d - delete, c - cut. 2059 * Returns: 1 - schedule occurred; 2060 * 0 - balancing for higher levels needed; 2061 * -1 - no balancing for higher levels needed; 2062 * -2 - no disk space. 2063 */ 2064static int check_balance(int mode, 2065 struct tree_balance *tb, 2066 int h, 2067 int inum, 2068 int pos_in_item, 2069 struct item_head *ins_ih, const void *data) 2070{ 2071 struct virtual_node *vn; 2072 2073 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); 2074 vn->vn_free_ptr = (char *)(tb->tb_vn + 1); 2075 vn->vn_mode = mode; 2076 vn->vn_affected_item_num = inum; 2077 vn->vn_pos_in_item = pos_in_item; 2078 vn->vn_ins_ih = ins_ih; 2079 vn->vn_data = data; 2080 2081 RFALSE(mode == M_INSERT && !vn->vn_ins_ih, 2082 "vs-8255: ins_ih can not be 0 in insert mode"); 2083 2084 /* Calculate balance parameters when size of node is increasing. */ 2085 if (tb->insert_size[h] > 0) 2086 return ip_check_balance(tb, h); 2087 2088 /* Calculate balance parameters when size of node is decreasing. */ 2089 return dc_check_balance(tb, h); 2090} 2091 2092/* Check whether parent at the path is the really parent of the current node.*/ 2093static int get_direct_parent(struct tree_balance *tb, int h) 2094{ 2095 struct buffer_head *bh; 2096 struct treepath *path = tb->tb_path; 2097 int position, 2098 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); 2099 2100 /* We are in the root or in the new root. */ 2101 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 2102 2103 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, 2104 "PAP-8260: invalid offset in the path"); 2105 2106 if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> 2107 b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { 2108 /* Root is not changed. */ 2109 PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; 2110 PATH_OFFSET_POSITION(path, path_offset - 1) = 0; 2111 return CARRY_ON; 2112 } 2113 /* Root is changed and we must recalculate the path. */ 2114 return REPEAT_SEARCH; 2115 } 2116 2117 /* Parent in the path is not in the tree. */ 2118 if (!B_IS_IN_TREE 2119 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) 2120 return REPEAT_SEARCH; 2121 2122 if ((position = 2123 PATH_OFFSET_POSITION(path, 2124 path_offset - 1)) > B_NR_ITEMS(bh)) 2125 return REPEAT_SEARCH; 2126 2127 /* Parent in the path is not parent of the current node in the tree. */ 2128 if (B_N_CHILD_NUM(bh, position) != 2129 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) 2130 return REPEAT_SEARCH; 2131 2132 if (buffer_locked(bh)) { 2133 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 2134 __wait_on_buffer(bh); 2135 reiserfs_write_lock_nested(tb->tb_sb, depth); 2136 if (FILESYSTEM_CHANGED_TB(tb)) 2137 return REPEAT_SEARCH; 2138 } 2139 2140 /* 2141 * Parent in the path is unlocked and really parent 2142 * of the current node. 2143 */ 2144 return CARRY_ON; 2145} 2146 2147/* 2148 * Using lnum[h] and rnum[h] we should determine what neighbors 2149 * of S[h] we 2150 * need in order to balance S[h], and get them if necessary. 2151 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 2152 * CARRY_ON - schedule didn't occur while the function worked; 2153 */ 2154static int get_neighbors(struct tree_balance *tb, int h) 2155{ 2156 int child_position, 2157 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); 2158 unsigned long son_number; 2159 struct super_block *sb = tb->tb_sb; 2160 struct buffer_head *bh; 2161 int depth; 2162 2163 PROC_INFO_INC(sb, get_neighbors[h]); 2164 2165 if (tb->lnum[h]) { 2166 /* We need left neighbor to balance S[h]. */ 2167 PROC_INFO_INC(sb, need_l_neighbor[h]); 2168 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2169 2170 RFALSE(bh == tb->FL[h] && 2171 !PATH_OFFSET_POSITION(tb->tb_path, path_offset), 2172 "PAP-8270: invalid position in the parent"); 2173 2174 child_position = 2175 (bh == 2176 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> 2177 FL[h]); 2178 son_number = B_N_CHILD_NUM(tb->FL[h], child_position); 2179 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2180 bh = sb_bread(sb, son_number); 2181 reiserfs_write_lock_nested(tb->tb_sb, depth); 2182 if (!bh) 2183 return IO_ERROR; 2184 if (FILESYSTEM_CHANGED_TB(tb)) { 2185 brelse(bh); 2186 PROC_INFO_INC(sb, get_neighbors_restart[h]); 2187 return REPEAT_SEARCH; 2188 } 2189 2190 RFALSE(!B_IS_IN_TREE(tb->FL[h]) || 2191 child_position > B_NR_ITEMS(tb->FL[h]) || 2192 B_N_CHILD_NUM(tb->FL[h], child_position) != 2193 bh->b_blocknr, "PAP-8275: invalid parent"); 2194 RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); 2195 RFALSE(!h && 2196 B_FREE_SPACE(bh) != 2197 MAX_CHILD_SIZE(bh) - 2198 dc_size(B_N_CHILD(tb->FL[0], child_position)), 2199 "PAP-8290: invalid child size of left neighbor"); 2200 2201 brelse(tb->L[h]); 2202 tb->L[h] = bh; 2203 } 2204 2205 /* We need right neighbor to balance S[path_offset]. */ 2206 if (tb->rnum[h]) { 2207 PROC_INFO_INC(sb, need_r_neighbor[h]); 2208 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2209 2210 RFALSE(bh == tb->FR[h] && 2211 PATH_OFFSET_POSITION(tb->tb_path, 2212 path_offset) >= 2213 B_NR_ITEMS(bh), 2214 "PAP-8295: invalid position in the parent"); 2215 2216 child_position = 2217 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; 2218 son_number = B_N_CHILD_NUM(tb->FR[h], child_position); 2219 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2220 bh = sb_bread(sb, son_number); 2221 reiserfs_write_lock_nested(tb->tb_sb, depth); 2222 if (!bh) 2223 return IO_ERROR; 2224 if (FILESYSTEM_CHANGED_TB(tb)) { 2225 brelse(bh); 2226 PROC_INFO_INC(sb, get_neighbors_restart[h]); 2227 return REPEAT_SEARCH; 2228 } 2229 brelse(tb->R[h]); 2230 tb->R[h] = bh; 2231 2232 RFALSE(!h 2233 && B_FREE_SPACE(bh) != 2234 MAX_CHILD_SIZE(bh) - 2235 dc_size(B_N_CHILD(tb->FR[0], child_position)), 2236 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", 2237 B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), 2238 dc_size(B_N_CHILD(tb->FR[0], child_position))); 2239 2240 } 2241 return CARRY_ON; 2242} 2243 2244static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) 2245{ 2246 int max_num_of_items; 2247 int max_num_of_entries; 2248 unsigned long blocksize = sb->s_blocksize; 2249 2250#define MIN_NAME_LEN 1 2251 2252 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); 2253 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / 2254 (DEH_SIZE + MIN_NAME_LEN); 2255 2256 return sizeof(struct virtual_node) + 2257 max(max_num_of_items * sizeof(struct virtual_item), 2258 sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + 2259 (max_num_of_entries - 1) * sizeof(__u16)); 2260} 2261 2262/* 2263 * maybe we should fail balancing we are going to perform when kmalloc 2264 * fails several times. But now it will loop until kmalloc gets 2265 * required memory 2266 */ 2267static int get_mem_for_virtual_node(struct tree_balance *tb) 2268{ 2269 int check_fs = 0; 2270 int size; 2271 char *buf; 2272 2273 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); 2274 2275 /* we have to allocate more memory for virtual node */ 2276 if (size > tb->vn_buf_size) { 2277 if (tb->vn_buf) { 2278 /* free memory allocated before */ 2279 kfree(tb->vn_buf); 2280 /* this is not needed if kfree is atomic */ 2281 check_fs = 1; 2282 } 2283 2284 /* virtual node requires now more memory */ 2285 tb->vn_buf_size = size; 2286 2287 /* get memory for virtual item */ 2288 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); 2289 if (!buf) { 2290 /* 2291 * getting memory with GFP_KERNEL priority may involve 2292 * balancing now (due to indirect_to_direct conversion 2293 * on dcache shrinking). So, release path and collected 2294 * resources here 2295 */ 2296 free_buffers_in_tb(tb); 2297 buf = kmalloc(size, GFP_NOFS); 2298 if (!buf) { 2299 tb->vn_buf_size = 0; 2300 } 2301 tb->vn_buf = buf; 2302 schedule(); 2303 return REPEAT_SEARCH; 2304 } 2305 2306 tb->vn_buf = buf; 2307 } 2308 2309 if (check_fs && FILESYSTEM_CHANGED_TB(tb)) 2310 return REPEAT_SEARCH; 2311 2312 return CARRY_ON; 2313} 2314 2315#ifdef CONFIG_REISERFS_CHECK 2316static void tb_buffer_sanity_check(struct super_block *sb, 2317 struct buffer_head *bh, 2318 const char *descr, int level) 2319{ 2320 if (bh) { 2321 if (atomic_read(&(bh->b_count)) <= 0) 2322 2323 reiserfs_panic(sb, "jmacd-1", "negative or zero " 2324 "reference counter for buffer %s[%d] " 2325 "(%b)", descr, level, bh); 2326 2327 if (!buffer_uptodate(bh)) 2328 reiserfs_panic(sb, "jmacd-2", "buffer is not up " 2329 "to date %s[%d] (%b)", 2330 descr, level, bh); 2331 2332 if (!B_IS_IN_TREE(bh)) 2333 reiserfs_panic(sb, "jmacd-3", "buffer is not " 2334 "in tree %s[%d] (%b)", 2335 descr, level, bh); 2336 2337 if (bh->b_bdev != sb->s_bdev) 2338 reiserfs_panic(sb, "jmacd-4", "buffer has wrong " 2339 "device %s[%d] (%b)", 2340 descr, level, bh); 2341 2342 if (bh->b_size != sb->s_blocksize) 2343 reiserfs_panic(sb, "jmacd-5", "buffer has wrong " 2344 "blocksize %s[%d] (%b)", 2345 descr, level, bh); 2346 2347 if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) 2348 reiserfs_panic(sb, "jmacd-6", "buffer block " 2349 "number too high %s[%d] (%b)", 2350 descr, level, bh); 2351 } 2352} 2353#else 2354static void tb_buffer_sanity_check(struct super_block *sb, 2355 struct buffer_head *bh, 2356 const char *descr, int level) 2357{; 2358} 2359#endif 2360 2361static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) 2362{ 2363 return reiserfs_prepare_for_journal(s, bh, 0); 2364} 2365 2366static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) 2367{ 2368 struct buffer_head *locked; 2369#ifdef CONFIG_REISERFS_CHECK 2370 int repeat_counter = 0; 2371#endif 2372 int i; 2373 2374 do { 2375 2376 locked = NULL; 2377 2378 for (i = tb->tb_path->path_length; 2379 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2380 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { 2381 /* 2382 * if I understand correctly, we can only 2383 * be sure the last buffer in the path is 2384 * in the tree --clm 2385 */ 2386#ifdef CONFIG_REISERFS_CHECK 2387 if (PATH_PLAST_BUFFER(tb->tb_path) == 2388 PATH_OFFSET_PBUFFER(tb->tb_path, i)) 2389 tb_buffer_sanity_check(tb->tb_sb, 2390 PATH_OFFSET_PBUFFER 2391 (tb->tb_path, 2392 i), "S", 2393 tb->tb_path-> 2394 path_length - i); 2395#endif 2396 if (!clear_all_dirty_bits(tb->tb_sb, 2397 PATH_OFFSET_PBUFFER 2398 (tb->tb_path, 2399 i))) { 2400 locked = 2401 PATH_OFFSET_PBUFFER(tb->tb_path, 2402 i); 2403 } 2404 } 2405 } 2406 2407 for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; 2408 i++) { 2409 2410 if (tb->lnum[i]) { 2411 2412 if (tb->L[i]) { 2413 tb_buffer_sanity_check(tb->tb_sb, 2414 tb->L[i], 2415 "L", i); 2416 if (!clear_all_dirty_bits 2417 (tb->tb_sb, tb->L[i])) 2418 locked = tb->L[i]; 2419 } 2420 2421 if (!locked && tb->FL[i]) { 2422 tb_buffer_sanity_check(tb->tb_sb, 2423 tb->FL[i], 2424 "FL", i); 2425 if (!clear_all_dirty_bits 2426 (tb->tb_sb, tb->FL[i])) 2427 locked = tb->FL[i]; 2428 } 2429 2430 if (!locked && tb->CFL[i]) { 2431 tb_buffer_sanity_check(tb->tb_sb, 2432 tb->CFL[i], 2433 "CFL", i); 2434 if (!clear_all_dirty_bits 2435 (tb->tb_sb, tb->CFL[i])) 2436 locked = tb->CFL[i]; 2437 } 2438 2439 } 2440 2441 if (!locked && (tb->rnum[i])) { 2442 2443 if (tb->R[i]) { 2444 tb_buffer_sanity_check(tb->tb_sb, 2445 tb->R[i], 2446 "R", i); 2447 if (!clear_all_dirty_bits 2448 (tb->tb_sb, tb->R[i])) 2449 locked = tb->R[i]; 2450 } 2451 2452 if (!locked && tb->FR[i]) { 2453 tb_buffer_sanity_check(tb->tb_sb, 2454 tb->FR[i], 2455 "FR", i); 2456 if (!clear_all_dirty_bits 2457 (tb->tb_sb, tb->FR[i])) 2458 locked = tb->FR[i]; 2459 } 2460 2461 if (!locked && tb->CFR[i]) { 2462 tb_buffer_sanity_check(tb->tb_sb, 2463 tb->CFR[i], 2464 "CFR", i); 2465 if (!clear_all_dirty_bits 2466 (tb->tb_sb, tb->CFR[i])) 2467 locked = tb->CFR[i]; 2468 } 2469 } 2470 } 2471 2472 /* 2473 * as far as I can tell, this is not required. The FEB list 2474 * seems to be full of newly allocated nodes, which will 2475 * never be locked, dirty, or anything else. 2476 * To be safe, I'm putting in the checks and waits in. 2477 * For the moment, they are needed to keep the code in 2478 * journal.c from complaining about the buffer. 2479 * That code is inside CONFIG_REISERFS_CHECK as well. --clm 2480 */ 2481 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2482 if (tb->FEB[i]) { 2483 if (!clear_all_dirty_bits 2484 (tb->tb_sb, tb->FEB[i])) 2485 locked = tb->FEB[i]; 2486 } 2487 } 2488 2489 if (locked) { 2490 int depth; 2491#ifdef CONFIG_REISERFS_CHECK 2492 repeat_counter++; 2493 if ((repeat_counter % 10000) == 0) { 2494 reiserfs_warning(tb->tb_sb, "reiserfs-8200", 2495 "too many iterations waiting " 2496 "for buffer to unlock " 2497 "(%b)", locked); 2498 2499 /* Don't loop forever. Try to recover from possible error. */ 2500 2501 return (FILESYSTEM_CHANGED_TB(tb)) ? 2502 REPEAT_SEARCH : CARRY_ON; 2503 } 2504#endif 2505 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2506 __wait_on_buffer(locked); 2507 reiserfs_write_lock_nested(tb->tb_sb, depth); 2508 if (FILESYSTEM_CHANGED_TB(tb)) 2509 return REPEAT_SEARCH; 2510 } 2511 2512 } while (locked); 2513 2514 return CARRY_ON; 2515} 2516 2517/* 2518 * Prepare for balancing, that is 2519 * get all necessary parents, and neighbors; 2520 * analyze what and where should be moved; 2521 * get sufficient number of new nodes; 2522 * Balancing will start only after all resources will be collected at a time. 2523 * 2524 * When ported to SMP kernels, only at the last moment after all needed nodes 2525 * are collected in cache, will the resources be locked using the usual 2526 * textbook ordered lock acquisition algorithms. Note that ensuring that 2527 * this code neither write locks what it does not need to write lock nor locks 2528 * out of order will be a pain in the butt that could have been avoided. 2529 * Grumble grumble. -Hans 2530 * 2531 * fix is meant in the sense of render unchanging 2532 * 2533 * Latency might be improved by first gathering a list of what buffers 2534 * are needed and then getting as many of them in parallel as possible? -Hans 2535 * 2536 * Parameters: 2537 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) 2538 * tb tree_balance structure; 2539 * inum item number in S[h]; 2540 * pos_in_item - comment this if you can 2541 * ins_ih item head of item being inserted 2542 * data inserted item or data to be pasted 2543 * Returns: 1 - schedule occurred while the function worked; 2544 * 0 - schedule didn't occur while the function worked; 2545 * -1 - if no_disk_space 2546 */ 2547 2548int fix_nodes(int op_mode, struct tree_balance *tb, 2549 struct item_head *ins_ih, const void *data) 2550{ 2551 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); 2552 int pos_in_item; 2553 2554 /* 2555 * we set wait_tb_buffers_run when we have to restore any dirty 2556 * bits cleared during wait_tb_buffers_run 2557 */ 2558 int wait_tb_buffers_run = 0; 2559 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 2560 2561 ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; 2562 2563 pos_in_item = tb->tb_path->pos_in_item; 2564 2565 tb->fs_gen = get_generation(tb->tb_sb); 2566 2567 /* 2568 * we prepare and log the super here so it will already be in the 2569 * transaction when do_balance needs to change it. 2570 * This way do_balance won't have to schedule when trying to prepare 2571 * the super for logging 2572 */ 2573 reiserfs_prepare_for_journal(tb->tb_sb, 2574 SB_BUFFER_WITH_SB(tb->tb_sb), 1); 2575 journal_mark_dirty(tb->transaction_handle, 2576 SB_BUFFER_WITH_SB(tb->tb_sb)); 2577 if (FILESYSTEM_CHANGED_TB(tb)) 2578 return REPEAT_SEARCH; 2579 2580 /* if it possible in indirect_to_direct conversion */ 2581 if (buffer_locked(tbS0)) { 2582 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 2583 __wait_on_buffer(tbS0); 2584 reiserfs_write_lock_nested(tb->tb_sb, depth); 2585 if (FILESYSTEM_CHANGED_TB(tb)) 2586 return REPEAT_SEARCH; 2587 } 2588#ifdef CONFIG_REISERFS_CHECK 2589 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 2590 print_cur_tb("fix_nodes"); 2591 reiserfs_panic(tb->tb_sb, "PAP-8305", 2592 "there is pending do_balance"); 2593 } 2594 2595 if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) 2596 reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " 2597 "not uptodate at the beginning of fix_nodes " 2598 "or not in tree (mode %c)", 2599 tbS0, tbS0, op_mode); 2600 2601 /* Check parameters. */ 2602 switch (op_mode) { 2603 case M_INSERT: 2604 if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) 2605 reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " 2606 "item number %d (in S0 - %d) in case " 2607 "of insert", item_num, 2608 B_NR_ITEMS(tbS0)); 2609 break; 2610 case M_PASTE: 2611 case M_DELETE: 2612 case M_CUT: 2613 if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { 2614 print_block(tbS0, 0, -1, -1); 2615 reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " 2616 "item number(%d); mode = %c " 2617 "insert_size = %d", 2618 item_num, op_mode, 2619 tb->insert_size[0]); 2620 } 2621 break; 2622 default: 2623 reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " 2624 "of operation"); 2625 } 2626#endif 2627 2628 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) 2629 /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ 2630 return REPEAT_SEARCH; 2631 2632 /* Starting from the leaf level; for all levels h of the tree. */ 2633 for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { 2634 ret = get_direct_parent(tb, h); 2635 if (ret != CARRY_ON) 2636 goto repeat; 2637 2638 ret = check_balance(op_mode, tb, h, item_num, 2639 pos_in_item, ins_ih, data); 2640 if (ret != CARRY_ON) { 2641 if (ret == NO_BALANCING_NEEDED) { 2642 /* No balancing for higher levels needed. */ 2643 ret = get_neighbors(tb, h); 2644 if (ret != CARRY_ON) 2645 goto repeat; 2646 if (h != MAX_HEIGHT - 1) 2647 tb->insert_size[h + 1] = 0; 2648 /* 2649 * ok, analysis and resource gathering 2650 * are complete 2651 */ 2652 break; 2653 } 2654 goto repeat; 2655 } 2656 2657 ret = get_neighbors(tb, h); 2658 if (ret != CARRY_ON) 2659 goto repeat; 2660 2661 /* 2662 * No disk space, or schedule occurred and analysis may be 2663 * invalid and needs to be redone. 2664 */ 2665 ret = get_empty_nodes(tb, h); 2666 if (ret != CARRY_ON) 2667 goto repeat; 2668 2669 /* 2670 * We have a positive insert size but no nodes exist on this 2671 * level, this means that we are creating a new root. 2672 */ 2673 if (!PATH_H_PBUFFER(tb->tb_path, h)) { 2674 2675 RFALSE(tb->blknum[h] != 1, 2676 "PAP-8350: creating new empty root"); 2677 2678 if (h < MAX_HEIGHT - 1) 2679 tb->insert_size[h + 1] = 0; 2680 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { 2681 /* 2682 * The tree needs to be grown, so this node S[h] 2683 * which is the root node is split into two nodes, 2684 * and a new node (S[h+1]) will be created to 2685 * become the root node. 2686 */ 2687 if (tb->blknum[h] > 1) { 2688 2689 RFALSE(h == MAX_HEIGHT - 1, 2690 "PAP-8355: attempt to create too high of a tree"); 2691 2692 tb->insert_size[h + 1] = 2693 (DC_SIZE + 2694 KEY_SIZE) * (tb->blknum[h] - 1) + 2695 DC_SIZE; 2696 } else if (h < MAX_HEIGHT - 1) 2697 tb->insert_size[h + 1] = 0; 2698 } else 2699 tb->insert_size[h + 1] = 2700 (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); 2701 } 2702 2703 ret = wait_tb_buffers_until_unlocked(tb); 2704 if (ret == CARRY_ON) { 2705 if (FILESYSTEM_CHANGED_TB(tb)) { 2706 wait_tb_buffers_run = 1; 2707 ret = REPEAT_SEARCH; 2708 goto repeat; 2709 } else { 2710 return CARRY_ON; 2711 } 2712 } else { 2713 wait_tb_buffers_run = 1; 2714 goto repeat; 2715 } 2716 2717repeat: 2718 /* 2719 * fix_nodes was unable to perform its calculation due to 2720 * filesystem got changed under us, lack of free disk space or i/o 2721 * failure. If the first is the case - the search will be 2722 * repeated. For now - free all resources acquired so far except 2723 * for the new allocated nodes 2724 */ 2725 { 2726 int i; 2727 2728 /* Release path buffers. */ 2729 if (wait_tb_buffers_run) { 2730 pathrelse_and_restore(tb->tb_sb, tb->tb_path); 2731 } else { 2732 pathrelse(tb->tb_path); 2733 } 2734 /* brelse all resources collected for balancing */ 2735 for (i = 0; i < MAX_HEIGHT; i++) { 2736 if (wait_tb_buffers_run) { 2737 reiserfs_restore_prepared_buffer(tb->tb_sb, 2738 tb->L[i]); 2739 reiserfs_restore_prepared_buffer(tb->tb_sb, 2740 tb->R[i]); 2741 reiserfs_restore_prepared_buffer(tb->tb_sb, 2742 tb->FL[i]); 2743 reiserfs_restore_prepared_buffer(tb->tb_sb, 2744 tb->FR[i]); 2745 reiserfs_restore_prepared_buffer(tb->tb_sb, 2746 tb-> 2747 CFL[i]); 2748 reiserfs_restore_prepared_buffer(tb->tb_sb, 2749 tb-> 2750 CFR[i]); 2751 } 2752 2753 brelse(tb->L[i]); 2754 brelse(tb->R[i]); 2755 brelse(tb->FL[i]); 2756 brelse(tb->FR[i]); 2757 brelse(tb->CFL[i]); 2758 brelse(tb->CFR[i]); 2759 2760 tb->L[i] = NULL; 2761 tb->R[i] = NULL; 2762 tb->FL[i] = NULL; 2763 tb->FR[i] = NULL; 2764 tb->CFL[i] = NULL; 2765 tb->CFR[i] = NULL; 2766 } 2767 2768 if (wait_tb_buffers_run) { 2769 for (i = 0; i < MAX_FEB_SIZE; i++) { 2770 if (tb->FEB[i]) 2771 reiserfs_restore_prepared_buffer 2772 (tb->tb_sb, tb->FEB[i]); 2773 } 2774 } 2775 return ret; 2776 } 2777 2778} 2779 2780void unfix_nodes(struct tree_balance *tb) 2781{ 2782 int i; 2783 2784 /* Release path buffers. */ 2785 pathrelse_and_restore(tb->tb_sb, tb->tb_path); 2786 2787 /* brelse all resources collected for balancing */ 2788 for (i = 0; i < MAX_HEIGHT; i++) { 2789 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); 2790 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); 2791 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); 2792 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); 2793 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); 2794 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); 2795 2796 brelse(tb->L[i]); 2797 brelse(tb->R[i]); 2798 brelse(tb->FL[i]); 2799 brelse(tb->FR[i]); 2800 brelse(tb->CFL[i]); 2801 brelse(tb->CFR[i]); 2802 } 2803 2804 /* deal with list of allocated (used and unused) nodes */ 2805 for (i = 0; i < MAX_FEB_SIZE; i++) { 2806 if (tb->FEB[i]) { 2807 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; 2808 /* 2809 * de-allocated block which was not used by 2810 * balancing and bforget about buffer for it 2811 */ 2812 brelse(tb->FEB[i]); 2813 reiserfs_free_block(tb->transaction_handle, NULL, 2814 blocknr, 0); 2815 } 2816 if (tb->used[i]) { 2817 /* release used as new nodes including a new root */ 2818 brelse(tb->used[i]); 2819 } 2820 } 2821 2822 kfree(tb->vn_buf); 2823 2824} 2825