do_balan.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5/* Now we have all buffers that must be used in balancing of the tree */ 6/* Further calculations can not cause schedule(), and thus the buffer */ 7/* tree will be stable until the balancing will be finished */ 8/* balance the tree according to the analysis made before, */ 9/* and using buffers obtained after all above. */ 10 11 12/** 13 ** balance_leaf_when_delete 14 ** balance_leaf 15 ** do_balance 16 ** 17 **/ 18 19#include <linux/config.h> 20#include <asm/uaccess.h> 21#include <linux/time.h> 22#include <linux/reiserfs_fs.h> 23#include <linux/buffer_head.h> 24 25#ifdef CONFIG_REISERFS_CHECK 26 27struct tree_balance * cur_tb = NULL; /* detects whether more than one 28 copy of tb exists as a means 29 of checking whether schedule 30 is interrupting do_balance */ 31#endif 32 33inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 34 struct buffer_head * bh, int flag) 35{ 36 journal_mark_dirty(tb->transaction_handle, 37 tb->transaction_handle->t_super, bh) ; 38} 39 40#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 41#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 42 43 44/* summary: 45 if deleting something ( tb->insert_size[0] < 0 ) 46 return(balance_leaf_when_delete()); (flag d handled here) 47 else 48 if lnum is larger than 0 we put items into the left node 49 if rnum is larger than 0 we put items into the right node 50 if snum1 is larger than 0 we put items into the new node s1 51 if snum2 is larger than 0 we put items into the new node s2 52Note that all *num* count new items being created. 53 54It would be easier to read balance_leaf() if each of these summary 55lines was a separate procedure rather than being inlined. I think 56that there are many passages here and in balance_leaf_when_delete() in 57which two calls to one procedure can replace two passages, and it 58might save cache space and improve software maintenance costs to do so. 59 60Vladimir made the perceptive comment that we should offload most of 61the decision making in this function into fix_nodes/check_balance, and 62then create some sort of structure in tb that says what actions should 63be performed by do_balance. 64 65-Hans */ 66 67 68 69/* Balance leaf node in case of delete or cut: insert_size[0] < 0 70 * 71 * lnum, rnum can have values >= -1 72 * -1 means that the neighbor must be joined with S 73 * 0 means that nothing should be done with the neighbor 74 * >0 means to shift entirely or partly the specified number of items to the neighbor 75 */ 76static int balance_leaf_when_delete (struct tree_balance * tb, int flag) 77{ 78 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 79 int item_pos = PATH_LAST_POSITION (tb->tb_path); 80 int pos_in_item = tb->tb_path->pos_in_item; 81 struct buffer_info bi; 82 int n; 83 struct item_head * ih; 84 85 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 86 "vs- 12000: level: wrong FR %z", tb->FR[0]); 87 RFALSE( tb->blknum[0] > 1, 88 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 89 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0), 90 "PAP-12010: tree can not be empty"); 91 92 ih = B_N_PITEM_HEAD (tbS0, item_pos); 93 94 /* Delete or truncate the item */ 95 96 switch (flag) { 97 case M_DELETE: /* delete item in S[0] */ 98 99 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 100 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 101 -tb->insert_size [0], ih); 102 103 bi.tb = tb; 104 bi.bi_bh = tbS0; 105 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 106 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 107 leaf_delete_items (&bi, 0, item_pos, 1, -1); 108 109 if ( ! item_pos && tb->CFL[0] ) { 110 if ( B_NR_ITEMS(tbS0) ) { 111 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); 112 } 113 else { 114 if ( ! PATH_H_POSITION (tb->tb_path, 1) ) 115 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0); 116 } 117 } 118 119 RFALSE( ! item_pos && !tb->CFL[0], 120 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]); 121 122 break; 123 124 case M_CUT: { /* cut item in S[0] */ 125 bi.tb = tb; 126 bi.bi_bh = tbS0; 127 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 128 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 129 if (is_direntry_le_ih (ih)) { 130 131 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 132 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 133 tb->insert_size[0] = -1; 134 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); 135 136 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0], 137 "PAP-12030: can not change delimiting key. CFL[0]=%p", 138 tb->CFL[0]); 139 140 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) { 141 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0); 142 } 143 } else { 144 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]); 145 146 RFALSE( ! ih_item_len(ih), 147 "PAP-12035: cut must leave non-zero dynamic length of item"); 148 } 149 break; 150 } 151 152 default: 153 print_cur_tb ("12040"); 154 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)", 155 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag); 156 } 157 158 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 159 n = B_NR_ITEMS(tbS0); 160 if ( tb->lnum[0] ) /* L[0] takes part in balancing */ 161 { 162 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */ 163 { 164 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */ 165 { 166 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) ) 167 { 168 /* all contents of all the 3 buffers will be in L[0] */ 169 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) ) 170 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1); 171 172 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL); 173 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL); 174 175 reiserfs_invalidate_buffer (tb, tbS0); 176 reiserfs_invalidate_buffer (tb, tb->R[0]); 177 178 return 0; 179 } 180 /* all contents of all the 3 buffers will be in R[0] */ 181 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL); 182 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL); 183 184 /* right_delimiting_key is correct in R[0] */ 185 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 186 187 reiserfs_invalidate_buffer (tb, tbS0); 188 reiserfs_invalidate_buffer (tb, tb->L[0]); 189 190 return -1; 191 } 192 193 RFALSE( tb->rnum[0] != 0, 194 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 195 /* all contents of L[0] and S[0] will be in L[0] */ 196 leaf_shift_left(tb, n, -1); 197 198 reiserfs_invalidate_buffer (tb, tbS0); 199 200 return 0; 201 } 202 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 203 204 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) || 205 ( tb->lnum[0] + tb->rnum[0] > n+1 ), 206 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 207 tb->rnum[0], tb->lnum[0], n); 208 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) && 209 (tb->lbytes != -1 || tb->rbytes != -1), 210 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 211 tb->rbytes, tb->lbytes); 212 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) && 213 (tb->lbytes < 1 || tb->rbytes != -1), 214 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 215 tb->rbytes, tb->lbytes); 216 217 leaf_shift_left (tb, tb->lnum[0], tb->lbytes); 218 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 219 220 reiserfs_invalidate_buffer (tb, tbS0); 221 222 return 0; 223 } 224 225 if ( tb->rnum[0] == -1 ) { 226 /* all contents of R[0] and S[0] will be in R[0] */ 227 leaf_shift_right(tb, n, -1); 228 reiserfs_invalidate_buffer (tb, tbS0); 229 return 0; 230 } 231 232 RFALSE( tb->rnum[0], 233 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 234 return 0; 235} 236 237 238static int balance_leaf (struct tree_balance * tb, 239 struct item_head * ih, /* item header of inserted item (this is on little endian) */ 240 const char * body, /* body of inserted item or bytes to paste */ 241 int flag, /* i - insert, d - delete, c - cut, p - paste 242 (see comment to do_balance) */ 243 struct item_head * insert_key, /* in our processing of one level we sometimes determine what 244 must be inserted into the next higher level. This insertion 245 consists of a key or two keys and their corresponding 246 pointers */ 247 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */ 248 ) 249{ 250 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path); 251 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0] 252 of the affected item */ 253 struct buffer_info bi; 254 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 255 int snum[2]; /* number of items that will be placed 256 into S_new (includes partially shifted 257 items) */ 258 int sbytes[2]; /* if an item is partially shifted into S_new then 259 if it is a directory item 260 it is the number of entries from the item that are shifted into S_new 261 else 262 it is the number of bytes from the item that are shifted into S_new 263 */ 264 int n, i; 265 int ret_val; 266 int pos_in_item; 267 int zeros_num; 268 269 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] ); 270 271 /* Make balance in case insert_size[0] < 0 */ 272 if ( tb->insert_size[0] < 0 ) 273 return balance_leaf_when_delete (tb, flag); 274 275 zeros_num = 0; 276 if (flag == M_INSERT && body == 0) 277 zeros_num = ih_item_len( ih ); 278 279 pos_in_item = tb->tb_path->pos_in_item; 280 /* for indirect item pos_in_item is measured in unformatted node 281 pointers. Recalculate to bytes */ 282 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) 283 pos_in_item *= UNFM_P_SIZE; 284 285 if ( tb->lnum[0] > 0 ) { 286 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 287 if ( item_pos < tb->lnum[0] ) { 288 /* new item or it part falls to L[0], shift it too */ 289 n = B_NR_ITEMS(tb->L[0]); 290 291 switch (flag) { 292 case M_INSERT: /* insert item into L[0] */ 293 294 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 295 /* part of new item falls into L[0] */ 296 int new_item_len; 297 int version; 298 299 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1); 300 301 /* Calculate item length to insert to S[0] */ 302 new_item_len = ih_item_len(ih) - tb->lbytes; 303 /* Calculate and check item length to insert to L[0] */ 304 put_ih_item_len(ih, ih_item_len(ih) - new_item_len ); 305 306 RFALSE( ih_item_len(ih) <= 0, 307 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 308 ih_item_len(ih)); 309 310 /* Insert new item into L[0] */ 311 bi.tb = tb; 312 bi.bi_bh = tb->L[0]; 313 bi.bi_parent = tb->FL[0]; 314 bi.bi_position = get_left_neighbor_position (tb, 0); 315 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, 316 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); 317 318 version = ih_version (ih); 319 320 /* Calculate key component, item length and body to insert into S[0] */ 321 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); 322 323 put_ih_item_len( ih, new_item_len ); 324 if ( tb->lbytes > zeros_num ) { 325 body += (tb->lbytes - zeros_num); 326 zeros_num = 0; 327 } 328 else 329 zeros_num -= tb->lbytes; 330 331 RFALSE( ih_item_len(ih) <= 0, 332 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 333 ih_item_len(ih)); 334 } else { 335 /* new item in whole falls into L[0] */ 336 /* Shift lnum[0]-1 items to L[0] */ 337 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes); 338 /* Insert new item into L[0] */ 339 bi.tb = tb; 340 bi.bi_bh = tb->L[0]; 341 bi.bi_parent = tb->FL[0]; 342 bi.bi_position = get_left_neighbor_position (tb, 0); 343 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num); 344 tb->insert_size[0] = 0; 345 zeros_num = 0; 346 } 347 break; 348 349 case M_PASTE: /* append item in L[0] */ 350 351 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) { 352 /* we must shift the part of the appended item */ 353 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) { 354 355 RFALSE( zeros_num, 356 "PAP-12090: invalid parameter in case of a directory"); 357 /* directory item */ 358 if ( tb->lbytes > pos_in_item ) { 359 /* new directory entry falls into L[0] */ 360 struct item_head * pasted; 361 int l_pos_in_item = pos_in_item; 362 363 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 364 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1); 365 if ( ret_val && ! item_pos ) { 366 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1); 367 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1); 368 } 369 370 /* Append given directory entry to directory item */ 371 bi.tb = tb; 372 bi.bi_bh = tb->L[0]; 373 bi.bi_parent = tb->FL[0]; 374 bi.bi_position = get_left_neighbor_position (tb, 0); 375 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item, 376 tb->insert_size[0], body, zeros_num); 377 378 /* previous string prepared space for pasting new entry, following string pastes this entry */ 379 380 /* when we have merge directory item, pos_in_item has been changed too */ 381 382 /* paste new directory entry. 1 is entry number */ 383 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1, 384 (struct reiserfs_de_head *)body, 385 body + DEH_SIZE, tb->insert_size[0] 386 ); 387 tb->insert_size[0] = 0; 388 } else { 389 /* new directory item doesn't fall into L[0] */ 390 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 391 leaf_shift_left (tb, tb->lnum[0], tb->lbytes); 392 } 393 /* Calculate new position to append in item body */ 394 pos_in_item -= tb->lbytes; 395 } 396 else { 397 /* regular object */ 398 RFALSE( tb->lbytes <= 0, 399 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", 400 tb->lbytes); 401 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)), 402 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 403 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item); 404 405 if ( tb->lbytes >= pos_in_item ) { 406 /* appended item will be in L[0] in whole */ 407 int l_n; 408 409 /* this bytes number must be appended to the last item of L[h] */ 410 l_n = tb->lbytes - pos_in_item; 411 412 /* Calculate new insert_size[0] */ 413 tb->insert_size[0] -= l_n; 414 415 RFALSE( tb->insert_size[0] <= 0, 416 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 417 tb->insert_size[0]); 418 ret_val = leaf_shift_left(tb,tb->lnum[0], 419 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos))); 420 /* Append to body of item in L[0] */ 421 bi.tb = tb; 422 bi.bi_bh = tb->L[0]; 423 bi.bi_parent = tb->FL[0]; 424 bi.bi_position = get_left_neighbor_position (tb, 0); 425 leaf_paste_in_buffer( 426 &bi,n + item_pos - ret_val, 427 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)), 428 l_n,body, zeros_num > l_n ? l_n : zeros_num 429 ); 430 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ 431 { 432 int version; 433 int temp_l = l_n; 434 435 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)), 436 "PAP-12106: item length must be 0"); 437 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0), 438 B_N_PKEY (tb->L[0], 439 n + item_pos - ret_val)), 440 "PAP-12107: items must be of the same file"); 441 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0], 442 n + item_pos - ret_val))) { 443 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); 444 } 445 /* update key of first item in S0 */ 446 version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); 447 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), 448 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l); 449 /* update left delimiting key */ 450 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 451 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l); 452 } 453 454 /* Calculate new body, position in item and insert_size[0] */ 455 if ( l_n > zeros_num ) { 456 body += (l_n - zeros_num); 457 zeros_num = 0; 458 } 459 else 460 zeros_num -= l_n; 461 pos_in_item = 0; 462 463 RFALSE( comp_short_le_keys 464 (B_N_PKEY(tbS0,0), 465 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) || 466 467 !op_is_left_mergeable 468 (B_N_PKEY (tbS0, 0), tbS0->b_size) || 469 !op_is_left_mergeable 470 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 471 tbS0->b_size), 472 "PAP-12120: item must be merge-able with left neighboring item"); 473 } 474 else /* only part of the appended item will be in L[0] */ 475 { 476 /* Calculate position in item for append in S[0] */ 477 pos_in_item -= tb->lbytes; 478 479 RFALSE( pos_in_item <= 0, 480 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); 481 482 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 483 leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 484 } 485 } 486 } 487 else /* appended item will be in L[0] in whole */ 488 { 489 struct item_head * pasted; 490 491 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ) 492 { /* if we paste into first item of S[0] and it is left mergable */ 493 /* then increment pos_in_item by the size of the last item in L[0] */ 494 pasted = B_N_PITEM_HEAD(tb->L[0],n-1); 495 if ( is_direntry_le_ih (pasted) ) 496 pos_in_item += ih_entry_count(pasted); 497 else 498 pos_in_item += ih_item_len(pasted); 499 } 500 501 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 502 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 503 /* Append to body of item in L[0] */ 504 bi.tb = tb; 505 bi.bi_bh = tb->L[0]; 506 bi.bi_parent = tb->FL[0]; 507 bi.bi_position = get_left_neighbor_position (tb, 0); 508 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0], 509 body, zeros_num); 510 511 /* if appended item is directory, paste entry */ 512 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val); 513 if (is_direntry_le_ih (pasted)) 514 leaf_paste_entries ( 515 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1, 516 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] 517 ); 518 /* if appended item is indirect item, put unformatted node into un list */ 519 if (is_indirect_le_ih (pasted)) 520 set_ih_free_space (pasted, 0); 521 tb->insert_size[0] = 0; 522 zeros_num = 0; 523 } 524 break; 525 default: /* cases d and t */ 526 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)", 527 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 528 } 529 } else { 530 /* new item doesn't fall into L[0] */ 531 leaf_shift_left(tb,tb->lnum[0],tb->lbytes); 532 } 533 } /* tb->lnum[0] > 0 */ 534 535 /* Calculate new item position */ 536 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0)); 537 538 if ( tb->rnum[0] > 0 ) { 539 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 540 n = B_NR_ITEMS(tbS0); 541 switch ( flag ) { 542 543 case M_INSERT: /* insert item */ 544 if ( n - tb->rnum[0] < item_pos ) 545 { /* new item or its part falls to R[0] */ 546 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) 547 { /* part of new item falls into R[0] */ 548 loff_t old_key_comp, old_len, r_zeros_number; 549 const char * r_body; 550 int version; 551 loff_t offset; 552 553 leaf_shift_right(tb,tb->rnum[0]-1,-1); 554 555 version = ih_version(ih); 556 /* Remember key component and item length */ 557 old_key_comp = le_ih_k_offset( ih ); 558 old_len = ih_item_len(ih); 559 560 /* Calculate key component and item length to insert into R[0] */ 561 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)); 562 set_le_ih_k_offset( ih, offset ); 563 put_ih_item_len( ih, tb->rbytes); 564 /* Insert part of the item into R[0] */ 565 bi.tb = tb; 566 bi.bi_bh = tb->R[0]; 567 bi.bi_parent = tb->FR[0]; 568 bi.bi_position = get_right_neighbor_position (tb, 0); 569 if ( (old_len - tb->rbytes) > zeros_num ) { 570 r_zeros_number = 0; 571 r_body = body + (old_len - tb->rbytes) - zeros_num; 572 } 573 else { 574 r_body = body; 575 r_zeros_number = zeros_num - (old_len - tb->rbytes); 576 zeros_num -= r_zeros_number; 577 } 578 579 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); 580 581 /* Replace right delimiting key by first key in R[0] */ 582 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 583 584 /* Calculate key component and item length to insert into S[0] */ 585 set_le_ih_k_offset( ih, old_key_comp ); 586 put_ih_item_len( ih, old_len - tb->rbytes ); 587 588 tb->insert_size[0] -= tb->rbytes; 589 590 } 591 else /* whole new item falls into R[0] */ 592 { 593 /* Shift rnum[0]-1 items to R[0] */ 594 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes); 595 /* Insert new item into R[0] */ 596 bi.tb = tb; 597 bi.bi_bh = tb->R[0]; 598 bi.bi_parent = tb->FR[0]; 599 bi.bi_position = get_right_neighbor_position (tb, 0); 600 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num); 601 602 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) { 603 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 604 605 } 606 zeros_num = tb->insert_size[0] = 0; 607 } 608 } 609 else /* new item or part of it doesn't fall into R[0] */ 610 { 611 leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 612 } 613 break; 614 615 case M_PASTE: /* append item */ 616 617 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */ 618 { 619 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 ) 620 { /* we must shift the part of the appended item */ 621 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos))) 622 { /* we append to directory item */ 623 int entry_count; 624 625 RFALSE( zeros_num, 626 "PAP-12145: invalid parameter in case of a directory"); 627 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos)); 628 if ( entry_count - tb->rbytes < pos_in_item ) 629 /* new directory entry falls into R[0] */ 630 { 631 int paste_entry_position; 632 633 RFALSE( tb->rbytes - 1 >= entry_count || 634 ! tb->insert_size[0], 635 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 636 tb->rbytes, entry_count); 637 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 638 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1); 639 /* Paste given directory entry to directory item */ 640 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; 641 bi.tb = tb; 642 bi.bi_bh = tb->R[0]; 643 bi.bi_parent = tb->FR[0]; 644 bi.bi_position = get_right_neighbor_position (tb, 0); 645 leaf_paste_in_buffer (&bi, 0, paste_entry_position, 646 tb->insert_size[0],body,zeros_num); 647 /* paste entry */ 648 leaf_paste_entries ( 649 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body, 650 body + DEH_SIZE, tb->insert_size[0] 651 ); 652 653 if ( paste_entry_position == 0 ) { 654 /* change delimiting keys */ 655 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 656 } 657 658 tb->insert_size[0] = 0; 659 pos_in_item++; 660 } 661 else /* new directory entry doesn't fall into R[0] */ 662 { 663 leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 664 } 665 } 666 else /* regular object */ 667 { 668 int n_shift, n_rem, r_zeros_number; 669 const char * r_body; 670 671 /* Calculate number of bytes which must be shifted from appended item */ 672 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 ) 673 n_shift = 0; 674 675 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)), 676 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 677 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos))); 678 679 leaf_shift_right(tb,tb->rnum[0],n_shift); 680 /* Calculate number of bytes which must remain in body after appending to R[0] */ 681 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 ) 682 n_rem = 0; 683 684 { 685 int version; 686 unsigned long temp_rem = n_rem; 687 688 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); 689 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){ 690 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - 691 UNFM_P_SHIFT); 692 } 693 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), 694 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem); 695 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), 696 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem); 697 } 698/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 699 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 700 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0); 701 702 /* Append part of body into R[0] */ 703 bi.tb = tb; 704 bi.bi_bh = tb->R[0]; 705 bi.bi_parent = tb->FR[0]; 706 bi.bi_position = get_right_neighbor_position (tb, 0); 707 if ( n_rem > zeros_num ) { 708 r_zeros_number = 0; 709 r_body = body + n_rem - zeros_num; 710 } 711 else { 712 r_body = body; 713 r_zeros_number = zeros_num - n_rem; 714 zeros_num -= r_zeros_number; 715 } 716 717 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); 718 719 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { 720#if 0 721 RFALSE( n_rem, 722 "PAP-12160: paste more than one unformatted node pointer"); 723#endif 724 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); 725 } 726 tb->insert_size[0] = n_rem; 727 if ( ! n_rem ) 728 pos_in_item ++; 729 } 730 } 731 else /* pasted item in whole falls into R[0] */ 732 { 733 struct item_head * pasted; 734 735 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 736 /* append item in R[0] */ 737 if ( pos_in_item >= 0 ) { 738 bi.tb = tb; 739 bi.bi_bh = tb->R[0]; 740 bi.bi_parent = tb->FR[0]; 741 bi.bi_position = get_right_neighbor_position (tb, 0); 742 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item, 743 tb->insert_size[0],body, zeros_num); 744 } 745 746 /* paste new entry, if item is directory item */ 747 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); 748 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) { 749 leaf_paste_entries ( 750 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1, 751 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] 752 ); 753 if ( ! pos_in_item ) { 754 755 RFALSE( item_pos - n + tb->rnum[0], 756 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 757 758 /* update delimiting keys */ 759 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0); 760 } 761 } 762 763 if (is_indirect_le_ih (pasted)) 764 set_ih_free_space (pasted, 0); 765 zeros_num = tb->insert_size[0] = 0; 766 } 767 } 768 else /* new item doesn't fall into R[0] */ 769 { 770 leaf_shift_right(tb,tb->rnum[0],tb->rbytes); 771 } 772 break; 773 default: /* cases d and t */ 774 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)", 775 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 776 } 777 778 } /* tb->rnum[0] > 0 */ 779 780 781 RFALSE( tb->blknum[0] > 3, 782 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); 783 RFALSE( tb->blknum[0] < 0, 784 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); 785 786 /* if while adding to a node we discover that it is possible to split 787 it in two, and merge the left part into the left neighbor and the 788 right part into the right neighbor, eliminating the node */ 789 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */ 790 791 RFALSE( ! tb->lnum[0] || ! tb->rnum[0], 792 "PAP-12190: lnum and rnum must not be zero"); 793 /* if insertion was done before 0-th position in R[0], right 794 delimiting key of the tb->L[0]'s and left delimiting key are 795 not set correctly */ 796 if (tb->CFL[0]) { 797 if (!tb->CFR[0]) 798 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized"); 799 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])); 800 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0); 801 } 802 803 reiserfs_invalidate_buffer(tb,tbS0); 804 return 0; 805 } 806 807 808 /* Fill new nodes that appear in place of S[0] */ 809 810 /* I am told that this copying is because we need an array to enable 811 the looping code. -Hans */ 812 snum[0] = tb->s1num, 813 snum[1] = tb->s2num; 814 sbytes[0] = tb->s1bytes; 815 sbytes[1] = tb->s2bytes; 816 for( i = tb->blknum[0] - 2; i >= 0; i-- ) { 817 818 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]); 819 820 /* here we shift from S to S_new nodes */ 821 822 S_new[i] = get_FEB(tb); 823 824 /* initialized block type and tree level */ 825 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL ); 826 827 828 n = B_NR_ITEMS(tbS0); 829 830 switch (flag) { 831 case M_INSERT: /* insert item */ 832 833 if ( n - snum[i] < item_pos ) 834 { /* new item or it's part falls to first new node S_new[i]*/ 835 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 ) 836 { /* part of new item falls into S_new[i] */ 837 int old_key_comp, old_len, r_zeros_number; 838 const char * r_body; 839 int version; 840 841 /* Move snum[i]-1 items from S[0] to S_new[i] */ 842 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]); 843 /* Remember key component and item length */ 844 version = ih_version (ih); 845 old_key_comp = le_ih_k_offset( ih ); 846 old_len = ih_item_len(ih); 847 848 /* Calculate key component and item length to insert into S_new[i] */ 849 set_le_ih_k_offset( ih, 850 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) ); 851 852 put_ih_item_len( ih, sbytes[i] ); 853 854 /* Insert part of the item into S_new[i] before 0-th item */ 855 bi.tb = tb; 856 bi.bi_bh = S_new[i]; 857 bi.bi_parent = NULL; 858 bi.bi_position = 0; 859 860 if ( (old_len - sbytes[i]) > zeros_num ) { 861 r_zeros_number = 0; 862 r_body = body + (old_len - sbytes[i]) - zeros_num; 863 } 864 else { 865 r_body = body; 866 r_zeros_number = zeros_num - (old_len - sbytes[i]); 867 zeros_num -= r_zeros_number; 868 } 869 870 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number); 871 872 /* Calculate key component and item length to insert into S[i] */ 873 set_le_ih_k_offset( ih, old_key_comp ); 874 put_ih_item_len( ih, old_len - sbytes[i] ); 875 tb->insert_size[0] -= sbytes[i]; 876 } 877 else /* whole new item falls into S_new[i] */ 878 { 879 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 880 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]); 881 882 /* Insert new item into S_new[i] */ 883 bi.tb = tb; 884 bi.bi_bh = S_new[i]; 885 bi.bi_parent = NULL; 886 bi.bi_position = 0; 887 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num); 888 889 zeros_num = tb->insert_size[0] = 0; 890 } 891 } 892 893 else /* new item or it part don't falls into S_new[i] */ 894 { 895 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); 896 } 897 break; 898 899 case M_PASTE: /* append item */ 900 901 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */ 902 { 903 if ( item_pos == n - snum[i] && sbytes[i] != -1 ) 904 { /* we must shift part of the appended item */ 905 struct item_head * aux_ih; 906 907 RFALSE( ih, "PAP-12210: ih must be 0"); 908 909 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) { 910 /* we append to directory item */ 911 912 int entry_count; 913 914 entry_count = ih_entry_count(aux_ih); 915 916 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) { 917 /* new directory entry falls into S_new[i] */ 918 919 RFALSE( ! tb->insert_size[0], 920 "PAP-12215: insert_size is already 0"); 921 RFALSE( sbytes[i] - 1 >= entry_count, 922 "PAP-12220: there are no so much entries (%d), only %d", 923 sbytes[i] - 1, entry_count); 924 925 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 926 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]); 927 /* Paste given directory entry to directory item */ 928 bi.tb = tb; 929 bi.bi_bh = S_new[i]; 930 bi.bi_parent = NULL; 931 bi.bi_position = 0; 932 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 933 tb->insert_size[0], body,zeros_num); 934 /* paste new directory entry */ 935 leaf_paste_entries ( 936 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1, 937 1, (struct reiserfs_de_head *)body, body + DEH_SIZE, 938 tb->insert_size[0] 939 ); 940 tb->insert_size[0] = 0; 941 pos_in_item++; 942 } else { /* new directory entry doesn't fall into S_new[i] */ 943 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); 944 } 945 } 946 else /* regular object */ 947 { 948 int n_shift, n_rem, r_zeros_number; 949 const char * r_body; 950 951 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) || 952 tb->insert_size[0] <= 0, 953 "PAP-12225: item too short or insert_size <= 0"); 954 955 /* Calculate number of bytes which must be shifted from appended item */ 956 n_shift = sbytes[i] - tb->insert_size[0]; 957 if ( n_shift < 0 ) 958 n_shift = 0; 959 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); 960 961 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 962 n_rem = tb->insert_size[0] - sbytes[i]; 963 if ( n_rem < 0 ) 964 n_rem = 0; 965 /* Append part of body into S_new[0] */ 966 bi.tb = tb; 967 bi.bi_bh = S_new[i]; 968 bi.bi_parent = NULL; 969 bi.bi_position = 0; 970 971 if ( n_rem > zeros_num ) { 972 r_zeros_number = 0; 973 r_body = body + n_rem - zeros_num; 974 } 975 else { 976 r_body = body; 977 r_zeros_number = zeros_num - n_rem; 978 zeros_num -= r_zeros_number; 979 } 980 981 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number); 982 { 983 struct item_head * tmp; 984 985 tmp = B_N_PITEM_HEAD(S_new[i],0); 986 if (is_indirect_le_ih (tmp)) { 987 set_ih_free_space (tmp, 0); 988 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + 989 (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); 990 } else { 991 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + 992 n_rem ); 993 } 994 } 995 996 tb->insert_size[0] = n_rem; 997 if ( ! n_rem ) 998 pos_in_item++; 999 } 1000 } 1001 else 1002 /* item falls wholly into S_new[i] */ 1003 { 1004 int ret_val; 1005 struct item_head * pasted; 1006 1007#ifdef CONFIG_REISERFS_CHECK 1008 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos); 1009 1010 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) || 1011 tb->insert_size[0] <= 0) ) 1012 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len"); 1013#endif /* CONFIG_REISERFS_CHECK */ 1014 1015 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); 1016 1017 RFALSE( ret_val, 1018 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1019 ret_val); 1020 1021 /* paste into item */ 1022 bi.tb = tb; 1023 bi.bi_bh = S_new[i]; 1024 bi.bi_parent = NULL; 1025 bi.bi_position = 0; 1026 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num); 1027 1028 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); 1029 if (is_direntry_le_ih (pasted)) 1030 { 1031 leaf_paste_entries ( 1032 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1, 1033 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0] 1034 ); 1035 } 1036 1037 /* if we paste to indirect item update ih_free_space */ 1038 if (is_indirect_le_ih (pasted)) 1039 set_ih_free_space (pasted, 0); 1040 zeros_num = tb->insert_size[0] = 0; 1041 } 1042 } 1043 1044 else /* pasted item doesn't fall into S_new[i] */ 1045 { 1046 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]); 1047 } 1048 break; 1049 default: /* cases d and t */ 1050 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)", 1051 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 1052 } 1053 1054 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE); 1055 insert_ptr[i] = S_new[i]; 1056 1057 RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) || 1058 buffer_dirty (S_new [i]), 1059 "PAP-12247: S_new[%d] : (%b)", i, S_new[i]); 1060 } 1061 1062 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1063 affected item which remains in S */ 1064 if ( 0 <= item_pos && item_pos < tb->s0num ) 1065 { /* if we must insert or append into buffer S[0] */ 1066 1067 switch (flag) 1068 { 1069 case M_INSERT: /* insert item into S[0] */ 1070 bi.tb = tb; 1071 bi.bi_bh = tbS0; 1072 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 1073 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 1074 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num); 1075 1076 /* If we insert the first key change the delimiting key */ 1077 if( item_pos == 0 ) { 1078 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1079 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); 1080 1081 } 1082 break; 1083 1084 case M_PASTE: { /* append item in S[0] */ 1085 struct item_head * pasted; 1086 1087 pasted = B_N_PITEM_HEAD (tbS0, item_pos); 1088 /* when directory, may be new entry already pasted */ 1089 if (is_direntry_le_ih (pasted)) { 1090 if ( pos_in_item >= 0 && 1091 pos_in_item <= ih_entry_count(pasted) ) { 1092 1093 RFALSE( ! tb->insert_size[0], 1094 "PAP-12260: insert_size is 0 already"); 1095 1096 /* prepare space */ 1097 bi.tb = tb; 1098 bi.bi_bh = tbS0; 1099 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 1100 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 1101 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); 1102 1103 /* paste entry */ 1104 leaf_paste_entries ( 1105 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body, 1106 body + DEH_SIZE, tb->insert_size[0] 1107 ); 1108 if ( ! item_pos && ! pos_in_item ) { 1109 RFALSE( !tb->CFL[0] || !tb->L[0], 1110 "PAP-12270: CFL[0]/L[0] must be specified"); 1111 if (tb->CFL[0]) { 1112 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0); 1113 1114 } 1115 } 1116 tb->insert_size[0] = 0; 1117 } 1118 } else { /* regular object */ 1119 if ( pos_in_item == ih_item_len(pasted) ) { 1120 1121 RFALSE( tb->insert_size[0] <= 0, 1122 "PAP-12275: insert size must not be %d", 1123 tb->insert_size[0]); 1124 bi.tb = tb; 1125 bi.bi_bh = tbS0; 1126 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0); 1127 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1); 1128 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); 1129 1130 if (is_indirect_le_ih (pasted)) { 1131#if 0 1132 RFALSE( tb->insert_size[0] != UNFM_P_SIZE, 1133 "PAP-12280: insert_size for indirect item must be %d, not %d", 1134 UNFM_P_SIZE, tb->insert_size[0]); 1135#endif 1136 set_ih_free_space (pasted, 0); 1137 } 1138 tb->insert_size[0] = 0; 1139 } 1140 1141#ifdef CONFIG_REISERFS_CHECK 1142 else { 1143 if ( tb->insert_size[0] ) { 1144 print_cur_tb ("12285"); 1145 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]); 1146 } 1147 } 1148#endif /* CONFIG_REISERFS_CHECK */ 1149 1150 } 1151 } /* case M_PASTE: */ 1152 } 1153 } 1154 1155#ifdef CONFIG_REISERFS_CHECK 1156 if ( flag == M_PASTE && tb->insert_size[0] ) { 1157 print_cur_tb ("12290"); 1158 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]); 1159 } 1160#endif /* CONFIG_REISERFS_CHECK */ 1161 1162 return 0; 1163} /* Leaf level of the tree is balanced (end of balance_leaf) */ 1164 1165 1166 1167/* Make empty node */ 1168void make_empty_node (struct buffer_info * bi) 1169{ 1170 struct block_head * blkh; 1171 1172 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1173 1174 blkh = B_BLK_HEAD(bi->bi_bh); 1175 set_blkh_nr_item( blkh, 0 ); 1176 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) ); 1177 1178 if (bi->bi_parent) 1179 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1180} 1181 1182 1183/* Get first empty buffer */ 1184struct buffer_head * get_FEB (struct tree_balance * tb) 1185{ 1186 int i; 1187 struct buffer_head * first_b; 1188 struct buffer_info bi; 1189 1190 for (i = 0; i < MAX_FEB_SIZE; i ++) 1191 if (tb->FEB[i] != 0) 1192 break; 1193 1194 if (i == MAX_FEB_SIZE) 1195 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty"); 1196 1197 bi.tb = tb; 1198 bi.bi_bh = first_b = tb->FEB[i]; 1199 bi.bi_parent = NULL; 1200 bi.bi_position = 0; 1201 make_empty_node (&bi); 1202 set_buffer_uptodate(first_b); 1203 tb->FEB[i] = NULL; 1204 tb->used[i] = first_b; 1205 1206 return(first_b); 1207} 1208 1209 1210/* This is now used because reiserfs_free_block has to be able to 1211** schedule. 1212*/ 1213static void store_thrown (struct tree_balance * tb, struct buffer_head * bh) 1214{ 1215 int i; 1216 1217 if (buffer_dirty (bh)) 1218 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer"); 1219 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++) 1220 if (!tb->thrown[i]) { 1221 tb->thrown[i] = bh; 1222 get_bh(bh) ; /* free_thrown puts this */ 1223 return; 1224 } 1225 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers"); 1226} 1227 1228static void free_thrown(struct tree_balance *tb) { 1229 int i ; 1230 b_blocknr_t blocknr ; 1231 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) { 1232 if (tb->thrown[i]) { 1233 blocknr = tb->thrown[i]->b_blocknr ; 1234 if (buffer_dirty (tb->thrown[i])) 1235 reiserfs_warning (tb->tb_sb, 1236 "free_thrown deals with dirty buffer %d", 1237 blocknr); 1238 brelse(tb->thrown[i]) ; /* incremented in store_thrown */ 1239 reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0); 1240 } 1241 } 1242} 1243 1244void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh) 1245{ 1246 struct block_head *blkh; 1247 blkh = B_BLK_HEAD(bh); 1248 set_blkh_level( blkh, FREE_LEVEL ); 1249 set_blkh_nr_item( blkh, 0 ); 1250 1251 clear_buffer_dirty(bh); 1252 store_thrown (tb, bh); 1253} 1254 1255/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1256void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest, 1257 struct buffer_head * src, int n_src) 1258{ 1259 1260 RFALSE( dest == NULL || src == NULL, 1261 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1262 src, dest); 1263 RFALSE( ! B_IS_KEYS_LEVEL (dest), 1264 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1265 dest); 1266 RFALSE( n_dest < 0 || n_src < 0, 1267 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1268 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1269 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1270 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1271 1272 if (B_IS_ITEMS_LEVEL (src)) 1273 /* source buffer contains leaf node */ 1274 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE); 1275 else 1276 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE); 1277 1278 do_balance_mark_internal_dirty (tb, dest, 0); 1279} 1280 1281 1282int get_left_neighbor_position ( 1283 struct tree_balance * tb, 1284 int h 1285 ) 1286{ 1287 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1288 1289 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0, 1290 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1291 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h)); 1292 1293 if (Sh_position == 0) 1294 return B_NR_ITEMS (tb->FL[h]); 1295 else 1296 return Sh_position - 1; 1297} 1298 1299 1300int get_right_neighbor_position (struct tree_balance * tb, int h) 1301{ 1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1); 1303 1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0, 1305 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1306 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]); 1307 1308 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h))) 1309 return 0; 1310 else 1311 return Sh_position + 1; 1312} 1313 1314 1315#ifdef CONFIG_REISERFS_CHECK 1316 1317int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value); 1318static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes) 1319{ 1320 struct disk_child * dc; 1321 int i; 1322 1323 RFALSE( !bh, "PAP-12336: bh == 0"); 1324 1325 if (!bh || !B_IS_IN_TREE (bh)) 1326 return; 1327 1328 RFALSE( !buffer_dirty (bh) && 1329 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1330 "PAP-12337: buffer (%b) must be dirty", bh); 1331 dc = B_N_CHILD (bh, 0); 1332 1333 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) { 1334 if (!is_reusable (s, dc_block_number(dc), 1) ) { 1335 print_cur_tb (mes); 1336 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh); 1337 } 1338 } 1339} 1340 1341 1342static int locked_or_not_in_tree (struct buffer_head * bh, char * which) 1343{ 1344 if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) || 1345 !B_IS_IN_TREE (bh) ) { 1346 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)", 1347 which, bh); 1348 return 1; 1349 } 1350 return 0; 1351} 1352 1353 1354static int check_before_balancing (struct tree_balance * tb) 1355{ 1356 int retval = 0; 1357 1358 if ( cur_tb ) { 1359 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: " 1360 "suspect that schedule occurred based on cur_tb not being null at this point in code. " 1361 "do_balance cannot properly handle schedule occurring while it runs."); 1362 } 1363 1364 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1365 prepped all of these for us). */ 1366 if ( tb->lnum[0] ) { 1367 retval |= locked_or_not_in_tree (tb->L[0], "L[0]"); 1368 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]"); 1369 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]"); 1370 check_leaf (tb->L[0]); 1371 } 1372 if ( tb->rnum[0] ) { 1373 retval |= locked_or_not_in_tree (tb->R[0], "R[0]"); 1374 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]"); 1375 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]"); 1376 check_leaf (tb->R[0]); 1377 } 1378 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]"); 1379 check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); 1380 1381 return retval; 1382} 1383 1384 1385static void check_after_balance_leaf (struct tree_balance * tb) 1386{ 1387 if (tb->lnum[0]) { 1388 if (B_FREE_SPACE (tb->L[0]) != 1389 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) { 1390 print_cur_tb ("12221"); 1391 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect"); 1392 } 1393 } 1394 if (tb->rnum[0]) { 1395 if (B_FREE_SPACE (tb->R[0]) != 1396 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) { 1397 print_cur_tb ("12222"); 1398 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect"); 1399 } 1400 } 1401 if (PATH_H_PBUFFER(tb->tb_path,1) && 1402 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != 1403 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1404 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), 1405 PATH_H_POSITION (tb->tb_path, 1)))) )) { 1406 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)); 1407 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) - 1408 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), 1409 PATH_H_POSITION (tb->tb_path, 1)))); 1410 print_cur_tb ("12223"); 1411 reiserfs_warning (tb->tb_sb, 1412 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1413 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1414 left, 1415 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)), 1416 PATH_H_PBUFFER(tb->tb_path,1), 1417 PATH_H_POSITION (tb->tb_path, 1), 1418 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ), 1419 right ); 1420 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect"); 1421 } 1422} 1423 1424 1425static void check_leaf_level (struct tree_balance * tb) 1426{ 1427 check_leaf (tb->L[0]); 1428 check_leaf (tb->R[0]); 1429 check_leaf (PATH_PLAST_BUFFER (tb->tb_path)); 1430} 1431 1432static void check_internal_levels (struct tree_balance * tb) 1433{ 1434 int h; 1435 1436 /* check all internal nodes */ 1437 for (h = 1; tb->insert_size[h]; h ++) { 1438 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH"); 1439 if (tb->lnum[h]) 1440 check_internal_node (tb->tb_sb, tb->L[h], "BAD L"); 1441 if (tb->rnum[h]) 1442 check_internal_node (tb->tb_sb, tb->R[h], "BAD R"); 1443 } 1444 1445} 1446 1447#endif 1448 1449 1450 1451 1452 1453 1454/* Now we have all of the buffers that must be used in balancing of 1455 the tree. We rely on the assumption that schedule() will not occur 1456 while do_balance works. ( Only interrupt handlers are acceptable.) 1457 We balance the tree according to the analysis made before this, 1458 using buffers already obtained. For SMP support it will someday be 1459 necessary to add ordered locking of tb. */ 1460 1461/* Some interesting rules of balancing: 1462 1463 we delete a maximum of two nodes per level per balancing: we never 1464 delete R, when we delete two of three nodes L, S, R then we move 1465 them into R. 1466 1467 we only delete L if we are deleting two nodes, if we delete only 1468 one node we delete S 1469 1470 if we shift leaves then we shift as much as we can: this is a 1471 deliberate policy of extremism in node packing which results in 1472 higher average utilization after repeated random balance operations 1473 at the cost of more memory copies and more balancing as a result of 1474 small insertions to full nodes. 1475 1476 if we shift internal nodes we try to evenly balance the node 1477 utilization, with consequent less balancing at the cost of lower 1478 utilization. 1479 1480 one could argue that the policy for directories in leaves should be 1481 that of internal nodes, but we will wait until another day to 1482 evaluate this.... It would be nice to someday measure and prove 1483 these assumptions as to what is optimal.... 1484 1485*/ 1486 1487static inline void do_balance_starts (struct tree_balance *tb) 1488{ 1489 /* use print_cur_tb() to see initial state of struct 1490 tree_balance */ 1491 1492 /* store_print_tb (tb); */ 1493 1494 /* do not delete, just comment it out */ 1495/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1496 "check");*/ 1497 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB"); 1498#ifdef CONFIG_REISERFS_CHECK 1499 cur_tb = tb; 1500#endif 1501} 1502 1503 1504static inline void do_balance_completed (struct tree_balance * tb) 1505{ 1506 1507#ifdef CONFIG_REISERFS_CHECK 1508 check_leaf_level (tb); 1509 check_internal_levels (tb); 1510 cur_tb = NULL; 1511#endif 1512 1513 /* reiserfs_free_block is no longer schedule safe. So, we need to 1514 ** put the buffers we want freed on the thrown list during do_balance, 1515 ** and then free them now 1516 */ 1517 1518 REISERFS_SB(tb->tb_sb)->s_do_balance ++; 1519 1520 1521 /* release all nodes hold to perform the balancing */ 1522 unfix_nodes(tb); 1523 1524 free_thrown(tb) ; 1525} 1526 1527 1528 1529 1530 1531void do_balance (struct tree_balance * tb, /* tree_balance structure */ 1532 struct item_head * ih, /* item header of inserted item */ 1533 const char * body, /* body of inserted item or bytes to paste */ 1534 int flag) /* i - insert, d - delete 1535 c - cut, p - paste 1536 1537 Cut means delete part of an item 1538 (includes removing an entry from a 1539 directory). 1540 1541 Delete means delete whole item. 1542 1543 Insert means add a new item into the 1544 tree. 1545 1546 Paste means to append to the end of an 1547 existing file or to insert a directory 1548 entry. */ 1549{ 1550 int child_pos, /* position of a child node in its parent */ 1551 h; /* level of the tree being processed */ 1552 struct item_head insert_key[2]; /* in our processing of one level 1553 we sometimes determine what 1554 must be inserted into the next 1555 higher level. This insertion 1556 consists of a key or two keys 1557 and their corresponding 1558 pointers */ 1559 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 1560 level */ 1561 1562 tb->tb_mode = flag; 1563 tb->need_balance_dirty = 0; 1564 1565 if (FILESYSTEM_CHANGED_TB(tb)) { 1566 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ; 1567 } 1568 /* if we have no real work to do */ 1569 if ( ! tb->insert_size[0] ) { 1570 reiserfs_warning (tb->tb_sb, 1571 "PAP-12350: do_balance: insert_size == 0, mode == %c", 1572 flag); 1573 unfix_nodes(tb); 1574 return; 1575 } 1576 1577 atomic_inc (&(fs_generation (tb->tb_sb))); 1578 do_balance_starts (tb); 1579 1580 /* balance leaf returns 0 except if combining L R and S into 1581 one node. see balance_internal() for explanation of this 1582 line of code.*/ 1583 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) + 1584 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr); 1585 1586#ifdef CONFIG_REISERFS_CHECK 1587 check_after_balance_leaf (tb); 1588#endif 1589 1590 /* Balance internal level of the tree. */ 1591 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ ) 1592 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr); 1593 1594 1595 do_balance_completed (tb); 1596 1597} 1598