do_balan.c revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree 	*/
6/* Further calculations can not cause schedule(), and thus the buffer 	*/
7/* tree will be stable until the balancing will be finished 		*/
8/* balance the tree according to the analysis made before,		*/
9/* and using buffers obtained after all above.				*/
10
11
12/**
13 ** balance_leaf_when_delete
14 ** balance_leaf
15 ** do_balance
16 **
17 **/
18
19#include <linux/config.h>
20#include <asm/uaccess.h>
21#include <linux/time.h>
22#include <linux/reiserfs_fs.h>
23#include <linux/buffer_head.h>
24
25#ifdef CONFIG_REISERFS_CHECK
26
27struct tree_balance * cur_tb = NULL; /* detects whether more than one
28                                        copy of tb exists as a means
29                                        of checking whether schedule
30                                        is interrupting do_balance */
31#endif
32
33inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
34					struct buffer_head * bh, int flag)
35{
36    journal_mark_dirty(tb->transaction_handle,
37                       tb->transaction_handle->t_super, bh) ;
38}
39
40#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
41#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
42
43
44/* summary:
45 if deleting something ( tb->insert_size[0] < 0 )
46   return(balance_leaf_when_delete()); (flag d handled here)
47 else
48   if lnum is larger than 0 we put items into the left node
49   if rnum is larger than 0 we put items into the right node
50   if snum1 is larger than 0 we put items into the new node s1
51   if snum2 is larger than 0 we put items into the new node s2
52Note that all *num* count new items being created.
53
54It would be easier to read balance_leaf() if each of these summary
55lines was a separate procedure rather than being inlined.  I think
56that there are many passages here and in balance_leaf_when_delete() in
57which two calls to one procedure can replace two passages, and it
58might save cache space and improve software maintenance costs to do so.
59
60Vladimir made the perceptive comment that we should offload most of
61the decision making in this function into fix_nodes/check_balance, and
62then create some sort of structure in tb that says what actions should
63be performed by do_balance.
64
65-Hans */
66
67
68
69/* Balance leaf node in case of delete or cut: insert_size[0] < 0
70 *
71 * lnum, rnum can have values >= -1
72 *	-1 means that the neighbor must be joined with S
73 *	 0 means that nothing should be done with the neighbor
74 *	>0 means to shift entirely or partly the specified number of items to the neighbor
75 */
76static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
77{
78    struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
79    int item_pos = PATH_LAST_POSITION (tb->tb_path);
80    int pos_in_item = tb->tb_path->pos_in_item;
81    struct buffer_info bi;
82    int n;
83    struct item_head * ih;
84
85    RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
86	    "vs- 12000: level: wrong FR %z", tb->FR[0]);
87    RFALSE( tb->blknum[0] > 1,
88	    "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
89    RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
90	    "PAP-12010: tree can not be empty");
91
92    ih = B_N_PITEM_HEAD (tbS0, item_pos);
93
94    /* Delete or truncate the item */
95
96    switch (flag) {
97    case M_DELETE:   /* delete item in S[0] */
98
99	RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
100	        "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
101 		 -tb->insert_size [0], ih);
102
103	bi.tb = tb;
104	bi.bi_bh = tbS0;
105	bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
106	bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
107	leaf_delete_items (&bi, 0, item_pos, 1, -1);
108
109	if ( ! item_pos && tb->CFL[0] ) {
110	    if ( B_NR_ITEMS(tbS0) ) {
111		replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
112	    }
113	    else {
114		if ( ! PATH_H_POSITION (tb->tb_path, 1) )
115		    replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
116	    }
117	}
118
119	RFALSE( ! item_pos && !tb->CFL[0],
120		"PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
121
122	break;
123
124    case M_CUT: {  /* cut item in S[0] */
125	bi.tb = tb;
126	bi.bi_bh = tbS0;
127	bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
128	bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
129	if (is_direntry_le_ih (ih)) {
130
131	    /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
132	    /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
133	    tb->insert_size[0] = -1;
134	    leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
135
136	    RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
137		    "PAP-12030: can not change delimiting key. CFL[0]=%p",
138		    tb->CFL[0]);
139
140	    if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
141		replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
142	    }
143	} else {
144	    leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
145
146	    RFALSE( ! ih_item_len(ih),
147		"PAP-12035: cut must leave non-zero dynamic length of item");
148	}
149	break;
150    }
151
152    default:
153	print_cur_tb ("12040");
154	reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
155			(flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
156    }
157
158    /* the rule is that no shifting occurs unless by shifting a node can be freed */
159    n = B_NR_ITEMS(tbS0);
160    if ( tb->lnum[0] )     /* L[0] takes part in balancing */
161    {
162	if ( tb->lnum[0] == -1 )    /* L[0] must be joined with S[0] */
163	{
164	    if ( tb->rnum[0] == -1 )    /* R[0] must be also joined with S[0] */
165	    {
166		if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
167		{
168		    /* all contents of all the 3 buffers will be in L[0] */
169		    if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
170			replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
171
172		    leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, NULL);
173		    leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, NULL);
174
175		    reiserfs_invalidate_buffer (tb, tbS0);
176		    reiserfs_invalidate_buffer (tb, tb->R[0]);
177
178		    return 0;
179		}
180		/* all contents of all the 3 buffers will be in R[0] */
181		leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, NULL);
182		leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, NULL);
183
184		/* right_delimiting_key is correct in R[0] */
185		replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
186
187		reiserfs_invalidate_buffer (tb, tbS0);
188		reiserfs_invalidate_buffer (tb, tb->L[0]);
189
190		return -1;
191	    }
192
193	    RFALSE( tb->rnum[0] != 0,
194		    "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
195	    /* all contents of L[0] and S[0] will be in L[0] */
196	    leaf_shift_left(tb, n, -1);
197
198	    reiserfs_invalidate_buffer (tb, tbS0);
199
200	    return 0;
201	}
202	/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
203
204	RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
205		( tb->lnum[0] + tb->rnum[0] > n+1 ),
206		"PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
207		tb->rnum[0], tb->lnum[0], n);
208	RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
209		(tb->lbytes != -1 || tb->rbytes != -1),
210		"PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
211		tb->rbytes, tb->lbytes);
212	RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
213		(tb->lbytes < 1 || tb->rbytes != -1),
214		"PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
215		tb->rbytes, tb->lbytes);
216
217	leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
218	leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
219
220	reiserfs_invalidate_buffer (tb, tbS0);
221
222	return 0;
223    }
224
225    if ( tb->rnum[0] == -1 ) {
226	/* all contents of R[0] and S[0] will be in R[0] */
227	leaf_shift_right(tb, n, -1);
228	reiserfs_invalidate_buffer (tb, tbS0);
229	return 0;
230    }
231
232    RFALSE( tb->rnum[0],
233	    "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
234    return 0;
235}
236
237
238static int balance_leaf (struct tree_balance * tb,
239			 struct item_head * ih,		/* item header of inserted item (this is on little endian) */
240			 const char * body,		/* body  of inserted item or bytes to paste */
241			 int flag,			/* i - insert, d - delete, c - cut, p - paste
242							   (see comment to do_balance) */
243			 struct item_head * insert_key,  /* in our processing of one level we sometimes determine what
244							    must be inserted into the next higher level.  This insertion
245							    consists of a key or two keys and their corresponding
246							    pointers */
247			 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
248    )
249{
250    struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
251    int item_pos = PATH_LAST_POSITION (tb->tb_path);	/*  index into the array of item headers in S[0]
252							    of the affected item */
253    struct buffer_info bi;
254    struct buffer_head *S_new[2];  /* new nodes allocated to hold what could not fit into S */
255    int snum[2];	    /* number of items that will be placed
256                               into S_new (includes partially shifted
257                               items) */
258    int sbytes[2];          /* if an item is partially shifted into S_new then
259			       if it is a directory item
260			       it is the number of entries from the item that are shifted into S_new
261			       else
262			       it is the number of bytes from the item that are shifted into S_new
263			    */
264    int n, i;
265    int ret_val;
266    int pos_in_item;
267    int zeros_num;
268
269    PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
270
271    /* Make balance in case insert_size[0] < 0 */
272    if ( tb->insert_size[0] < 0 )
273	return balance_leaf_when_delete (tb, flag);
274
275    zeros_num = 0;
276    if (flag == M_INSERT && body == 0)
277	zeros_num = ih_item_len( ih );
278
279    pos_in_item = tb->tb_path->pos_in_item;
280    /* for indirect item pos_in_item is measured in unformatted node
281       pointers. Recalculate to bytes */
282    if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
283	pos_in_item *= UNFM_P_SIZE;
284
285    if ( tb->lnum[0] > 0 ) {
286	/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
287	if ( item_pos < tb->lnum[0] ) {
288	    /* new item or it part falls to L[0], shift it too */
289	    n = B_NR_ITEMS(tb->L[0]);
290
291	    switch (flag) {
292	    case M_INSERT:   /* insert item into L[0] */
293
294		if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
295		    /* part of new item falls into L[0] */
296		    int new_item_len;
297		    int version;
298
299		    ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
300
301		    /* Calculate item length to insert to S[0] */
302		    new_item_len = ih_item_len(ih) - tb->lbytes;
303		    /* Calculate and check item length to insert to L[0] */
304		    put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
305
306		    RFALSE( ih_item_len(ih) <= 0,
307			    "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
308                            ih_item_len(ih));
309
310		    /* Insert new item into L[0] */
311		    bi.tb = tb;
312		    bi.bi_bh = tb->L[0];
313		    bi.bi_parent = tb->FL[0];
314		    bi.bi_position = get_left_neighbor_position (tb, 0);
315		    leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
316					  zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
317
318		    version = ih_version (ih);
319
320		    /* Calculate key component, item length and body to insert into S[0] */
321                    set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
322
323		    put_ih_item_len( ih, new_item_len );
324		    if ( tb->lbytes >  zeros_num ) {
325			body += (tb->lbytes - zeros_num);
326			zeros_num = 0;
327		    }
328		    else
329			zeros_num -= tb->lbytes;
330
331		    RFALSE( ih_item_len(ih) <= 0,
332			"PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
333			ih_item_len(ih));
334		} else {
335		    /* new item in whole falls into L[0] */
336		    /* Shift lnum[0]-1 items to L[0] */
337		    ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
338		    /* Insert new item into L[0] */
339		    bi.tb = tb;
340		    bi.bi_bh = tb->L[0];
341		    bi.bi_parent = tb->FL[0];
342		    bi.bi_position = get_left_neighbor_position (tb, 0);
343		    leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
344		    tb->insert_size[0] = 0;
345		    zeros_num = 0;
346		}
347		break;
348
349	    case M_PASTE:   /* append item in L[0] */
350
351		if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
352		    /* we must shift the part of the appended item */
353		    if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
354
355			RFALSE( zeros_num,
356				"PAP-12090: invalid parameter in case of a directory");
357			/* directory item */
358			if ( tb->lbytes > pos_in_item ) {
359			    /* new directory entry falls into L[0] */
360			    struct item_head * pasted;
361			    int l_pos_in_item = pos_in_item;
362
363			    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
364			    ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
365			    if ( ret_val && ! item_pos ) {
366				pasted =  B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
367				l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
368			    }
369
370			    /* Append given directory entry to directory item */
371			    bi.tb = tb;
372			    bi.bi_bh = tb->L[0];
373			    bi.bi_parent = tb->FL[0];
374			    bi.bi_position = get_left_neighbor_position (tb, 0);
375			    leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
376						  tb->insert_size[0], body, zeros_num);
377
378			    /* previous string prepared space for pasting new entry, following string pastes this entry */
379
380			    /* when we have merge directory item, pos_in_item has been changed too */
381
382			    /* paste new directory entry. 1 is entry number */
383			    leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
384						(struct reiserfs_de_head *)body,
385						body + DEH_SIZE, tb->insert_size[0]
386				);
387			    tb->insert_size[0] = 0;
388			} else {
389			    /* new directory item doesn't fall into L[0] */
390			    /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
391			    leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
392			}
393			/* Calculate new position to append in item body */
394			pos_in_item -= tb->lbytes;
395		    }
396		    else {
397			/* regular object */
398			RFALSE( tb->lbytes <= 0,
399			        "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
400				tb->lbytes);
401			RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
402                                "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403				ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
404
405			if ( tb->lbytes >= pos_in_item ) {
406			    /* appended item will be in L[0] in whole */
407			    int l_n;
408
409			    /* this bytes number must be appended to the last item of L[h] */
410			    l_n = tb->lbytes - pos_in_item;
411
412			    /* Calculate new insert_size[0] */
413			    tb->insert_size[0] -= l_n;
414
415			    RFALSE( tb->insert_size[0] <= 0,
416				    "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417				    tb->insert_size[0]);
418			    ret_val =  leaf_shift_left(tb,tb->lnum[0],
419						       ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
420			    /* Append to body of item in L[0] */
421			    bi.tb = tb;
422			    bi.bi_bh = tb->L[0];
423			    bi.bi_parent = tb->FL[0];
424			    bi.bi_position = get_left_neighbor_position (tb, 0);
425			    leaf_paste_in_buffer(
426				&bi,n + item_pos - ret_val,
427				ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
428				l_n,body, zeros_num > l_n ? l_n : zeros_num
429				);
430			    /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
431			    {
432				int version;
433				int temp_l = l_n;
434
435				RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
436					"PAP-12106: item length must be 0");
437				RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
438							    B_N_PKEY (tb->L[0],
439									    n + item_pos - ret_val)),
440					"PAP-12107: items must be of the same file");
441				if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
442								      n + item_pos - ret_val)))	{
443				    temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
444				}
445				/* update key of first item in S0 */
446				version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
447				set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
448						     le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
449				/* update left delimiting key */
450				set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
451						     le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
452			    }
453
454			    /* Calculate new body, position in item and insert_size[0] */
455			    if ( l_n > zeros_num ) {
456				body += (l_n - zeros_num);
457				zeros_num = 0;
458			    }
459			    else
460				zeros_num -= l_n;
461			    pos_in_item = 0;
462
463			    RFALSE( comp_short_le_keys
464				    (B_N_PKEY(tbS0,0),
465				     B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
466
467				    !op_is_left_mergeable
468				    (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
469				    !op_is_left_mergeable
470				    (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
471				     tbS0->b_size),
472				    "PAP-12120: item must be merge-able with left neighboring item");
473			}
474			else /* only part of the appended item will be in L[0] */
475			{
476			    /* Calculate position in item for append in S[0] */
477			    pos_in_item -= tb->lbytes;
478
479			    RFALSE( pos_in_item <= 0,
480				    "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
481
482			    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
483			    leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
484			}
485		    }
486		}
487		else /* appended item will be in L[0] in whole */
488		{
489		    struct item_head * pasted;
490
491			if ( ! item_pos  && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
492			{ /* if we paste into first item of S[0] and it is left mergable */
493			    /* then increment pos_in_item by the size of the last item in L[0] */
494			    pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
495			    if ( is_direntry_le_ih (pasted) )
496				pos_in_item += ih_entry_count(pasted);
497			    else
498				pos_in_item += ih_item_len(pasted);
499			}
500
501		    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
502		    ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
503		    /* Append to body of item in L[0] */
504		    bi.tb = tb;
505		    bi.bi_bh = tb->L[0];
506		    bi.bi_parent = tb->FL[0];
507		    bi.bi_position = get_left_neighbor_position (tb, 0);
508		    leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
509					  body, zeros_num);
510
511		    /* if appended item is directory, paste entry */
512		    pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
513		    if (is_direntry_le_ih (pasted))
514			leaf_paste_entries (
515			    bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
516			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
517			    );
518		    /* if appended item is indirect item, put unformatted node into un list */
519		    if (is_indirect_le_ih (pasted))
520			set_ih_free_space (pasted, 0);
521		    tb->insert_size[0] = 0;
522		    zeros_num = 0;
523		}
524		break;
525	    default:    /* cases d and t */
526		reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
527				(flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
528	    }
529	} else {
530	    /* new item doesn't fall into L[0] */
531	    leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
532	}
533    }	/* tb->lnum[0] > 0 */
534
535    /* Calculate new item position */
536    item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
537
538    if ( tb->rnum[0] > 0 ) {
539	/* shift rnum[0] items from S[0] to the right neighbor R[0] */
540	n = B_NR_ITEMS(tbS0);
541	switch ( flag ) {
542
543	case M_INSERT:   /* insert item */
544	    if ( n - tb->rnum[0] < item_pos )
545	    { /* new item or its part falls to R[0] */
546		if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
547		{ /* part of new item falls into R[0] */
548		    loff_t old_key_comp, old_len, r_zeros_number;
549		    const char * r_body;
550		    int version;
551		    loff_t offset;
552
553		    leaf_shift_right(tb,tb->rnum[0]-1,-1);
554
555		    version = ih_version(ih);
556		    /* Remember key component and item length */
557                    old_key_comp = le_ih_k_offset( ih );
558		    old_len = ih_item_len(ih);
559
560		    /* Calculate key component and item length to insert into R[0] */
561                    offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0));
562                    set_le_ih_k_offset( ih, offset );
563		    put_ih_item_len( ih, tb->rbytes);
564		    /* Insert part of the item into R[0] */
565		    bi.tb = tb;
566		    bi.bi_bh = tb->R[0];
567		    bi.bi_parent = tb->FR[0];
568		    bi.bi_position = get_right_neighbor_position (tb, 0);
569		    if ( (old_len - tb->rbytes) > zeros_num ) {
570			r_zeros_number = 0;
571			r_body = body + (old_len - tb->rbytes) - zeros_num;
572		    }
573		    else {
574			r_body = body;
575			r_zeros_number = zeros_num - (old_len - tb->rbytes);
576			zeros_num -= r_zeros_number;
577		    }
578
579		    leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
580
581		    /* Replace right delimiting key by first key in R[0] */
582		    replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
583
584		    /* Calculate key component and item length to insert into S[0] */
585                    set_le_ih_k_offset( ih, old_key_comp );
586		    put_ih_item_len( ih, old_len - tb->rbytes );
587
588		    tb->insert_size[0] -= tb->rbytes;
589
590		}
591		else /* whole new item falls into R[0] */
592		{
593		    /* Shift rnum[0]-1 items to R[0] */
594		    ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
595		    /* Insert new item into R[0] */
596		    bi.tb = tb;
597		    bi.bi_bh = tb->R[0];
598		    bi.bi_parent = tb->FR[0];
599		    bi.bi_position = get_right_neighbor_position (tb, 0);
600		    leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
601
602		    if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
603			replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
604
605		    }
606		    zeros_num = tb->insert_size[0] = 0;
607		}
608	    }
609	    else /* new item or part of it doesn't fall into R[0] */
610	    {
611		leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
612	    }
613	    break;
614
615	case M_PASTE:   /* append item */
616
617	    if ( n - tb->rnum[0] <= item_pos )  /* pasted item or part of it falls to R[0] */
618	    {
619		if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
620		{ /* we must shift the part of the appended item */
621		    if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
622		    { /* we append to directory item */
623			int entry_count;
624
625			RFALSE( zeros_num,
626				"PAP-12145: invalid parameter in case of a directory");
627			entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
628			if ( entry_count - tb->rbytes < pos_in_item )
629			    /* new directory entry falls into R[0] */
630			{
631			    int paste_entry_position;
632
633			    RFALSE( tb->rbytes - 1 >= entry_count ||
634				    ! tb->insert_size[0],
635				    "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
636				    tb->rbytes, entry_count);
637			    /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
638			    leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
639			    /* Paste given directory entry to directory item */
640			    paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
641			    bi.tb = tb;
642			    bi.bi_bh = tb->R[0];
643			    bi.bi_parent = tb->FR[0];
644			    bi.bi_position = get_right_neighbor_position (tb, 0);
645			    leaf_paste_in_buffer (&bi, 0, paste_entry_position,
646						  tb->insert_size[0],body,zeros_num);
647			    /* paste entry */
648			    leaf_paste_entries (
649				bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
650				body + DEH_SIZE, tb->insert_size[0]
651				);
652
653			    if ( paste_entry_position == 0 ) {
654				/* change delimiting keys */
655				replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
656			    }
657
658			    tb->insert_size[0] = 0;
659			    pos_in_item++;
660			}
661			else /* new directory entry doesn't fall into R[0] */
662			{
663			    leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
664			}
665		    }
666		    else /* regular object */
667		    {
668			int n_shift, n_rem, r_zeros_number;
669			const char * r_body;
670
671			/* Calculate number of bytes which must be shifted from appended item */
672			if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
673			    n_shift = 0;
674
675			RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
676			       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
677                               pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
678
679			leaf_shift_right(tb,tb->rnum[0],n_shift);
680			/* Calculate number of bytes which must remain in body after appending to R[0] */
681			if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
682			    n_rem = 0;
683
684			{
685			  int version;
686			  unsigned long temp_rem = n_rem;
687
688			  version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
689			  if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
690			      temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
691					 UNFM_P_SHIFT);
692			  }
693			  set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
694					       le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
695			  set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
696					       le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
697			}
698/*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
699		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
700			do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
701
702			/* Append part of body into R[0] */
703			bi.tb = tb;
704			bi.bi_bh = tb->R[0];
705			bi.bi_parent = tb->FR[0];
706			bi.bi_position = get_right_neighbor_position (tb, 0);
707			if ( n_rem > zeros_num ) {
708			    r_zeros_number = 0;
709			    r_body = body + n_rem - zeros_num;
710			}
711			else {
712			    r_body = body;
713			    r_zeros_number = zeros_num - n_rem;
714			    zeros_num -= r_zeros_number;
715			}
716
717			leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
718
719			if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
720#if 0
721			    RFALSE( n_rem,
722				    "PAP-12160: paste more than one unformatted node pointer");
723#endif
724			    set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
725			}
726			tb->insert_size[0] = n_rem;
727			if ( ! n_rem )
728			    pos_in_item ++;
729		    }
730		}
731		else /* pasted item in whole falls into R[0] */
732		{
733		    struct item_head * pasted;
734
735		    ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
736		    /* append item in R[0] */
737		    if ( pos_in_item >= 0 ) {
738			bi.tb = tb;
739			bi.bi_bh = tb->R[0];
740			bi.bi_parent = tb->FR[0];
741			bi.bi_position = get_right_neighbor_position (tb, 0);
742			leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
743					     tb->insert_size[0],body, zeros_num);
744		    }
745
746		    /* paste new entry, if item is directory item */
747		    pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
748		    if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
749			leaf_paste_entries (
750			    bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
751			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
752			    );
753			if ( ! pos_in_item ) {
754
755			    RFALSE( item_pos - n + tb->rnum[0],
756				    "PAP-12165: directory item must be first item of node when pasting is in 0th position");
757
758			    /* update delimiting keys */
759			    replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
760			}
761		    }
762
763		    if (is_indirect_le_ih (pasted))
764			set_ih_free_space (pasted, 0);
765		    zeros_num = tb->insert_size[0] = 0;
766		}
767	    }
768	    else /* new item doesn't fall into R[0] */
769	    {
770		leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
771	    }
772	    break;
773	default:    /* cases d and t */
774	    reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
775			    (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
776	}
777
778    }	/* tb->rnum[0] > 0 */
779
780
781    RFALSE( tb->blknum[0] > 3,
782	    "PAP-12180: blknum can not be %d. It must be <= 3",  tb->blknum[0]);
783    RFALSE( tb->blknum[0] < 0,
784	    "PAP-12185: blknum can not be %d. It must be >= 0",  tb->blknum[0]);
785
786    /* if while adding to a node we discover that it is possible to split
787       it in two, and merge the left part into the left neighbor and the
788       right part into the right neighbor, eliminating the node */
789    if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
790
791	RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
792	        "PAP-12190: lnum and rnum must not be zero");
793	/* if insertion was done before 0-th position in R[0], right
794	   delimiting key of the tb->L[0]'s and left delimiting key are
795	   not set correctly */
796	if (tb->CFL[0]) {
797	    if (!tb->CFR[0])
798		reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
799	    copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
800	    do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
801	}
802
803	reiserfs_invalidate_buffer(tb,tbS0);
804	return 0;
805    }
806
807
808    /* Fill new nodes that appear in place of S[0] */
809
810    /* I am told that this copying is because we need an array to enable
811       the looping code. -Hans */
812    snum[0] = tb->s1num,
813	snum[1] = tb->s2num;
814    sbytes[0] = tb->s1bytes;
815    sbytes[1] = tb->s2bytes;
816    for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
817
818	RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
819
820	/* here we shift from S to S_new nodes */
821
822	S_new[i] = get_FEB(tb);
823
824	/* initialized block type and tree level */
825        set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
826
827
828	n = B_NR_ITEMS(tbS0);
829
830	switch (flag) {
831	case M_INSERT:   /* insert item */
832
833	    if ( n - snum[i] < item_pos )
834	    { /* new item or it's part falls to first new node S_new[i]*/
835		if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
836		{ /* part of new item falls into S_new[i] */
837		    int old_key_comp, old_len, r_zeros_number;
838		    const char * r_body;
839		    int version;
840
841		    /* Move snum[i]-1 items from S[0] to S_new[i] */
842		    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
843		    /* Remember key component and item length */
844		    version = ih_version (ih);
845                    old_key_comp = le_ih_k_offset( ih );
846		    old_len = ih_item_len(ih);
847
848		    /* Calculate key component and item length to insert into S_new[i] */
849                    set_le_ih_k_offset( ih,
850                                le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT:0)) );
851
852		    put_ih_item_len( ih, sbytes[i] );
853
854		    /* Insert part of the item into S_new[i] before 0-th item */
855		    bi.tb = tb;
856		    bi.bi_bh = S_new[i];
857		    bi.bi_parent = NULL;
858		    bi.bi_position = 0;
859
860		    if ( (old_len - sbytes[i]) > zeros_num ) {
861			r_zeros_number = 0;
862			r_body = body + (old_len - sbytes[i]) - zeros_num;
863		    }
864		    else {
865			r_body = body;
866			r_zeros_number = zeros_num - (old_len - sbytes[i]);
867			zeros_num -= r_zeros_number;
868		    }
869
870		    leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
871
872		    /* Calculate key component and item length to insert into S[i] */
873                    set_le_ih_k_offset( ih, old_key_comp );
874		    put_ih_item_len( ih, old_len - sbytes[i] );
875		    tb->insert_size[0] -= sbytes[i];
876		}
877		else /* whole new item falls into S_new[i] */
878		{
879		    /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
880		    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
881
882		    /* Insert new item into S_new[i] */
883		    bi.tb = tb;
884		    bi.bi_bh = S_new[i];
885		    bi.bi_parent = NULL;
886		    bi.bi_position = 0;
887		    leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
888
889		    zeros_num = tb->insert_size[0] = 0;
890		}
891	    }
892
893	    else /* new item or it part don't falls into S_new[i] */
894	    {
895		leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
896	    }
897	    break;
898
899	case M_PASTE:   /* append item */
900
901	    if ( n - snum[i] <= item_pos )  /* pasted item or part if it falls to S_new[i] */
902	    {
903		if ( item_pos == n - snum[i] && sbytes[i] != -1 )
904		{ /* we must shift part of the appended item */
905		    struct item_head * aux_ih;
906
907		    RFALSE( ih, "PAP-12210: ih must be 0");
908
909		    if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
910			/* we append to directory item */
911
912			int entry_count;
913
914			entry_count = ih_entry_count(aux_ih);
915
916			if ( entry_count - sbytes[i] < pos_in_item  && pos_in_item <= entry_count ) {
917			    /* new directory entry falls into S_new[i] */
918
919			    RFALSE( ! tb->insert_size[0],
920				    "PAP-12215: insert_size is already 0");
921			    RFALSE( sbytes[i] - 1 >= entry_count,
922				    "PAP-12220: there are no so much entries (%d), only %d",
923				    sbytes[i] - 1, entry_count);
924
925			    /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
926			    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
927			    /* Paste given directory entry to directory item */
928			    bi.tb = tb;
929			    bi.bi_bh = S_new[i];
930			    bi.bi_parent = NULL;
931			    bi.bi_position = 0;
932			    leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
933						  tb->insert_size[0], body,zeros_num);
934			    /* paste new directory entry */
935			    leaf_paste_entries (
936				bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
937				1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
938				tb->insert_size[0]
939				);
940			    tb->insert_size[0] = 0;
941			    pos_in_item++;
942			} else { /* new directory entry doesn't fall into S_new[i] */
943			    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
944			}
945		    }
946		    else /* regular object */
947		    {
948			int n_shift, n_rem, r_zeros_number;
949			const char * r_body;
950
951			RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
952			        tb->insert_size[0] <= 0,
953			        "PAP-12225: item too short or insert_size <= 0");
954
955			/* Calculate number of bytes which must be shifted from appended item */
956			n_shift = sbytes[i] - tb->insert_size[0];
957			if ( n_shift < 0 )
958			    n_shift = 0;
959			leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
960
961			/* Calculate number of bytes which must remain in body after append to S_new[i] */
962			n_rem = tb->insert_size[0] - sbytes[i];
963			if ( n_rem < 0 )
964			    n_rem = 0;
965			/* Append part of body into S_new[0] */
966			bi.tb = tb;
967			bi.bi_bh = S_new[i];
968			bi.bi_parent = NULL;
969			bi.bi_position = 0;
970
971			if ( n_rem > zeros_num ) {
972			    r_zeros_number = 0;
973			    r_body = body + n_rem - zeros_num;
974			}
975			else {
976			    r_body = body;
977			    r_zeros_number = zeros_num - n_rem;
978			    zeros_num -= r_zeros_number;
979			}
980
981			leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
982			{
983			    struct item_head * tmp;
984
985			    tmp = B_N_PITEM_HEAD(S_new[i],0);
986			    if (is_indirect_le_ih (tmp)) {
987				set_ih_free_space (tmp, 0);
988				set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
989					            (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
990			    } else {
991				set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
992				                    n_rem );
993			    }
994			}
995
996			tb->insert_size[0] = n_rem;
997			if ( ! n_rem )
998			    pos_in_item++;
999		    }
1000		}
1001		else
1002		    /* item falls wholly into S_new[i] */
1003		{
1004		    int ret_val;
1005		    struct item_head * pasted;
1006
1007#ifdef CONFIG_REISERFS_CHECK
1008		    struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
1009
1010		    if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
1011						     tb->insert_size[0] <= 0) )
1012			reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1013#endif /* CONFIG_REISERFS_CHECK */
1014
1015		    ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1016
1017		    RFALSE( ret_val,
1018			    "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1019			    ret_val);
1020
1021		    /* paste into item */
1022		    bi.tb = tb;
1023		    bi.bi_bh = S_new[i];
1024		    bi.bi_parent = NULL;
1025		    bi.bi_position = 0;
1026		    leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1027
1028		    pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1029		    if (is_direntry_le_ih (pasted))
1030		    {
1031			leaf_paste_entries (
1032			    bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1033			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1034			    );
1035		    }
1036
1037		    /* if we paste to indirect item update ih_free_space */
1038		    if (is_indirect_le_ih (pasted))
1039			set_ih_free_space (pasted, 0);
1040		    zeros_num = tb->insert_size[0] = 0;
1041		}
1042	    }
1043
1044	    else /* pasted item doesn't fall into S_new[i] */
1045	    {
1046		leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1047	    }
1048	    break;
1049	default:    /* cases d and t */
1050	    reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1051			    (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1052	}
1053
1054	memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
1055	insert_ptr[i] = S_new[i];
1056
1057	RFALSE (!buffer_journaled (S_new [i]) || buffer_journal_dirty (S_new [i]) ||
1058		buffer_dirty (S_new [i]),
1059		"PAP-12247: S_new[%d] : (%b)", i, S_new[i]);
1060    }
1061
1062    /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1063       affected item which remains in S */
1064    if ( 0 <= item_pos && item_pos < tb->s0num )
1065    { /* if we must insert or append into buffer S[0] */
1066
1067	switch (flag)
1068	{
1069	case M_INSERT:   /* insert item into S[0] */
1070	    bi.tb = tb;
1071	    bi.bi_bh = tbS0;
1072	    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1073	    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1074	    leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1075
1076	    /* If we insert the first key change the delimiting key */
1077	    if( item_pos == 0 ) {
1078		if (tb->CFL[0]) /* can be 0 in reiserfsck */
1079		    replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1080
1081	    }
1082	    break;
1083
1084	case M_PASTE: {  /* append item in S[0] */
1085	    struct item_head * pasted;
1086
1087	    pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1088	    /* when directory, may be new entry already pasted */
1089	    if (is_direntry_le_ih (pasted)) {
1090		if ( pos_in_item >= 0 &&
1091		    pos_in_item <= ih_entry_count(pasted) ) {
1092
1093		    RFALSE( ! tb->insert_size[0],
1094			    "PAP-12260: insert_size is 0 already");
1095
1096		    /* prepare space */
1097		    bi.tb = tb;
1098		    bi.bi_bh = tbS0;
1099		    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1100		    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1101		    leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1102
1103		    /* paste entry */
1104		    leaf_paste_entries (
1105			bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1106			body + DEH_SIZE, tb->insert_size[0]
1107			);
1108		    if ( ! item_pos && ! pos_in_item ) {
1109			RFALSE( !tb->CFL[0] || !tb->L[0],
1110				"PAP-12270: CFL[0]/L[0] must be specified");
1111			if (tb->CFL[0]) {
1112			    replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1113
1114			}
1115		    }
1116		    tb->insert_size[0] = 0;
1117		}
1118	    } else { /* regular object */
1119		if ( pos_in_item == ih_item_len(pasted) ) {
1120
1121		    RFALSE( tb->insert_size[0] <= 0,
1122			    "PAP-12275: insert size must not be %d",
1123                            tb->insert_size[0]);
1124		    bi.tb = tb;
1125		    bi.bi_bh = tbS0;
1126		    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1127		    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1128		    leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1129
1130		    if (is_indirect_le_ih (pasted)) {
1131#if 0
1132			RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
1133				"PAP-12280: insert_size for indirect item must be %d, not %d",
1134				UNFM_P_SIZE, tb->insert_size[0]);
1135#endif
1136			set_ih_free_space (pasted, 0);
1137		    }
1138		    tb->insert_size[0] = 0;
1139		}
1140
1141#ifdef CONFIG_REISERFS_CHECK
1142		else {
1143		    if ( tb->insert_size[0] ) {
1144			print_cur_tb ("12285");
1145			reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
1146		    }
1147		}
1148#endif /* CONFIG_REISERFS_CHECK */
1149
1150	    }
1151	} /* case M_PASTE: */
1152	}
1153    }
1154
1155#ifdef CONFIG_REISERFS_CHECK
1156    if ( flag == M_PASTE && tb->insert_size[0] ) {
1157	print_cur_tb ("12290");
1158	reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
1159    }
1160#endif /* CONFIG_REISERFS_CHECK */
1161
1162    return 0;
1163} /* Leaf level of the tree is balanced (end of balance_leaf) */
1164
1165
1166
1167/* Make empty node */
1168void make_empty_node (struct buffer_info * bi)
1169{
1170    struct block_head * blkh;
1171
1172    RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1173
1174    blkh = B_BLK_HEAD(bi->bi_bh);
1175    set_blkh_nr_item( blkh, 0 );
1176    set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
1177
1178    if (bi->bi_parent)
1179	B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1180}
1181
1182
1183/* Get first empty buffer */
1184struct buffer_head * get_FEB (struct tree_balance * tb)
1185{
1186    int i;
1187    struct buffer_head * first_b;
1188    struct buffer_info bi;
1189
1190    for (i = 0; i < MAX_FEB_SIZE; i ++)
1191	if (tb->FEB[i] != 0)
1192	    break;
1193
1194    if (i == MAX_FEB_SIZE)
1195	reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1196
1197    bi.tb = tb;
1198    bi.bi_bh = first_b = tb->FEB[i];
1199    bi.bi_parent = NULL;
1200    bi.bi_position = 0;
1201    make_empty_node (&bi);
1202    set_buffer_uptodate(first_b);
1203    tb->FEB[i] = NULL;
1204    tb->used[i] = first_b;
1205
1206    return(first_b);
1207}
1208
1209
1210/* This is now used because reiserfs_free_block has to be able to
1211** schedule.
1212*/
1213static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
1214{
1215    int i;
1216
1217    if (buffer_dirty (bh))
1218      reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer");
1219    for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
1220	if (!tb->thrown[i]) {
1221	    tb->thrown[i] = bh;
1222	    get_bh(bh) ; /* free_thrown puts this */
1223	    return;
1224	}
1225    reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers");
1226}
1227
1228static void free_thrown(struct tree_balance *tb) {
1229    int i ;
1230    b_blocknr_t blocknr ;
1231    for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
1232	if (tb->thrown[i]) {
1233	    blocknr = tb->thrown[i]->b_blocknr ;
1234	    if (buffer_dirty (tb->thrown[i]))
1235	      reiserfs_warning (tb->tb_sb,
1236				"free_thrown deals with dirty buffer %d",
1237				blocknr);
1238	    brelse(tb->thrown[i]) ; /* incremented in store_thrown */
1239	    reiserfs_free_block (tb->transaction_handle, NULL, blocknr, 0);
1240	}
1241    }
1242}
1243
1244void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
1245{
1246    struct block_head *blkh;
1247    blkh = B_BLK_HEAD(bh);
1248    set_blkh_level( blkh, FREE_LEVEL );
1249    set_blkh_nr_item( blkh, 0 );
1250
1251    clear_buffer_dirty(bh);
1252    store_thrown (tb, bh);
1253}
1254
1255/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1256void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
1257		  struct buffer_head * src, int n_src)
1258{
1259
1260    RFALSE( dest == NULL || src == NULL,
1261	    "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1262	    src, dest);
1263    RFALSE( ! B_IS_KEYS_LEVEL (dest),
1264	    "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1265	    dest);
1266    RFALSE( n_dest < 0 || n_src < 0,
1267	    "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1268    RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1269	    "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1270	    n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1271
1272    if (B_IS_ITEMS_LEVEL (src))
1273	/* source buffer contains leaf node */
1274	memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
1275    else
1276	memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
1277
1278    do_balance_mark_internal_dirty (tb, dest, 0);
1279}
1280
1281
1282int get_left_neighbor_position (
1283				struct tree_balance * tb,
1284				int h
1285				)
1286{
1287  int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1288
1289  RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
1290	  "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1291	  h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
1292
1293  if (Sh_position == 0)
1294    return B_NR_ITEMS (tb->FL[h]);
1295  else
1296    return Sh_position - 1;
1297}
1298
1299
1300int get_right_neighbor_position (struct tree_balance * tb, int h)
1301{
1302  int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1303
1304  RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
1305	  "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1306	  h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
1307
1308  if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
1309    return 0;
1310  else
1311    return Sh_position + 1;
1312}
1313
1314
1315#ifdef CONFIG_REISERFS_CHECK
1316
1317int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value);
1318static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
1319{
1320  struct disk_child * dc;
1321  int i;
1322
1323  RFALSE( !bh, "PAP-12336: bh == 0");
1324
1325  if (!bh || !B_IS_IN_TREE (bh))
1326    return;
1327
1328  RFALSE( !buffer_dirty (bh) &&
1329	  !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1330	  "PAP-12337: buffer (%b) must be dirty", bh);
1331  dc = B_N_CHILD (bh, 0);
1332
1333  for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1334    if (!is_reusable (s, dc_block_number(dc), 1) ) {
1335      print_cur_tb (mes);
1336      reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1337    }
1338  }
1339}
1340
1341
1342static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
1343{
1344  if ( (!buffer_journal_prepared (bh) && buffer_locked (bh)) ||
1345        !B_IS_IN_TREE (bh) ) {
1346    reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)",
1347                      which, bh);
1348    return 1;
1349  }
1350  return 0;
1351}
1352
1353
1354static int check_before_balancing (struct tree_balance * tb)
1355{
1356  int retval = 0;
1357
1358  if ( cur_tb ) {
1359    reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
1360		    "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1361		    "do_balance cannot properly handle schedule occurring while it runs.");
1362  }
1363
1364  /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1365     prepped all of these for us). */
1366  if ( tb->lnum[0] ) {
1367    retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1368    retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1369    retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1370    check_leaf (tb->L[0]);
1371  }
1372  if ( tb->rnum[0] ) {
1373    retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1374    retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1375    retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1376    check_leaf (tb->R[0]);
1377  }
1378  retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1379  check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1380
1381  return retval;
1382}
1383
1384
1385static void check_after_balance_leaf (struct tree_balance * tb)
1386{
1387    if (tb->lnum[0]) {
1388	if (B_FREE_SPACE (tb->L[0]) !=
1389	    MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
1390	    print_cur_tb ("12221");
1391	    reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1392	}
1393    }
1394    if (tb->rnum[0]) {
1395	if (B_FREE_SPACE (tb->R[0]) !=
1396	    MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
1397	    print_cur_tb ("12222");
1398	    reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1399	}
1400    }
1401    if (PATH_H_PBUFFER(tb->tb_path,1) &&
1402	(B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) !=
1403		    (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1404		    dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1405		    PATH_H_POSITION (tb->tb_path, 1)))) )) {
1406	int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
1407	int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1408		    dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1409			PATH_H_POSITION (tb->tb_path, 1))));
1410	print_cur_tb ("12223");
1411	reiserfs_warning (tb->tb_sb,
1412	    "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1413    	    "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1414	    left,
1415	    MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1416	    PATH_H_PBUFFER(tb->tb_path,1),
1417	    PATH_H_POSITION (tb->tb_path, 1),
1418	    dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1419	    right );
1420	reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1421    }
1422}
1423
1424
1425static void check_leaf_level (struct tree_balance * tb)
1426{
1427  check_leaf (tb->L[0]);
1428  check_leaf (tb->R[0]);
1429  check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1430}
1431
1432static void check_internal_levels (struct tree_balance * tb)
1433{
1434  int h;
1435
1436  /* check all internal nodes */
1437  for (h = 1; tb->insert_size[h]; h ++) {
1438    check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
1439    if (tb->lnum[h])
1440      check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
1441    if (tb->rnum[h])
1442      check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
1443  }
1444
1445}
1446
1447#endif
1448
1449
1450
1451
1452
1453
1454/* Now we have all of the buffers that must be used in balancing of
1455   the tree.  We rely on the assumption that schedule() will not occur
1456   while do_balance works. ( Only interrupt handlers are acceptable.)
1457   We balance the tree according to the analysis made before this,
1458   using buffers already obtained.  For SMP support it will someday be
1459   necessary to add ordered locking of tb. */
1460
1461/* Some interesting rules of balancing:
1462
1463   we delete a maximum of two nodes per level per balancing: we never
1464   delete R, when we delete two of three nodes L, S, R then we move
1465   them into R.
1466
1467   we only delete L if we are deleting two nodes, if we delete only
1468   one node we delete S
1469
1470   if we shift leaves then we shift as much as we can: this is a
1471   deliberate policy of extremism in node packing which results in
1472   higher average utilization after repeated random balance operations
1473   at the cost of more memory copies and more balancing as a result of
1474   small insertions to full nodes.
1475
1476   if we shift internal nodes we try to evenly balance the node
1477   utilization, with consequent less balancing at the cost of lower
1478   utilization.
1479
1480   one could argue that the policy for directories in leaves should be
1481   that of internal nodes, but we will wait until another day to
1482   evaluate this....  It would be nice to someday measure and prove
1483   these assumptions as to what is optimal....
1484
1485*/
1486
1487static inline void do_balance_starts (struct tree_balance *tb)
1488{
1489    /* use print_cur_tb() to see initial state of struct
1490       tree_balance */
1491
1492    /* store_print_tb (tb); */
1493
1494    /* do not delete, just comment it out */
1495/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1496	     "check");*/
1497    RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
1498#ifdef CONFIG_REISERFS_CHECK
1499    cur_tb = tb;
1500#endif
1501}
1502
1503
1504static inline void do_balance_completed (struct tree_balance * tb)
1505{
1506
1507#ifdef CONFIG_REISERFS_CHECK
1508    check_leaf_level (tb);
1509    check_internal_levels (tb);
1510    cur_tb = NULL;
1511#endif
1512
1513    /* reiserfs_free_block is no longer schedule safe.  So, we need to
1514    ** put the buffers we want freed on the thrown list during do_balance,
1515    ** and then free them now
1516    */
1517
1518    REISERFS_SB(tb->tb_sb)->s_do_balance ++;
1519
1520
1521    /* release all nodes hold to perform the balancing */
1522    unfix_nodes(tb);
1523
1524    free_thrown(tb) ;
1525}
1526
1527
1528
1529
1530
1531void do_balance (struct tree_balance * tb, /* tree_balance structure */
1532		 struct item_head * ih,	   /* item header of inserted item */
1533		 const char * body,  /* body  of inserted item or bytes to paste */
1534		 int flag)  /* i - insert, d - delete
1535			       c - cut, p - paste
1536
1537			       Cut means delete part of an item
1538			       (includes removing an entry from a
1539			       directory).
1540
1541			       Delete means delete whole item.
1542
1543			       Insert means add a new item into the
1544			       tree.
1545
1546			       Paste means to append to the end of an
1547			       existing file or to insert a directory
1548			       entry.  */
1549{
1550    int child_pos, /* position of a child node in its parent */
1551	h;	   /* level of the tree being processed */
1552    struct item_head insert_key[2]; /* in our processing of one level
1553				       we sometimes determine what
1554				       must be inserted into the next
1555				       higher level.  This insertion
1556				       consists of a key or two keys
1557				       and their corresponding
1558				       pointers */
1559    struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1560					  level */
1561
1562    tb->tb_mode = flag;
1563    tb->need_balance_dirty = 0;
1564
1565    if (FILESYSTEM_CHANGED_TB(tb)) {
1566        reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1567    }
1568    /* if we have no real work to do  */
1569    if ( ! tb->insert_size[0] ) {
1570	reiserfs_warning (tb->tb_sb,
1571			  "PAP-12350: do_balance: insert_size == 0, mode == %c",
1572			  flag);
1573	unfix_nodes(tb);
1574	return;
1575    }
1576
1577    atomic_inc (&(fs_generation (tb->tb_sb)));
1578    do_balance_starts (tb);
1579
1580	/* balance leaf returns 0 except if combining L R and S into
1581	   one node.  see balance_internal() for explanation of this
1582	   line of code.*/
1583	child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
1584	  balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
1585
1586#ifdef CONFIG_REISERFS_CHECK
1587    check_after_balance_leaf (tb);
1588#endif
1589
1590    /* Balance internal level of the tree. */
1591    for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
1592	child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
1593
1594
1595    do_balance_completed (tb);
1596
1597}
1598