do_balan.c revision 7f447ba825a0752c0450ebd129f187484e86fe8b
1/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/*
6 * Now we have all buffers that must be used in balancing of the tree
7 * Further calculations can not cause schedule(), and thus the buffer
8 * tree will be stable until the balancing will be finished
9 * balance the tree according to the analysis made before,
10 * and using buffers obtained after all above.
11 */
12
13#include <asm/uaccess.h>
14#include <linux/time.h>
15#include "reiserfs.h"
16#include <linux/buffer_head.h>
17#include <linux/kernel.h>
18
19static inline void buffer_info_init_left(struct tree_balance *tb,
20                                         struct buffer_info *bi)
21{
22	bi->tb          = tb;
23	bi->bi_bh       = tb->L[0];
24	bi->bi_parent   = tb->FL[0];
25	bi->bi_position = get_left_neighbor_position(tb, 0);
26}
27
28static inline void buffer_info_init_right(struct tree_balance *tb,
29                                          struct buffer_info *bi)
30{
31	bi->tb          = tb;
32	bi->bi_bh       = tb->R[0];
33	bi->bi_parent   = tb->FR[0];
34	bi->bi_position = get_right_neighbor_position(tb, 0);
35}
36
37static inline void buffer_info_init_tbS0(struct tree_balance *tb,
38                                         struct buffer_info *bi)
39{
40	bi->tb          = tb;
41	bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
42	bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
43	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
44}
45
46static inline void buffer_info_init_bh(struct tree_balance *tb,
47                                       struct buffer_info *bi,
48                                       struct buffer_head *bh)
49{
50	bi->tb          = tb;
51	bi->bi_bh       = bh;
52	bi->bi_parent   = NULL;
53	bi->bi_position = 0;
54}
55
56inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
57				       struct buffer_head *bh, int flag)
58{
59	journal_mark_dirty(tb->transaction_handle, bh);
60}
61
62#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
63#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
64
65/*
66 * summary:
67 *  if deleting something ( tb->insert_size[0] < 0 )
68 *    return(balance_leaf_when_delete()); (flag d handled here)
69 *  else
70 *    if lnum is larger than 0 we put items into the left node
71 *    if rnum is larger than 0 we put items into the right node
72 *    if snum1 is larger than 0 we put items into the new node s1
73 *    if snum2 is larger than 0 we put items into the new node s2
74 * Note that all *num* count new items being created.
75 *
76 * It would be easier to read balance_leaf() if each of these summary
77 * lines was a separate procedure rather than being inlined.  I think
78 * that there are many passages here and in balance_leaf_when_delete() in
79 * which two calls to one procedure can replace two passages, and it
80 * might save cache space and improve software maintenance costs to do so.
81 *
82 * Vladimir made the perceptive comment that we should offload most of
83 * the decision making in this function into fix_nodes/check_balance, and
84 * then create some sort of structure in tb that says what actions should
85 * be performed by do_balance.
86 *
87 * -Hans
88 */
89
90/*
91 * Balance leaf node in case of delete or cut: insert_size[0] < 0
92 *
93 * lnum, rnum can have values >= -1
94 *	-1 means that the neighbor must be joined with S
95 *	 0 means that nothing should be done with the neighbor
96 *	>0 means to shift entirely or partly the specified number of items
97 *         to the neighbor
98 */
99static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
100{
101	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
102	int item_pos = PATH_LAST_POSITION(tb->tb_path);
103	int pos_in_item = tb->tb_path->pos_in_item;
104	struct buffer_info bi;
105	int n;
106	struct item_head *ih;
107
108	RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
109	       "vs- 12000: level: wrong FR %z", tb->FR[0]);
110	RFALSE(tb->blknum[0] > 1,
111	       "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
112	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
113	       "PAP-12010: tree can not be empty");
114
115	ih = item_head(tbS0, item_pos);
116	buffer_info_init_tbS0(tb, &bi);
117
118	/* Delete or truncate the item */
119
120	switch (flag) {
121	case M_DELETE:		/* delete item in S[0] */
122
123		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
124		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
125		       -tb->insert_size[0], ih);
126
127		leaf_delete_items(&bi, 0, item_pos, 1, -1);
128
129		if (!item_pos && tb->CFL[0]) {
130			if (B_NR_ITEMS(tbS0)) {
131				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
132					    0);
133			} else {
134				if (!PATH_H_POSITION(tb->tb_path, 1))
135					replace_key(tb, tb->CFL[0], tb->lkey[0],
136						    PATH_H_PPARENT(tb->tb_path,
137								   0), 0);
138			}
139		}
140
141		RFALSE(!item_pos && !tb->CFL[0],
142		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
143		       tb->L[0]);
144
145		break;
146
147	case M_CUT:{		/* cut item in S[0] */
148			if (is_direntry_le_ih(ih)) {
149
150				/*
151				 * UFS unlink semantics are such that you
152				 * can only delete one directory entry at
153				 * a time.
154				 */
155
156				/*
157				 * when we cut a directory tb->insert_size[0]
158				 * means number of entries to be cut (always 1)
159				 */
160				tb->insert_size[0] = -1;
161				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
162						     -tb->insert_size[0]);
163
164				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
165				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
166				       tb->CFL[0]);
167
168				if (!item_pos && !pos_in_item && tb->CFL[0]) {
169					replace_key(tb, tb->CFL[0], tb->lkey[0],
170						    tbS0, 0);
171				}
172			} else {
173				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
174						     -tb->insert_size[0]);
175
176				RFALSE(!ih_item_len(ih),
177				       "PAP-12035: cut must leave non-zero dynamic length of item");
178			}
179			break;
180		}
181
182	default:
183		print_cur_tb("12040");
184		reiserfs_panic(tb->tb_sb, "PAP-12040",
185			       "unexpected mode: %s(%d)",
186			       (flag ==
187				M_PASTE) ? "PASTE" : ((flag ==
188						       M_INSERT) ? "INSERT" :
189						      "UNKNOWN"), flag);
190	}
191
192	/*
193	 * the rule is that no shifting occurs unless by shifting
194	 * a node can be freed
195	 */
196	n = B_NR_ITEMS(tbS0);
197	/* L[0] takes part in balancing */
198	if (tb->lnum[0]) {
199		/* L[0] must be joined with S[0] */
200		if (tb->lnum[0] == -1) {
201			/* R[0] must be also joined with S[0] */
202			if (tb->rnum[0] == -1) {
203				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
204					/*
205					 * all contents of all the 3 buffers
206					 * will be in L[0]
207					 */
208					if (PATH_H_POSITION(tb->tb_path, 1) == 0
209					    && 1 < B_NR_ITEMS(tb->FR[0]))
210						replace_key(tb, tb->CFL[0],
211							    tb->lkey[0],
212							    tb->FR[0], 1);
213
214					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
215							-1, NULL);
216					leaf_move_items(LEAF_FROM_R_TO_L, tb,
217							B_NR_ITEMS(tb->R[0]),
218							-1, NULL);
219
220					reiserfs_invalidate_buffer(tb, tbS0);
221					reiserfs_invalidate_buffer(tb,
222								   tb->R[0]);
223
224					return 0;
225				}
226				/*
227				 * all contents of all the 3 buffers will
228				 * be in R[0]
229				 */
230				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
231						NULL);
232				leaf_move_items(LEAF_FROM_L_TO_R, tb,
233						B_NR_ITEMS(tb->L[0]), -1, NULL);
234
235				/* right_delimiting_key is correct in R[0] */
236				replace_key(tb, tb->CFR[0], tb->rkey[0],
237					    tb->R[0], 0);
238
239				reiserfs_invalidate_buffer(tb, tbS0);
240				reiserfs_invalidate_buffer(tb, tb->L[0]);
241
242				return -1;
243			}
244
245			RFALSE(tb->rnum[0] != 0,
246			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
247			/* all contents of L[0] and S[0] will be in L[0] */
248			leaf_shift_left(tb, n, -1);
249
250			reiserfs_invalidate_buffer(tb, tbS0);
251
252			return 0;
253		}
254
255		/*
256		 * a part of contents of S[0] will be in L[0] and the
257		 * rest part of S[0] will be in R[0]
258		 */
259
260		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
261		       (tb->lnum[0] + tb->rnum[0] > n + 1),
262		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
263		       tb->rnum[0], tb->lnum[0], n);
264		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
265		       (tb->lbytes != -1 || tb->rbytes != -1),
266		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
267		       tb->rbytes, tb->lbytes);
268		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
269		       (tb->lbytes < 1 || tb->rbytes != -1),
270		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
271		       tb->rbytes, tb->lbytes);
272
273		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
274		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
275
276		reiserfs_invalidate_buffer(tb, tbS0);
277
278		return 0;
279	}
280
281	if (tb->rnum[0] == -1) {
282		/* all contents of R[0] and S[0] will be in R[0] */
283		leaf_shift_right(tb, n, -1);
284		reiserfs_invalidate_buffer(tb, tbS0);
285		return 0;
286	}
287
288	RFALSE(tb->rnum[0],
289	       "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
290	return 0;
291}
292
293static void balance_leaf_insert_left(struct tree_balance *tb,
294				     struct item_head *ih, const char *body)
295{
296	int ret_val;
297	struct buffer_info bi;
298	int n = B_NR_ITEMS(tb->L[0]);
299
300				if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
301					/* part of new item falls into L[0] */
302					int new_item_len;
303					int version;
304
305					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
306
307					/* Calculate item length to insert to S[0] */
308					new_item_len = ih_item_len(ih) - tb->lbytes;
309					/* Calculate and check item length to insert to L[0] */
310					put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
311
312					RFALSE(ih_item_len(ih) <= 0,
313					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
314					       ih_item_len(ih));
315
316					/* Insert new item into L[0] */
317					buffer_info_init_left(tb, &bi);
318					leaf_insert_into_buf(&bi,
319							n + tb->item_pos - ret_val, ih, body,
320							tb->zeroes_num > ih_item_len(ih) ? ih_item_len(ih) : tb->zeroes_num);
321
322					version = ih_version(ih);
323
324					/* Calculate key component, item length and body to insert into S[0] */
325					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
326							(tb->lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)));
327
328					put_ih_item_len(ih, new_item_len);
329					if (tb->lbytes > tb->zeroes_num) {
330						body += (tb->lbytes - tb->zeroes_num);
331						tb->zeroes_num = 0;
332					} else
333						tb->zeroes_num -= tb->lbytes;
334
335					RFALSE(ih_item_len(ih) <= 0,
336					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
337					       ih_item_len(ih));
338				} else {
339					/* new item in whole falls into L[0] */
340					/* Shift lnum[0]-1 items to L[0] */
341					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
342					/* Insert new item into L[0] */
343					buffer_info_init_left(tb, &bi);
344					leaf_insert_into_buf(&bi, n + tb->item_pos - ret_val, ih, body, tb->zeroes_num);
345					tb->insert_size[0] = 0;
346					tb->zeroes_num = 0;
347				}
348
349}
350
351static void balance_leaf_paste_left(struct tree_balance *tb,
352				    struct item_head *ih, const char *body)
353{
354	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
355	int ret_val;
356	struct buffer_info bi;
357	int n = B_NR_ITEMS(tb->L[0]);
358
359				if (tb->item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
360					/* we must shift the part of the appended item */
361					if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {
362
363						RFALSE(tb->zeroes_num,
364						       "PAP-12090: invalid parameter in case of a directory");
365						/* directory item */
366						if (tb->lbytes > tb->pos_in_item) {
367							/* new directory entry falls into L[0] */
368							struct item_head *pasted;
369							int l_pos_in_item = tb->pos_in_item;
370
371							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
372							ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
373							if (ret_val && !tb->item_pos) {
374								pasted = item_head(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
375								l_pos_in_item += ih_entry_count(pasted) - (tb->lbytes -1);
376							}
377
378							/* Append given directory entry to directory item */
379							buffer_info_init_left(tb, &bi);
380							leaf_paste_in_buffer(&bi, n + tb->item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, tb->zeroes_num);
381
382							/* previous string prepared space for pasting new entry, following string pastes this entry */
383
384							/* when we have merge directory item, pos_in_item has been changed too */
385
386							/* paste new directory entry. 1 is entry number */
387							leaf_paste_entries(&bi, n + tb->item_pos - ret_val, l_pos_in_item,
388									   1, (struct reiserfs_de_head *) body,
389									   body + DEH_SIZE, tb->insert_size[0]);
390							tb->insert_size[0] = 0;
391						} else {
392							/* new directory item doesn't fall into L[0] */
393							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
394							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
395						}
396						/* Calculate new position to append in item body */
397						tb->pos_in_item -= tb->lbytes;
398					} else {
399						/* regular object */
400						RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
401						RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)),
402						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
403						       ih_item_len(item_head(tbS0, tb->item_pos)), tb->pos_in_item);
404
405						if (tb->lbytes >= tb->pos_in_item) {
406							/* appended item will be in L[0] in whole */
407							int l_n;
408
409							/* this bytes number must be appended to the last item of L[h] */
410							l_n = tb->lbytes - tb->pos_in_item;
411
412							/* Calculate new insert_size[0] */
413							tb->insert_size[0] -= l_n;
414
415							RFALSE(tb->insert_size[0] <= 0,
416							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
417							       tb->insert_size[0]);
418							ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
419									    (item_head(tbS0, tb->item_pos)));
420							/* Append to body of item in L[0] */
421							buffer_info_init_left(tb, &bi);
422							leaf_paste_in_buffer
423							    (&bi, n + tb->item_pos - ret_val, ih_item_len
424							     (item_head(tb->L[0], n + tb->item_pos - ret_val)),
425							     l_n, body,
426							     tb->zeroes_num > l_n ? l_n : tb->zeroes_num);
427							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
428							{
429								int version;
430								int temp_l = l_n;
431
432								RFALSE(ih_item_len(item_head(tbS0, 0)),
433								     "PAP-12106: item length must be 0");
434								RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key
435								      (tb->L[0], n + tb->item_pos - ret_val)),
436								     "PAP-12107: items must be of the same file");
437								if (is_indirect_le_ih(item_head(tb->L[0], n + tb->item_pos - ret_val))) {
438									temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
439								}
440								/* update key of first item in S0 */
441								version = ih_version(item_head(tbS0, 0));
442								set_le_key_k_offset(version, leaf_key(tbS0, 0),
443								     le_key_k_offset(version,leaf_key(tbS0, 0)) + temp_l);
444								/* update left delimiting key */
445								set_le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0]),
446								     le_key_k_offset(version, internal_key(tb->CFL[0], tb->lkey[0])) + temp_l);
447							}
448
449							/* Calculate new body, position in item and insert_size[0] */
450							if (l_n > tb->zeroes_num) {
451								body += (l_n - tb->zeroes_num);
452								tb->zeroes_num = 0;
453							} else
454								tb->zeroes_num -= l_n;
455							tb->pos_in_item = 0;
456
457							RFALSE(comp_short_le_keys(leaf_key(tbS0, 0), leaf_key(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
458							     || !op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)
459							     || !op_is_left_mergeable(internal_key(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
460							     "PAP-12120: item must be merge-able with left neighboring item");
461						} else {	/* only part of the appended item will be in L[0] */
462
463							/* Calculate position in item for append in S[0] */
464							tb->pos_in_item -= tb->lbytes;
465
466							RFALSE(tb->pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", tb->pos_in_item);
467
468							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
469							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
470						}
471					}
472				} else {	/* appended item will be in L[0] in whole */
473
474					struct item_head *pasted;
475
476					if (!tb->item_pos && op_is_left_mergeable(leaf_key(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
477						/* then increment pos_in_item by the size of the last item in L[0] */
478						pasted = item_head(tb->L[0], n - 1);
479						if (is_direntry_le_ih(pasted))
480							tb->pos_in_item += ih_entry_count(pasted);
481						else
482							tb->pos_in_item += ih_item_len(pasted);
483					}
484
485					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
486					ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
487					/* Append to body of item in L[0] */
488					buffer_info_init_left(tb, &bi);
489					leaf_paste_in_buffer(&bi, n + tb->item_pos - ret_val,
490							     tb->pos_in_item,
491							     tb->insert_size[0],
492							     body, tb->zeroes_num);
493
494					/* if appended item is directory, paste entry */
495					pasted = item_head(tb->L[0], n + tb->item_pos - ret_val);
496					if (is_direntry_le_ih(pasted))
497						leaf_paste_entries(&bi, n + tb->item_pos - ret_val,
498								   tb->pos_in_item, 1,
499								   (struct reiserfs_de_head *) body,
500								   body + DEH_SIZE,
501								   tb->insert_size[0]);
502					/* if appended item is indirect item, put unformatted node into un list */
503					if (is_indirect_le_ih(pasted))
504						set_ih_free_space(pasted, 0);
505					tb->insert_size[0] = 0;
506					tb->zeroes_num = 0;
507				}
508
509}
510
511static void balance_leaf_insert_right(struct tree_balance *tb,
512				      struct item_head *ih, const char *body)
513{
514
515	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
516	int n = B_NR_ITEMS(tbS0);
517	struct buffer_info bi;
518	int ret_val;
519			if (n - tb->rnum[0] < tb->item_pos) {	/* new item or its part falls to R[0] */
520				if (tb->item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
521					loff_t old_key_comp, old_len, r_zeroes_number;
522					const char *r_body;
523					int version;
524					loff_t offset;
525
526					leaf_shift_right(tb, tb->rnum[0] - 1, -1);
527
528					version = ih_version(ih);
529					/* Remember key component and item length */
530					old_key_comp = le_ih_k_offset(ih);
531					old_len = ih_item_len(ih);
532
533					/* Calculate key component and item length to insert into R[0] */
534					offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
535					set_le_ih_k_offset(ih, offset);
536					put_ih_item_len(ih, tb->rbytes);
537					/* Insert part of the item into R[0] */
538					buffer_info_init_right(tb, &bi);
539					if ((old_len - tb->rbytes) > tb->zeroes_num) {
540						r_zeroes_number = 0;
541						r_body = body + (old_len - tb->rbytes) - tb->zeroes_num;
542					} else {
543						r_body = body;
544						r_zeroes_number = tb->zeroes_num - (old_len - tb->rbytes);
545						tb->zeroes_num -= r_zeroes_number;
546					}
547
548					leaf_insert_into_buf(&bi, 0, ih, r_body,
549							     r_zeroes_number);
550
551					/* Replace right delimiting key by first key in R[0] */
552					replace_key(tb, tb->CFR[0], tb->rkey[0],
553						    tb->R[0], 0);
554
555					/* Calculate key component and item length to insert into S[0] */
556					set_le_ih_k_offset(ih, old_key_comp);
557					put_ih_item_len(ih, old_len - tb->rbytes);
558
559					tb->insert_size[0] -= tb->rbytes;
560
561				} else {	/* whole new item falls into R[0] */
562
563					/* Shift rnum[0]-1 items to R[0] */
564					ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
565					/* Insert new item into R[0] */
566					buffer_info_init_right(tb, &bi);
567					leaf_insert_into_buf(&bi, tb->item_pos - n + tb->rnum[0] - 1,
568							     ih, body, tb->zeroes_num);
569
570					if (tb->item_pos - n + tb->rnum[0] - 1 == 0) {
571						replace_key(tb, tb->CFR[0],
572							    tb->rkey[0],
573							    tb->R[0], 0);
574
575					}
576					tb->zeroes_num = tb->insert_size[0] = 0;
577				}
578			} else {	/* new item or part of it doesn't fall into R[0] */
579
580				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
581			}
582
583}
584
585static void balance_leaf_paste_right(struct tree_balance *tb,
586				     struct item_head *ih, const char *body)
587{
588	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
589	int n = B_NR_ITEMS(tbS0);
590	struct buffer_info bi;
591	int ret_val;
592
593			if (n - tb->rnum[0] <= tb->item_pos) {	/* pasted item or part of it falls to R[0] */
594				if (tb->item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
595					if (is_direntry_le_ih(item_head(tbS0, tb->item_pos))) {	/* we append to directory item */
596						int entry_count;
597
598						RFALSE(tb->zeroes_num,
599						       "PAP-12145: invalid parameter in case of a directory");
600						entry_count = ih_entry_count(item_head
601								  (tbS0, tb->item_pos));
602						if (entry_count - tb->rbytes <
603						    tb->pos_in_item)
604							/* new directory entry falls into R[0] */
605						{
606							int paste_entry_position;
607
608							RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
609							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
610							       tb->rbytes, entry_count);
611							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
612							leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
613							/* Paste given directory entry to directory item */
614							paste_entry_position = tb->pos_in_item - entry_count + tb->rbytes - 1;
615							buffer_info_init_right(tb, &bi);
616							leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, tb->zeroes_num);
617							/* paste entry */
618							leaf_paste_entries(&bi, 0, paste_entry_position, 1,
619									   (struct reiserfs_de_head *) body,
620									   body + DEH_SIZE, tb->insert_size[0]);
621
622							if (paste_entry_position == 0) {
623								/* change delimiting keys */
624								replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
625							}
626
627							tb->insert_size[0] = 0;
628							tb->pos_in_item++;
629						} else {	/* new directory entry doesn't fall into R[0] */
630
631							leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
632						}
633					} else {	/* regular object */
634
635						int n_shift, n_rem, r_zeroes_number;
636						const char *r_body;
637
638						/* Calculate number of bytes which must be shifted from appended item */
639						if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
640							n_shift = 0;
641
642						RFALSE(tb->pos_in_item != ih_item_len
643						       (item_head(tbS0, tb->item_pos)),
644						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
645						       tb->pos_in_item, ih_item_len
646						       (item_head(tbS0, tb->item_pos)));
647
648						leaf_shift_right(tb, tb->rnum[0], n_shift);
649						/* Calculate number of bytes which must remain in body after appending to R[0] */
650						if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
651							n_rem = 0;
652
653						{
654							int version;
655							unsigned long temp_rem = n_rem;
656
657							version = ih_version(item_head(tb->R[0], 0));
658							if (is_indirect_le_key(version, leaf_key(tb->R[0], 0))) {
659								temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
660							}
661							set_le_key_k_offset(version, leaf_key(tb->R[0], 0),
662							     le_key_k_offset(version, leaf_key(tb->R[0], 0)) + temp_rem);
663							set_le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0]),
664							     le_key_k_offset(version, internal_key(tb->CFR[0], tb->rkey[0])) + temp_rem);
665						}
666/*		  k_offset (leaf_key(tb->R[0],0)) += n_rem;
667		  k_offset (internal_key(tb->CFR[0],tb->rkey[0])) += n_rem;*/
668						do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
669
670						/* Append part of body into R[0] */
671						buffer_info_init_right(tb, &bi);
672						if (n_rem > tb->zeroes_num) {
673							r_zeroes_number = 0;
674							r_body = body + n_rem - tb->zeroes_num;
675						} else {
676							r_body = body;
677							r_zeroes_number = tb->zeroes_num - n_rem;
678							tb->zeroes_num -= r_zeroes_number;
679						}
680
681						leaf_paste_in_buffer(&bi, 0, n_shift,
682								     tb->insert_size[0] - n_rem,
683								     r_body, r_zeroes_number);
684
685						if (is_indirect_le_ih(item_head(tb->R[0], 0))) {
686#if 0
687							RFALSE(n_rem,
688							       "PAP-12160: paste more than one unformatted node pointer");
689#endif
690							set_ih_free_space(item_head(tb->R[0], 0), 0);
691						}
692						tb->insert_size[0] = n_rem;
693						if (!n_rem)
694							tb->pos_in_item++;
695					}
696				} else {	/* pasted item in whole falls into R[0] */
697
698					struct item_head *pasted;
699
700					ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
701					/* append item in R[0] */
702					if (tb->pos_in_item >= 0) {
703						buffer_info_init_right(tb, &bi);
704						leaf_paste_in_buffer(&bi, tb->item_pos - n + tb->rnum[0], tb->pos_in_item,
705								     tb->insert_size[0], body, tb->zeroes_num);
706					}
707
708					/* paste new entry, if item is directory item */
709					pasted = item_head(tb->R[0], tb->item_pos - n + tb->rnum[0]);
710					if (is_direntry_le_ih(pasted) && tb->pos_in_item >= 0) {
711						leaf_paste_entries(&bi, tb->item_pos - n + tb->rnum[0],
712								   tb->pos_in_item, 1,
713								   (struct reiserfs_de_head *) body,
714								   body + DEH_SIZE, tb->insert_size[0]);
715						if (!tb->pos_in_item) {
716
717							RFALSE(tb->item_pos - n + tb->rnum[0],
718							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
719
720							/* update delimiting keys */
721							replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
722						}
723					}
724
725					if (is_indirect_le_ih(pasted))
726						set_ih_free_space(pasted, 0);
727					tb->zeroes_num = tb->insert_size[0] = 0;
728				}
729			} else {	/* new item doesn't fall into R[0] */
730
731				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
732			}
733
734}
735
736static void balance_leaf_new_nodes_insert(struct tree_balance *tb,
737					  struct item_head *ih,
738					  const char *body,
739					  struct item_head *insert_key,
740					  struct buffer_head **insert_ptr,
741					  int i)
742{
743	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
744	int n = B_NR_ITEMS(tbS0);
745	struct buffer_info bi;
746			if (n - tb->snum[i] < tb->item_pos) {	/* new item or it's part falls to first new node S_new[i] */
747				if (tb->item_pos == n - tb->snum[i] + 1 && tb->sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
748					int old_key_comp, old_len, r_zeroes_number;
749					const char *r_body;
750					int version;
751
752					/* Move snum[i]-1 items from S[0] to S_new[i] */
753					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
754							tb->snum[i] - 1, -1,
755							tb->S_new[i]);
756					/* Remember key component and item length */
757					version = ih_version(ih);
758					old_key_comp = le_ih_k_offset(ih);
759					old_len = ih_item_len(ih);
760
761					/* Calculate key component and item length to insert into S_new[i] */
762					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
763							   ((old_len - tb->sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)));
764
765					put_ih_item_len(ih, tb->sbytes[i]);
766
767					/* Insert part of the item into S_new[i] before 0-th item */
768					buffer_info_init_bh(tb, &bi, tb->S_new[i]);
769
770					if ((old_len - tb->sbytes[i]) > tb->zeroes_num) {
771						r_zeroes_number = 0;
772						r_body = body + (old_len - tb->sbytes[i]) - tb->zeroes_num;
773					} else {
774						r_body = body;
775						r_zeroes_number = tb->zeroes_num - (old_len - tb->sbytes[i]);
776						tb->zeroes_num -= r_zeroes_number;
777					}
778
779					leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeroes_number);
780
781					/* Calculate key component and item length to insert into S[i] */
782					set_le_ih_k_offset(ih, old_key_comp);
783					put_ih_item_len(ih, old_len - tb->sbytes[i]);
784					tb->insert_size[0] -= tb->sbytes[i];
785				} else {	/* whole new item falls into S_new[i] */
786
787					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
788					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
789							tb->snum[i] - 1, tb->sbytes[i], tb->S_new[i]);
790
791					/* Insert new item into S_new[i] */
792					buffer_info_init_bh(tb, &bi, tb->S_new[i]);
793					leaf_insert_into_buf(&bi, tb->item_pos - n + tb->snum[i] - 1,
794							     ih, body, tb->zeroes_num);
795
796					tb->zeroes_num = tb->insert_size[0] = 0;
797				}
798			}
799
800			else {	/* new item or it part don't falls into S_new[i] */
801
802				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
803						tb->snum[i], tb->sbytes[i], tb->S_new[i]);
804			}
805}
806
807static void balance_leaf_new_nodes_paste(struct tree_balance *tb,
808					 struct item_head *ih,
809					 const char *body,
810					 struct item_head *insert_key,
811					 struct buffer_head **insert_ptr,
812					 int i)
813{
814	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
815	int n = B_NR_ITEMS(tbS0);
816	struct buffer_info bi;
817			if (n - tb->snum[i] <= tb->item_pos) {	/* pasted item or part if it falls to S_new[i] */
818				if (tb->item_pos == n - tb->snum[i] && tb->sbytes[i] != -1) {	/* we must shift part of the appended item */
819					struct item_head *aux_ih;
820
821					RFALSE(ih, "PAP-12210: ih must be 0");
822
823					aux_ih = item_head(tbS0, tb->item_pos);
824					if (is_direntry_le_ih(aux_ih)) {
825						/* we append to directory item */
826
827						int entry_count;
828
829						entry_count = ih_entry_count(aux_ih);
830
831						if (entry_count - tb->sbytes[i] < tb->pos_in_item && tb->pos_in_item <= entry_count) {
832							/* new directory entry falls into S_new[i] */
833
834							RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
835							RFALSE(tb->sbytes[i] - 1 >= entry_count,
836							       "PAP-12220: there are no so much entries (%d), only %d",
837							       tb->sbytes[i] - 1, entry_count);
838
839							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
840							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i] - 1, tb->S_new[i]);
841							/* Paste given directory entry to directory item */
842							buffer_info_init_bh(tb, &bi, tb->S_new[i]);
843							leaf_paste_in_buffer(&bi, 0, tb->pos_in_item - entry_count + tb->sbytes[i] - 1,
844							     tb->insert_size[0], body, tb->zeroes_num);
845							/* paste new directory entry */
846							leaf_paste_entries(&bi, 0, tb->pos_in_item - entry_count + tb->sbytes[i] - 1, 1,
847									   (struct reiserfs_de_head *) body,
848									   body + DEH_SIZE, tb->insert_size[0]);
849							tb->insert_size[0] = 0;
850							tb->pos_in_item++;
851						} else {	/* new directory entry doesn't fall into S_new[i] */
852							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], tb->sbytes[i], tb->S_new[i]);
853						}
854					} else {	/* regular object */
855
856						int n_shift, n_rem, r_zeroes_number;
857						const char *r_body;
858
859						RFALSE(tb->pos_in_item != ih_item_len(item_head(tbS0, tb->item_pos)) || tb->insert_size[0] <= 0,
860						       "PAP-12225: item too short or insert_size <= 0");
861
862						/* Calculate number of bytes which must be shifted from appended item */
863						n_shift = tb->sbytes[i] - tb->insert_size[0];
864						if (n_shift < 0)
865							n_shift = 0;
866						leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, tb->snum[i], n_shift, tb->S_new[i]);
867
868						/* Calculate number of bytes which must remain in body after append to S_new[i] */
869						n_rem = tb->insert_size[0] - tb->sbytes[i];
870						if (n_rem < 0)
871							n_rem = 0;
872						/* Append part of body into S_new[0] */
873						buffer_info_init_bh(tb, &bi, tb->S_new[i]);
874						if (n_rem > tb->zeroes_num) {
875							r_zeroes_number = 0;
876							r_body = body + n_rem - tb->zeroes_num;
877						} else {
878							r_body = body;
879							r_zeroes_number = tb->zeroes_num - n_rem;
880							tb->zeroes_num -= r_zeroes_number;
881						}
882
883						leaf_paste_in_buffer(&bi, 0, n_shift,
884								     tb->insert_size[0] - n_rem,
885								     r_body, r_zeroes_number);
886						{
887							struct item_head *tmp;
888
889							tmp = item_head(tb->S_new[i], 0);
890							if (is_indirect_le_ih
891							    (tmp)) {
892								set_ih_free_space(tmp, 0);
893								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
894							} else {
895								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
896							}
897						}
898
899						tb->insert_size[0] = n_rem;
900						if (!n_rem)
901							tb->pos_in_item++;
902					}
903				} else
904					/* item falls wholly into S_new[i] */
905				{
906					int leaf_mi;
907					struct item_head *pasted;
908
909#ifdef CONFIG_REISERFS_CHECK
910					struct item_head *ih_check = item_head(tbS0, tb->item_pos);
911
912					if (!is_direntry_le_ih(ih_check)
913					    && (tb->pos_in_item != ih_item_len(ih_check)
914						|| tb->insert_size[0] <= 0))
915						reiserfs_panic(tb->tb_sb,
916							     "PAP-12235",
917							     "pos_in_item "
918							     "must be equal "
919							     "to ih_item_len");
920#endif				/* CONFIG_REISERFS_CHECK */
921
922					leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
923							    tb, tb->snum[i],
924							    tb->sbytes[i],
925							    tb->S_new[i]);
926
927					RFALSE(leaf_mi,
928					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
929					       leaf_mi);
930
931					/* paste into item */
932					buffer_info_init_bh(tb, &bi, tb->S_new[i]);
933					leaf_paste_in_buffer(&bi,
934							     tb->item_pos - n + tb->snum[i],
935							     tb->pos_in_item,
936							     tb->insert_size[0],
937							     body, tb->zeroes_num);
938
939					pasted = item_head(tb->S_new[i], tb->item_pos - n + tb->snum[i]);
940					if (is_direntry_le_ih(pasted)) {
941						leaf_paste_entries(&bi,
942								   tb->item_pos - n + tb->snum[i],
943								   tb->pos_in_item, 1,
944								   (struct reiserfs_de_head *)body,
945								   body + DEH_SIZE,
946								   tb->insert_size[0]
947						    );
948					}
949
950					/* if we paste to indirect item update ih_free_space */
951					if (is_indirect_le_ih(pasted))
952						set_ih_free_space(pasted, 0);
953					tb->zeroes_num = tb->insert_size[0] = 0;
954				}
955			}
956
957			else {	/* pasted item doesn't fall into S_new[i] */
958
959				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
960						tb->snum[i], tb->sbytes[i], tb->S_new[i]);
961			}
962
963}
964
965static void balance_leaf_finish_node_insert(struct tree_balance *tb,
966					    struct item_head *ih,
967					    const char *body)
968{
969	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
970	struct buffer_info bi;
971			buffer_info_init_tbS0(tb, &bi);
972			leaf_insert_into_buf(&bi, tb->item_pos, ih,
973					     body, tb->zeroes_num);
974
975			/*
976			 * If we insert the first key
977			 * change the delimiting key
978			 */
979			if (tb->item_pos == 0) {
980				if (tb->CFL[0])	/* can be 0 in reiserfsck */
981					replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
982			}
983}
984
985static void balance_leaf_finish_node_paste(struct tree_balance *tb,
986					   struct item_head *ih,
987					   const char *body)
988{
989	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
990	struct buffer_info bi;
991				struct item_head *pasted;
992
993				pasted = item_head(tbS0, tb->item_pos);
994				/* when directory, may be new entry already pasted */
995				if (is_direntry_le_ih(pasted)) {
996					if (tb->pos_in_item >= 0 && tb->pos_in_item <= ih_entry_count(pasted)) {
997
998						RFALSE(!tb->insert_size[0],
999						       "PAP-12260: insert_size is 0 already");
1000
1001						/* prepare space */
1002						buffer_info_init_tbS0(tb, &bi);
1003						leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item,
1004								     tb->insert_size[0], body,
1005								     tb->zeroes_num);
1006
1007						/* paste entry */
1008						leaf_paste_entries(&bi, tb->item_pos, tb->pos_in_item, 1,
1009								   (struct reiserfs_de_head *)body,
1010								   body + DEH_SIZE,
1011								   tb->insert_size[0]);
1012						if (!tb->item_pos && !tb->pos_in_item) {
1013							RFALSE(!tb->CFL[0] || !tb->L[0],
1014							       "PAP-12270: CFL[0]/L[0] must be specified");
1015							if (tb->CFL[0])
1016								replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1017						}
1018						tb->insert_size[0] = 0;
1019					}
1020				} else {	/* regular object */
1021					if (tb->pos_in_item == ih_item_len(pasted)) {
1022
1023						RFALSE(tb->insert_size[0] <= 0,
1024						       "PAP-12275: insert size must not be %d",
1025						       tb->insert_size[0]);
1026						buffer_info_init_tbS0(tb, &bi);
1027						leaf_paste_in_buffer(&bi, tb->item_pos, tb->pos_in_item,
1028								     tb->insert_size[0], body, tb->zeroes_num);
1029
1030						if (is_indirect_le_ih(pasted)) {
1031#if 0
1032							RFALSE(tb->
1033							       insert_size[0] !=
1034							       UNFM_P_SIZE,
1035							       "PAP-12280: insert_size for indirect item must be %d, not %d",
1036							       UNFM_P_SIZE,
1037							       tb->
1038							       insert_size[0]);
1039#endif
1040							set_ih_free_space(pasted, 0);
1041						}
1042						tb->insert_size[0] = 0;
1043					}
1044#ifdef CONFIG_REISERFS_CHECK
1045					else {
1046						if (tb->insert_size[0]) {
1047							print_cur_tb("12285");
1048							reiserfs_panic(tb->tb_sb,
1049							    "PAP-12285",
1050							    "insert_size "
1051							    "must be 0 "
1052							    "(%d)",
1053							    tb->insert_size[0]);
1054						}
1055					}
1056#endif				/* CONFIG_REISERFS_CHECK */
1057
1058				}
1059}
1060
1061/**
1062 * balance_leaf - reiserfs tree balancing algorithm
1063 * @tb: tree balance state
1064 * @ih: item header of inserted item (little endian)
1065 * @body: body of inserted item or bytes to paste
1066 * @flag: i - insert, d - delete, c - cut, p - paste (see do_balance)
1067 * passed back:
1068 * @insert_key: key to insert new nodes
1069 * @insert_ptr: array of nodes to insert at the next level
1070 *
1071 * In our processing of one level we sometimes determine what must be
1072 * inserted into the next higher level.  This insertion consists of a
1073 * key or two keys and their corresponding pointers.
1074 */
1075static int balance_leaf(struct tree_balance *tb, struct item_head *ih,
1076			const char *body, int flag,
1077			struct item_head *insert_key,
1078			struct buffer_head **insert_ptr)
1079{
1080	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
1081	int n, i;
1082
1083	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
1084
1085	/* Make balance in case insert_size[0] < 0 */
1086	if (tb->insert_size[0] < 0)
1087		return balance_leaf_when_delete(tb, flag);
1088
1089	tb->item_pos = PATH_LAST_POSITION(tb->tb_path),
1090	tb->pos_in_item = tb->tb_path->pos_in_item,
1091	tb->zeroes_num = 0;
1092	if (flag == M_INSERT && !body)
1093		tb->zeroes_num = ih_item_len(ih);
1094
1095	/*
1096	 * for indirect item pos_in_item is measured in unformatted node
1097	 * pointers. Recalculate to bytes
1098	 */
1099	if (flag != M_INSERT
1100	    && is_indirect_le_ih(item_head(tbS0, tb->item_pos)))
1101		tb->pos_in_item *= UNFM_P_SIZE;
1102
1103	if (tb->lnum[0] > 0) {
1104		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
1105		if (tb->item_pos < tb->lnum[0]) {
1106			/* new item or it part falls to L[0], shift it too */
1107			n = B_NR_ITEMS(tb->L[0]);
1108
1109			switch (flag) {
1110			case M_INSERT:	/* insert item into L[0] */
1111				balance_leaf_insert_left(tb, ih, body);
1112				break;
1113
1114			case M_PASTE:	/* append item in L[0] */
1115				balance_leaf_paste_left(tb, ih, body);
1116				break;
1117			default:	/* cases d and t */
1118				reiserfs_panic(tb->tb_sb, "PAP-12130",
1119					       "lnum > 0: unexpected mode: "
1120					       " %s(%d)",
1121					       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1122			}
1123		} else {
1124			/* new item doesn't fall into L[0] */
1125			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
1126		}
1127	}
1128
1129	/* tb->lnum[0] > 0 */
1130	/* Calculate new item position */
1131	tb->item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
1132
1133	if (tb->rnum[0] > 0) {
1134		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
1135		n = B_NR_ITEMS(tbS0);
1136		switch (flag) {
1137
1138		case M_INSERT:	/* insert item */
1139			balance_leaf_insert_right(tb, ih, body);
1140			break;
1141
1142		case M_PASTE:	/* append item */
1143			balance_leaf_paste_right(tb, ih, body);
1144			break;
1145		default:	/* cases d and t */
1146			reiserfs_panic(tb->tb_sb, "PAP-12175",
1147				       "rnum > 0: unexpected mode: %s(%d)",
1148				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1149		}
1150
1151	}
1152
1153	/* tb->rnum[0] > 0 */
1154	RFALSE(tb->blknum[0] > 3,
1155	       "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
1156	RFALSE(tb->blknum[0] < 0,
1157	       "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
1158
1159	/*
1160	 * if while adding to a node we discover that it is possible to split
1161	 * it in two, and merge the left part into the left neighbor and the
1162	 * right part into the right neighbor, eliminating the node
1163	 */
1164	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
1165
1166		RFALSE(!tb->lnum[0] || !tb->rnum[0],
1167		       "PAP-12190: lnum and rnum must not be zero");
1168		/*
1169		 * if insertion was done before 0-th position in R[0], right
1170		 * delimiting key of the tb->L[0]'s and left delimiting key are
1171		 * not set correctly
1172		 */
1173		if (tb->CFL[0]) {
1174			if (!tb->CFR[0])
1175				reiserfs_panic(tb->tb_sb, "vs-12195",
1176					       "CFR not initialized");
1177			copy_key(internal_key(tb->CFL[0], tb->lkey[0]),
1178				 internal_key(tb->CFR[0], tb->rkey[0]));
1179			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1180		}
1181
1182		reiserfs_invalidate_buffer(tb, tbS0);
1183		return 0;
1184	}
1185
1186	/* Fill new nodes that appear in place of S[0] */
1187	for (i = tb->blknum[0] - 2; i >= 0; i--) {
1188
1189		RFALSE(!tb->snum[i],
1190		       "PAP-12200: snum[%d] == %d. Must be > 0", i,
1191		       tb->snum[i]);
1192
1193		/* here we shift from S to S_new nodes */
1194
1195		tb->S_new[i] = get_FEB(tb);
1196
1197		/* initialized block type and tree level */
1198		set_blkh_level(B_BLK_HEAD(tb->S_new[i]), DISK_LEAF_NODE_LEVEL);
1199
1200		n = B_NR_ITEMS(tbS0);
1201
1202		switch (flag) {
1203		case M_INSERT:	/* insert item */
1204			balance_leaf_new_nodes_insert(tb, ih, body, insert_key,
1205						      insert_ptr, i);
1206			break;
1207
1208		case M_PASTE:	/* append item */
1209			balance_leaf_new_nodes_paste(tb, ih, body, insert_key,
1210						     insert_ptr, i);
1211			break;
1212		default:	/* cases d and t */
1213			reiserfs_panic(tb->tb_sb, "PAP-12245",
1214				       "blknum > 2: unexpected mode: %s(%d)",
1215				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1216		}
1217
1218		memcpy(insert_key + i, leaf_key(tb->S_new[i], 0), KEY_SIZE);
1219		insert_ptr[i] = tb->S_new[i];
1220
1221		RFALSE(!buffer_journaled(tb->S_new[i])
1222		       || buffer_journal_dirty(tb->S_new[i])
1223		       || buffer_dirty(tb->S_new[i]),
1224		       "PAP-12247: S_new[%d] : (%b)",
1225		       i, tb->S_new[i]);
1226	}
1227
1228	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1229	   affected item which remains in S */
1230	if (0 <= tb->item_pos && tb->item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
1231
1232		switch (flag) {
1233		case M_INSERT:	/* insert item into S[0] */
1234			balance_leaf_finish_node_insert(tb, ih, body);
1235			break;
1236
1237		case M_PASTE:	/* append item in S[0] */
1238			balance_leaf_finish_node_paste(tb, ih, body);
1239			break;
1240		}
1241	}
1242#ifdef CONFIG_REISERFS_CHECK
1243	if (flag == M_PASTE && tb->insert_size[0]) {
1244		print_cur_tb("12290");
1245		reiserfs_panic(tb->tb_sb,
1246			       "PAP-12290", "insert_size is still not 0 (%d)",
1247			       tb->insert_size[0]);
1248	}
1249#endif
1250
1251	/* Leaf level of the tree is balanced (end of balance_leaf) */
1252	return 0;
1253}
1254
1255/* Make empty node */
1256void make_empty_node(struct buffer_info *bi)
1257{
1258	struct block_head *blkh;
1259
1260	RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1261
1262	blkh = B_BLK_HEAD(bi->bi_bh);
1263	set_blkh_nr_item(blkh, 0);
1264	set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1265
1266	if (bi->bi_parent)
1267		B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0;	/* Endian safe if 0 */
1268}
1269
1270/* Get first empty buffer */
1271struct buffer_head *get_FEB(struct tree_balance *tb)
1272{
1273	int i;
1274	struct buffer_info bi;
1275
1276	for (i = 0; i < MAX_FEB_SIZE; i++)
1277		if (tb->FEB[i] != NULL)
1278			break;
1279
1280	if (i == MAX_FEB_SIZE)
1281		reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1282
1283	buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1284	make_empty_node(&bi);
1285	set_buffer_uptodate(tb->FEB[i]);
1286	tb->used[i] = tb->FEB[i];
1287	tb->FEB[i] = NULL;
1288
1289	return tb->used[i];
1290}
1291
1292/* This is now used because reiserfs_free_block has to be able to schedule. */
1293static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1294{
1295	int i;
1296
1297	if (buffer_dirty(bh))
1298		reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1299				 "called with dirty buffer");
1300	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1301		if (!tb->thrown[i]) {
1302			tb->thrown[i] = bh;
1303			get_bh(bh);	/* free_thrown puts this */
1304			return;
1305		}
1306	reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1307			 "too many thrown buffers");
1308}
1309
1310static void free_thrown(struct tree_balance *tb)
1311{
1312	int i;
1313	b_blocknr_t blocknr;
1314	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1315		if (tb->thrown[i]) {
1316			blocknr = tb->thrown[i]->b_blocknr;
1317			if (buffer_dirty(tb->thrown[i]))
1318				reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1319						 "called with dirty buffer %d",
1320						 blocknr);
1321			brelse(tb->thrown[i]);	/* incremented in store_thrown */
1322			reiserfs_free_block(tb->transaction_handle, NULL,
1323					    blocknr, 0);
1324		}
1325	}
1326}
1327
1328void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1329{
1330	struct block_head *blkh;
1331	blkh = B_BLK_HEAD(bh);
1332	set_blkh_level(blkh, FREE_LEVEL);
1333	set_blkh_nr_item(blkh, 0);
1334
1335	clear_buffer_dirty(bh);
1336	store_thrown(tb, bh);
1337}
1338
1339/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1340void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1341		 struct buffer_head *src, int n_src)
1342{
1343
1344	RFALSE(dest == NULL || src == NULL,
1345	       "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1346	       src, dest);
1347	RFALSE(!B_IS_KEYS_LEVEL(dest),
1348	       "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1349	       dest);
1350	RFALSE(n_dest < 0 || n_src < 0,
1351	       "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1352	RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1353	       "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1354	       n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1355
1356	if (B_IS_ITEMS_LEVEL(src))
1357		/* source buffer contains leaf node */
1358		memcpy(internal_key(dest, n_dest), item_head(src, n_src),
1359		       KEY_SIZE);
1360	else
1361		memcpy(internal_key(dest, n_dest), internal_key(src, n_src),
1362		       KEY_SIZE);
1363
1364	do_balance_mark_internal_dirty(tb, dest, 0);
1365}
1366
1367int get_left_neighbor_position(struct tree_balance *tb, int h)
1368{
1369	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1370
1371	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1372	       "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1373	       h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1374
1375	if (Sh_position == 0)
1376		return B_NR_ITEMS(tb->FL[h]);
1377	else
1378		return Sh_position - 1;
1379}
1380
1381int get_right_neighbor_position(struct tree_balance *tb, int h)
1382{
1383	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1384
1385	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1386	       "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1387	       h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1388
1389	if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1390		return 0;
1391	else
1392		return Sh_position + 1;
1393}
1394
1395#ifdef CONFIG_REISERFS_CHECK
1396
1397int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1398static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1399				char *mes)
1400{
1401	struct disk_child *dc;
1402	int i;
1403
1404	RFALSE(!bh, "PAP-12336: bh == 0");
1405
1406	if (!bh || !B_IS_IN_TREE(bh))
1407		return;
1408
1409	RFALSE(!buffer_dirty(bh) &&
1410	       !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1411	       "PAP-12337: buffer (%b) must be dirty", bh);
1412	dc = B_N_CHILD(bh, 0);
1413
1414	for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1415		if (!is_reusable(s, dc_block_number(dc), 1)) {
1416			print_cur_tb(mes);
1417			reiserfs_panic(s, "PAP-12338",
1418				       "invalid child pointer %y in %b",
1419				       dc, bh);
1420		}
1421	}
1422}
1423
1424static int locked_or_not_in_tree(struct tree_balance *tb,
1425				  struct buffer_head *bh, char *which)
1426{
1427	if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1428	    !B_IS_IN_TREE(bh)) {
1429		reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1430		return 1;
1431	}
1432	return 0;
1433}
1434
1435static int check_before_balancing(struct tree_balance *tb)
1436{
1437	int retval = 0;
1438
1439	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1440		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1441			       "occurred based on cur_tb not being null at "
1442			       "this point in code. do_balance cannot properly "
1443			       "handle concurrent tree accesses on a same "
1444			       "mount point.");
1445	}
1446
1447	/*
1448	 * double check that buffers that we will modify are unlocked.
1449	 * (fix_nodes should already have prepped all of these for us).
1450	 */
1451	if (tb->lnum[0]) {
1452		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1453		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1454		retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1455		check_leaf(tb->L[0]);
1456	}
1457	if (tb->rnum[0]) {
1458		retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1459		retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1460		retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1461		check_leaf(tb->R[0]);
1462	}
1463	retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1464					"S[0]");
1465	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1466
1467	return retval;
1468}
1469
1470static void check_after_balance_leaf(struct tree_balance *tb)
1471{
1472	if (tb->lnum[0]) {
1473		if (B_FREE_SPACE(tb->L[0]) !=
1474		    MAX_CHILD_SIZE(tb->L[0]) -
1475		    dc_size(B_N_CHILD
1476			    (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1477			print_cur_tb("12221");
1478			reiserfs_panic(tb->tb_sb, "PAP-12355",
1479				       "shift to left was incorrect");
1480		}
1481	}
1482	if (tb->rnum[0]) {
1483		if (B_FREE_SPACE(tb->R[0]) !=
1484		    MAX_CHILD_SIZE(tb->R[0]) -
1485		    dc_size(B_N_CHILD
1486			    (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1487			print_cur_tb("12222");
1488			reiserfs_panic(tb->tb_sb, "PAP-12360",
1489				       "shift to right was incorrect");
1490		}
1491	}
1492	if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1493	    (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1494	     (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1495	      dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1496				PATH_H_POSITION(tb->tb_path, 1)))))) {
1497		int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1498		int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1499			     dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1500					       PATH_H_POSITION(tb->tb_path,
1501							       1))));
1502		print_cur_tb("12223");
1503		reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1504				 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1505				 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1506				 left,
1507				 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1508				 PATH_H_PBUFFER(tb->tb_path, 1),
1509				 PATH_H_POSITION(tb->tb_path, 1),
1510				 dc_size(B_N_CHILD
1511					 (PATH_H_PBUFFER(tb->tb_path, 1),
1512					  PATH_H_POSITION(tb->tb_path, 1))),
1513				 right);
1514		reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1515	}
1516}
1517
1518static void check_leaf_level(struct tree_balance *tb)
1519{
1520	check_leaf(tb->L[0]);
1521	check_leaf(tb->R[0]);
1522	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1523}
1524
1525static void check_internal_levels(struct tree_balance *tb)
1526{
1527	int h;
1528
1529	/* check all internal nodes */
1530	for (h = 1; tb->insert_size[h]; h++) {
1531		check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1532				    "BAD BUFFER ON PATH");
1533		if (tb->lnum[h])
1534			check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1535		if (tb->rnum[h])
1536			check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1537	}
1538
1539}
1540
1541#endif
1542
1543/*
1544 * Now we have all of the buffers that must be used in balancing of
1545 * the tree.  We rely on the assumption that schedule() will not occur
1546 * while do_balance works. ( Only interrupt handlers are acceptable.)
1547 * We balance the tree according to the analysis made before this,
1548 * using buffers already obtained.  For SMP support it will someday be
1549 * necessary to add ordered locking of tb.
1550 */
1551
1552/*
1553 * Some interesting rules of balancing:
1554 * we delete a maximum of two nodes per level per balancing: we never
1555 * delete R, when we delete two of three nodes L, S, R then we move
1556 * them into R.
1557 *
1558 * we only delete L if we are deleting two nodes, if we delete only
1559 * one node we delete S
1560 *
1561 * if we shift leaves then we shift as much as we can: this is a
1562 * deliberate policy of extremism in node packing which results in
1563 * higher average utilization after repeated random balance operations
1564 * at the cost of more memory copies and more balancing as a result of
1565 * small insertions to full nodes.
1566 *
1567 * if we shift internal nodes we try to evenly balance the node
1568 * utilization, with consequent less balancing at the cost of lower
1569 * utilization.
1570 *
1571 * one could argue that the policy for directories in leaves should be
1572 * that of internal nodes, but we will wait until another day to
1573 * evaluate this....  It would be nice to someday measure and prove
1574 * these assumptions as to what is optimal....
1575 */
1576
1577static inline void do_balance_starts(struct tree_balance *tb)
1578{
1579	/* use print_cur_tb() to see initial state of struct tree_balance */
1580
1581	/* store_print_tb (tb); */
1582
1583	/* do not delete, just comment it out */
1584	/*
1585	print_tb(flag, PATH_LAST_POSITION(tb->tb_path),
1586		 tb->tb_path->pos_in_item, tb, "check");
1587	*/
1588	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1589#ifdef CONFIG_REISERFS_CHECK
1590	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1591#endif
1592}
1593
1594static inline void do_balance_completed(struct tree_balance *tb)
1595{
1596
1597#ifdef CONFIG_REISERFS_CHECK
1598	check_leaf_level(tb);
1599	check_internal_levels(tb);
1600	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1601#endif
1602
1603	/*
1604	 * reiserfs_free_block is no longer schedule safe.  So, we need to
1605	 * put the buffers we want freed on the thrown list during do_balance,
1606	 * and then free them now
1607	 */
1608
1609	REISERFS_SB(tb->tb_sb)->s_do_balance++;
1610
1611	/* release all nodes hold to perform the balancing */
1612	unfix_nodes(tb);
1613
1614	free_thrown(tb);
1615}
1616
1617/*
1618 * do_balance - balance the tree
1619 *
1620 * @tb: tree_balance structure
1621 * @ih: item header of inserted item
1622 * @body: body of inserted item or bytes to paste
1623 * @flag: 'i' - insert, 'd' - delete, 'c' - cut, 'p' paste
1624 *
1625 * Cut means delete part of an item (includes removing an entry from a
1626 * directory).
1627 *
1628 * Delete means delete whole item.
1629 *
1630 * Insert means add a new item into the tree.
1631 *
1632 * Paste means to append to the end of an existing file or to
1633 * insert a directory entry.
1634 */
1635void do_balance(struct tree_balance *tb, struct item_head *ih,
1636		const char *body, int flag)
1637{
1638	int child_pos;		/* position of a child node in its parent */
1639	int h;			/* level of the tree being processed */
1640
1641	/*
1642	 * in our processing of one level we sometimes determine what
1643	 * must be inserted into the next higher level.  This insertion
1644	 * consists of a key or two keys and their corresponding
1645	 * pointers
1646	 */
1647	struct item_head insert_key[2];
1648
1649	/* inserted node-ptrs for the next level */
1650	struct buffer_head *insert_ptr[2];
1651
1652	tb->tb_mode = flag;
1653	tb->need_balance_dirty = 0;
1654
1655	if (FILESYSTEM_CHANGED_TB(tb)) {
1656		reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1657			       "changed");
1658	}
1659	/* if we have no real work to do  */
1660	if (!tb->insert_size[0]) {
1661		reiserfs_warning(tb->tb_sb, "PAP-12350",
1662				 "insert_size == 0, mode == %c", flag);
1663		unfix_nodes(tb);
1664		return;
1665	}
1666
1667	atomic_inc(&fs_generation(tb->tb_sb));
1668	do_balance_starts(tb);
1669
1670	/*
1671	 * balance_leaf returns 0 except if combining L R and S into
1672	 * one node.  see balance_internal() for explanation of this
1673	 * line of code.
1674	 */
1675	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1676	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1677
1678#ifdef CONFIG_REISERFS_CHECK
1679	check_after_balance_leaf(tb);
1680#endif
1681
1682	/* Balance internal level of the tree. */
1683	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1684		child_pos =
1685		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1686
1687	do_balance_completed(tb);
1688
1689}
1690