stree.c revision 14a61442c2203d2a49f2f954bfa9259c0ddac1aa
1/*
2 *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/*
6 *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7 *  Programm System Institute
8 *  Pereslavl-Zalessky Russia
9 */
10
11/*
12 *  This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * decrement_counters_in_path
27 * reiserfs_check_path
28 * pathrelse_and_restore
29 * pathrelse
30 * search_by_key_reada
31 * search_by_key
32 * search_for_position_by_key
33 * comp_items
34 * prepare_for_direct_item
35 * prepare_for_direntry_item
36 * prepare_for_delete_or_cut
37 * calc_deleted_bytes_number
38 * init_tb_struct
39 * padd_item
40 * reiserfs_delete_item
41 * reiserfs_delete_solid_item
42 * reiserfs_delete_object
43 * maybe_indirect_to_direct
44 * indirect_to_direct_roll_back
45 * reiserfs_cut_from_item
46 * truncate_directory
47 * reiserfs_do_truncate
48 * reiserfs_paste_into_item
49 * reiserfs_insert_item
50 */
51
52#include <linux/time.h>
53#include <linux/string.h>
54#include <linux/pagemap.h>
55#include <linux/reiserfs_fs.h>
56#include <linux/smp_lock.h>
57#include <linux/buffer_head.h>
58#include <linux/quotaops.h>
59
60/* Does the buffer contain a disk block which is in the tree. */
61inline int B_IS_IN_TREE(const struct buffer_head *p_s_bh)
62{
63
64	RFALSE(B_LEVEL(p_s_bh) > MAX_HEIGHT,
65	       "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
66
67	return (B_LEVEL(p_s_bh) != FREE_LEVEL);
68}
69
70//
71// to gets item head in le form
72//
73inline void copy_item_head(struct item_head *p_v_to,
74			   const struct item_head *p_v_from)
75{
76	memcpy(p_v_to, p_v_from, IH_SIZE);
77}
78
79/* k1 is pointer to on-disk structure which is stored in little-endian
80   form. k2 is pointer to cpu variable. For key of items of the same
81   object this returns 0.
82   Returns: -1 if key1 < key2
83   0 if key1 == key2
84   1 if key1 > key2 */
85inline int comp_short_keys(const struct reiserfs_key *le_key,
86			   const struct cpu_key *cpu_key)
87{
88	__u32 n;
89	n = le32_to_cpu(le_key->k_dir_id);
90	if (n < cpu_key->on_disk_key.k_dir_id)
91		return -1;
92	if (n > cpu_key->on_disk_key.k_dir_id)
93		return 1;
94	n = le32_to_cpu(le_key->k_objectid);
95	if (n < cpu_key->on_disk_key.k_objectid)
96		return -1;
97	if (n > cpu_key->on_disk_key.k_objectid)
98		return 1;
99	return 0;
100}
101
102/* k1 is pointer to on-disk structure which is stored in little-endian
103   form. k2 is pointer to cpu variable.
104   Compare keys using all 4 key fields.
105   Returns: -1 if key1 < key2 0
106   if key1 = key2 1 if key1 > key2 */
107static inline int comp_keys(const struct reiserfs_key *le_key,
108			    const struct cpu_key *cpu_key)
109{
110	int retval;
111
112	retval = comp_short_keys(le_key, cpu_key);
113	if (retval)
114		return retval;
115	if (le_key_k_offset(le_key_version(le_key), le_key) <
116	    cpu_key_k_offset(cpu_key))
117		return -1;
118	if (le_key_k_offset(le_key_version(le_key), le_key) >
119	    cpu_key_k_offset(cpu_key))
120		return 1;
121
122	if (cpu_key->key_length == 3)
123		return 0;
124
125	/* this part is needed only when tail conversion is in progress */
126	if (le_key_k_type(le_key_version(le_key), le_key) <
127	    cpu_key_k_type(cpu_key))
128		return -1;
129
130	if (le_key_k_type(le_key_version(le_key), le_key) >
131	    cpu_key_k_type(cpu_key))
132		return 1;
133
134	return 0;
135}
136
137inline int comp_short_le_keys(const struct reiserfs_key *key1,
138			      const struct reiserfs_key *key2)
139{
140	__u32 *p_s_1_u32, *p_s_2_u32;
141	int n_key_length = REISERFS_SHORT_KEY_LEN;
142
143	p_s_1_u32 = (__u32 *) key1;
144	p_s_2_u32 = (__u32 *) key2;
145	for (; n_key_length--; ++p_s_1_u32, ++p_s_2_u32) {
146		if (le32_to_cpu(*p_s_1_u32) < le32_to_cpu(*p_s_2_u32))
147			return -1;
148		if (le32_to_cpu(*p_s_1_u32) > le32_to_cpu(*p_s_2_u32))
149			return 1;
150	}
151	return 0;
152}
153
154inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
155{
156	int version;
157	to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
158	to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
159
160	// find out version of the key
161	version = le_key_version(from);
162	to->version = version;
163	to->on_disk_key.k_offset = le_key_k_offset(version, from);
164	to->on_disk_key.k_type = le_key_k_type(version, from);
165}
166
167// this does not say which one is bigger, it only returns 1 if keys
168// are not equal, 0 otherwise
169inline int comp_le_keys(const struct reiserfs_key *k1,
170			const struct reiserfs_key *k2)
171{
172	return memcmp(k1, k2, sizeof(struct reiserfs_key));
173}
174
175/**************************************************************************
176 *  Binary search toolkit function                                        *
177 *  Search for an item in the array by the item key                       *
178 *  Returns:    1 if found,  0 if not found;                              *
179 *        *p_n_pos = number of the searched element if found, else the    *
180 *        number of the first element that is larger than p_v_key.        *
181 **************************************************************************/
182/* For those not familiar with binary search: n_lbound is the leftmost item that it
183 could be, n_rbound the rightmost item that it could be.  We examine the item
184 halfway between n_lbound and n_rbound, and that tells us either that we can increase
185 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
186 there are no possible items, and we have not found it. With each examination we
187 cut the number of possible items it could be by one more than half rounded down,
188 or we find it. */
189static inline int bin_search(const void *p_v_key,	/* Key to search for.                   */
190			     const void *p_v_base,	/* First item in the array.             */
191			     int p_n_num,	/* Number of items in the array.        */
192			     int p_n_width,	/* Item size in the array.
193						   searched. Lest the reader be
194						   confused, note that this is crafted
195						   as a general function, and when it
196						   is applied specifically to the array
197						   of item headers in a node, p_n_width
198						   is actually the item header size not
199						   the item size.                      */
200			     int *p_n_pos	/* Number of the searched for element. */
201    )
202{
203	int n_rbound, n_lbound, n_j;
204
205	for (n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0)) / 2;
206	     n_lbound <= n_rbound; n_j = (n_rbound + n_lbound) / 2)
207		switch (comp_keys
208			((struct reiserfs_key *)((char *)p_v_base +
209						 n_j * p_n_width),
210			 (struct cpu_key *)p_v_key)) {
211		case -1:
212			n_lbound = n_j + 1;
213			continue;
214		case 1:
215			n_rbound = n_j - 1;
216			continue;
217		case 0:
218			*p_n_pos = n_j;
219			return ITEM_FOUND;	/* Key found in the array.  */
220		}
221
222	/* bin_search did not find given key, it returns position of key,
223	   that is minimal and greater than the given one. */
224	*p_n_pos = n_lbound;
225	return ITEM_NOT_FOUND;
226}
227
228#ifdef CONFIG_REISERFS_CHECK
229extern struct tree_balance *cur_tb;
230#endif
231
232/* Minimal possible key. It is never in the tree. */
233const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
234
235/* Maximal possible key. It is never in the tree. */
236static const struct reiserfs_key MAX_KEY = {
237	__constant_cpu_to_le32(0xffffffff),
238	__constant_cpu_to_le32(0xffffffff),
239	{{__constant_cpu_to_le32(0xffffffff),
240	  __constant_cpu_to_le32(0xffffffff)},}
241};
242
243/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
244   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
245   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
246   case we return a special key, either MIN_KEY or MAX_KEY. */
247static inline const struct reiserfs_key *get_lkey(const struct path
248						  *p_s_chk_path,
249						  const struct super_block
250						  *p_s_sb)
251{
252	int n_position, n_path_offset = p_s_chk_path->path_length;
253	struct buffer_head *p_s_parent;
254
255	RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
256	       "PAP-5010: invalid offset in the path");
257
258	/* While not higher in path than first element. */
259	while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
260
261		RFALSE(!buffer_uptodate
262		       (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
263		       "PAP-5020: parent is not uptodate");
264
265		/* Parent at the path is not in the tree now. */
266		if (!B_IS_IN_TREE
267		    (p_s_parent =
268		     PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
269			return &MAX_KEY;
270		/* Check whether position in the parent is correct. */
271		if ((n_position =
272		     PATH_OFFSET_POSITION(p_s_chk_path,
273					  n_path_offset)) >
274		    B_NR_ITEMS(p_s_parent))
275			return &MAX_KEY;
276		/* Check whether parent at the path really points to the child. */
277		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
278		    PATH_OFFSET_PBUFFER(p_s_chk_path,
279					n_path_offset + 1)->b_blocknr)
280			return &MAX_KEY;
281		/* Return delimiting key if position in the parent is not equal to zero. */
282		if (n_position)
283			return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
284	}
285	/* Return MIN_KEY if we are in the root of the buffer tree. */
286	if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
287	    b_blocknr == SB_ROOT_BLOCK(p_s_sb))
288		return &MIN_KEY;
289	return &MAX_KEY;
290}
291
292/* Get delimiting key of the buffer at the path and its right neighbor. */
293inline const struct reiserfs_key *get_rkey(const struct path *p_s_chk_path,
294					   const struct super_block *p_s_sb)
295{
296	int n_position, n_path_offset = p_s_chk_path->path_length;
297	struct buffer_head *p_s_parent;
298
299	RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
300	       "PAP-5030: invalid offset in the path");
301
302	while (n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
303
304		RFALSE(!buffer_uptodate
305		       (PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
306		       "PAP-5040: parent is not uptodate");
307
308		/* Parent at the path is not in the tree now. */
309		if (!B_IS_IN_TREE
310		    (p_s_parent =
311		     PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)))
312			return &MIN_KEY;
313		/* Check whether position in the parent is correct. */
314		if ((n_position =
315		     PATH_OFFSET_POSITION(p_s_chk_path,
316					  n_path_offset)) >
317		    B_NR_ITEMS(p_s_parent))
318			return &MIN_KEY;
319		/* Check whether parent at the path really points to the child. */
320		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
321		    PATH_OFFSET_PBUFFER(p_s_chk_path,
322					n_path_offset + 1)->b_blocknr)
323			return &MIN_KEY;
324		/* Return delimiting key if position in the parent is not the last one. */
325		if (n_position != B_NR_ITEMS(p_s_parent))
326			return B_N_PDELIM_KEY(p_s_parent, n_position);
327	}
328	/* Return MAX_KEY if we are in the root of the buffer tree. */
329	if (PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->
330	    b_blocknr == SB_ROOT_BLOCK(p_s_sb))
331		return &MAX_KEY;
332	return &MIN_KEY;
333}
334
335/* Check whether a key is contained in the tree rooted from a buffer at a path. */
336/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
337   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
338   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
339   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
340static inline int key_in_buffer(struct path *p_s_chk_path,	/* Path which should be checked.  */
341				const struct cpu_key *p_s_key,	/* Key which should be checked.   */
342				struct super_block *p_s_sb	/* Super block pointer.           */
343    )
344{
345
346	RFALSE(!p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
347	       || p_s_chk_path->path_length > MAX_HEIGHT,
348	       "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
349	       p_s_key, p_s_chk_path->path_length);
350	RFALSE(!PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
351	       "PAP-5060: device must not be NODEV");
352
353	if (comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1)
354		/* left delimiting key is bigger, that the key we look for */
355		return 0;
356	//  if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
357	if (comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1)
358		/* p_s_key must be less than right delimitiing key */
359		return 0;
360	return 1;
361}
362
363inline void decrement_bcount(struct buffer_head *p_s_bh)
364{
365	if (p_s_bh) {
366		if (atomic_read(&(p_s_bh->b_count))) {
367			put_bh(p_s_bh);
368			return;
369		}
370		reiserfs_panic(NULL,
371			       "PAP-5070: decrement_bcount: trying to free free buffer %b",
372			       p_s_bh);
373	}
374}
375
376/* Decrement b_count field of the all buffers in the path. */
377void decrement_counters_in_path(struct path *p_s_search_path)
378{
379	int n_path_offset = p_s_search_path->path_length;
380
381	RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
382	       n_path_offset > EXTENDED_MAX_HEIGHT - 1,
383	       "PAP-5080: invalid path offset of %d", n_path_offset);
384
385	while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
386		struct buffer_head *bh;
387
388		bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
389		decrement_bcount(bh);
390	}
391	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
392}
393
394int reiserfs_check_path(struct path *p)
395{
396	RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
397	       "path not properly relsed");
398	return 0;
399}
400
401/* Release all buffers in the path. Restore dirty bits clean
402** when preparing the buffer for the log
403**
404** only called from fix_nodes()
405*/
406void pathrelse_and_restore(struct super_block *s, struct path *p_s_search_path)
407{
408	int n_path_offset = p_s_search_path->path_length;
409
410	RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
411	       "clm-4000: invalid path offset");
412
413	while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
414		reiserfs_restore_prepared_buffer(s,
415						 PATH_OFFSET_PBUFFER
416						 (p_s_search_path,
417						  n_path_offset));
418		brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
419	}
420	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
421}
422
423/* Release all buffers in the path. */
424void pathrelse(struct path *p_s_search_path)
425{
426	int n_path_offset = p_s_search_path->path_length;
427
428	RFALSE(n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
429	       "PAP-5090: invalid path offset");
430
431	while (n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
432		brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
433
434	p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
435}
436
437static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
438{
439	struct block_head *blkh;
440	struct item_head *ih;
441	int used_space;
442	int prev_location;
443	int i;
444	int nr;
445
446	blkh = (struct block_head *)buf;
447	if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
448		reiserfs_warning(NULL,
449				 "is_leaf: this should be caught earlier");
450		return 0;
451	}
452
453	nr = blkh_nr_item(blkh);
454	if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
455		/* item number is too big or too small */
456		reiserfs_warning(NULL, "is_leaf: nr_item seems wrong: %z", bh);
457		return 0;
458	}
459	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
460	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
461	if (used_space != blocksize - blkh_free_space(blkh)) {
462		/* free space does not match to calculated amount of use space */
463		reiserfs_warning(NULL, "is_leaf: free space seems wrong: %z",
464				 bh);
465		return 0;
466	}
467	// FIXME: it is_leaf will hit performance too much - we may have
468	// return 1 here
469
470	/* check tables of item heads */
471	ih = (struct item_head *)(buf + BLKH_SIZE);
472	prev_location = blocksize;
473	for (i = 0; i < nr; i++, ih++) {
474		if (le_ih_k_type(ih) == TYPE_ANY) {
475			reiserfs_warning(NULL,
476					 "is_leaf: wrong item type for item %h",
477					 ih);
478			return 0;
479		}
480		if (ih_location(ih) >= blocksize
481		    || ih_location(ih) < IH_SIZE * nr) {
482			reiserfs_warning(NULL,
483					 "is_leaf: item location seems wrong: %h",
484					 ih);
485			return 0;
486		}
487		if (ih_item_len(ih) < 1
488		    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
489			reiserfs_warning(NULL,
490					 "is_leaf: item length seems wrong: %h",
491					 ih);
492			return 0;
493		}
494		if (prev_location - ih_location(ih) != ih_item_len(ih)) {
495			reiserfs_warning(NULL,
496					 "is_leaf: item location seems wrong (second one): %h",
497					 ih);
498			return 0;
499		}
500		prev_location = ih_location(ih);
501	}
502
503	// one may imagine much more checks
504	return 1;
505}
506
507/* returns 1 if buf looks like an internal node, 0 otherwise */
508static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
509{
510	struct block_head *blkh;
511	int nr;
512	int used_space;
513
514	blkh = (struct block_head *)buf;
515	nr = blkh_level(blkh);
516	if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
517		/* this level is not possible for internal nodes */
518		reiserfs_warning(NULL,
519				 "is_internal: this should be caught earlier");
520		return 0;
521	}
522
523	nr = blkh_nr_item(blkh);
524	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
525		/* for internal which is not root we might check min number of keys */
526		reiserfs_warning(NULL,
527				 "is_internal: number of key seems wrong: %z",
528				 bh);
529		return 0;
530	}
531
532	used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
533	if (used_space != blocksize - blkh_free_space(blkh)) {
534		reiserfs_warning(NULL,
535				 "is_internal: free space seems wrong: %z", bh);
536		return 0;
537	}
538	// one may imagine much more checks
539	return 1;
540}
541
542// make sure that bh contains formatted node of reiserfs tree of
543// 'level'-th level
544static int is_tree_node(struct buffer_head *bh, int level)
545{
546	if (B_LEVEL(bh) != level) {
547		reiserfs_warning(NULL,
548				 "is_tree_node: node level %d does not match to the expected one %d",
549				 B_LEVEL(bh), level);
550		return 0;
551	}
552	if (level == DISK_LEAF_NODE_LEVEL)
553		return is_leaf(bh->b_data, bh->b_size, bh);
554
555	return is_internal(bh->b_data, bh->b_size, bh);
556}
557
558#define SEARCH_BY_KEY_READA 16
559
560/* The function is NOT SCHEDULE-SAFE! */
561static void search_by_key_reada(struct super_block *s,
562				struct buffer_head **bh,
563				unsigned long *b, int num)
564{
565	int i, j;
566
567	for (i = 0; i < num; i++) {
568		bh[i] = sb_getblk(s, b[i]);
569	}
570	for (j = 0; j < i; j++) {
571		/*
572		 * note, this needs attention if we are getting rid of the BKL
573		 * you have to make sure the prepared bit isn't set on this buffer
574		 */
575		if (!buffer_uptodate(bh[j]))
576			ll_rw_block(READA, 1, bh + j);
577		brelse(bh[j]);
578	}
579}
580
581/**************************************************************************
582 * Algorithm   SearchByKey                                                *
583 *             look for item in the Disk S+Tree by its key                *
584 * Input:  p_s_sb   -  super block                                        *
585 *         p_s_key  - pointer to the key to search                        *
586 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
587 *         p_s_search_path - path from the root to the needed leaf        *
588 **************************************************************************/
589
590/* This function fills up the path from the root to the leaf as it
591   descends the tree looking for the key.  It uses reiserfs_bread to
592   try to find buffers in the cache given their block number.  If it
593   does not find them in the cache it reads them from disk.  For each
594   node search_by_key finds using reiserfs_bread it then uses
595   bin_search to look through that node.  bin_search will find the
596   position of the block_number of the next node if it is looking
597   through an internal node.  If it is looking through a leaf node
598   bin_search will find the position of the item which has key either
599   equal to given key, or which is the maximal key less than the given
600   key.  search_by_key returns a path that must be checked for the
601   correctness of the top of the path but need not be checked for the
602   correctness of the bottom of the path */
603/* The function is NOT SCHEDULE-SAFE! */
604int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/* Key to search. */
605		  struct path *p_s_search_path,	/* This structure was
606						   allocated and initialized
607						   by the calling
608						   function. It is filled up
609						   by this function.  */
610		  int n_stop_level	/* How far down the tree to search. To
611					   stop at leaf level - set to
612					   DISK_LEAF_NODE_LEVEL */
613    )
614{
615	int n_block_number;
616	int expected_level;
617	struct buffer_head *p_s_bh;
618	struct path_element *p_s_last_element;
619	int n_node_level, n_retval;
620	int right_neighbor_of_leaf_node;
621	int fs_gen;
622	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
623	unsigned long reada_blocks[SEARCH_BY_KEY_READA];
624	int reada_count = 0;
625
626#ifdef CONFIG_REISERFS_CHECK
627	int n_repeat_counter = 0;
628#endif
629
630	PROC_INFO_INC(p_s_sb, search_by_key);
631
632	/* As we add each node to a path we increase its count.  This means that
633	   we must be careful to release all nodes in a path before we either
634	   discard the path struct or re-use the path struct, as we do here. */
635
636	decrement_counters_in_path(p_s_search_path);
637
638	right_neighbor_of_leaf_node = 0;
639
640	/* With each iteration of this loop we search through the items in the
641	   current node, and calculate the next current node(next path element)
642	   for the next iteration of this loop.. */
643	n_block_number = SB_ROOT_BLOCK(p_s_sb);
644	expected_level = -1;
645	while (1) {
646
647#ifdef CONFIG_REISERFS_CHECK
648		if (!(++n_repeat_counter % 50000))
649			reiserfs_warning(p_s_sb, "PAP-5100: search_by_key: %s:"
650					 "there were %d iterations of while loop "
651					 "looking for key %K",
652					 current->comm, n_repeat_counter,
653					 p_s_key);
654#endif
655
656		/* prep path to have another element added to it. */
657		p_s_last_element =
658		    PATH_OFFSET_PELEMENT(p_s_search_path,
659					 ++p_s_search_path->path_length);
660		fs_gen = get_generation(p_s_sb);
661
662		/* Read the next tree node, and set the last element in the path to
663		   have a pointer to it. */
664		if ((p_s_bh = p_s_last_element->pe_buffer =
665		     sb_getblk(p_s_sb, n_block_number))) {
666			if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
667				search_by_key_reada(p_s_sb, reada_bh,
668						    reada_blocks, reada_count);
669			}
670			ll_rw_block(READ, 1, &p_s_bh);
671			wait_on_buffer(p_s_bh);
672			if (!buffer_uptodate(p_s_bh))
673				goto io_error;
674		} else {
675		      io_error:
676			p_s_search_path->path_length--;
677			pathrelse(p_s_search_path);
678			return IO_ERROR;
679		}
680		reada_count = 0;
681		if (expected_level == -1)
682			expected_level = SB_TREE_HEIGHT(p_s_sb);
683		expected_level--;
684
685		/* It is possible that schedule occurred. We must check whether the key
686		   to search is still in the tree rooted from the current buffer. If
687		   not then repeat search from the root. */
688		if (fs_changed(fs_gen, p_s_sb) &&
689		    (!B_IS_IN_TREE(p_s_bh) ||
690		     B_LEVEL(p_s_bh) != expected_level ||
691		     !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
692			PROC_INFO_INC(p_s_sb, search_by_key_fs_changed);
693			PROC_INFO_INC(p_s_sb, search_by_key_restarted);
694			PROC_INFO_INC(p_s_sb,
695				      sbk_restarted[expected_level - 1]);
696			decrement_counters_in_path(p_s_search_path);
697
698			/* Get the root block number so that we can repeat the search
699			   starting from the root. */
700			n_block_number = SB_ROOT_BLOCK(p_s_sb);
701			expected_level = -1;
702			right_neighbor_of_leaf_node = 0;
703
704			/* repeat search from the root */
705			continue;
706		}
707
708		/* only check that the key is in the buffer if p_s_key is not
709		   equal to the MAX_KEY. Latter case is only possible in
710		   "finish_unfinished()" processing during mount. */
711		RFALSE(comp_keys(&MAX_KEY, p_s_key) &&
712		       !key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
713		       "PAP-5130: key is not in the buffer");
714#ifdef CONFIG_REISERFS_CHECK
715		if (cur_tb) {
716			print_cur_tb("5140");
717			reiserfs_panic(p_s_sb,
718				       "PAP-5140: search_by_key: schedule occurred in do_balance!");
719		}
720#endif
721
722		// make sure, that the node contents look like a node of
723		// certain level
724		if (!is_tree_node(p_s_bh, expected_level)) {
725			reiserfs_warning(p_s_sb, "vs-5150: search_by_key: "
726					 "invalid format found in block %ld. Fsck?",
727					 p_s_bh->b_blocknr);
728			pathrelse(p_s_search_path);
729			return IO_ERROR;
730		}
731
732		/* ok, we have acquired next formatted node in the tree */
733		n_node_level = B_LEVEL(p_s_bh);
734
735		PROC_INFO_BH_STAT(p_s_sb, p_s_bh, n_node_level - 1);
736
737		RFALSE(n_node_level < n_stop_level,
738		       "vs-5152: tree level (%d) is less than stop level (%d)",
739		       n_node_level, n_stop_level);
740
741		n_retval = bin_search(p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
742				      B_NR_ITEMS(p_s_bh),
743				      (n_node_level ==
744				       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
745				      KEY_SIZE,
746				      &(p_s_last_element->pe_position));
747		if (n_node_level == n_stop_level) {
748			return n_retval;
749		}
750
751		/* we are not in the stop level */
752		if (n_retval == ITEM_FOUND)
753			/* item has been found, so we choose the pointer which is to the right of the found one */
754			p_s_last_element->pe_position++;
755
756		/* if item was not found we choose the position which is to
757		   the left of the found item. This requires no code,
758		   bin_search did it already. */
759
760		/* So we have chosen a position in the current node which is
761		   an internal node.  Now we calculate child block number by
762		   position in the node. */
763		n_block_number =
764		    B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
765
766		/* if we are going to read leaf nodes, try for read ahead as well */
767		if ((p_s_search_path->reada & PATH_READA) &&
768		    n_node_level == DISK_LEAF_NODE_LEVEL + 1) {
769			int pos = p_s_last_element->pe_position;
770			int limit = B_NR_ITEMS(p_s_bh);
771			struct reiserfs_key *le_key;
772
773			if (p_s_search_path->reada & PATH_READA_BACK)
774				limit = 0;
775			while (reada_count < SEARCH_BY_KEY_READA) {
776				if (pos == limit)
777					break;
778				reada_blocks[reada_count++] =
779				    B_N_CHILD_NUM(p_s_bh, pos);
780				if (p_s_search_path->reada & PATH_READA_BACK)
781					pos--;
782				else
783					pos++;
784
785				/*
786				 * check to make sure we're in the same object
787				 */
788				le_key = B_N_PDELIM_KEY(p_s_bh, pos);
789				if (le32_to_cpu(le_key->k_objectid) !=
790				    p_s_key->on_disk_key.k_objectid) {
791					break;
792				}
793			}
794		}
795	}
796}
797
798/* Form the path to an item and position in this item which contains
799   file byte defined by p_s_key. If there is no such item
800   corresponding to the key, we point the path to the item with
801   maximal key less than p_s_key, and *p_n_pos_in_item is set to one
802   past the last entry/byte in the item.  If searching for entry in a
803   directory item, and it is not found, *p_n_pos_in_item is set to one
804   entry more than the entry with maximal key which is less than the
805   sought key.
806
807   Note that if there is no entry in this same node which is one more,
808   then we point to an imaginary entry.  for direct items, the
809   position is in units of bytes, for indirect items the position is
810   in units of blocknr entries, for directory items the position is in
811   units of directory entries.  */
812
813/* The function is NOT SCHEDULE-SAFE! */
814int search_for_position_by_key(struct super_block *p_s_sb,	/* Pointer to the super block.          */
815			       const struct cpu_key *p_cpu_key,	/* Key to search (cpu variable)         */
816			       struct path *p_s_search_path	/* Filled up by this function.          */
817    )
818{
819	struct item_head *p_le_ih;	/* pointer to on-disk structure */
820	int n_blk_size;
821	loff_t item_offset, offset;
822	struct reiserfs_dir_entry de;
823	int retval;
824
825	/* If searching for directory entry. */
826	if (is_direntry_cpu_key(p_cpu_key))
827		return search_by_entry_key(p_s_sb, p_cpu_key, p_s_search_path,
828					   &de);
829
830	/* If not searching for directory entry. */
831
832	/* If item is found. */
833	retval = search_item(p_s_sb, p_cpu_key, p_s_search_path);
834	if (retval == IO_ERROR)
835		return retval;
836	if (retval == ITEM_FOUND) {
837
838		RFALSE(!ih_item_len
839		       (B_N_PITEM_HEAD
840			(PATH_PLAST_BUFFER(p_s_search_path),
841			 PATH_LAST_POSITION(p_s_search_path))),
842		       "PAP-5165: item length equals zero");
843
844		pos_in_item(p_s_search_path) = 0;
845		return POSITION_FOUND;
846	}
847
848	RFALSE(!PATH_LAST_POSITION(p_s_search_path),
849	       "PAP-5170: position equals zero");
850
851	/* Item is not found. Set path to the previous item. */
852	p_le_ih =
853	    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
854			   --PATH_LAST_POSITION(p_s_search_path));
855	n_blk_size = p_s_sb->s_blocksize;
856
857	if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
858		return FILE_NOT_FOUND;
859	}
860	// FIXME: quite ugly this far
861
862	item_offset = le_ih_k_offset(p_le_ih);
863	offset = cpu_key_k_offset(p_cpu_key);
864
865	/* Needed byte is contained in the item pointed to by the path. */
866	if (item_offset <= offset &&
867	    item_offset + op_bytes_number(p_le_ih, n_blk_size) > offset) {
868		pos_in_item(p_s_search_path) = offset - item_offset;
869		if (is_indirect_le_ih(p_le_ih)) {
870			pos_in_item(p_s_search_path) /= n_blk_size;
871		}
872		return POSITION_FOUND;
873	}
874
875	/* Needed byte is not contained in the item pointed to by the
876	   path. Set pos_in_item out of the item. */
877	if (is_indirect_le_ih(p_le_ih))
878		pos_in_item(p_s_search_path) =
879		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
880	else
881		pos_in_item(p_s_search_path) = ih_item_len(p_le_ih);
882
883	return POSITION_NOT_FOUND;
884}
885
886/* Compare given item and item pointed to by the path. */
887int comp_items(const struct item_head *stored_ih, const struct path *p_s_path)
888{
889	struct buffer_head *p_s_bh;
890	struct item_head *ih;
891
892	/* Last buffer at the path is not in the tree. */
893	if (!B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)))
894		return 1;
895
896	/* Last path position is invalid. */
897	if (PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh))
898		return 1;
899
900	/* we need only to know, whether it is the same item */
901	ih = get_ih(p_s_path);
902	return memcmp(stored_ih, ih, IH_SIZE);
903}
904
905/* unformatted nodes are not logged anymore, ever.  This is safe
906** now
907*/
908#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
909
910// block can not be forgotten as it is in I/O or held by someone
911#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
912
913// prepare for delete or cut of direct item
914static inline int prepare_for_direct_item(struct path *path,
915					  struct item_head *le_ih,
916					  struct inode *inode,
917					  loff_t new_file_length, int *cut_size)
918{
919	loff_t round_len;
920
921	if (new_file_length == max_reiserfs_offset(inode)) {
922		/* item has to be deleted */
923		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
924		return M_DELETE;
925	}
926	// new file gets truncated
927	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
928		//
929		round_len = ROUND_UP(new_file_length);
930		/* this was n_new_file_length < le_ih ... */
931		if (round_len < le_ih_k_offset(le_ih)) {
932			*cut_size = -(IH_SIZE + ih_item_len(le_ih));
933			return M_DELETE;	/* Delete this item. */
934		}
935		/* Calculate first position and size for cutting from item. */
936		pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
937		*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
938
939		return M_CUT;	/* Cut from this item. */
940	}
941
942	// old file: items may have any length
943
944	if (new_file_length < le_ih_k_offset(le_ih)) {
945		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
946		return M_DELETE;	/* Delete this item. */
947	}
948	/* Calculate first position and size for cutting from item. */
949	*cut_size = -(ih_item_len(le_ih) -
950		      (pos_in_item(path) =
951		       new_file_length + 1 - le_ih_k_offset(le_ih)));
952	return M_CUT;		/* Cut from this item. */
953}
954
955static inline int prepare_for_direntry_item(struct path *path,
956					    struct item_head *le_ih,
957					    struct inode *inode,
958					    loff_t new_file_length,
959					    int *cut_size)
960{
961	if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
962	    new_file_length == max_reiserfs_offset(inode)) {
963		RFALSE(ih_entry_count(le_ih) != 2,
964		       "PAP-5220: incorrect empty directory item (%h)", le_ih);
965		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
966		return M_DELETE;	/* Delete the directory item containing "." and ".." entry. */
967	}
968
969	if (ih_entry_count(le_ih) == 1) {
970		/* Delete the directory item such as there is one record only
971		   in this item */
972		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
973		return M_DELETE;
974	}
975
976	/* Cut one record from the directory item. */
977	*cut_size =
978	    -(DEH_SIZE +
979	      entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
980	return M_CUT;
981}
982
983#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
984
985/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
986    If the path points to an indirect item, remove some number of its unformatted nodes.
987    In case of file truncate calculate whether this item must be deleted/truncated or last
988    unformatted node of this item will be converted to a direct item.
989    This function returns a determination of what balance mode the calling function should employ. */
990static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct path *p_s_path, const struct cpu_key *p_s_item_key, int *p_n_removed,	/* Number of unformatted nodes which were removed
991																						   from end of the file. */
992				      int *p_n_cut_size, unsigned long long n_new_file_length	/* MAX_KEY_OFFSET in case of delete. */
993    )
994{
995	struct super_block *p_s_sb = inode->i_sb;
996	struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_path);
997	struct buffer_head *p_s_bh = PATH_PLAST_BUFFER(p_s_path);
998
999	BUG_ON(!th->t_trans_id);
1000
1001	/* Stat_data item. */
1002	if (is_statdata_le_ih(p_le_ih)) {
1003
1004		RFALSE(n_new_file_length != max_reiserfs_offset(inode),
1005		       "PAP-5210: mode must be M_DELETE");
1006
1007		*p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1008		return M_DELETE;
1009	}
1010
1011	/* Directory item. */
1012	if (is_direntry_le_ih(p_le_ih))
1013		return prepare_for_direntry_item(p_s_path, p_le_ih, inode,
1014						 n_new_file_length,
1015						 p_n_cut_size);
1016
1017	/* Direct item. */
1018	if (is_direct_le_ih(p_le_ih))
1019		return prepare_for_direct_item(p_s_path, p_le_ih, inode,
1020					       n_new_file_length, p_n_cut_size);
1021
1022	/* Case of an indirect item. */
1023	{
1024	    int blk_size = p_s_sb->s_blocksize;
1025	    struct item_head s_ih;
1026	    int need_re_search;
1027	    int delete = 0;
1028	    int result = M_CUT;
1029	    int pos = 0;
1030
1031	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {
1032		/* prepare_for_delete_or_cut() is called by
1033		 * reiserfs_delete_item() */
1034		n_new_file_length = 0;
1035		delete = 1;
1036	    }
1037
1038	    do {
1039		need_re_search = 0;
1040		*p_n_cut_size = 0;
1041		p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1042		copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1043		pos = I_UNFM_NUM(&s_ih);
1044
1045		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > n_new_file_length) {
1046		    __u32 *unfm, block;
1047
1048		    /* Each unformatted block deletion may involve one additional
1049		     * bitmap block into the transaction, thereby the initial
1050		     * journal space reservation might not be enough. */
1051		    if (!delete && (*p_n_cut_size) != 0 &&
1052			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1053			break;
1054		    }
1055
1056		    unfm = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + pos - 1;
1057		    block = get_block_num(unfm, 0);
1058
1059		    if (block != 0) {
1060			reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1061			put_block_num(unfm, 0, 0);
1062			journal_mark_dirty (th, p_s_sb, p_s_bh);
1063			reiserfs_free_block(th, inode, block, 1);
1064		    }
1065
1066		    cond_resched();
1067
1068		    if (item_moved (&s_ih, p_s_path))  {
1069			need_re_search = 1;
1070			break;
1071		    }
1072
1073		    pos --;
1074		    (*p_n_removed) ++;
1075		    (*p_n_cut_size) -= UNFM_P_SIZE;
1076
1077		    if (pos == 0) {
1078			(*p_n_cut_size) -= IH_SIZE;
1079			result = M_DELETE;
1080			break;
1081		    }
1082		}
1083		/* a trick.  If the buffer has been logged, this will do nothing.  If
1084		** we've broken the loop without logging it, it will restore the
1085		** buffer */
1086		reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1087	    } while (need_re_search &&
1088		     search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND);
1089	    pos_in_item(p_s_path) = pos * UNFM_P_SIZE;
1090
1091	    if (*p_n_cut_size == 0) {
1092		/* Nothing were cut. maybe convert last unformatted node to the
1093		 * direct item? */
1094		result = M_CONVERT;
1095	    }
1096	    return result;
1097	}
1098}
1099
1100/* Calculate number of bytes which will be deleted or cut during balance */
1101static int calc_deleted_bytes_number(struct tree_balance *p_s_tb, char c_mode)
1102{
1103	int n_del_size;
1104	struct item_head *p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1105
1106	if (is_statdata_le_ih(p_le_ih))
1107		return 0;
1108
1109	n_del_size =
1110	    (c_mode ==
1111	     M_DELETE) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1112	if (is_direntry_le_ih(p_le_ih)) {
1113		// return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1114		// we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1115		// empty size.  ick. FIXME, is this right?
1116		//
1117		return n_del_size;
1118	}
1119
1120	if (is_indirect_le_ih(p_le_ih))
1121		n_del_size = (n_del_size / UNFM_P_SIZE) * (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);	// - get_ih_free_space (p_le_ih);
1122	return n_del_size;
1123}
1124
1125static void init_tb_struct(struct reiserfs_transaction_handle *th,
1126			   struct tree_balance *p_s_tb,
1127			   struct super_block *p_s_sb,
1128			   struct path *p_s_path, int n_size)
1129{
1130
1131	BUG_ON(!th->t_trans_id);
1132
1133	memset(p_s_tb, '\0', sizeof(struct tree_balance));
1134	p_s_tb->transaction_handle = th;
1135	p_s_tb->tb_sb = p_s_sb;
1136	p_s_tb->tb_path = p_s_path;
1137	PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1138	PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1139	p_s_tb->insert_size[0] = n_size;
1140}
1141
1142void padd_item(char *item, int total_length, int length)
1143{
1144	int i;
1145
1146	for (i = total_length; i > length;)
1147		item[--i] = 0;
1148}
1149
1150#ifdef REISERQUOTA_DEBUG
1151char key2type(struct reiserfs_key *ih)
1152{
1153	if (is_direntry_le_key(2, ih))
1154		return 'd';
1155	if (is_direct_le_key(2, ih))
1156		return 'D';
1157	if (is_indirect_le_key(2, ih))
1158		return 'i';
1159	if (is_statdata_le_key(2, ih))
1160		return 's';
1161	return 'u';
1162}
1163
1164char head2type(struct item_head *ih)
1165{
1166	if (is_direntry_le_ih(ih))
1167		return 'd';
1168	if (is_direct_le_ih(ih))
1169		return 'D';
1170	if (is_indirect_le_ih(ih))
1171		return 'i';
1172	if (is_statdata_le_ih(ih))
1173		return 's';
1174	return 'u';
1175}
1176#endif
1177
1178/* Delete object item. */
1179int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct path *p_s_path,	/* Path to the deleted item. */
1180			 const struct cpu_key *p_s_item_key,	/* Key to search for the deleted item.  */
1181			 struct inode *p_s_inode,	/* inode is here just to update i_blocks and quotas */
1182			 struct buffer_head *p_s_un_bh)
1183{				/* NULL or unformatted node pointer.    */
1184	struct super_block *p_s_sb = p_s_inode->i_sb;
1185	struct tree_balance s_del_balance;
1186	struct item_head s_ih;
1187	struct item_head *q_ih;
1188	int quota_cut_bytes;
1189	int n_ret_value, n_del_size, n_removed;
1190
1191#ifdef CONFIG_REISERFS_CHECK
1192	char c_mode;
1193	int n_iter = 0;
1194#endif
1195
1196	BUG_ON(!th->t_trans_id);
1197
1198	init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path,
1199		       0 /*size is unknown */ );
1200
1201	while (1) {
1202		n_removed = 0;
1203
1204#ifdef CONFIG_REISERFS_CHECK
1205		n_iter++;
1206		c_mode =
1207#endif
1208		    prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1209					      p_s_item_key, &n_removed,
1210					      &n_del_size,
1211					      max_reiserfs_offset(p_s_inode));
1212
1213		RFALSE(c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1214
1215		copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1216		s_del_balance.insert_size[0] = n_del_size;
1217
1218		n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1219		if (n_ret_value != REPEAT_SEARCH)
1220			break;
1221
1222		PROC_INFO_INC(p_s_sb, delete_item_restarted);
1223
1224		// file system changed, repeat search
1225		n_ret_value =
1226		    search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1227		if (n_ret_value == IO_ERROR)
1228			break;
1229		if (n_ret_value == FILE_NOT_FOUND) {
1230			reiserfs_warning(p_s_sb,
1231					 "vs-5340: reiserfs_delete_item: "
1232					 "no items of the file %K found",
1233					 p_s_item_key);
1234			break;
1235		}
1236	}			/* while (1) */
1237
1238	if (n_ret_value != CARRY_ON) {
1239		unfix_nodes(&s_del_balance);
1240		return 0;
1241	}
1242	// reiserfs_delete_item returns item length when success
1243	n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1244	q_ih = get_ih(p_s_path);
1245	quota_cut_bytes = ih_item_len(q_ih);
1246
1247	/* hack so the quota code doesn't have to guess if the file
1248	 ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1249	 ** We test the offset because the tail might have been
1250	 ** split into multiple items, and we only want to decrement for
1251	 ** the unfm node once
1252	 */
1253	if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1254		if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1255			quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1256		} else {
1257			quota_cut_bytes = 0;
1258		}
1259	}
1260
1261	if (p_s_un_bh) {
1262		int off;
1263		char *data;
1264
1265		/* We are in direct2indirect conversion, so move tail contents
1266		   to the unformatted node */
1267		/* note, we do the copy before preparing the buffer because we
1268		 ** don't care about the contents of the unformatted node yet.
1269		 ** the only thing we really care about is the direct item's data
1270		 ** is in the unformatted node.
1271		 **
1272		 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1273		 ** the unformatted node, which might schedule, meaning we'd have to
1274		 ** loop all the way back up to the start of the while loop.
1275		 **
1276		 ** The unformatted node must be dirtied later on.  We can't be
1277		 ** sure here if the entire tail has been deleted yet.
1278		 **
1279		 ** p_s_un_bh is from the page cache (all unformatted nodes are
1280		 ** from the page cache) and might be a highmem page.  So, we
1281		 ** can't use p_s_un_bh->b_data.
1282		 ** -clm
1283		 */
1284
1285		data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1286		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1287		memcpy(data + off,
1288		       B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih),
1289		       n_ret_value);
1290		kunmap_atomic(data, KM_USER0);
1291	}
1292	/* Perform balancing after all resources have been collected at once. */
1293	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1294
1295#ifdef REISERQUOTA_DEBUG
1296	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
1297		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
1298		       quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1299#endif
1300	DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1301
1302	/* Return deleted body length */
1303	return n_ret_value;
1304}
1305
1306/* Summary Of Mechanisms For Handling Collisions Between Processes:
1307
1308 deletion of the body of the object is performed by iput(), with the
1309 result that if multiple processes are operating on a file, the
1310 deletion of the body of the file is deferred until the last process
1311 that has an open inode performs its iput().
1312
1313 writes and truncates are protected from collisions by use of
1314 semaphores.
1315
1316 creates, linking, and mknod are protected from collisions with other
1317 processes by making the reiserfs_add_entry() the last step in the
1318 creation, and then rolling back all changes if there was a collision.
1319 - Hans
1320*/
1321
1322/* this deletes item which never gets split */
1323void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1324				struct inode *inode, struct reiserfs_key *key)
1325{
1326	struct tree_balance tb;
1327	INITIALIZE_PATH(path);
1328	int item_len = 0;
1329	int tb_init = 0;
1330	struct cpu_key cpu_key;
1331	int retval;
1332	int quota_cut_bytes = 0;
1333
1334	BUG_ON(!th->t_trans_id);
1335
1336	le_key2cpu_key(&cpu_key, key);
1337
1338	while (1) {
1339		retval = search_item(th->t_super, &cpu_key, &path);
1340		if (retval == IO_ERROR) {
1341			reiserfs_warning(th->t_super,
1342					 "vs-5350: reiserfs_delete_solid_item: "
1343					 "i/o failure occurred trying to delete %K",
1344					 &cpu_key);
1345			break;
1346		}
1347		if (retval != ITEM_FOUND) {
1348			pathrelse(&path);
1349			// No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1350			if (!
1351			    ((unsigned long long)
1352			     GET_HASH_VALUE(le_key_k_offset
1353					    (le_key_version(key), key)) == 0
1354			     && (unsigned long long)
1355			     GET_GENERATION_NUMBER(le_key_k_offset
1356						   (le_key_version(key),
1357						    key)) == 1))
1358				reiserfs_warning(th->t_super,
1359						 "vs-5355: reiserfs_delete_solid_item: %k not found",
1360						 key);
1361			break;
1362		}
1363		if (!tb_init) {
1364			tb_init = 1;
1365			item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1366			init_tb_struct(th, &tb, th->t_super, &path,
1367				       -(IH_SIZE + item_len));
1368		}
1369		quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1370
1371		retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1372		if (retval == REPEAT_SEARCH) {
1373			PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1374			continue;
1375		}
1376
1377		if (retval == CARRY_ON) {
1378			do_balance(&tb, NULL, NULL, M_DELETE);
1379			if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
1380#ifdef REISERQUOTA_DEBUG
1381				reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1382					       "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1383					       quota_cut_bytes, inode->i_uid,
1384					       key2type(key));
1385#endif
1386				DQUOT_FREE_SPACE_NODIRTY(inode,
1387							 quota_cut_bytes);
1388			}
1389			break;
1390		}
1391		// IO_ERROR, NO_DISK_SPACE, etc
1392		reiserfs_warning(th->t_super,
1393				 "vs-5360: reiserfs_delete_solid_item: "
1394				 "could not delete %K due to fix_nodes failure",
1395				 &cpu_key);
1396		unfix_nodes(&tb);
1397		break;
1398	}
1399
1400	reiserfs_check_path(&path);
1401}
1402
1403int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1404			   struct inode *inode)
1405{
1406	int err;
1407	inode->i_size = 0;
1408	BUG_ON(!th->t_trans_id);
1409
1410	/* for directory this deletes item containing "." and ".." */
1411	err =
1412	    reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1413	if (err)
1414		return err;
1415
1416#if defined( USE_INODE_GENERATION_COUNTER )
1417	if (!old_format_only(th->t_super)) {
1418		__le32 *inode_generation;
1419
1420		inode_generation =
1421		    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1422		*inode_generation =
1423		    cpu_to_le32(le32_to_cpu(*inode_generation) + 1);
1424	}
1425/* USE_INODE_GENERATION_COUNTER */
1426#endif
1427	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1428
1429	return err;
1430}
1431
1432static void unmap_buffers(struct page *page, loff_t pos)
1433{
1434	struct buffer_head *bh;
1435	struct buffer_head *head;
1436	struct buffer_head *next;
1437	unsigned long tail_index;
1438	unsigned long cur_index;
1439
1440	if (page) {
1441		if (page_has_buffers(page)) {
1442			tail_index = pos & (PAGE_CACHE_SIZE - 1);
1443			cur_index = 0;
1444			head = page_buffers(page);
1445			bh = head;
1446			do {
1447				next = bh->b_this_page;
1448
1449				/* we want to unmap the buffers that contain the tail, and
1450				 ** all the buffers after it (since the tail must be at the
1451				 ** end of the file).  We don't want to unmap file data
1452				 ** before the tail, since it might be dirty and waiting to
1453				 ** reach disk
1454				 */
1455				cur_index += bh->b_size;
1456				if (cur_index > tail_index) {
1457					reiserfs_unmap_buffer(bh);
1458				}
1459				bh = next;
1460			} while (bh != head);
1461			if (PAGE_SIZE == bh->b_size) {
1462				clear_page_dirty(page);
1463			}
1464		}
1465	}
1466}
1467
1468static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1469				    struct inode *p_s_inode,
1470				    struct page *page,
1471				    struct path *p_s_path,
1472				    const struct cpu_key *p_s_item_key,
1473				    loff_t n_new_file_size, char *p_c_mode)
1474{
1475	struct super_block *p_s_sb = p_s_inode->i_sb;
1476	int n_block_size = p_s_sb->s_blocksize;
1477	int cut_bytes;
1478	BUG_ON(!th->t_trans_id);
1479	BUG_ON(n_new_file_size != p_s_inode->i_size);
1480
1481	/* the page being sent in could be NULL if there was an i/o error
1482	 ** reading in the last block.  The user will hit problems trying to
1483	 ** read the file, but for now we just skip the indirect2direct
1484	 */
1485	if (atomic_read(&p_s_inode->i_count) > 1 ||
1486	    !tail_has_to_be_packed(p_s_inode) ||
1487	    !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1488		// leave tail in an unformatted node
1489		*p_c_mode = M_SKIP_BALANCING;
1490		cut_bytes =
1491		    n_block_size - (n_new_file_size & (n_block_size - 1));
1492		pathrelse(p_s_path);
1493		return cut_bytes;
1494	}
1495	/* Permorm the conversion to a direct_item. */
1496	/*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode); */
1497	return indirect2direct(th, p_s_inode, page, p_s_path, p_s_item_key,
1498			       n_new_file_size, p_c_mode);
1499}
1500
1501/* we did indirect_to_direct conversion. And we have inserted direct
1502   item successesfully, but there were no disk space to cut unfm
1503   pointer being converted. Therefore we have to delete inserted
1504   direct item(s) */
1505static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1506					 struct inode *inode, struct path *path)
1507{
1508	struct cpu_key tail_key;
1509	int tail_len;
1510	int removed;
1511	BUG_ON(!th->t_trans_id);
1512
1513	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);	// !!!!
1514	tail_key.key_length = 4;
1515
1516	tail_len =
1517	    (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1518	while (tail_len) {
1519		/* look for the last byte of the tail */
1520		if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1521		    POSITION_NOT_FOUND)
1522			reiserfs_panic(inode->i_sb,
1523				       "vs-5615: indirect_to_direct_roll_back: found invalid item");
1524		RFALSE(path->pos_in_item !=
1525		       ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1526		       "vs-5616: appended bytes found");
1527		PATH_LAST_POSITION(path)--;
1528
1529		removed =
1530		    reiserfs_delete_item(th, path, &tail_key, inode,
1531					 NULL /*unbh not needed */ );
1532		RFALSE(removed <= 0
1533		       || removed > tail_len,
1534		       "vs-5617: there was tail %d bytes, removed item length %d bytes",
1535		       tail_len, removed);
1536		tail_len -= removed;
1537		set_cpu_key_k_offset(&tail_key,
1538				     cpu_key_k_offset(&tail_key) - removed);
1539	}
1540	reiserfs_warning(inode->i_sb,
1541			 "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1542	//mark_file_without_tail (inode);
1543	mark_inode_dirty(inode);
1544}
1545
1546/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1547int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1548			   struct path *p_s_path,
1549			   struct cpu_key *p_s_item_key,
1550			   struct inode *p_s_inode,
1551			   struct page *page, loff_t n_new_file_size)
1552{
1553	struct super_block *p_s_sb = p_s_inode->i_sb;
1554	/* Every function which is going to call do_balance must first
1555	   create a tree_balance structure.  Then it must fill up this
1556	   structure by using the init_tb_struct and fix_nodes functions.
1557	   After that we can make tree balancing. */
1558	struct tree_balance s_cut_balance;
1559	struct item_head *p_le_ih;
1560	int n_cut_size = 0,	/* Amount to be cut. */
1561	    n_ret_value = CARRY_ON, n_removed = 0,	/* Number of the removed unformatted nodes. */
1562	    n_is_inode_locked = 0;
1563	char c_mode;		/* Mode of the balance. */
1564	int retval2 = -1;
1565	int quota_cut_bytes;
1566	loff_t tail_pos = 0;
1567
1568	BUG_ON(!th->t_trans_id);
1569
1570	init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path,
1571		       n_cut_size);
1572
1573	/* Repeat this loop until we either cut the item without needing
1574	   to balance, or we fix_nodes without schedule occurring */
1575	while (1) {
1576		/* Determine the balance mode, position of the first byte to
1577		   be cut, and size to be cut.  In case of the indirect item
1578		   free unformatted nodes which are pointed to by the cut
1579		   pointers. */
1580
1581		c_mode =
1582		    prepare_for_delete_or_cut(th, p_s_inode, p_s_path,
1583					      p_s_item_key, &n_removed,
1584					      &n_cut_size, n_new_file_size);
1585		if (c_mode == M_CONVERT) {
1586			/* convert last unformatted node to direct item or leave
1587			   tail in the unformatted node */
1588			RFALSE(n_ret_value != CARRY_ON,
1589			       "PAP-5570: can not convert twice");
1590
1591			n_ret_value =
1592			    maybe_indirect_to_direct(th, p_s_inode, page,
1593						     p_s_path, p_s_item_key,
1594						     n_new_file_size, &c_mode);
1595			if (c_mode == M_SKIP_BALANCING)
1596				/* tail has been left in the unformatted node */
1597				return n_ret_value;
1598
1599			n_is_inode_locked = 1;
1600
1601			/* removing of last unformatted node will change value we
1602			   have to return to truncate. Save it */
1603			retval2 = n_ret_value;
1604			/*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1)); */
1605
1606			/* So, we have performed the first part of the conversion:
1607			   inserting the new direct item.  Now we are removing the
1608			   last unformatted node pointer. Set key to search for
1609			   it. */
1610			set_cpu_key_k_type(p_s_item_key, TYPE_INDIRECT);
1611			p_s_item_key->key_length = 4;
1612			n_new_file_size -=
1613			    (n_new_file_size & (p_s_sb->s_blocksize - 1));
1614			tail_pos = n_new_file_size;
1615			set_cpu_key_k_offset(p_s_item_key, n_new_file_size + 1);
1616			if (search_for_position_by_key
1617			    (p_s_sb, p_s_item_key,
1618			     p_s_path) == POSITION_NOT_FOUND) {
1619				print_block(PATH_PLAST_BUFFER(p_s_path), 3,
1620					    PATH_LAST_POSITION(p_s_path) - 1,
1621					    PATH_LAST_POSITION(p_s_path) + 1);
1622				reiserfs_panic(p_s_sb,
1623					       "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)",
1624					       p_s_item_key);
1625			}
1626			continue;
1627		}
1628		if (n_cut_size == 0) {
1629			pathrelse(p_s_path);
1630			return 0;
1631		}
1632
1633		s_cut_balance.insert_size[0] = n_cut_size;
1634
1635		n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1636		if (n_ret_value != REPEAT_SEARCH)
1637			break;
1638
1639		PROC_INFO_INC(p_s_sb, cut_from_item_restarted);
1640
1641		n_ret_value =
1642		    search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1643		if (n_ret_value == POSITION_FOUND)
1644			continue;
1645
1646		reiserfs_warning(p_s_sb,
1647				 "PAP-5610: reiserfs_cut_from_item: item %K not found",
1648				 p_s_item_key);
1649		unfix_nodes(&s_cut_balance);
1650		return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1651	}			/* while */
1652
1653	// check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1654	if (n_ret_value != CARRY_ON) {
1655		if (n_is_inode_locked) {
1656			// FIXME: this seems to be not needed: we are always able
1657			// to cut item
1658			indirect_to_direct_roll_back(th, p_s_inode, p_s_path);
1659		}
1660		if (n_ret_value == NO_DISK_SPACE)
1661			reiserfs_warning(p_s_sb, "NO_DISK_SPACE");
1662		unfix_nodes(&s_cut_balance);
1663		return -EIO;
1664	}
1665
1666	/* go ahead and perform balancing */
1667
1668	RFALSE(c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1669
1670	/* Calculate number of bytes that need to be cut from the item. */
1671	quota_cut_bytes =
1672	    (c_mode ==
1673	     M_DELETE) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.
1674	    insert_size[0];
1675	if (retval2 == -1)
1676		n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1677	else
1678		n_ret_value = retval2;
1679
1680	/* For direct items, we only change the quota when deleting the last
1681	 ** item.
1682	 */
1683	p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1684	if (!S_ISLNK(p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1685		if (c_mode == M_DELETE &&
1686		    (le_ih_k_offset(p_le_ih) & (p_s_sb->s_blocksize - 1)) ==
1687		    1) {
1688			// FIXME: this is to keep 3.5 happy
1689			REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1690			quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1691		} else {
1692			quota_cut_bytes = 0;
1693		}
1694	}
1695#ifdef CONFIG_REISERFS_CHECK
1696	if (n_is_inode_locked) {
1697		struct item_head *le_ih =
1698		    PATH_PITEM_HEAD(s_cut_balance.tb_path);
1699		/* we are going to complete indirect2direct conversion. Make
1700		   sure, that we exactly remove last unformatted node pointer
1701		   of the item */
1702		if (!is_indirect_le_ih(le_ih))
1703			reiserfs_panic(p_s_sb,
1704				       "vs-5652: reiserfs_cut_from_item: "
1705				       "item must be indirect %h", le_ih);
1706
1707		if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1708			reiserfs_panic(p_s_sb,
1709				       "vs-5653: reiserfs_cut_from_item: "
1710				       "completing indirect2direct conversion indirect item %h "
1711				       "being deleted must be of 4 byte long",
1712				       le_ih);
1713
1714		if (c_mode == M_CUT
1715		    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1716			reiserfs_panic(p_s_sb,
1717				       "vs-5654: reiserfs_cut_from_item: "
1718				       "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1719				       le_ih, s_cut_balance.insert_size[0]);
1720		}
1721		/* it would be useful to make sure, that right neighboring
1722		   item is direct item of this file */
1723	}
1724#endif
1725
1726	do_balance(&s_cut_balance, NULL, NULL, c_mode);
1727	if (n_is_inode_locked) {
1728		/* we've done an indirect->direct conversion.  when the data block
1729		 ** was freed, it was removed from the list of blocks that must
1730		 ** be flushed before the transaction commits, make sure to
1731		 ** unmap and invalidate it
1732		 */
1733		unmap_buffers(page, tail_pos);
1734		REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask;
1735	}
1736#ifdef REISERQUOTA_DEBUG
1737	reiserfs_debug(p_s_inode->i_sb, REISERFS_DEBUG_CODE,
1738		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1739		       quota_cut_bytes, p_s_inode->i_uid, '?');
1740#endif
1741	DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1742	return n_ret_value;
1743}
1744
1745static void truncate_directory(struct reiserfs_transaction_handle *th,
1746			       struct inode *inode)
1747{
1748	BUG_ON(!th->t_trans_id);
1749	if (inode->i_nlink)
1750		reiserfs_warning(inode->i_sb,
1751				 "vs-5655: truncate_directory: link count != 0");
1752
1753	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1754	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1755	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1756	reiserfs_update_sd(th, inode);
1757	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1758	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1759}
1760
1761/* Truncate file to the new size. Note, this must be called with a transaction
1762   already started */
1763int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *p_s_inode,	/* ->i_size contains new
1764												   size */
1765			 struct page *page,	/* up to date for last block */
1766			 int update_timestamps	/* when it is called by
1767						   file_release to convert
1768						   the tail - no timestamps
1769						   should be updated */
1770    )
1771{
1772	INITIALIZE_PATH(s_search_path);	/* Path to the current object item. */
1773	struct item_head *p_le_ih;	/* Pointer to an item header. */
1774	struct cpu_key s_item_key;	/* Key to search for a previous file item. */
1775	loff_t n_file_size,	/* Old file size. */
1776	 n_new_file_size;	/* New file size. */
1777	int n_deleted;		/* Number of deleted or truncated bytes. */
1778	int retval;
1779	int err = 0;
1780
1781	BUG_ON(!th->t_trans_id);
1782	if (!
1783	    (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode)
1784	     || S_ISLNK(p_s_inode->i_mode)))
1785		return 0;
1786
1787	if (S_ISDIR(p_s_inode->i_mode)) {
1788		// deletion of directory - no need to update timestamps
1789		truncate_directory(th, p_s_inode);
1790		return 0;
1791	}
1792
1793	/* Get new file size. */
1794	n_new_file_size = p_s_inode->i_size;
1795
1796	// FIXME: note, that key type is unimportant here
1797	make_cpu_key(&s_item_key, p_s_inode, max_reiserfs_offset(p_s_inode),
1798		     TYPE_DIRECT, 3);
1799
1800	retval =
1801	    search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1802				       &s_search_path);
1803	if (retval == IO_ERROR) {
1804		reiserfs_warning(p_s_inode->i_sb,
1805				 "vs-5657: reiserfs_do_truncate: "
1806				 "i/o failure occurred trying to truncate %K",
1807				 &s_item_key);
1808		err = -EIO;
1809		goto out;
1810	}
1811	if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1812		reiserfs_warning(p_s_inode->i_sb,
1813				 "PAP-5660: reiserfs_do_truncate: "
1814				 "wrong result %d of search for %K", retval,
1815				 &s_item_key);
1816
1817		err = -EIO;
1818		goto out;
1819	}
1820
1821	s_search_path.pos_in_item--;
1822
1823	/* Get real file size (total length of all file items) */
1824	p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1825	if (is_statdata_le_ih(p_le_ih))
1826		n_file_size = 0;
1827	else {
1828		loff_t offset = le_ih_k_offset(p_le_ih);
1829		int bytes =
1830		    op_bytes_number(p_le_ih, p_s_inode->i_sb->s_blocksize);
1831
1832		/* this may mismatch with real file size: if last direct item
1833		   had no padding zeros and last unformatted node had no free
1834		   space, this file would have this file size */
1835		n_file_size = offset + bytes - 1;
1836	}
1837	/*
1838	 * are we doing a full truncate or delete, if so
1839	 * kick in the reada code
1840	 */
1841	if (n_new_file_size == 0)
1842		s_search_path.reada = PATH_READA | PATH_READA_BACK;
1843
1844	if (n_file_size == 0 || n_file_size < n_new_file_size) {
1845		goto update_and_out;
1846	}
1847
1848	/* Update key to search for the last file item. */
1849	set_cpu_key_k_offset(&s_item_key, n_file_size);
1850
1851	do {
1852		/* Cut or delete file item. */
1853		n_deleted =
1854		    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1855					   p_s_inode, page, n_new_file_size);
1856		if (n_deleted < 0) {
1857			reiserfs_warning(p_s_inode->i_sb,
1858					 "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1859			reiserfs_check_path(&s_search_path);
1860			return 0;
1861		}
1862
1863		RFALSE(n_deleted > n_file_size,
1864		       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1865		       n_deleted, n_file_size, &s_item_key);
1866
1867		/* Change key to search the last file item. */
1868		n_file_size -= n_deleted;
1869
1870		set_cpu_key_k_offset(&s_item_key, n_file_size);
1871
1872		/* While there are bytes to truncate and previous file item is presented in the tree. */
1873
1874		/*
1875		 ** This loop could take a really long time, and could log
1876		 ** many more blocks than a transaction can hold.  So, we do a polite
1877		 ** journal end here, and if the transaction needs ending, we make
1878		 ** sure the file is consistent before ending the current trans
1879		 ** and starting a new one
1880		 */
1881		if (journal_transaction_should_end(th, 0) ||
1882		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1883			int orig_len_alloc = th->t_blocks_allocated;
1884			decrement_counters_in_path(&s_search_path);
1885
1886			if (update_timestamps) {
1887				p_s_inode->i_mtime = p_s_inode->i_ctime =
1888				    CURRENT_TIME_SEC;
1889			}
1890			reiserfs_update_sd(th, p_s_inode);
1891
1892			err = journal_end(th, p_s_inode->i_sb, orig_len_alloc);
1893			if (err)
1894				goto out;
1895			err = journal_begin(th, p_s_inode->i_sb,
1896					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1897			if (err)
1898				goto out;
1899			reiserfs_update_inode_transaction(p_s_inode);
1900		}
1901	} while (n_file_size > ROUND_UP(n_new_file_size) &&
1902		 search_for_position_by_key(p_s_inode->i_sb, &s_item_key,
1903					    &s_search_path) == POSITION_FOUND);
1904
1905	RFALSE(n_file_size > ROUND_UP(n_new_file_size),
1906	       "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1907	       n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1908
1909      update_and_out:
1910	if (update_timestamps) {
1911		// this is truncate, not file closing
1912		p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1913	}
1914	reiserfs_update_sd(th, p_s_inode);
1915
1916      out:
1917	pathrelse(&s_search_path);
1918	return err;
1919}
1920
1921#ifdef CONFIG_REISERFS_CHECK
1922// this makes sure, that we __append__, not overwrite or add holes
1923static void check_research_for_paste(struct path *path,
1924				     const struct cpu_key *p_s_key)
1925{
1926	struct item_head *found_ih = get_ih(path);
1927
1928	if (is_direct_le_ih(found_ih)) {
1929		if (le_ih_k_offset(found_ih) +
1930		    op_bytes_number(found_ih,
1931				    get_last_bh(path)->b_size) !=
1932		    cpu_key_k_offset(p_s_key)
1933		    || op_bytes_number(found_ih,
1934				       get_last_bh(path)->b_size) !=
1935		    pos_in_item(path))
1936			reiserfs_panic(NULL,
1937				       "PAP-5720: check_research_for_paste: "
1938				       "found direct item %h or position (%d) does not match to key %K",
1939				       found_ih, pos_in_item(path), p_s_key);
1940	}
1941	if (is_indirect_le_ih(found_ih)) {
1942		if (le_ih_k_offset(found_ih) +
1943		    op_bytes_number(found_ih,
1944				    get_last_bh(path)->b_size) !=
1945		    cpu_key_k_offset(p_s_key)
1946		    || I_UNFM_NUM(found_ih) != pos_in_item(path)
1947		    || get_ih_free_space(found_ih) != 0)
1948			reiserfs_panic(NULL,
1949				       "PAP-5730: check_research_for_paste: "
1950				       "found indirect item (%h) or position (%d) does not match to key (%K)",
1951				       found_ih, pos_in_item(path), p_s_key);
1952	}
1953}
1954#endif				/* config reiserfs check */
1955
1956/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1957int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct path *p_s_search_path,	/* Path to the pasted item.          */
1958			     const struct cpu_key *p_s_key,	/* Key to search for the needed item. */
1959			     struct inode *inode,	/* Inode item belongs to */
1960			     const char *p_c_body,	/* Pointer to the bytes to paste.    */
1961			     int n_pasted_size)
1962{				/* Size of pasted bytes.             */
1963	struct tree_balance s_paste_balance;
1964	int retval;
1965	int fs_gen;
1966
1967	BUG_ON(!th->t_trans_id);
1968
1969	fs_gen = get_generation(inode->i_sb);
1970
1971#ifdef REISERQUOTA_DEBUG
1972	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1973		       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1974		       n_pasted_size, inode->i_uid,
1975		       key2type(&(p_s_key->on_disk_key)));
1976#endif
1977
1978	if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1979		pathrelse(p_s_search_path);
1980		return -EDQUOT;
1981	}
1982	init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path,
1983		       n_pasted_size);
1984#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1985	s_paste_balance.key = p_s_key->on_disk_key;
1986#endif
1987
1988	/* DQUOT_* can schedule, must check before the fix_nodes */
1989	if (fs_changed(fs_gen, inode->i_sb)) {
1990		goto search_again;
1991	}
1992
1993	while ((retval =
1994		fix_nodes(M_PASTE, &s_paste_balance, NULL,
1995			  p_c_body)) == REPEAT_SEARCH) {
1996	      search_again:
1997		/* file system changed while we were in the fix_nodes */
1998		PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1999		retval =
2000		    search_for_position_by_key(th->t_super, p_s_key,
2001					       p_s_search_path);
2002		if (retval == IO_ERROR) {
2003			retval = -EIO;
2004			goto error_out;
2005		}
2006		if (retval == POSITION_FOUND) {
2007			reiserfs_warning(inode->i_sb,
2008					 "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists",
2009					 p_s_key);
2010			retval = -EEXIST;
2011			goto error_out;
2012		}
2013#ifdef CONFIG_REISERFS_CHECK
2014		check_research_for_paste(p_s_search_path, p_s_key);
2015#endif
2016	}
2017
2018	/* Perform balancing after all resources are collected by fix_nodes, and
2019	   accessing them will not risk triggering schedule. */
2020	if (retval == CARRY_ON) {
2021		do_balance(&s_paste_balance, NULL /*ih */ , p_c_body, M_PASTE);
2022		return 0;
2023	}
2024	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2025      error_out:
2026	/* this also releases the path */
2027	unfix_nodes(&s_paste_balance);
2028#ifdef REISERQUOTA_DEBUG
2029	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2030		       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2031		       n_pasted_size, inode->i_uid,
2032		       key2type(&(p_s_key->on_disk_key)));
2033#endif
2034	DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
2035	return retval;
2036}
2037
2038/* Insert new item into the buffer at the path. */
2039int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct path *p_s_path,	/* Path to the inserteded item.         */
2040			 const struct cpu_key *key, struct item_head *p_s_ih,	/* Pointer to the item header to insert. */
2041			 struct inode *inode, const char *p_c_body)
2042{				/* Pointer to the bytes to insert.      */
2043	struct tree_balance s_ins_balance;
2044	int retval;
2045	int fs_gen = 0;
2046	int quota_bytes = 0;
2047
2048	BUG_ON(!th->t_trans_id);
2049
2050	if (inode) {		/* Do we count quotas for item? */
2051		fs_gen = get_generation(inode->i_sb);
2052		quota_bytes = ih_item_len(p_s_ih);
2053
2054		/* hack so the quota code doesn't have to guess if the file has
2055		 ** a tail, links are always tails, so there's no guessing needed
2056		 */
2057		if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2058			quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2059		}
2060#ifdef REISERQUOTA_DEBUG
2061		reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2062			       "reiserquota insert_item(): allocating %u id=%u type=%c",
2063			       quota_bytes, inode->i_uid, head2type(p_s_ih));
2064#endif
2065		/* We can't dirty inode here. It would be immediately written but
2066		 * appropriate stat item isn't inserted yet... */
2067		if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2068			pathrelse(p_s_path);
2069			return -EDQUOT;
2070		}
2071	}
2072	init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path,
2073		       IH_SIZE + ih_item_len(p_s_ih));
2074#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2075	s_ins_balance.key = key->on_disk_key;
2076#endif
2077	/* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2078	if (inode && fs_changed(fs_gen, inode->i_sb)) {
2079		goto search_again;
2080	}
2081
2082	while ((retval =
2083		fix_nodes(M_INSERT, &s_ins_balance, p_s_ih,
2084			  p_c_body)) == REPEAT_SEARCH) {
2085	      search_again:
2086		/* file system changed while we were in the fix_nodes */
2087		PROC_INFO_INC(th->t_super, insert_item_restarted);
2088		retval = search_item(th->t_super, key, p_s_path);
2089		if (retval == IO_ERROR) {
2090			retval = -EIO;
2091			goto error_out;
2092		}
2093		if (retval == ITEM_FOUND) {
2094			reiserfs_warning(th->t_super,
2095					 "PAP-5760: reiserfs_insert_item: "
2096					 "key %K already exists in the tree",
2097					 key);
2098			retval = -EEXIST;
2099			goto error_out;
2100		}
2101	}
2102
2103	/* make balancing after all resources will be collected at a time */
2104	if (retval == CARRY_ON) {
2105		do_balance(&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2106		return 0;
2107	}
2108
2109	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2110      error_out:
2111	/* also releases the path */
2112	unfix_nodes(&s_ins_balance);
2113#ifdef REISERQUOTA_DEBUG
2114	reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2115		       "reiserquota insert_item(): freeing %u id=%u type=%c",
2116		       quota_bytes, inode->i_uid, head2type(p_s_ih));
2117#endif
2118	if (inode)
2119		DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes);
2120	return retval;
2121}
2122