btree.c revision 41c88bd74d372db5102996a4ea6167a725c24b5e
1/*
2 * btree.c - NILFS B-tree.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19 *
20 * Written by Koji Sato <koji@osrg.net>.
21 */
22
23#include <linux/slab.h>
24#include <linux/string.h>
25#include <linux/errno.h>
26#include <linux/pagevec.h>
27#include "nilfs.h"
28#include "page.h"
29#include "btnode.h"
30#include "btree.h"
31#include "alloc.h"
32#include "dat.h"
33
34static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35{
36	struct nilfs_btree_path *path;
37	int level = NILFS_BTREE_LEVEL_DATA;
38
39	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40	if (path == NULL)
41		goto out;
42
43	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44		path[level].bp_bh = NULL;
45		path[level].bp_sib_bh = NULL;
46		path[level].bp_index = 0;
47		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49		path[level].bp_op = NULL;
50	}
51
52out:
53	return path;
54}
55
56static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57{
58	int level = NILFS_BTREE_LEVEL_DATA;
59
60	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61		brelse(path[level].bp_bh);
62
63	kmem_cache_free(nilfs_btree_path_cache, path);
64}
65
66/*
67 * B-tree node operations
68 */
69static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
70				 struct buffer_head **bhp)
71{
72	struct address_space *btnc =
73		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
74	int err;
75
76	err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
77	if (err)
78		return err == -EEXIST ? 0 : err;
79
80	wait_on_buffer(*bhp);
81	if (!buffer_uptodate(*bhp)) {
82		brelse(*bhp);
83		return -EIO;
84	}
85	return 0;
86}
87
88static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
89				     __u64 ptr, struct buffer_head **bhp)
90{
91	struct address_space *btnc =
92		&NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
93	struct buffer_head *bh;
94
95	bh = nilfs_btnode_create_block(btnc, ptr);
96	if (!bh)
97		return -ENOMEM;
98
99	set_buffer_nilfs_volatile(bh);
100	*bhp = bh;
101	return 0;
102}
103
104static inline int
105nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
106{
107	return node->bn_flags;
108}
109
110static inline void
111nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
112{
113	node->bn_flags = flags;
114}
115
116static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
117{
118	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
119}
120
121static inline int
122nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
123{
124	return node->bn_level;
125}
126
127static inline void
128nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
129{
130	node->bn_level = level;
131}
132
133static inline int
134nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
135{
136	return le16_to_cpu(node->bn_nchildren);
137}
138
139static inline void
140nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
141{
142	node->bn_nchildren = cpu_to_le16(nchildren);
143}
144
145static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
146{
147	return 1 << btree->bt_bmap.b_inode->i_blkbits;
148}
149
150static inline int
151nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
152			       const struct nilfs_btree *btree)
153{
154	return nilfs_btree_node_root(node) ?
155		NILFS_BTREE_ROOT_NCHILDREN_MIN :
156		NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
157}
158
159static inline int
160nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
161			       const struct nilfs_btree *btree)
162{
163	return nilfs_btree_node_root(node) ?
164		NILFS_BTREE_ROOT_NCHILDREN_MAX :
165		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
166}
167
168static inline __le64 *
169nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
170{
171	return (__le64 *)((char *)(node + 1) +
172			  (nilfs_btree_node_root(node) ?
173			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
174}
175
176static inline __le64 *
177nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
178		       const struct nilfs_btree *btree)
179{
180	return (__le64 *)(nilfs_btree_node_dkeys(node) +
181			  nilfs_btree_node_nchildren_max(node, btree));
182}
183
184static inline __u64
185nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
186{
187	return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
188}
189
190static inline void
191nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
192{
193	*(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
194}
195
196static inline __u64
197nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
198			 const struct nilfs_btree_node *node, int index)
199{
200	return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
201					index));
202}
203
204static inline void
205nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
206			 struct nilfs_btree_node *node, int index, __u64 ptr)
207{
208	*(nilfs_btree_node_dptrs(node, btree) + index) =
209		nilfs_bmap_ptr_to_dptr(ptr);
210}
211
212static void nilfs_btree_node_init(struct nilfs_btree *btree,
213				  struct nilfs_btree_node *node,
214				  int flags, int level, int nchildren,
215				  const __u64 *keys, const __u64 *ptrs)
216{
217	__le64 *dkeys;
218	__le64 *dptrs;
219	int i;
220
221	nilfs_btree_node_set_flags(node, flags);
222	nilfs_btree_node_set_level(node, level);
223	nilfs_btree_node_set_nchildren(node, nchildren);
224
225	dkeys = nilfs_btree_node_dkeys(node);
226	dptrs = nilfs_btree_node_dptrs(node, btree);
227	for (i = 0; i < nchildren; i++) {
228		dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
229		dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
230	}
231}
232
233/* Assume the buffer heads corresponding to left and right are locked. */
234static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
235				       struct nilfs_btree_node *left,
236				       struct nilfs_btree_node *right,
237				       int n)
238{
239	__le64 *ldkeys, *rdkeys;
240	__le64 *ldptrs, *rdptrs;
241	int lnchildren, rnchildren;
242
243	ldkeys = nilfs_btree_node_dkeys(left);
244	ldptrs = nilfs_btree_node_dptrs(left, btree);
245	lnchildren = nilfs_btree_node_get_nchildren(left);
246
247	rdkeys = nilfs_btree_node_dkeys(right);
248	rdptrs = nilfs_btree_node_dptrs(right, btree);
249	rnchildren = nilfs_btree_node_get_nchildren(right);
250
251	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
252	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
253	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
254	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
255
256	lnchildren += n;
257	rnchildren -= n;
258	nilfs_btree_node_set_nchildren(left, lnchildren);
259	nilfs_btree_node_set_nchildren(right, rnchildren);
260}
261
262/* Assume that the buffer heads corresponding to left and right are locked. */
263static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
264					struct nilfs_btree_node *left,
265					struct nilfs_btree_node *right,
266					int n)
267{
268	__le64 *ldkeys, *rdkeys;
269	__le64 *ldptrs, *rdptrs;
270	int lnchildren, rnchildren;
271
272	ldkeys = nilfs_btree_node_dkeys(left);
273	ldptrs = nilfs_btree_node_dptrs(left, btree);
274	lnchildren = nilfs_btree_node_get_nchildren(left);
275
276	rdkeys = nilfs_btree_node_dkeys(right);
277	rdptrs = nilfs_btree_node_dptrs(right, btree);
278	rnchildren = nilfs_btree_node_get_nchildren(right);
279
280	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
281	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
282	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
283	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
284
285	lnchildren -= n;
286	rnchildren += n;
287	nilfs_btree_node_set_nchildren(left, lnchildren);
288	nilfs_btree_node_set_nchildren(right, rnchildren);
289}
290
291/* Assume that the buffer head corresponding to node is locked. */
292static void nilfs_btree_node_insert(struct nilfs_btree *btree,
293				    struct nilfs_btree_node *node,
294				    __u64 key, __u64 ptr, int index)
295{
296	__le64 *dkeys;
297	__le64 *dptrs;
298	int nchildren;
299
300	dkeys = nilfs_btree_node_dkeys(node);
301	dptrs = nilfs_btree_node_dptrs(node, btree);
302	nchildren = nilfs_btree_node_get_nchildren(node);
303	if (index < nchildren) {
304		memmove(dkeys + index + 1, dkeys + index,
305			(nchildren - index) * sizeof(*dkeys));
306		memmove(dptrs + index + 1, dptrs + index,
307			(nchildren - index) * sizeof(*dptrs));
308	}
309	dkeys[index] = nilfs_bmap_key_to_dkey(key);
310	dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
311	nchildren++;
312	nilfs_btree_node_set_nchildren(node, nchildren);
313}
314
315/* Assume that the buffer head corresponding to node is locked. */
316static void nilfs_btree_node_delete(struct nilfs_btree *btree,
317				    struct nilfs_btree_node *node,
318				    __u64 *keyp, __u64 *ptrp, int index)
319{
320	__u64 key;
321	__u64 ptr;
322	__le64 *dkeys;
323	__le64 *dptrs;
324	int nchildren;
325
326	dkeys = nilfs_btree_node_dkeys(node);
327	dptrs = nilfs_btree_node_dptrs(node, btree);
328	key = nilfs_bmap_dkey_to_key(dkeys[index]);
329	ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
330	nchildren = nilfs_btree_node_get_nchildren(node);
331	if (keyp != NULL)
332		*keyp = key;
333	if (ptrp != NULL)
334		*ptrp = ptr;
335
336	if (index < nchildren - 1) {
337		memmove(dkeys + index, dkeys + index + 1,
338			(nchildren - index - 1) * sizeof(*dkeys));
339		memmove(dptrs + index, dptrs + index + 1,
340			(nchildren - index - 1) * sizeof(*dptrs));
341	}
342	nchildren--;
343	nilfs_btree_node_set_nchildren(node, nchildren);
344}
345
346static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
347				   __u64 key, int *indexp)
348{
349	__u64 nkey;
350	int index, low, high, s;
351
352	/* binary search */
353	low = 0;
354	high = nilfs_btree_node_get_nchildren(node) - 1;
355	index = 0;
356	s = 0;
357	while (low <= high) {
358		index = (low + high) / 2;
359		nkey = nilfs_btree_node_get_key(node, index);
360		if (nkey == key) {
361			s = 0;
362			goto out;
363		} else if (nkey < key) {
364			low = index + 1;
365			s = -1;
366		} else {
367			high = index - 1;
368			s = 1;
369		}
370	}
371
372	/* adjust index */
373	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
374		if (s > 0 && index > 0)
375			index--;
376	} else if (s < 0)
377		index++;
378
379 out:
380	*indexp = index;
381
382	return s == 0;
383}
384
385static inline struct nilfs_btree_node *
386nilfs_btree_get_root(const struct nilfs_btree *btree)
387{
388	return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
389}
390
391static inline struct nilfs_btree_node *
392nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393{
394	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395}
396
397static inline struct nilfs_btree_node *
398nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399{
400	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401}
402
403static inline int nilfs_btree_height(const struct nilfs_btree *btree)
404{
405	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406}
407
408static inline struct nilfs_btree_node *
409nilfs_btree_get_node(const struct nilfs_btree *btree,
410		     const struct nilfs_btree_path *path,
411		     int level)
412{
413	return (level == nilfs_btree_height(btree) - 1) ?
414		nilfs_btree_get_root(btree) :
415		nilfs_btree_get_nonroot_node(path, level);
416}
417
418static inline int
419nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
420{
421	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
422		dump_stack();
423		printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
424		       nilfs_btree_node_get_level(node), level);
425		return 1;
426	}
427	return 0;
428}
429
430static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
431				 struct nilfs_btree_path *path,
432				 __u64 key, __u64 *ptrp, int minlevel)
433{
434	struct nilfs_btree_node *node;
435	__u64 ptr;
436	int level, index, found, ret;
437
438	node = nilfs_btree_get_root(btree);
439	level = nilfs_btree_node_get_level(node);
440	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
441		return -ENOENT;
442
443	found = nilfs_btree_node_lookup(node, key, &index);
444	ptr = nilfs_btree_node_get_ptr(btree, node, index);
445	path[level].bp_bh = NULL;
446	path[level].bp_index = index;
447
448	for (level--; level >= minlevel; level--) {
449		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
450		if (ret < 0)
451			return ret;
452		node = nilfs_btree_get_nonroot_node(path, level);
453		if (nilfs_btree_bad_node(node, level))
454			return -EINVAL;
455		if (!found)
456			found = nilfs_btree_node_lookup(node, key, &index);
457		else
458			index = 0;
459		if (index < nilfs_btree_node_nchildren_max(node, btree))
460			ptr = nilfs_btree_node_get_ptr(btree, node, index);
461		else {
462			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
463			/* insert */
464			ptr = NILFS_BMAP_INVALID_PTR;
465		}
466		path[level].bp_index = index;
467	}
468	if (!found)
469		return -ENOENT;
470
471	if (ptrp != NULL)
472		*ptrp = ptr;
473
474	return 0;
475}
476
477static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
478				      struct nilfs_btree_path *path,
479				      __u64 *keyp, __u64 *ptrp)
480{
481	struct nilfs_btree_node *node;
482	__u64 ptr;
483	int index, level, ret;
484
485	node = nilfs_btree_get_root(btree);
486	index = nilfs_btree_node_get_nchildren(node) - 1;
487	if (index < 0)
488		return -ENOENT;
489	level = nilfs_btree_node_get_level(node);
490	ptr = nilfs_btree_node_get_ptr(btree, node, index);
491	path[level].bp_bh = NULL;
492	path[level].bp_index = index;
493
494	for (level--; level > 0; level--) {
495		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
496		if (ret < 0)
497			return ret;
498		node = nilfs_btree_get_nonroot_node(path, level);
499		if (nilfs_btree_bad_node(node, level))
500			return -EINVAL;
501		index = nilfs_btree_node_get_nchildren(node) - 1;
502		ptr = nilfs_btree_node_get_ptr(btree, node, index);
503		path[level].bp_index = index;
504	}
505
506	if (keyp != NULL)
507		*keyp = nilfs_btree_node_get_key(node, index);
508	if (ptrp != NULL)
509		*ptrp = ptr;
510
511	return 0;
512}
513
514static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
515			      __u64 key, int level, __u64 *ptrp)
516{
517	struct nilfs_btree *btree;
518	struct nilfs_btree_path *path;
519	__u64 ptr;
520	int ret;
521
522	btree = (struct nilfs_btree *)bmap;
523	path = nilfs_btree_alloc_path();
524	if (path == NULL)
525		return -ENOMEM;
526
527	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
528
529	if (ptrp != NULL)
530		*ptrp = ptr;
531
532	nilfs_btree_free_path(path);
533
534	return ret;
535}
536
537static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
538				     __u64 key, __u64 *ptrp, unsigned maxblocks)
539{
540	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
541	struct nilfs_btree_path *path;
542	struct nilfs_btree_node *node;
543	struct inode *dat = NULL;
544	__u64 ptr, ptr2;
545	sector_t blocknr;
546	int level = NILFS_BTREE_LEVEL_NODE_MIN;
547	int ret, cnt, index, maxlevel;
548
549	path = nilfs_btree_alloc_path();
550	if (path == NULL)
551		return -ENOMEM;
552
553	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
554	if (ret < 0)
555		goto out;
556
557	if (NILFS_BMAP_USE_VBN(bmap)) {
558		dat = nilfs_bmap_get_dat(bmap);
559		ret = nilfs_dat_translate(dat, ptr, &blocknr);
560		if (ret < 0)
561			goto out;
562		ptr = blocknr;
563	}
564	cnt = 1;
565	if (cnt == maxblocks)
566		goto end;
567
568	maxlevel = nilfs_btree_height(btree) - 1;
569	node = nilfs_btree_get_node(btree, path, level);
570	index = path[level].bp_index + 1;
571	for (;;) {
572		while (index < nilfs_btree_node_get_nchildren(node)) {
573			if (nilfs_btree_node_get_key(node, index) !=
574			    key + cnt)
575				goto end;
576			ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
577			if (dat) {
578				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
579				if (ret < 0)
580					goto out;
581				ptr2 = blocknr;
582			}
583			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
584				goto end;
585			index++;
586			continue;
587		}
588		if (level == maxlevel)
589			break;
590
591		/* look-up right sibling node */
592		node = nilfs_btree_get_node(btree, path, level + 1);
593		index = path[level + 1].bp_index + 1;
594		if (index >= nilfs_btree_node_get_nchildren(node) ||
595		    nilfs_btree_node_get_key(node, index) != key + cnt)
596			break;
597		ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
598		path[level + 1].bp_index = index;
599
600		brelse(path[level].bp_bh);
601		path[level].bp_bh = NULL;
602		ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
603		if (ret < 0)
604			goto out;
605		node = nilfs_btree_get_nonroot_node(path, level);
606		index = 0;
607		path[level].bp_index = index;
608	}
609 end:
610	*ptrp = ptr;
611	ret = cnt;
612 out:
613	nilfs_btree_free_path(path);
614	return ret;
615}
616
617static void nilfs_btree_promote_key(struct nilfs_btree *btree,
618				    struct nilfs_btree_path *path,
619				    int level, __u64 key)
620{
621	if (level < nilfs_btree_height(btree) - 1) {
622		do {
623			nilfs_btree_node_set_key(
624				nilfs_btree_get_nonroot_node(path, level),
625				path[level].bp_index, key);
626			if (!buffer_dirty(path[level].bp_bh))
627				nilfs_btnode_mark_dirty(path[level].bp_bh);
628		} while ((path[level].bp_index == 0) &&
629			 (++level < nilfs_btree_height(btree) - 1));
630	}
631
632	/* root */
633	if (level == nilfs_btree_height(btree) - 1) {
634		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
635					 path[level].bp_index, key);
636	}
637}
638
639static void nilfs_btree_do_insert(struct nilfs_btree *btree,
640				  struct nilfs_btree_path *path,
641				  int level, __u64 *keyp, __u64 *ptrp)
642{
643	struct nilfs_btree_node *node;
644
645	if (level < nilfs_btree_height(btree) - 1) {
646		node = nilfs_btree_get_nonroot_node(path, level);
647		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
648					path[level].bp_index);
649		if (!buffer_dirty(path[level].bp_bh))
650			nilfs_btnode_mark_dirty(path[level].bp_bh);
651
652		if (path[level].bp_index == 0)
653			nilfs_btree_promote_key(btree, path, level + 1,
654						nilfs_btree_node_get_key(node,
655									 0));
656	} else {
657		node = nilfs_btree_get_root(btree);
658		nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
659					path[level].bp_index);
660	}
661}
662
663static void nilfs_btree_carry_left(struct nilfs_btree *btree,
664				   struct nilfs_btree_path *path,
665				   int level, __u64 *keyp, __u64 *ptrp)
666{
667	struct nilfs_btree_node *node, *left;
668	int nchildren, lnchildren, n, move;
669
670	node = nilfs_btree_get_nonroot_node(path, level);
671	left = nilfs_btree_get_sib_node(path, level);
672	nchildren = nilfs_btree_node_get_nchildren(node);
673	lnchildren = nilfs_btree_node_get_nchildren(left);
674	move = 0;
675
676	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
677	if (n > path[level].bp_index) {
678		/* move insert point */
679		n--;
680		move = 1;
681	}
682
683	nilfs_btree_node_move_left(btree, left, node, n);
684
685	if (!buffer_dirty(path[level].bp_bh))
686		nilfs_btnode_mark_dirty(path[level].bp_bh);
687	if (!buffer_dirty(path[level].bp_sib_bh))
688		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
689
690	nilfs_btree_promote_key(btree, path, level + 1,
691				nilfs_btree_node_get_key(node, 0));
692
693	if (move) {
694		brelse(path[level].bp_bh);
695		path[level].bp_bh = path[level].bp_sib_bh;
696		path[level].bp_sib_bh = NULL;
697		path[level].bp_index += lnchildren;
698		path[level + 1].bp_index--;
699	} else {
700		brelse(path[level].bp_sib_bh);
701		path[level].bp_sib_bh = NULL;
702		path[level].bp_index -= n;
703	}
704
705	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
706}
707
708static void nilfs_btree_carry_right(struct nilfs_btree *btree,
709				    struct nilfs_btree_path *path,
710				    int level, __u64 *keyp, __u64 *ptrp)
711{
712	struct nilfs_btree_node *node, *right;
713	int nchildren, rnchildren, n, move;
714
715	node = nilfs_btree_get_nonroot_node(path, level);
716	right = nilfs_btree_get_sib_node(path, level);
717	nchildren = nilfs_btree_node_get_nchildren(node);
718	rnchildren = nilfs_btree_node_get_nchildren(right);
719	move = 0;
720
721	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
722	if (n > nchildren - path[level].bp_index) {
723		/* move insert point */
724		n--;
725		move = 1;
726	}
727
728	nilfs_btree_node_move_right(btree, node, right, n);
729
730	if (!buffer_dirty(path[level].bp_bh))
731		nilfs_btnode_mark_dirty(path[level].bp_bh);
732	if (!buffer_dirty(path[level].bp_sib_bh))
733		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
734
735	path[level + 1].bp_index++;
736	nilfs_btree_promote_key(btree, path, level + 1,
737				nilfs_btree_node_get_key(right, 0));
738	path[level + 1].bp_index--;
739
740	if (move) {
741		brelse(path[level].bp_bh);
742		path[level].bp_bh = path[level].bp_sib_bh;
743		path[level].bp_sib_bh = NULL;
744		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
745		path[level + 1].bp_index++;
746	} else {
747		brelse(path[level].bp_sib_bh);
748		path[level].bp_sib_bh = NULL;
749	}
750
751	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752}
753
754static void nilfs_btree_split(struct nilfs_btree *btree,
755			      struct nilfs_btree_path *path,
756			      int level, __u64 *keyp, __u64 *ptrp)
757{
758	struct nilfs_btree_node *node, *right;
759	__u64 newkey;
760	__u64 newptr;
761	int nchildren, n, move;
762
763	node = nilfs_btree_get_nonroot_node(path, level);
764	right = nilfs_btree_get_sib_node(path, level);
765	nchildren = nilfs_btree_node_get_nchildren(node);
766	move = 0;
767
768	n = (nchildren + 1) / 2;
769	if (n > nchildren - path[level].bp_index) {
770		n--;
771		move = 1;
772	}
773
774	nilfs_btree_node_move_right(btree, node, right, n);
775
776	if (!buffer_dirty(path[level].bp_bh))
777		nilfs_btnode_mark_dirty(path[level].bp_bh);
778	if (!buffer_dirty(path[level].bp_sib_bh))
779		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
780
781	newkey = nilfs_btree_node_get_key(right, 0);
782	newptr = path[level].bp_newreq.bpr_ptr;
783
784	if (move) {
785		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
786		nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
787					path[level].bp_index);
788
789		*keyp = nilfs_btree_node_get_key(right, 0);
790		*ptrp = path[level].bp_newreq.bpr_ptr;
791
792		brelse(path[level].bp_bh);
793		path[level].bp_bh = path[level].bp_sib_bh;
794		path[level].bp_sib_bh = NULL;
795	} else {
796		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
797
798		*keyp = nilfs_btree_node_get_key(right, 0);
799		*ptrp = path[level].bp_newreq.bpr_ptr;
800
801		brelse(path[level].bp_sib_bh);
802		path[level].bp_sib_bh = NULL;
803	}
804
805	path[level + 1].bp_index++;
806}
807
808static void nilfs_btree_grow(struct nilfs_btree *btree,
809			     struct nilfs_btree_path *path,
810			     int level, __u64 *keyp, __u64 *ptrp)
811{
812	struct nilfs_btree_node *root, *child;
813	int n;
814
815	root = nilfs_btree_get_root(btree);
816	child = nilfs_btree_get_sib_node(path, level);
817
818	n = nilfs_btree_node_get_nchildren(root);
819
820	nilfs_btree_node_move_right(btree, root, child, n);
821	nilfs_btree_node_set_level(root, level + 1);
822
823	if (!buffer_dirty(path[level].bp_sib_bh))
824		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
825
826	path[level].bp_bh = path[level].bp_sib_bh;
827	path[level].bp_sib_bh = NULL;
828
829	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
830
831	*keyp = nilfs_btree_node_get_key(child, 0);
832	*ptrp = path[level].bp_newreq.bpr_ptr;
833}
834
835static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
836				   const struct nilfs_btree_path *path)
837{
838	struct nilfs_btree_node *node;
839	int level;
840
841	if (path == NULL)
842		return NILFS_BMAP_INVALID_PTR;
843
844	/* left sibling */
845	level = NILFS_BTREE_LEVEL_NODE_MIN;
846	if (path[level].bp_index > 0) {
847		node = nilfs_btree_get_node(btree, path, level);
848		return nilfs_btree_node_get_ptr(btree, node,
849						path[level].bp_index - 1);
850	}
851
852	/* parent */
853	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
854	if (level <= nilfs_btree_height(btree) - 1) {
855		node = nilfs_btree_get_node(btree, path, level);
856		return nilfs_btree_node_get_ptr(btree, node,
857						path[level].bp_index);
858	}
859
860	return NILFS_BMAP_INVALID_PTR;
861}
862
863static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
864				       const struct nilfs_btree_path *path,
865				       __u64 key)
866{
867	__u64 ptr;
868
869	ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
870	if (ptr != NILFS_BMAP_INVALID_PTR)
871		/* sequential access */
872		return ptr;
873	else {
874		ptr = nilfs_btree_find_near(btree, path);
875		if (ptr != NILFS_BMAP_INVALID_PTR)
876			/* near */
877			return ptr;
878	}
879	/* block group */
880	return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
881}
882
883static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
884				     __u64 ptr)
885{
886	btree->bt_bmap.b_last_allocated_key = key;
887	btree->bt_bmap.b_last_allocated_ptr = ptr;
888}
889
890static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
891				      struct nilfs_btree_path *path,
892				      int *levelp, __u64 key, __u64 ptr,
893				      struct nilfs_bmap_stats *stats)
894{
895	struct buffer_head *bh;
896	struct nilfs_btree_node *node, *parent, *sib;
897	__u64 sibptr;
898	int pindex, level, ret;
899	struct inode *dat = NULL;
900
901	stats->bs_nblocks = 0;
902	level = NILFS_BTREE_LEVEL_DATA;
903
904	/* allocate a new ptr for data block */
905	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
906		path[level].bp_newreq.bpr_ptr =
907			nilfs_btree_find_target_v(btree, path, key);
908		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
909	}
910
911	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
912					   &path[level].bp_newreq, dat);
913	if (ret < 0)
914		goto err_out_data;
915
916	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
917	     level < nilfs_btree_height(btree) - 1;
918	     level++) {
919		node = nilfs_btree_get_nonroot_node(path, level);
920		if (nilfs_btree_node_get_nchildren(node) <
921		    nilfs_btree_node_nchildren_max(node, btree)) {
922			path[level].bp_op = nilfs_btree_do_insert;
923			stats->bs_nblocks++;
924			goto out;
925		}
926
927		parent = nilfs_btree_get_node(btree, path, level + 1);
928		pindex = path[level + 1].bp_index;
929
930		/* left sibling */
931		if (pindex > 0) {
932			sibptr = nilfs_btree_node_get_ptr(btree, parent,
933							  pindex - 1);
934			ret = nilfs_btree_get_block(btree, sibptr, &bh);
935			if (ret < 0)
936				goto err_out_child_node;
937			sib = (struct nilfs_btree_node *)bh->b_data;
938			if (nilfs_btree_node_get_nchildren(sib) <
939			    nilfs_btree_node_nchildren_max(sib, btree)) {
940				path[level].bp_sib_bh = bh;
941				path[level].bp_op = nilfs_btree_carry_left;
942				stats->bs_nblocks++;
943				goto out;
944			} else
945				brelse(bh);
946		}
947
948		/* right sibling */
949		if (pindex <
950		    nilfs_btree_node_get_nchildren(parent) - 1) {
951			sibptr = nilfs_btree_node_get_ptr(btree, parent,
952							  pindex + 1);
953			ret = nilfs_btree_get_block(btree, sibptr, &bh);
954			if (ret < 0)
955				goto err_out_child_node;
956			sib = (struct nilfs_btree_node *)bh->b_data;
957			if (nilfs_btree_node_get_nchildren(sib) <
958			    nilfs_btree_node_nchildren_max(sib, btree)) {
959				path[level].bp_sib_bh = bh;
960				path[level].bp_op = nilfs_btree_carry_right;
961				stats->bs_nblocks++;
962				goto out;
963			} else
964				brelse(bh);
965		}
966
967		/* split */
968		path[level].bp_newreq.bpr_ptr =
969			path[level - 1].bp_newreq.bpr_ptr + 1;
970		ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
971						   &path[level].bp_newreq, dat);
972		if (ret < 0)
973			goto err_out_child_node;
974		ret = nilfs_btree_get_new_block(btree,
975						path[level].bp_newreq.bpr_ptr,
976						&bh);
977		if (ret < 0)
978			goto err_out_curr_node;
979
980		stats->bs_nblocks++;
981
982		nilfs_btree_node_init(btree,
983				      (struct nilfs_btree_node *)bh->b_data,
984				      0, level, 0, NULL, NULL);
985		path[level].bp_sib_bh = bh;
986		path[level].bp_op = nilfs_btree_split;
987	}
988
989	/* root */
990	node = nilfs_btree_get_root(btree);
991	if (nilfs_btree_node_get_nchildren(node) <
992	    nilfs_btree_node_nchildren_max(node, btree)) {
993		path[level].bp_op = nilfs_btree_do_insert;
994		stats->bs_nblocks++;
995		goto out;
996	}
997
998	/* grow */
999	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1000	ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1001					   &path[level].bp_newreq, dat);
1002	if (ret < 0)
1003		goto err_out_child_node;
1004	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1005					&bh);
1006	if (ret < 0)
1007		goto err_out_curr_node;
1008
1009	nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1010			      0, level, 0, NULL, NULL);
1011	path[level].bp_sib_bh = bh;
1012	path[level].bp_op = nilfs_btree_grow;
1013
1014	level++;
1015	path[level].bp_op = nilfs_btree_do_insert;
1016
1017	/* a newly-created node block and a data block are added */
1018	stats->bs_nblocks += 2;
1019
1020	/* success */
1021 out:
1022	*levelp = level;
1023	return ret;
1024
1025	/* error */
1026 err_out_curr_node:
1027	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1028				   dat);
1029 err_out_child_node:
1030	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1031		nilfs_btnode_delete(path[level].bp_sib_bh);
1032		nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1033					   &path[level].bp_newreq, dat);
1034
1035	}
1036
1037	nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1038				   dat);
1039 err_out_data:
1040	*levelp = level;
1041	stats->bs_nblocks = 0;
1042	return ret;
1043}
1044
1045static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1046				      struct nilfs_btree_path *path,
1047				      int maxlevel, __u64 key, __u64 ptr)
1048{
1049	struct inode *dat = NULL;
1050	int level;
1051
1052	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1053	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1054	if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1055		nilfs_btree_set_target_v(btree, key, ptr);
1056		dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1057	}
1058
1059	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1060		nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1061					    &path[level - 1].bp_newreq, dat);
1062		path[level].bp_op(btree, path, level, &key, &ptr);
1063	}
1064
1065	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1066		nilfs_bmap_set_dirty(&btree->bt_bmap);
1067}
1068
1069static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1070{
1071	struct nilfs_btree *btree;
1072	struct nilfs_btree_path *path;
1073	struct nilfs_bmap_stats stats;
1074	int level, ret;
1075
1076	btree = (struct nilfs_btree *)bmap;
1077	path = nilfs_btree_alloc_path();
1078	if (path == NULL)
1079		return -ENOMEM;
1080
1081	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1082				    NILFS_BTREE_LEVEL_NODE_MIN);
1083	if (ret != -ENOENT) {
1084		if (ret == 0)
1085			ret = -EEXIST;
1086		goto out;
1087	}
1088
1089	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1090	if (ret < 0)
1091		goto out;
1092	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1093	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1094
1095 out:
1096	nilfs_btree_free_path(path);
1097	return ret;
1098}
1099
1100static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1101				  struct nilfs_btree_path *path,
1102				  int level, __u64 *keyp, __u64 *ptrp)
1103{
1104	struct nilfs_btree_node *node;
1105
1106	if (level < nilfs_btree_height(btree) - 1) {
1107		node = nilfs_btree_get_nonroot_node(path, level);
1108		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1109					path[level].bp_index);
1110		if (!buffer_dirty(path[level].bp_bh))
1111			nilfs_btnode_mark_dirty(path[level].bp_bh);
1112		if (path[level].bp_index == 0)
1113			nilfs_btree_promote_key(btree, path, level + 1,
1114				nilfs_btree_node_get_key(node, 0));
1115	} else {
1116		node = nilfs_btree_get_root(btree);
1117		nilfs_btree_node_delete(btree, node, keyp, ptrp,
1118					path[level].bp_index);
1119	}
1120}
1121
1122static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1123				    struct nilfs_btree_path *path,
1124				    int level, __u64 *keyp, __u64 *ptrp)
1125{
1126	struct nilfs_btree_node *node, *left;
1127	int nchildren, lnchildren, n;
1128
1129	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1130
1131	node = nilfs_btree_get_nonroot_node(path, level);
1132	left = nilfs_btree_get_sib_node(path, level);
1133	nchildren = nilfs_btree_node_get_nchildren(node);
1134	lnchildren = nilfs_btree_node_get_nchildren(left);
1135
1136	n = (nchildren + lnchildren) / 2 - nchildren;
1137
1138	nilfs_btree_node_move_right(btree, left, node, n);
1139
1140	if (!buffer_dirty(path[level].bp_bh))
1141		nilfs_btnode_mark_dirty(path[level].bp_bh);
1142	if (!buffer_dirty(path[level].bp_sib_bh))
1143		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1144
1145	nilfs_btree_promote_key(btree, path, level + 1,
1146				nilfs_btree_node_get_key(node, 0));
1147
1148	brelse(path[level].bp_sib_bh);
1149	path[level].bp_sib_bh = NULL;
1150	path[level].bp_index += n;
1151}
1152
1153static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1154				     struct nilfs_btree_path *path,
1155				     int level, __u64 *keyp, __u64 *ptrp)
1156{
1157	struct nilfs_btree_node *node, *right;
1158	int nchildren, rnchildren, n;
1159
1160	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1161
1162	node = nilfs_btree_get_nonroot_node(path, level);
1163	right = nilfs_btree_get_sib_node(path, level);
1164	nchildren = nilfs_btree_node_get_nchildren(node);
1165	rnchildren = nilfs_btree_node_get_nchildren(right);
1166
1167	n = (nchildren + rnchildren) / 2 - nchildren;
1168
1169	nilfs_btree_node_move_left(btree, node, right, n);
1170
1171	if (!buffer_dirty(path[level].bp_bh))
1172		nilfs_btnode_mark_dirty(path[level].bp_bh);
1173	if (!buffer_dirty(path[level].bp_sib_bh))
1174		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1175
1176	path[level + 1].bp_index++;
1177	nilfs_btree_promote_key(btree, path, level + 1,
1178				nilfs_btree_node_get_key(right, 0));
1179	path[level + 1].bp_index--;
1180
1181	brelse(path[level].bp_sib_bh);
1182	path[level].bp_sib_bh = NULL;
1183}
1184
1185static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1186				    struct nilfs_btree_path *path,
1187				    int level, __u64 *keyp, __u64 *ptrp)
1188{
1189	struct nilfs_btree_node *node, *left;
1190	int n;
1191
1192	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1193
1194	node = nilfs_btree_get_nonroot_node(path, level);
1195	left = nilfs_btree_get_sib_node(path, level);
1196
1197	n = nilfs_btree_node_get_nchildren(node);
1198
1199	nilfs_btree_node_move_left(btree, left, node, n);
1200
1201	if (!buffer_dirty(path[level].bp_sib_bh))
1202		nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1203
1204	nilfs_btnode_delete(path[level].bp_bh);
1205	path[level].bp_bh = path[level].bp_sib_bh;
1206	path[level].bp_sib_bh = NULL;
1207	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1208}
1209
1210static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1211				     struct nilfs_btree_path *path,
1212				     int level, __u64 *keyp, __u64 *ptrp)
1213{
1214	struct nilfs_btree_node *node, *right;
1215	int n;
1216
1217	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1218
1219	node = nilfs_btree_get_nonroot_node(path, level);
1220	right = nilfs_btree_get_sib_node(path, level);
1221
1222	n = nilfs_btree_node_get_nchildren(right);
1223
1224	nilfs_btree_node_move_left(btree, node, right, n);
1225
1226	if (!buffer_dirty(path[level].bp_bh))
1227		nilfs_btnode_mark_dirty(path[level].bp_bh);
1228
1229	nilfs_btnode_delete(path[level].bp_sib_bh);
1230	path[level].bp_sib_bh = NULL;
1231	path[level + 1].bp_index++;
1232}
1233
1234static void nilfs_btree_shrink(struct nilfs_btree *btree,
1235			       struct nilfs_btree_path *path,
1236			       int level, __u64 *keyp, __u64 *ptrp)
1237{
1238	struct nilfs_btree_node *root, *child;
1239	int n;
1240
1241	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1242
1243	root = nilfs_btree_get_root(btree);
1244	child = nilfs_btree_get_nonroot_node(path, level);
1245
1246	nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1247	nilfs_btree_node_set_level(root, level);
1248	n = nilfs_btree_node_get_nchildren(child);
1249	nilfs_btree_node_move_left(btree, root, child, n);
1250
1251	nilfs_btnode_delete(path[level].bp_bh);
1252	path[level].bp_bh = NULL;
1253}
1254
1255
1256static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1257				      struct nilfs_btree_path *path,
1258				      int *levelp,
1259				      struct nilfs_bmap_stats *stats,
1260				      struct inode *dat)
1261{
1262	struct buffer_head *bh;
1263	struct nilfs_btree_node *node, *parent, *sib;
1264	__u64 sibptr;
1265	int pindex, level, ret;
1266
1267	ret = 0;
1268	stats->bs_nblocks = 0;
1269	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1270	     level < nilfs_btree_height(btree) - 1;
1271	     level++) {
1272		node = nilfs_btree_get_nonroot_node(path, level);
1273		path[level].bp_oldreq.bpr_ptr =
1274			nilfs_btree_node_get_ptr(btree, node,
1275						 path[level].bp_index);
1276		ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1277						 &path[level].bp_oldreq, dat);
1278		if (ret < 0)
1279			goto err_out_child_node;
1280
1281		if (nilfs_btree_node_get_nchildren(node) >
1282		    nilfs_btree_node_nchildren_min(node, btree)) {
1283			path[level].bp_op = nilfs_btree_do_delete;
1284			stats->bs_nblocks++;
1285			goto out;
1286		}
1287
1288		parent = nilfs_btree_get_node(btree, path, level + 1);
1289		pindex = path[level + 1].bp_index;
1290
1291		if (pindex > 0) {
1292			/* left sibling */
1293			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1294							  pindex - 1);
1295			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1296			if (ret < 0)
1297				goto err_out_curr_node;
1298			sib = (struct nilfs_btree_node *)bh->b_data;
1299			if (nilfs_btree_node_get_nchildren(sib) >
1300			    nilfs_btree_node_nchildren_min(sib, btree)) {
1301				path[level].bp_sib_bh = bh;
1302				path[level].bp_op = nilfs_btree_borrow_left;
1303				stats->bs_nblocks++;
1304				goto out;
1305			} else {
1306				path[level].bp_sib_bh = bh;
1307				path[level].bp_op = nilfs_btree_concat_left;
1308				stats->bs_nblocks++;
1309				/* continue; */
1310			}
1311		} else if (pindex <
1312			   nilfs_btree_node_get_nchildren(parent) - 1) {
1313			/* right sibling */
1314			sibptr = nilfs_btree_node_get_ptr(btree, parent,
1315							  pindex + 1);
1316			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1317			if (ret < 0)
1318				goto err_out_curr_node;
1319			sib = (struct nilfs_btree_node *)bh->b_data;
1320			if (nilfs_btree_node_get_nchildren(sib) >
1321			    nilfs_btree_node_nchildren_min(sib, btree)) {
1322				path[level].bp_sib_bh = bh;
1323				path[level].bp_op = nilfs_btree_borrow_right;
1324				stats->bs_nblocks++;
1325				goto out;
1326			} else {
1327				path[level].bp_sib_bh = bh;
1328				path[level].bp_op = nilfs_btree_concat_right;
1329				stats->bs_nblocks++;
1330				/* continue; */
1331			}
1332		} else {
1333			/* no siblings */
1334			/* the only child of the root node */
1335			WARN_ON(level != nilfs_btree_height(btree) - 2);
1336			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1337			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1338				path[level].bp_op = nilfs_btree_shrink;
1339				stats->bs_nblocks += 2;
1340			} else {
1341				path[level].bp_op = nilfs_btree_do_delete;
1342				stats->bs_nblocks++;
1343			}
1344
1345			goto out;
1346
1347		}
1348	}
1349
1350	node = nilfs_btree_get_root(btree);
1351	path[level].bp_oldreq.bpr_ptr =
1352		nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1353
1354	ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1355					 &path[level].bp_oldreq, dat);
1356	if (ret < 0)
1357		goto err_out_child_node;
1358
1359	/* child of the root node is deleted */
1360	path[level].bp_op = nilfs_btree_do_delete;
1361	stats->bs_nblocks++;
1362
1363	/* success */
1364 out:
1365	*levelp = level;
1366	return ret;
1367
1368	/* error */
1369 err_out_curr_node:
1370	nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1371 err_out_child_node:
1372	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1373		brelse(path[level].bp_sib_bh);
1374		nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1375					 &path[level].bp_oldreq, dat);
1376	}
1377	*levelp = level;
1378	stats->bs_nblocks = 0;
1379	return ret;
1380}
1381
1382static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1383				      struct nilfs_btree_path *path,
1384				      int maxlevel, struct inode *dat)
1385{
1386	int level;
1387
1388	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1389		nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1390					  &path[level].bp_oldreq, dat);
1391		path[level].bp_op(btree, path, level, NULL, NULL);
1392	}
1393
1394	if (!nilfs_bmap_dirty(&btree->bt_bmap))
1395		nilfs_bmap_set_dirty(&btree->bt_bmap);
1396}
1397
1398static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1399
1400{
1401	struct nilfs_btree *btree;
1402	struct nilfs_btree_path *path;
1403	struct nilfs_bmap_stats stats;
1404	struct inode *dat;
1405	int level, ret;
1406
1407	btree = (struct nilfs_btree *)bmap;
1408	path = nilfs_btree_alloc_path();
1409	if (path == NULL)
1410		return -ENOMEM;
1411
1412	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1413				    NILFS_BTREE_LEVEL_NODE_MIN);
1414	if (ret < 0)
1415		goto out;
1416
1417
1418	dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1419		nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1420
1421	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1422	if (ret < 0)
1423		goto out;
1424	nilfs_btree_commit_delete(btree, path, level, dat);
1425	nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1426
1427out:
1428	nilfs_btree_free_path(path);
1429	return ret;
1430}
1431
1432static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1433{
1434	struct nilfs_btree *btree;
1435	struct nilfs_btree_path *path;
1436	int ret;
1437
1438	btree = (struct nilfs_btree *)bmap;
1439	path = nilfs_btree_alloc_path();
1440	if (path == NULL)
1441		return -ENOMEM;
1442
1443	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1444
1445	nilfs_btree_free_path(path);
1446
1447	return ret;
1448}
1449
1450static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1451{
1452	struct buffer_head *bh;
1453	struct nilfs_btree *btree;
1454	struct nilfs_btree_node *root, *node;
1455	__u64 maxkey, nextmaxkey;
1456	__u64 ptr;
1457	int nchildren, ret;
1458
1459	btree = (struct nilfs_btree *)bmap;
1460	root = nilfs_btree_get_root(btree);
1461	switch (nilfs_btree_height(btree)) {
1462	case 2:
1463		bh = NULL;
1464		node = root;
1465		break;
1466	case 3:
1467		nchildren = nilfs_btree_node_get_nchildren(root);
1468		if (nchildren > 1)
1469			return 0;
1470		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1471		ret = nilfs_btree_get_block(btree, ptr, &bh);
1472		if (ret < 0)
1473			return ret;
1474		node = (struct nilfs_btree_node *)bh->b_data;
1475		break;
1476	default:
1477		return 0;
1478	}
1479
1480	nchildren = nilfs_btree_node_get_nchildren(node);
1481	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1482	nextmaxkey = (nchildren > 1) ?
1483		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1484	if (bh != NULL)
1485		brelse(bh);
1486
1487	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1488}
1489
1490static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1491				   __u64 *keys, __u64 *ptrs, int nitems)
1492{
1493	struct buffer_head *bh;
1494	struct nilfs_btree *btree;
1495	struct nilfs_btree_node *node, *root;
1496	__le64 *dkeys;
1497	__le64 *dptrs;
1498	__u64 ptr;
1499	int nchildren, i, ret;
1500
1501	btree = (struct nilfs_btree *)bmap;
1502	root = nilfs_btree_get_root(btree);
1503	switch (nilfs_btree_height(btree)) {
1504	case 2:
1505		bh = NULL;
1506		node = root;
1507		break;
1508	case 3:
1509		nchildren = nilfs_btree_node_get_nchildren(root);
1510		WARN_ON(nchildren > 1);
1511		ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1512		ret = nilfs_btree_get_block(btree, ptr, &bh);
1513		if (ret < 0)
1514			return ret;
1515		node = (struct nilfs_btree_node *)bh->b_data;
1516		break;
1517	default:
1518		node = NULL;
1519		return -EINVAL;
1520	}
1521
1522	nchildren = nilfs_btree_node_get_nchildren(node);
1523	if (nchildren < nitems)
1524		nitems = nchildren;
1525	dkeys = nilfs_btree_node_dkeys(node);
1526	dptrs = nilfs_btree_node_dptrs(node, btree);
1527	for (i = 0; i < nitems; i++) {
1528		keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1529		ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1530	}
1531
1532	if (bh != NULL)
1533		brelse(bh);
1534
1535	return nitems;
1536}
1537
1538static int
1539nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1540				       union nilfs_bmap_ptr_req *dreq,
1541				       union nilfs_bmap_ptr_req *nreq,
1542				       struct buffer_head **bhp,
1543				       struct nilfs_bmap_stats *stats)
1544{
1545	struct buffer_head *bh;
1546	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1547	struct inode *dat = NULL;
1548	int ret;
1549
1550	stats->bs_nblocks = 0;
1551
1552	/* for data */
1553	/* cannot find near ptr */
1554	if (NILFS_BMAP_USE_VBN(bmap)) {
1555		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1556		dat = nilfs_bmap_get_dat(bmap);
1557	}
1558
1559	ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1560	if (ret < 0)
1561		return ret;
1562
1563	*bhp = NULL;
1564	stats->bs_nblocks++;
1565	if (nreq != NULL) {
1566		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1567		ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1568		if (ret < 0)
1569			goto err_out_dreq;
1570
1571		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1572		if (ret < 0)
1573			goto err_out_nreq;
1574
1575		*bhp = bh;
1576		stats->bs_nblocks++;
1577	}
1578
1579	/* success */
1580	return 0;
1581
1582	/* error */
1583 err_out_nreq:
1584	nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1585 err_out_dreq:
1586	nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1587	stats->bs_nblocks = 0;
1588	return ret;
1589
1590}
1591
1592static void
1593nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1594				      __u64 key, __u64 ptr,
1595				      const __u64 *keys, const __u64 *ptrs,
1596				      int n,
1597				      union nilfs_bmap_ptr_req *dreq,
1598				      union nilfs_bmap_ptr_req *nreq,
1599				      struct buffer_head *bh)
1600{
1601	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1602	struct nilfs_btree_node *node;
1603	struct inode *dat;
1604	__u64 tmpptr;
1605
1606	/* free resources */
1607	if (bmap->b_ops->bop_clear != NULL)
1608		bmap->b_ops->bop_clear(bmap);
1609
1610	/* ptr must be a pointer to a buffer head. */
1611	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1612
1613	/* convert and insert */
1614	dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1615	nilfs_btree_init(bmap);
1616	if (nreq != NULL) {
1617		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1618		nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1619
1620		/* create child node at level 1 */
1621		node = (struct nilfs_btree_node *)bh->b_data;
1622		nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1623		nilfs_btree_node_insert(btree, node,
1624					key, dreq->bpr_ptr, n);
1625		if (!buffer_dirty(bh))
1626			nilfs_btnode_mark_dirty(bh);
1627		if (!nilfs_bmap_dirty(bmap))
1628			nilfs_bmap_set_dirty(bmap);
1629
1630		brelse(bh);
1631
1632		/* create root node at level 2 */
1633		node = nilfs_btree_get_root(btree);
1634		tmpptr = nreq->bpr_ptr;
1635		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1636				      2, 1, &keys[0], &tmpptr);
1637	} else {
1638		nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1639
1640		/* create root node at level 1 */
1641		node = nilfs_btree_get_root(btree);
1642		nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1643				      1, n, keys, ptrs);
1644		nilfs_btree_node_insert(btree, node,
1645					key, dreq->bpr_ptr, n);
1646		if (!nilfs_bmap_dirty(bmap))
1647			nilfs_bmap_set_dirty(bmap);
1648	}
1649
1650	if (NILFS_BMAP_USE_VBN(bmap))
1651		nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1652}
1653
1654/**
1655 * nilfs_btree_convert_and_insert -
1656 * @bmap:
1657 * @key:
1658 * @ptr:
1659 * @keys:
1660 * @ptrs:
1661 * @n:
1662 */
1663int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1664				   __u64 key, __u64 ptr,
1665				   const __u64 *keys, const __u64 *ptrs, int n)
1666{
1667	struct buffer_head *bh;
1668	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1669	struct nilfs_bmap_stats stats;
1670	int ret;
1671
1672	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1673		di = &dreq;
1674		ni = NULL;
1675	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1676			   1 << bmap->b_inode->i_blkbits)) {
1677		di = &dreq;
1678		ni = &nreq;
1679	} else {
1680		di = NULL;
1681		ni = NULL;
1682		BUG();
1683	}
1684
1685	ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1686						     &stats);
1687	if (ret < 0)
1688		return ret;
1689	nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1690					      di, ni, bh);
1691	nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1692	return 0;
1693}
1694
1695static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1696				   struct nilfs_btree_path *path,
1697				   int level,
1698				   struct buffer_head *bh)
1699{
1700	while ((++level < nilfs_btree_height(btree) - 1) &&
1701	       !buffer_dirty(path[level].bp_bh))
1702		nilfs_btnode_mark_dirty(path[level].bp_bh);
1703
1704	return 0;
1705}
1706
1707static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1708					struct nilfs_btree_path *path,
1709					int level, struct inode *dat)
1710{
1711	struct nilfs_btree_node *parent;
1712	int ret;
1713
1714	parent = nilfs_btree_get_node(btree, path, level + 1);
1715	path[level].bp_oldreq.bpr_ptr =
1716		nilfs_btree_node_get_ptr(btree, parent,
1717					 path[level + 1].bp_index);
1718	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1719	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1720				       &path[level].bp_newreq.bpr_req);
1721	if (ret < 0)
1722		return ret;
1723
1724	if (buffer_nilfs_node(path[level].bp_bh)) {
1725		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1726		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1727		path[level].bp_ctxt.bh = path[level].bp_bh;
1728		ret = nilfs_btnode_prepare_change_key(
1729			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1730			&path[level].bp_ctxt);
1731		if (ret < 0) {
1732			nilfs_dat_abort_update(dat,
1733					       &path[level].bp_oldreq.bpr_req,
1734					       &path[level].bp_newreq.bpr_req);
1735			return ret;
1736		}
1737	}
1738
1739	return 0;
1740}
1741
1742static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1743					struct nilfs_btree_path *path,
1744					int level, struct inode *dat)
1745{
1746	struct nilfs_btree_node *parent;
1747
1748	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1749				&path[level].bp_newreq.bpr_req,
1750				btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1751
1752	if (buffer_nilfs_node(path[level].bp_bh)) {
1753		nilfs_btnode_commit_change_key(
1754			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1755			&path[level].bp_ctxt);
1756		path[level].bp_bh = path[level].bp_ctxt.bh;
1757	}
1758	set_buffer_nilfs_volatile(path[level].bp_bh);
1759
1760	parent = nilfs_btree_get_node(btree, path, level + 1);
1761	nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1762				 path[level].bp_newreq.bpr_ptr);
1763}
1764
1765static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1766				       struct nilfs_btree_path *path,
1767				       int level, struct inode *dat)
1768{
1769	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1770			       &path[level].bp_newreq.bpr_req);
1771	if (buffer_nilfs_node(path[level].bp_bh))
1772		nilfs_btnode_abort_change_key(
1773			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1774			&path[level].bp_ctxt);
1775}
1776
1777static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1778					   struct nilfs_btree_path *path,
1779					   int minlevel, int *maxlevelp,
1780					   struct inode *dat)
1781{
1782	int level, ret;
1783
1784	level = minlevel;
1785	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1786		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1787		if (ret < 0)
1788			return ret;
1789	}
1790	while ((++level < nilfs_btree_height(btree) - 1) &&
1791	       !buffer_dirty(path[level].bp_bh)) {
1792
1793		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1794		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1795		if (ret < 0)
1796			goto out;
1797	}
1798
1799	/* success */
1800	*maxlevelp = level - 1;
1801	return 0;
1802
1803	/* error */
1804 out:
1805	while (--level > minlevel)
1806		nilfs_btree_abort_update_v(btree, path, level, dat);
1807	if (!buffer_nilfs_volatile(path[level].bp_bh))
1808		nilfs_btree_abort_update_v(btree, path, level, dat);
1809	return ret;
1810}
1811
1812static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1813					   struct nilfs_btree_path *path,
1814					   int minlevel, int maxlevel,
1815					   struct buffer_head *bh,
1816					   struct inode *dat)
1817{
1818	int level;
1819
1820	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1821		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1822
1823	for (level = minlevel + 1; level <= maxlevel; level++)
1824		nilfs_btree_commit_update_v(btree, path, level, dat);
1825}
1826
1827static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1828				   struct nilfs_btree_path *path,
1829				   int level, struct buffer_head *bh)
1830{
1831	int maxlevel = 0, ret;
1832	struct nilfs_btree_node *parent;
1833	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1834	__u64 ptr;
1835
1836	get_bh(bh);
1837	path[level].bp_bh = bh;
1838	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1839					      dat);
1840	if (ret < 0)
1841		goto out;
1842
1843	if (buffer_nilfs_volatile(path[level].bp_bh)) {
1844		parent = nilfs_btree_get_node(btree, path, level + 1);
1845		ptr = nilfs_btree_node_get_ptr(btree, parent,
1846					       path[level + 1].bp_index);
1847		ret = nilfs_dat_mark_dirty(dat, ptr);
1848		if (ret < 0)
1849			goto out;
1850	}
1851
1852	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1853
1854 out:
1855	brelse(path[level].bp_bh);
1856	path[level].bp_bh = NULL;
1857	return ret;
1858}
1859
1860static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1861				 struct buffer_head *bh)
1862{
1863	struct nilfs_btree *btree;
1864	struct nilfs_btree_path *path;
1865	struct nilfs_btree_node *node;
1866	__u64 key;
1867	int level, ret;
1868
1869	WARN_ON(!buffer_dirty(bh));
1870
1871	btree = (struct nilfs_btree *)bmap;
1872	path = nilfs_btree_alloc_path();
1873	if (path == NULL)
1874		return -ENOMEM;
1875
1876	if (buffer_nilfs_node(bh)) {
1877		node = (struct nilfs_btree_node *)bh->b_data;
1878		key = nilfs_btree_node_get_key(node, 0);
1879		level = nilfs_btree_node_get_level(node);
1880	} else {
1881		key = nilfs_bmap_data_get_key(bmap, bh);
1882		level = NILFS_BTREE_LEVEL_DATA;
1883	}
1884
1885	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1886	if (ret < 0) {
1887		if (unlikely(ret == -ENOENT))
1888			printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1889			       __func__, (unsigned long long)key, level);
1890		goto out;
1891	}
1892
1893	ret = NILFS_BMAP_USE_VBN(bmap) ?
1894		nilfs_btree_propagate_v(btree, path, level, bh) :
1895		nilfs_btree_propagate_p(btree, path, level, bh);
1896
1897 out:
1898	nilfs_btree_free_path(path);
1899
1900	return ret;
1901}
1902
1903static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1904				    struct buffer_head *bh)
1905{
1906	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1907}
1908
1909static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1910					 struct list_head *lists,
1911					 struct buffer_head *bh)
1912{
1913	struct list_head *head;
1914	struct buffer_head *cbh;
1915	struct nilfs_btree_node *node, *cnode;
1916	__u64 key, ckey;
1917	int level;
1918
1919	get_bh(bh);
1920	node = (struct nilfs_btree_node *)bh->b_data;
1921	key = nilfs_btree_node_get_key(node, 0);
1922	level = nilfs_btree_node_get_level(node);
1923	list_for_each(head, &lists[level]) {
1924		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1925		cnode = (struct nilfs_btree_node *)cbh->b_data;
1926		ckey = nilfs_btree_node_get_key(cnode, 0);
1927		if (key < ckey)
1928			break;
1929	}
1930	list_add_tail(&bh->b_assoc_buffers, head);
1931}
1932
1933static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1934					     struct list_head *listp)
1935{
1936	struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1937	struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1938	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1939	struct pagevec pvec;
1940	struct buffer_head *bh, *head;
1941	pgoff_t index = 0;
1942	int level, i;
1943
1944	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1945	     level < NILFS_BTREE_LEVEL_MAX;
1946	     level++)
1947		INIT_LIST_HEAD(&lists[level]);
1948
1949	pagevec_init(&pvec, 0);
1950
1951	while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1952				  PAGEVEC_SIZE)) {
1953		for (i = 0; i < pagevec_count(&pvec); i++) {
1954			bh = head = page_buffers(pvec.pages[i]);
1955			do {
1956				if (buffer_dirty(bh))
1957					nilfs_btree_add_dirty_buffer(btree,
1958								     lists, bh);
1959			} while ((bh = bh->b_this_page) != head);
1960		}
1961		pagevec_release(&pvec);
1962		cond_resched();
1963	}
1964
1965	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1966	     level < NILFS_BTREE_LEVEL_MAX;
1967	     level++)
1968		list_splice_tail(&lists[level], listp);
1969}
1970
1971static int nilfs_btree_assign_p(struct nilfs_btree *btree,
1972				struct nilfs_btree_path *path,
1973				int level,
1974				struct buffer_head **bh,
1975				sector_t blocknr,
1976				union nilfs_binfo *binfo)
1977{
1978	struct nilfs_btree_node *parent;
1979	__u64 key;
1980	__u64 ptr;
1981	int ret;
1982
1983	parent = nilfs_btree_get_node(btree, path, level + 1);
1984	ptr = nilfs_btree_node_get_ptr(btree, parent,
1985				       path[level + 1].bp_index);
1986	if (buffer_nilfs_node(*bh)) {
1987		path[level].bp_ctxt.oldkey = ptr;
1988		path[level].bp_ctxt.newkey = blocknr;
1989		path[level].bp_ctxt.bh = *bh;
1990		ret = nilfs_btnode_prepare_change_key(
1991			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1992			&path[level].bp_ctxt);
1993		if (ret < 0)
1994			return ret;
1995		nilfs_btnode_commit_change_key(
1996			&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1997			&path[level].bp_ctxt);
1998		*bh = path[level].bp_ctxt.bh;
1999	}
2000
2001	nilfs_btree_node_set_ptr(btree, parent,
2002				 path[level + 1].bp_index, blocknr);
2003
2004	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2005	/* on-disk format */
2006	binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2007	binfo->bi_dat.bi_level = level;
2008
2009	return 0;
2010}
2011
2012static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2013				struct nilfs_btree_path *path,
2014				int level,
2015				struct buffer_head **bh,
2016				sector_t blocknr,
2017				union nilfs_binfo *binfo)
2018{
2019	struct nilfs_btree_node *parent;
2020	struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2021	__u64 key;
2022	__u64 ptr;
2023	union nilfs_bmap_ptr_req req;
2024	int ret;
2025
2026	parent = nilfs_btree_get_node(btree, path, level + 1);
2027	ptr = nilfs_btree_node_get_ptr(btree, parent,
2028				       path[level + 1].bp_index);
2029	req.bpr_ptr = ptr;
2030	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2031	if (ret < 0)
2032		return ret;
2033	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2034
2035	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2036	/* on-disk format */
2037	binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2038	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2039
2040	return 0;
2041}
2042
2043static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2044			      struct buffer_head **bh,
2045			      sector_t blocknr,
2046			      union nilfs_binfo *binfo)
2047{
2048	struct nilfs_btree *btree;
2049	struct nilfs_btree_path *path;
2050	struct nilfs_btree_node *node;
2051	__u64 key;
2052	int level, ret;
2053
2054	btree = (struct nilfs_btree *)bmap;
2055	path = nilfs_btree_alloc_path();
2056	if (path == NULL)
2057		return -ENOMEM;
2058
2059	if (buffer_nilfs_node(*bh)) {
2060		node = (struct nilfs_btree_node *)(*bh)->b_data;
2061		key = nilfs_btree_node_get_key(node, 0);
2062		level = nilfs_btree_node_get_level(node);
2063	} else {
2064		key = nilfs_bmap_data_get_key(bmap, *bh);
2065		level = NILFS_BTREE_LEVEL_DATA;
2066	}
2067
2068	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2069	if (ret < 0) {
2070		WARN_ON(ret == -ENOENT);
2071		goto out;
2072	}
2073
2074	ret = NILFS_BMAP_USE_VBN(bmap) ?
2075		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2076		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2077
2078 out:
2079	nilfs_btree_free_path(path);
2080
2081	return ret;
2082}
2083
2084static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2085				 struct buffer_head **bh,
2086				 sector_t blocknr,
2087				 union nilfs_binfo *binfo)
2088{
2089	struct nilfs_btree_node *node;
2090	__u64 key;
2091	int ret;
2092
2093	ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2094			     blocknr);
2095	if (ret < 0)
2096		return ret;
2097
2098	if (buffer_nilfs_node(*bh)) {
2099		node = (struct nilfs_btree_node *)(*bh)->b_data;
2100		key = nilfs_btree_node_get_key(node, 0);
2101	} else
2102		key = nilfs_bmap_data_get_key(bmap, *bh);
2103
2104	/* on-disk format */
2105	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2106	binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2107
2108	return 0;
2109}
2110
2111static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2112{
2113	struct buffer_head *bh;
2114	struct nilfs_btree *btree;
2115	struct nilfs_btree_path *path;
2116	__u64 ptr;
2117	int ret;
2118
2119	btree = (struct nilfs_btree *)bmap;
2120	path = nilfs_btree_alloc_path();
2121	if (path == NULL)
2122		return -ENOMEM;
2123
2124	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2125	if (ret < 0) {
2126		WARN_ON(ret == -ENOENT);
2127		goto out;
2128	}
2129	ret = nilfs_btree_get_block(btree, ptr, &bh);
2130	if (ret < 0) {
2131		WARN_ON(ret == -ENOENT);
2132		goto out;
2133	}
2134
2135	if (!buffer_dirty(bh))
2136		nilfs_btnode_mark_dirty(bh);
2137	brelse(bh);
2138	if (!nilfs_bmap_dirty(&btree->bt_bmap))
2139		nilfs_bmap_set_dirty(&btree->bt_bmap);
2140
2141 out:
2142	nilfs_btree_free_path(path);
2143	return ret;
2144}
2145
2146static const struct nilfs_bmap_operations nilfs_btree_ops = {
2147	.bop_lookup		=	nilfs_btree_lookup,
2148	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2149	.bop_insert		=	nilfs_btree_insert,
2150	.bop_delete		=	nilfs_btree_delete,
2151	.bop_clear		=	NULL,
2152
2153	.bop_propagate		=	nilfs_btree_propagate,
2154
2155	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2156
2157	.bop_assign		=	nilfs_btree_assign,
2158	.bop_mark		=	nilfs_btree_mark,
2159
2160	.bop_last_key		=	nilfs_btree_last_key,
2161	.bop_check_insert	=	NULL,
2162	.bop_check_delete	=	nilfs_btree_check_delete,
2163	.bop_gather_data	=	nilfs_btree_gather_data,
2164};
2165
2166static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2167	.bop_lookup		=	NULL,
2168	.bop_lookup_contig	=	NULL,
2169	.bop_insert		=	NULL,
2170	.bop_delete		=	NULL,
2171	.bop_clear		=	NULL,
2172
2173	.bop_propagate		=	nilfs_btree_propagate_gc,
2174
2175	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2176
2177	.bop_assign		=	nilfs_btree_assign_gc,
2178	.bop_mark		=	NULL,
2179
2180	.bop_last_key		=	NULL,
2181	.bop_check_insert	=	NULL,
2182	.bop_check_delete	=	NULL,
2183	.bop_gather_data	=	NULL,
2184};
2185
2186int nilfs_btree_init(struct nilfs_bmap *bmap)
2187{
2188	bmap->b_ops = &nilfs_btree_ops;
2189	return 0;
2190}
2191
2192void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2193{
2194	bmap->b_ops = &nilfs_btree_ops_gc;
2195}
2196