check-integrity.c revision e06baab4184509bdfddd294efc6cae7a410c6f07
1/*
2 * Copyright (C) STRATO AG 2011.  All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19/*
20 * This module can be used to catch cases when the btrfs kernel
21 * code executes write requests to the disk that bring the file
22 * system in an inconsistent state. In such a state, a power-loss
23 * or kernel panic event would cause that the data on disk is
24 * lost or at least damaged.
25 *
26 * Code is added that examines all block write requests during
27 * runtime (including writes of the super block). Three rules
28 * are verified and an error is printed on violation of the
29 * rules:
30 * 1. It is not allowed to write a disk block which is
31 *    currently referenced by the super block (either directly
32 *    or indirectly).
33 * 2. When a super block is written, it is verified that all
34 *    referenced (directly or indirectly) blocks fulfill the
35 *    following requirements:
36 *    2a. All referenced blocks have either been present when
37 *        the file system was mounted, (i.e., they have been
38 *        referenced by the super block) or they have been
39 *        written since then and the write completion callback
40 *        was called and a FLUSH request to the device where
41 *        these blocks are located was received and completed.
42 *    2b. All referenced blocks need to have a generation
43 *        number which is equal to the parent's number.
44 *
45 * One issue that was found using this module was that the log
46 * tree on disk became temporarily corrupted because disk blocks
47 * that had been in use for the log tree had been freed and
48 * reused too early, while being referenced by the written super
49 * block.
50 *
51 * The search term in the kernel log that can be used to filter
52 * on the existence of detected integrity issues is
53 * "btrfs: attempt".
54 *
55 * The integrity check is enabled via mount options. These
56 * mount options are only supported if the integrity check
57 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58 *
59 * Example #1, apply integrity checks to all metadata:
60 * mount /dev/sdb1 /mnt -o check_int
61 *
62 * Example #2, apply integrity checks to all metadata and
63 * to data extents:
64 * mount /dev/sdb1 /mnt -o check_int_data
65 *
66 * Example #3, apply integrity checks to all metadata and dump
67 * the tree that the super block references to kernel messages
68 * each time after a super block was written:
69 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70 *
71 * If the integrity check tool is included and activated in
72 * the mount options, plenty of kernel memory is used, and
73 * plenty of additional CPU cycles are spent. Enabling this
74 * functionality is not intended for normal use. In most
75 * cases, unless you are a btrfs developer who needs to verify
76 * the integrity of (super)-block write requests, do not
77 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78 * include and compile the integrity check tool.
79 */
80
81#include <linux/sched.h>
82#include <linux/slab.h>
83#include <linux/buffer_head.h>
84#include <linux/mutex.h>
85#include <linux/crc32c.h>
86#include <linux/genhd.h>
87#include <linux/blkdev.h>
88#include "ctree.h"
89#include "disk-io.h"
90#include "transaction.h"
91#include "extent_io.h"
92#include "volumes.h"
93#include "print-tree.h"
94#include "locking.h"
95#include "check-integrity.h"
96
97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
99#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
100#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
101#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
102#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)	/* in characters,
105							 * excluding " [...]" */
106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
107
108/*
109 * The definition of the bitmask fields for the print_mask.
110 * They are specified with the mount option check_integrity_print_mask.
111 */
112#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE			0x00000001
113#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION		0x00000002
114#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE			0x00000004
115#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE			0x00000008
116#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH			0x00000010
117#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH			0x00000020
118#define BTRFSIC_PRINT_MASK_VERBOSE				0x00000040
119#define BTRFSIC_PRINT_MASK_VERY_VERBOSE				0x00000080
120#define BTRFSIC_PRINT_MASK_INITIAL_TREE				0x00000100
121#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES			0x00000200
122#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE			0x00000400
123#define BTRFSIC_PRINT_MASK_NUM_COPIES				0x00000800
124#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS		0x00001000
125
126struct btrfsic_dev_state;
127struct btrfsic_state;
128
129struct btrfsic_block {
130	u32 magic_num;		/* only used for debug purposes */
131	unsigned int is_metadata:1;	/* if it is meta-data, not data-data */
132	unsigned int is_superblock:1;	/* if it is one of the superblocks */
133	unsigned int is_iodone:1;	/* if is done by lower subsystem */
134	unsigned int iodone_w_error:1;	/* error was indicated to endio */
135	unsigned int never_written:1;	/* block was added because it was
136					 * referenced, not because it was
137					 * written */
138	unsigned int mirror_num:2;	/* large enough to hold
139					 * BTRFS_SUPER_MIRROR_MAX */
140	struct btrfsic_dev_state *dev_state;
141	u64 dev_bytenr;		/* key, physical byte num on disk */
142	u64 logical_bytenr;	/* logical byte num on disk */
143	u64 generation;
144	struct btrfs_disk_key disk_key;	/* extra info to print in case of
145					 * issues, will not always be correct */
146	struct list_head collision_resolving_node;	/* list node */
147	struct list_head all_blocks_node;	/* list node */
148
149	/* the following two lists contain block_link items */
150	struct list_head ref_to_list;	/* list */
151	struct list_head ref_from_list;	/* list */
152	struct btrfsic_block *next_in_same_bio;
153	void *orig_bio_bh_private;
154	union {
155		bio_end_io_t *bio;
156		bh_end_io_t *bh;
157	} orig_bio_bh_end_io;
158	int submit_bio_bh_rw;
159	u64 flush_gen; /* only valid if !never_written */
160};
161
162/*
163 * Elements of this type are allocated dynamically and required because
164 * each block object can refer to and can be ref from multiple blocks.
165 * The key to lookup them in the hashtable is the dev_bytenr of
166 * the block ref to plus the one from the block refered from.
167 * The fact that they are searchable via a hashtable and that a
168 * ref_cnt is maintained is not required for the btrfs integrity
169 * check algorithm itself, it is only used to make the output more
170 * beautiful in case that an error is detected (an error is defined
171 * as a write operation to a block while that block is still referenced).
172 */
173struct btrfsic_block_link {
174	u32 magic_num;		/* only used for debug purposes */
175	u32 ref_cnt;
176	struct list_head node_ref_to;	/* list node */
177	struct list_head node_ref_from;	/* list node */
178	struct list_head collision_resolving_node;	/* list node */
179	struct btrfsic_block *block_ref_to;
180	struct btrfsic_block *block_ref_from;
181	u64 parent_generation;
182};
183
184struct btrfsic_dev_state {
185	u32 magic_num;		/* only used for debug purposes */
186	struct block_device *bdev;
187	struct btrfsic_state *state;
188	struct list_head collision_resolving_node;	/* list node */
189	struct btrfsic_block dummy_block_for_bio_bh_flush;
190	u64 last_flush_gen;
191	char name[BDEVNAME_SIZE];
192};
193
194struct btrfsic_block_hashtable {
195	struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
196};
197
198struct btrfsic_block_link_hashtable {
199	struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
200};
201
202struct btrfsic_dev_state_hashtable {
203	struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
204};
205
206struct btrfsic_block_data_ctx {
207	u64 start;		/* virtual bytenr */
208	u64 dev_bytenr;		/* physical bytenr on device */
209	u32 len;
210	struct btrfsic_dev_state *dev;
211	char **datav;
212	struct page **pagev;
213	void *mem_to_free;
214};
215
216/* This structure is used to implement recursion without occupying
217 * any stack space, refer to btrfsic_process_metablock() */
218struct btrfsic_stack_frame {
219	u32 magic;
220	u32 nr;
221	int error;
222	int i;
223	int limit_nesting;
224	int num_copies;
225	int mirror_num;
226	struct btrfsic_block *block;
227	struct btrfsic_block_data_ctx *block_ctx;
228	struct btrfsic_block *next_block;
229	struct btrfsic_block_data_ctx next_block_ctx;
230	struct btrfs_header *hdr;
231	struct btrfsic_stack_frame *prev;
232};
233
234/* Some state per mounted filesystem */
235struct btrfsic_state {
236	u32 print_mask;
237	int include_extent_data;
238	int csum_size;
239	struct list_head all_blocks_list;
240	struct btrfsic_block_hashtable block_hashtable;
241	struct btrfsic_block_link_hashtable block_link_hashtable;
242	struct btrfs_root *root;
243	u64 max_superblock_generation;
244	struct btrfsic_block *latest_superblock;
245	u32 metablock_size;
246	u32 datablock_size;
247};
248
249static void btrfsic_block_init(struct btrfsic_block *b);
250static struct btrfsic_block *btrfsic_block_alloc(void);
251static void btrfsic_block_free(struct btrfsic_block *b);
252static void btrfsic_block_link_init(struct btrfsic_block_link *n);
253static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
254static void btrfsic_block_link_free(struct btrfsic_block_link *n);
255static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
256static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
257static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
258static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
259static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
260					struct btrfsic_block_hashtable *h);
261static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
262static struct btrfsic_block *btrfsic_block_hashtable_lookup(
263		struct block_device *bdev,
264		u64 dev_bytenr,
265		struct btrfsic_block_hashtable *h);
266static void btrfsic_block_link_hashtable_init(
267		struct btrfsic_block_link_hashtable *h);
268static void btrfsic_block_link_hashtable_add(
269		struct btrfsic_block_link *l,
270		struct btrfsic_block_link_hashtable *h);
271static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
272static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
273		struct block_device *bdev_ref_to,
274		u64 dev_bytenr_ref_to,
275		struct block_device *bdev_ref_from,
276		u64 dev_bytenr_ref_from,
277		struct btrfsic_block_link_hashtable *h);
278static void btrfsic_dev_state_hashtable_init(
279		struct btrfsic_dev_state_hashtable *h);
280static void btrfsic_dev_state_hashtable_add(
281		struct btrfsic_dev_state *ds,
282		struct btrfsic_dev_state_hashtable *h);
283static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
284static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
285		struct block_device *bdev,
286		struct btrfsic_dev_state_hashtable *h);
287static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
288static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
289static int btrfsic_process_superblock(struct btrfsic_state *state,
290				      struct btrfs_fs_devices *fs_devices);
291static int btrfsic_process_metablock(struct btrfsic_state *state,
292				     struct btrfsic_block *block,
293				     struct btrfsic_block_data_ctx *block_ctx,
294				     int limit_nesting, int force_iodone_flag);
295static void btrfsic_read_from_block_data(
296	struct btrfsic_block_data_ctx *block_ctx,
297	void *dst, u32 offset, size_t len);
298static int btrfsic_create_link_to_next_block(
299		struct btrfsic_state *state,
300		struct btrfsic_block *block,
301		struct btrfsic_block_data_ctx
302		*block_ctx, u64 next_bytenr,
303		int limit_nesting,
304		struct btrfsic_block_data_ctx *next_block_ctx,
305		struct btrfsic_block **next_blockp,
306		int force_iodone_flag,
307		int *num_copiesp, int *mirror_nump,
308		struct btrfs_disk_key *disk_key,
309		u64 parent_generation);
310static int btrfsic_handle_extent_data(struct btrfsic_state *state,
311				      struct btrfsic_block *block,
312				      struct btrfsic_block_data_ctx *block_ctx,
313				      u32 item_offset, int force_iodone_flag);
314static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
315			     struct btrfsic_block_data_ctx *block_ctx_out,
316			     int mirror_num);
317static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
318				  u32 len, struct block_device *bdev,
319				  struct btrfsic_block_data_ctx *block_ctx_out);
320static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
321static int btrfsic_read_block(struct btrfsic_state *state,
322			      struct btrfsic_block_data_ctx *block_ctx);
323static void btrfsic_dump_database(struct btrfsic_state *state);
324static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
325static int btrfsic_test_for_metadata(struct btrfsic_state *state,
326				     char **datav, unsigned int num_pages);
327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
328					  u64 dev_bytenr, char **mapped_datav,
329					  unsigned int num_pages,
330					  struct bio *bio, int *bio_is_patched,
331					  struct buffer_head *bh,
332					  int submit_bio_bh_rw);
333static int btrfsic_process_written_superblock(
334		struct btrfsic_state *state,
335		struct btrfsic_block *const block,
336		struct btrfs_super_block *const super_hdr);
337static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
338static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
339static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
340					      const struct btrfsic_block *block,
341					      int recursion_level);
342static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
343					struct btrfsic_block *const block,
344					int recursion_level);
345static void btrfsic_print_add_link(const struct btrfsic_state *state,
346				   const struct btrfsic_block_link *l);
347static void btrfsic_print_rem_link(const struct btrfsic_state *state,
348				   const struct btrfsic_block_link *l);
349static char btrfsic_get_block_type(const struct btrfsic_state *state,
350				   const struct btrfsic_block *block);
351static void btrfsic_dump_tree(const struct btrfsic_state *state);
352static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
353				  const struct btrfsic_block *block,
354				  int indent_level);
355static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
356		struct btrfsic_state *state,
357		struct btrfsic_block_data_ctx *next_block_ctx,
358		struct btrfsic_block *next_block,
359		struct btrfsic_block *from_block,
360		u64 parent_generation);
361static struct btrfsic_block *btrfsic_block_lookup_or_add(
362		struct btrfsic_state *state,
363		struct btrfsic_block_data_ctx *block_ctx,
364		const char *additional_string,
365		int is_metadata,
366		int is_iodone,
367		int never_written,
368		int mirror_num,
369		int *was_created);
370static int btrfsic_process_superblock_dev_mirror(
371		struct btrfsic_state *state,
372		struct btrfsic_dev_state *dev_state,
373		struct btrfs_device *device,
374		int superblock_mirror_num,
375		struct btrfsic_dev_state **selected_dev_state,
376		struct btrfs_super_block *selected_super);
377static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
378		struct block_device *bdev);
379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
380					   u64 bytenr,
381					   struct btrfsic_dev_state *dev_state,
382					   u64 dev_bytenr);
383
384static struct mutex btrfsic_mutex;
385static int btrfsic_is_initialized;
386static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
387
388
389static void btrfsic_block_init(struct btrfsic_block *b)
390{
391	b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
392	b->dev_state = NULL;
393	b->dev_bytenr = 0;
394	b->logical_bytenr = 0;
395	b->generation = BTRFSIC_GENERATION_UNKNOWN;
396	b->disk_key.objectid = 0;
397	b->disk_key.type = 0;
398	b->disk_key.offset = 0;
399	b->is_metadata = 0;
400	b->is_superblock = 0;
401	b->is_iodone = 0;
402	b->iodone_w_error = 0;
403	b->never_written = 0;
404	b->mirror_num = 0;
405	b->next_in_same_bio = NULL;
406	b->orig_bio_bh_private = NULL;
407	b->orig_bio_bh_end_io.bio = NULL;
408	INIT_LIST_HEAD(&b->collision_resolving_node);
409	INIT_LIST_HEAD(&b->all_blocks_node);
410	INIT_LIST_HEAD(&b->ref_to_list);
411	INIT_LIST_HEAD(&b->ref_from_list);
412	b->submit_bio_bh_rw = 0;
413	b->flush_gen = 0;
414}
415
416static struct btrfsic_block *btrfsic_block_alloc(void)
417{
418	struct btrfsic_block *b;
419
420	b = kzalloc(sizeof(*b), GFP_NOFS);
421	if (NULL != b)
422		btrfsic_block_init(b);
423
424	return b;
425}
426
427static void btrfsic_block_free(struct btrfsic_block *b)
428{
429	BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
430	kfree(b);
431}
432
433static void btrfsic_block_link_init(struct btrfsic_block_link *l)
434{
435	l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
436	l->ref_cnt = 1;
437	INIT_LIST_HEAD(&l->node_ref_to);
438	INIT_LIST_HEAD(&l->node_ref_from);
439	INIT_LIST_HEAD(&l->collision_resolving_node);
440	l->block_ref_to = NULL;
441	l->block_ref_from = NULL;
442}
443
444static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
445{
446	struct btrfsic_block_link *l;
447
448	l = kzalloc(sizeof(*l), GFP_NOFS);
449	if (NULL != l)
450		btrfsic_block_link_init(l);
451
452	return l;
453}
454
455static void btrfsic_block_link_free(struct btrfsic_block_link *l)
456{
457	BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
458	kfree(l);
459}
460
461static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
462{
463	ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
464	ds->bdev = NULL;
465	ds->state = NULL;
466	ds->name[0] = '\0';
467	INIT_LIST_HEAD(&ds->collision_resolving_node);
468	ds->last_flush_gen = 0;
469	btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
470	ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
471	ds->dummy_block_for_bio_bh_flush.dev_state = ds;
472}
473
474static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
475{
476	struct btrfsic_dev_state *ds;
477
478	ds = kzalloc(sizeof(*ds), GFP_NOFS);
479	if (NULL != ds)
480		btrfsic_dev_state_init(ds);
481
482	return ds;
483}
484
485static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
486{
487	BUG_ON(!(NULL == ds ||
488		 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
489	kfree(ds);
490}
491
492static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
493{
494	int i;
495
496	for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
497		INIT_LIST_HEAD(h->table + i);
498}
499
500static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
501					struct btrfsic_block_hashtable *h)
502{
503	const unsigned int hashval =
504	    (((unsigned int)(b->dev_bytenr >> 16)) ^
505	     ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
506	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
507
508	list_add(&b->collision_resolving_node, h->table + hashval);
509}
510
511static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
512{
513	list_del(&b->collision_resolving_node);
514}
515
516static struct btrfsic_block *btrfsic_block_hashtable_lookup(
517		struct block_device *bdev,
518		u64 dev_bytenr,
519		struct btrfsic_block_hashtable *h)
520{
521	const unsigned int hashval =
522	    (((unsigned int)(dev_bytenr >> 16)) ^
523	     ((unsigned int)((uintptr_t)bdev))) &
524	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
525	struct list_head *elem;
526
527	list_for_each(elem, h->table + hashval) {
528		struct btrfsic_block *const b =
529		    list_entry(elem, struct btrfsic_block,
530			       collision_resolving_node);
531
532		if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
533			return b;
534	}
535
536	return NULL;
537}
538
539static void btrfsic_block_link_hashtable_init(
540		struct btrfsic_block_link_hashtable *h)
541{
542	int i;
543
544	for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
545		INIT_LIST_HEAD(h->table + i);
546}
547
548static void btrfsic_block_link_hashtable_add(
549		struct btrfsic_block_link *l,
550		struct btrfsic_block_link_hashtable *h)
551{
552	const unsigned int hashval =
553	    (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
554	     ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
555	     ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
556	     ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
557	     & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
558
559	BUG_ON(NULL == l->block_ref_to);
560	BUG_ON(NULL == l->block_ref_from);
561	list_add(&l->collision_resolving_node, h->table + hashval);
562}
563
564static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
565{
566	list_del(&l->collision_resolving_node);
567}
568
569static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
570		struct block_device *bdev_ref_to,
571		u64 dev_bytenr_ref_to,
572		struct block_device *bdev_ref_from,
573		u64 dev_bytenr_ref_from,
574		struct btrfsic_block_link_hashtable *h)
575{
576	const unsigned int hashval =
577	    (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
578	     ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
579	     ((unsigned int)((uintptr_t)bdev_ref_to)) ^
580	     ((unsigned int)((uintptr_t)bdev_ref_from))) &
581	     (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
582	struct list_head *elem;
583
584	list_for_each(elem, h->table + hashval) {
585		struct btrfsic_block_link *const l =
586		    list_entry(elem, struct btrfsic_block_link,
587			       collision_resolving_node);
588
589		BUG_ON(NULL == l->block_ref_to);
590		BUG_ON(NULL == l->block_ref_from);
591		if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
592		    l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
593		    l->block_ref_from->dev_state->bdev == bdev_ref_from &&
594		    l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
595			return l;
596	}
597
598	return NULL;
599}
600
601static void btrfsic_dev_state_hashtable_init(
602		struct btrfsic_dev_state_hashtable *h)
603{
604	int i;
605
606	for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
607		INIT_LIST_HEAD(h->table + i);
608}
609
610static void btrfsic_dev_state_hashtable_add(
611		struct btrfsic_dev_state *ds,
612		struct btrfsic_dev_state_hashtable *h)
613{
614	const unsigned int hashval =
615	    (((unsigned int)((uintptr_t)ds->bdev)) &
616	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
617
618	list_add(&ds->collision_resolving_node, h->table + hashval);
619}
620
621static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
622{
623	list_del(&ds->collision_resolving_node);
624}
625
626static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
627		struct block_device *bdev,
628		struct btrfsic_dev_state_hashtable *h)
629{
630	const unsigned int hashval =
631	    (((unsigned int)((uintptr_t)bdev)) &
632	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
633	struct list_head *elem;
634
635	list_for_each(elem, h->table + hashval) {
636		struct btrfsic_dev_state *const ds =
637		    list_entry(elem, struct btrfsic_dev_state,
638			       collision_resolving_node);
639
640		if (ds->bdev == bdev)
641			return ds;
642	}
643
644	return NULL;
645}
646
647static int btrfsic_process_superblock(struct btrfsic_state *state,
648				      struct btrfs_fs_devices *fs_devices)
649{
650	int ret = 0;
651	struct btrfs_super_block *selected_super;
652	struct list_head *dev_head = &fs_devices->devices;
653	struct btrfs_device *device;
654	struct btrfsic_dev_state *selected_dev_state = NULL;
655	int pass;
656
657	BUG_ON(NULL == state);
658	selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
659	if (NULL == selected_super) {
660		printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
661		return -1;
662	}
663
664	list_for_each_entry(device, dev_head, dev_list) {
665		int i;
666		struct btrfsic_dev_state *dev_state;
667
668		if (!device->bdev || !device->name)
669			continue;
670
671		dev_state = btrfsic_dev_state_lookup(device->bdev);
672		BUG_ON(NULL == dev_state);
673		for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
674			ret = btrfsic_process_superblock_dev_mirror(
675					state, dev_state, device, i,
676					&selected_dev_state, selected_super);
677			if (0 != ret && 0 == i) {
678				kfree(selected_super);
679				return ret;
680			}
681		}
682	}
683
684	if (NULL == state->latest_superblock) {
685		printk(KERN_INFO "btrfsic: no superblock found!\n");
686		kfree(selected_super);
687		return -1;
688	}
689
690	state->csum_size = btrfs_super_csum_size(selected_super);
691
692	for (pass = 0; pass < 3; pass++) {
693		int num_copies;
694		int mirror_num;
695		u64 next_bytenr;
696
697		switch (pass) {
698		case 0:
699			next_bytenr = btrfs_super_root(selected_super);
700			if (state->print_mask &
701			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
702				printk(KERN_INFO "root@%llu\n",
703				       (unsigned long long)next_bytenr);
704			break;
705		case 1:
706			next_bytenr = btrfs_super_chunk_root(selected_super);
707			if (state->print_mask &
708			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
709				printk(KERN_INFO "chunk@%llu\n",
710				       (unsigned long long)next_bytenr);
711			break;
712		case 2:
713			next_bytenr = btrfs_super_log_root(selected_super);
714			if (0 == next_bytenr)
715				continue;
716			if (state->print_mask &
717			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
718				printk(KERN_INFO "log@%llu\n",
719				       (unsigned long long)next_bytenr);
720			break;
721		}
722
723		num_copies =
724		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
725				     next_bytenr, state->metablock_size);
726		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
727			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
728			       (unsigned long long)next_bytenr, num_copies);
729
730		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
731			struct btrfsic_block *next_block;
732			struct btrfsic_block_data_ctx tmp_next_block_ctx;
733			struct btrfsic_block_link *l;
734
735			ret = btrfsic_map_block(state, next_bytenr,
736						state->metablock_size,
737						&tmp_next_block_ctx,
738						mirror_num);
739			if (ret) {
740				printk(KERN_INFO "btrfsic:"
741				       " btrfsic_map_block(root @%llu,"
742				       " mirror %d) failed!\n",
743				       (unsigned long long)next_bytenr,
744				       mirror_num);
745				kfree(selected_super);
746				return -1;
747			}
748
749			next_block = btrfsic_block_hashtable_lookup(
750					tmp_next_block_ctx.dev->bdev,
751					tmp_next_block_ctx.dev_bytenr,
752					&state->block_hashtable);
753			BUG_ON(NULL == next_block);
754
755			l = btrfsic_block_link_hashtable_lookup(
756					tmp_next_block_ctx.dev->bdev,
757					tmp_next_block_ctx.dev_bytenr,
758					state->latest_superblock->dev_state->
759					bdev,
760					state->latest_superblock->dev_bytenr,
761					&state->block_link_hashtable);
762			BUG_ON(NULL == l);
763
764			ret = btrfsic_read_block(state, &tmp_next_block_ctx);
765			if (ret < (int)PAGE_CACHE_SIZE) {
766				printk(KERN_INFO
767				       "btrfsic: read @logical %llu failed!\n",
768				       (unsigned long long)
769				       tmp_next_block_ctx.start);
770				btrfsic_release_block_ctx(&tmp_next_block_ctx);
771				kfree(selected_super);
772				return -1;
773			}
774
775			ret = btrfsic_process_metablock(state,
776							next_block,
777							&tmp_next_block_ctx,
778							BTRFS_MAX_LEVEL + 3, 1);
779			btrfsic_release_block_ctx(&tmp_next_block_ctx);
780		}
781	}
782
783	kfree(selected_super);
784	return ret;
785}
786
787static int btrfsic_process_superblock_dev_mirror(
788		struct btrfsic_state *state,
789		struct btrfsic_dev_state *dev_state,
790		struct btrfs_device *device,
791		int superblock_mirror_num,
792		struct btrfsic_dev_state **selected_dev_state,
793		struct btrfs_super_block *selected_super)
794{
795	struct btrfs_super_block *super_tmp;
796	u64 dev_bytenr;
797	struct buffer_head *bh;
798	struct btrfsic_block *superblock_tmp;
799	int pass;
800	struct block_device *const superblock_bdev = device->bdev;
801
802	/* super block bytenr is always the unmapped device bytenr */
803	dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
804	if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805		return -1;
806	bh = __bread(superblock_bdev, dev_bytenr / 4096,
807		     BTRFS_SUPER_INFO_SIZE);
808	if (NULL == bh)
809		return -1;
810	super_tmp = (struct btrfs_super_block *)
811	    (bh->b_data + (dev_bytenr & 4095));
812
813	if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
814	    strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
815		    sizeof(super_tmp->magic)) ||
816	    memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817	    btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818	    btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819	    btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
820		brelse(bh);
821		return 0;
822	}
823
824	superblock_tmp =
825	    btrfsic_block_hashtable_lookup(superblock_bdev,
826					   dev_bytenr,
827					   &state->block_hashtable);
828	if (NULL == superblock_tmp) {
829		superblock_tmp = btrfsic_block_alloc();
830		if (NULL == superblock_tmp) {
831			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
832			brelse(bh);
833			return -1;
834		}
835		/* for superblock, only the dev_bytenr makes sense */
836		superblock_tmp->dev_bytenr = dev_bytenr;
837		superblock_tmp->dev_state = dev_state;
838		superblock_tmp->logical_bytenr = dev_bytenr;
839		superblock_tmp->generation = btrfs_super_generation(super_tmp);
840		superblock_tmp->is_metadata = 1;
841		superblock_tmp->is_superblock = 1;
842		superblock_tmp->is_iodone = 1;
843		superblock_tmp->never_written = 0;
844		superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846			printk(KERN_INFO "New initial S-block (bdev %p, %s)"
847			       " @%llu (%s/%llu/%d)\n",
848			       superblock_bdev, device->name,
849			       (unsigned long long)dev_bytenr,
850			       dev_state->name,
851			       (unsigned long long)dev_bytenr,
852			       superblock_mirror_num);
853		list_add(&superblock_tmp->all_blocks_node,
854			 &state->all_blocks_list);
855		btrfsic_block_hashtable_add(superblock_tmp,
856					    &state->block_hashtable);
857	}
858
859	/* select the one with the highest generation field */
860	if (btrfs_super_generation(super_tmp) >
861	    state->max_superblock_generation ||
862	    0 == state->max_superblock_generation) {
863		memcpy(selected_super, super_tmp, sizeof(*selected_super));
864		*selected_dev_state = dev_state;
865		state->max_superblock_generation =
866		    btrfs_super_generation(super_tmp);
867		state->latest_superblock = superblock_tmp;
868	}
869
870	for (pass = 0; pass < 3; pass++) {
871		u64 next_bytenr;
872		int num_copies;
873		int mirror_num;
874		const char *additional_string = NULL;
875		struct btrfs_disk_key tmp_disk_key;
876
877		tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
878		tmp_disk_key.offset = 0;
879		switch (pass) {
880		case 0:
881			tmp_disk_key.objectid =
882			    cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
883			additional_string = "initial root ";
884			next_bytenr = btrfs_super_root(super_tmp);
885			break;
886		case 1:
887			tmp_disk_key.objectid =
888			    cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
889			additional_string = "initial chunk ";
890			next_bytenr = btrfs_super_chunk_root(super_tmp);
891			break;
892		case 2:
893			tmp_disk_key.objectid =
894			    cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
895			additional_string = "initial log ";
896			next_bytenr = btrfs_super_log_root(super_tmp);
897			if (0 == next_bytenr)
898				continue;
899			break;
900		}
901
902		num_copies =
903		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
904				     next_bytenr, state->metablock_size);
905		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
906			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
907			       (unsigned long long)next_bytenr, num_copies);
908		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
909			struct btrfsic_block *next_block;
910			struct btrfsic_block_data_ctx tmp_next_block_ctx;
911			struct btrfsic_block_link *l;
912
913			if (btrfsic_map_block(state, next_bytenr,
914					      state->metablock_size,
915					      &tmp_next_block_ctx,
916					      mirror_num)) {
917				printk(KERN_INFO "btrfsic: btrfsic_map_block("
918				       "bytenr @%llu, mirror %d) failed!\n",
919				       (unsigned long long)next_bytenr,
920				       mirror_num);
921				brelse(bh);
922				return -1;
923			}
924
925			next_block = btrfsic_block_lookup_or_add(
926					state, &tmp_next_block_ctx,
927					additional_string, 1, 1, 0,
928					mirror_num, NULL);
929			if (NULL == next_block) {
930				btrfsic_release_block_ctx(&tmp_next_block_ctx);
931				brelse(bh);
932				return -1;
933			}
934
935			next_block->disk_key = tmp_disk_key;
936			next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
937			l = btrfsic_block_link_lookup_or_add(
938					state, &tmp_next_block_ctx,
939					next_block, superblock_tmp,
940					BTRFSIC_GENERATION_UNKNOWN);
941			btrfsic_release_block_ctx(&tmp_next_block_ctx);
942			if (NULL == l) {
943				brelse(bh);
944				return -1;
945			}
946		}
947	}
948	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
949		btrfsic_dump_tree_sub(state, superblock_tmp, 0);
950
951	brelse(bh);
952	return 0;
953}
954
955static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
956{
957	struct btrfsic_stack_frame *sf;
958
959	sf = kzalloc(sizeof(*sf), GFP_NOFS);
960	if (NULL == sf)
961		printk(KERN_INFO "btrfsic: alloc memory failed!\n");
962	else
963		sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
964	return sf;
965}
966
967static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
968{
969	BUG_ON(!(NULL == sf ||
970		 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
971	kfree(sf);
972}
973
974static int btrfsic_process_metablock(
975		struct btrfsic_state *state,
976		struct btrfsic_block *const first_block,
977		struct btrfsic_block_data_ctx *const first_block_ctx,
978		int first_limit_nesting, int force_iodone_flag)
979{
980	struct btrfsic_stack_frame initial_stack_frame = { 0 };
981	struct btrfsic_stack_frame *sf;
982	struct btrfsic_stack_frame *next_stack;
983	struct btrfs_header *const first_hdr =
984		(struct btrfs_header *)first_block_ctx->datav[0];
985
986	BUG_ON(!first_hdr);
987	sf = &initial_stack_frame;
988	sf->error = 0;
989	sf->i = -1;
990	sf->limit_nesting = first_limit_nesting;
991	sf->block = first_block;
992	sf->block_ctx = first_block_ctx;
993	sf->next_block = NULL;
994	sf->hdr = first_hdr;
995	sf->prev = NULL;
996
997continue_with_new_stack_frame:
998	sf->block->generation = le64_to_cpu(sf->hdr->generation);
999	if (0 == sf->hdr->level) {
1000		struct btrfs_leaf *const leafhdr =
1001		    (struct btrfs_leaf *)sf->hdr;
1002
1003		if (-1 == sf->i) {
1004			sf->nr = le32_to_cpu(leafhdr->header.nritems);
1005
1006			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1007				printk(KERN_INFO
1008				       "leaf %llu items %d generation %llu"
1009				       " owner %llu\n",
1010				       (unsigned long long)
1011				       sf->block_ctx->start,
1012				       sf->nr,
1013				       (unsigned long long)
1014				       le64_to_cpu(leafhdr->header.generation),
1015				       (unsigned long long)
1016				       le64_to_cpu(leafhdr->header.owner));
1017		}
1018
1019continue_with_current_leaf_stack_frame:
1020		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1021			sf->i++;
1022			sf->num_copies = 0;
1023		}
1024
1025		if (sf->i < sf->nr) {
1026			struct btrfs_item disk_item;
1027			u32 disk_item_offset =
1028				(uintptr_t)(leafhdr->items + sf->i) -
1029				(uintptr_t)leafhdr;
1030			struct btrfs_disk_key *disk_key;
1031			u8 type;
1032			u32 item_offset;
1033
1034			if (disk_item_offset + sizeof(struct btrfs_item) >
1035			    sf->block_ctx->len) {
1036leaf_item_out_of_bounce_error:
1037				printk(KERN_INFO
1038				       "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039				       sf->block_ctx->start,
1040				       sf->block_ctx->dev->name);
1041				goto one_stack_frame_backwards;
1042			}
1043			btrfsic_read_from_block_data(sf->block_ctx,
1044						     &disk_item,
1045						     disk_item_offset,
1046						     sizeof(struct btrfs_item));
1047			item_offset = le32_to_cpu(disk_item.offset);
1048			disk_key = &disk_item.key;
1049			type = disk_key->type;
1050
1051			if (BTRFS_ROOT_ITEM_KEY == type) {
1052				struct btrfs_root_item root_item;
1053				u32 root_item_offset;
1054				u64 next_bytenr;
1055
1056				root_item_offset = item_offset +
1057					offsetof(struct btrfs_leaf, items);
1058				if (root_item_offset +
1059				    sizeof(struct btrfs_root_item) >
1060				    sf->block_ctx->len)
1061					goto leaf_item_out_of_bounce_error;
1062				btrfsic_read_from_block_data(
1063					sf->block_ctx, &root_item,
1064					root_item_offset,
1065					sizeof(struct btrfs_root_item));
1066				next_bytenr = le64_to_cpu(root_item.bytenr);
1067
1068				sf->error =
1069				    btrfsic_create_link_to_next_block(
1070						state,
1071						sf->block,
1072						sf->block_ctx,
1073						next_bytenr,
1074						sf->limit_nesting,
1075						&sf->next_block_ctx,
1076						&sf->next_block,
1077						force_iodone_flag,
1078						&sf->num_copies,
1079						&sf->mirror_num,
1080						disk_key,
1081						le64_to_cpu(root_item.
1082						generation));
1083				if (sf->error)
1084					goto one_stack_frame_backwards;
1085
1086				if (NULL != sf->next_block) {
1087					struct btrfs_header *const next_hdr =
1088					    (struct btrfs_header *)
1089					    sf->next_block_ctx.datav[0];
1090
1091					next_stack =
1092					    btrfsic_stack_frame_alloc();
1093					if (NULL == next_stack) {
1094						btrfsic_release_block_ctx(
1095								&sf->
1096								next_block_ctx);
1097						goto one_stack_frame_backwards;
1098					}
1099
1100					next_stack->i = -1;
1101					next_stack->block = sf->next_block;
1102					next_stack->block_ctx =
1103					    &sf->next_block_ctx;
1104					next_stack->next_block = NULL;
1105					next_stack->hdr = next_hdr;
1106					next_stack->limit_nesting =
1107					    sf->limit_nesting - 1;
1108					next_stack->prev = sf;
1109					sf = next_stack;
1110					goto continue_with_new_stack_frame;
1111				}
1112			} else if (BTRFS_EXTENT_DATA_KEY == type &&
1113				   state->include_extent_data) {
1114				sf->error = btrfsic_handle_extent_data(
1115						state,
1116						sf->block,
1117						sf->block_ctx,
1118						item_offset,
1119						force_iodone_flag);
1120				if (sf->error)
1121					goto one_stack_frame_backwards;
1122			}
1123
1124			goto continue_with_current_leaf_stack_frame;
1125		}
1126	} else {
1127		struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1128
1129		if (-1 == sf->i) {
1130			sf->nr = le32_to_cpu(nodehdr->header.nritems);
1131
1132			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1133				printk(KERN_INFO "node %llu level %d items %d"
1134				       " generation %llu owner %llu\n",
1135				       (unsigned long long)
1136				       sf->block_ctx->start,
1137				       nodehdr->header.level, sf->nr,
1138				       (unsigned long long)
1139				       le64_to_cpu(nodehdr->header.generation),
1140				       (unsigned long long)
1141				       le64_to_cpu(nodehdr->header.owner));
1142		}
1143
1144continue_with_current_node_stack_frame:
1145		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1146			sf->i++;
1147			sf->num_copies = 0;
1148		}
1149
1150		if (sf->i < sf->nr) {
1151			struct btrfs_key_ptr key_ptr;
1152			u32 key_ptr_offset;
1153			u64 next_bytenr;
1154
1155			key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156					  (uintptr_t)nodehdr;
1157			if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158			    sf->block_ctx->len) {
1159				printk(KERN_INFO
1160				       "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161				       sf->block_ctx->start,
1162				       sf->block_ctx->dev->name);
1163				goto one_stack_frame_backwards;
1164			}
1165			btrfsic_read_from_block_data(
1166				sf->block_ctx, &key_ptr, key_ptr_offset,
1167				sizeof(struct btrfs_key_ptr));
1168			next_bytenr = le64_to_cpu(key_ptr.blockptr);
1169
1170			sf->error = btrfsic_create_link_to_next_block(
1171					state,
1172					sf->block,
1173					sf->block_ctx,
1174					next_bytenr,
1175					sf->limit_nesting,
1176					&sf->next_block_ctx,
1177					&sf->next_block,
1178					force_iodone_flag,
1179					&sf->num_copies,
1180					&sf->mirror_num,
1181					&key_ptr.key,
1182					le64_to_cpu(key_ptr.generation));
1183			if (sf->error)
1184				goto one_stack_frame_backwards;
1185
1186			if (NULL != sf->next_block) {
1187				struct btrfs_header *const next_hdr =
1188				    (struct btrfs_header *)
1189				    sf->next_block_ctx.datav[0];
1190
1191				next_stack = btrfsic_stack_frame_alloc();
1192				if (NULL == next_stack)
1193					goto one_stack_frame_backwards;
1194
1195				next_stack->i = -1;
1196				next_stack->block = sf->next_block;
1197				next_stack->block_ctx = &sf->next_block_ctx;
1198				next_stack->next_block = NULL;
1199				next_stack->hdr = next_hdr;
1200				next_stack->limit_nesting =
1201				    sf->limit_nesting - 1;
1202				next_stack->prev = sf;
1203				sf = next_stack;
1204				goto continue_with_new_stack_frame;
1205			}
1206
1207			goto continue_with_current_node_stack_frame;
1208		}
1209	}
1210
1211one_stack_frame_backwards:
1212	if (NULL != sf->prev) {
1213		struct btrfsic_stack_frame *const prev = sf->prev;
1214
1215		/* the one for the initial block is freed in the caller */
1216		btrfsic_release_block_ctx(sf->block_ctx);
1217
1218		if (sf->error) {
1219			prev->error = sf->error;
1220			btrfsic_stack_frame_free(sf);
1221			sf = prev;
1222			goto one_stack_frame_backwards;
1223		}
1224
1225		btrfsic_stack_frame_free(sf);
1226		sf = prev;
1227		goto continue_with_new_stack_frame;
1228	} else {
1229		BUG_ON(&initial_stack_frame != sf);
1230	}
1231
1232	return sf->error;
1233}
1234
1235static void btrfsic_read_from_block_data(
1236	struct btrfsic_block_data_ctx *block_ctx,
1237	void *dstv, u32 offset, size_t len)
1238{
1239	size_t cur;
1240	size_t offset_in_page;
1241	char *kaddr;
1242	char *dst = (char *)dstv;
1243	size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244	unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246	WARN_ON(offset + len > block_ctx->len);
1247	offset_in_page = (start_offset + offset) &
1248			 ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250	while (len > 0) {
1251		cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252		BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253			    PAGE_CACHE_SHIFT);
1254		kaddr = block_ctx->datav[i];
1255		memcpy(dst, kaddr + offset_in_page, cur);
1256
1257		dst += cur;
1258		len -= cur;
1259		offset_in_page = 0;
1260		i++;
1261	}
1262}
1263
1264static int btrfsic_create_link_to_next_block(
1265		struct btrfsic_state *state,
1266		struct btrfsic_block *block,
1267		struct btrfsic_block_data_ctx *block_ctx,
1268		u64 next_bytenr,
1269		int limit_nesting,
1270		struct btrfsic_block_data_ctx *next_block_ctx,
1271		struct btrfsic_block **next_blockp,
1272		int force_iodone_flag,
1273		int *num_copiesp, int *mirror_nump,
1274		struct btrfs_disk_key *disk_key,
1275		u64 parent_generation)
1276{
1277	struct btrfsic_block *next_block = NULL;
1278	int ret;
1279	struct btrfsic_block_link *l;
1280	int did_alloc_block_link;
1281	int block_was_created;
1282
1283	*next_blockp = NULL;
1284	if (0 == *num_copiesp) {
1285		*num_copiesp =
1286		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
1287				     next_bytenr, state->metablock_size);
1288		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1289			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1290			       (unsigned long long)next_bytenr, *num_copiesp);
1291		*mirror_nump = 1;
1292	}
1293
1294	if (*mirror_nump > *num_copiesp)
1295		return 0;
1296
1297	if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1298		printk(KERN_INFO
1299		       "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1300		       *mirror_nump);
1301	ret = btrfsic_map_block(state, next_bytenr,
1302				state->metablock_size,
1303				next_block_ctx, *mirror_nump);
1304	if (ret) {
1305		printk(KERN_INFO
1306		       "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1307		       (unsigned long long)next_bytenr, *mirror_nump);
1308		btrfsic_release_block_ctx(next_block_ctx);
1309		*next_blockp = NULL;
1310		return -1;
1311	}
1312
1313	next_block = btrfsic_block_lookup_or_add(state,
1314						 next_block_ctx, "referenced ",
1315						 1, force_iodone_flag,
1316						 !force_iodone_flag,
1317						 *mirror_nump,
1318						 &block_was_created);
1319	if (NULL == next_block) {
1320		btrfsic_release_block_ctx(next_block_ctx);
1321		*next_blockp = NULL;
1322		return -1;
1323	}
1324	if (block_was_created) {
1325		l = NULL;
1326		next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1327	} else {
1328		if (next_block->logical_bytenr != next_bytenr &&
1329		    !(!next_block->is_metadata &&
1330		      0 == next_block->logical_bytenr)) {
1331			printk(KERN_INFO
1332			       "Referenced block @%llu (%s/%llu/%d)"
1333			       " found in hash table, %c,"
1334			       " bytenr mismatch (!= stored %llu).\n",
1335			       (unsigned long long)next_bytenr,
1336			       next_block_ctx->dev->name,
1337			       (unsigned long long)next_block_ctx->dev_bytenr,
1338			       *mirror_nump,
1339			       btrfsic_get_block_type(state, next_block),
1340			       (unsigned long long)next_block->logical_bytenr);
1341		} else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1342			printk(KERN_INFO
1343			       "Referenced block @%llu (%s/%llu/%d)"
1344			       " found in hash table, %c.\n",
1345			       (unsigned long long)next_bytenr,
1346			       next_block_ctx->dev->name,
1347			       (unsigned long long)next_block_ctx->dev_bytenr,
1348			       *mirror_nump,
1349			       btrfsic_get_block_type(state, next_block));
1350		next_block->logical_bytenr = next_bytenr;
1351
1352		next_block->mirror_num = *mirror_nump;
1353		l = btrfsic_block_link_hashtable_lookup(
1354				next_block_ctx->dev->bdev,
1355				next_block_ctx->dev_bytenr,
1356				block_ctx->dev->bdev,
1357				block_ctx->dev_bytenr,
1358				&state->block_link_hashtable);
1359	}
1360
1361	next_block->disk_key = *disk_key;
1362	if (NULL == l) {
1363		l = btrfsic_block_link_alloc();
1364		if (NULL == l) {
1365			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1366			btrfsic_release_block_ctx(next_block_ctx);
1367			*next_blockp = NULL;
1368			return -1;
1369		}
1370
1371		did_alloc_block_link = 1;
1372		l->block_ref_to = next_block;
1373		l->block_ref_from = block;
1374		l->ref_cnt = 1;
1375		l->parent_generation = parent_generation;
1376
1377		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1378			btrfsic_print_add_link(state, l);
1379
1380		list_add(&l->node_ref_to, &block->ref_to_list);
1381		list_add(&l->node_ref_from, &next_block->ref_from_list);
1382
1383		btrfsic_block_link_hashtable_add(l,
1384						 &state->block_link_hashtable);
1385	} else {
1386		did_alloc_block_link = 0;
1387		if (0 == limit_nesting) {
1388			l->ref_cnt++;
1389			l->parent_generation = parent_generation;
1390			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1391				btrfsic_print_add_link(state, l);
1392		}
1393	}
1394
1395	if (limit_nesting > 0 && did_alloc_block_link) {
1396		ret = btrfsic_read_block(state, next_block_ctx);
1397		if (ret < (int)next_block_ctx->len) {
1398			printk(KERN_INFO
1399			       "btrfsic: read block @logical %llu failed!\n",
1400			       (unsigned long long)next_bytenr);
1401			btrfsic_release_block_ctx(next_block_ctx);
1402			*next_blockp = NULL;
1403			return -1;
1404		}
1405
1406		*next_blockp = next_block;
1407	} else {
1408		*next_blockp = NULL;
1409	}
1410	(*mirror_nump)++;
1411
1412	return 0;
1413}
1414
1415static int btrfsic_handle_extent_data(
1416		struct btrfsic_state *state,
1417		struct btrfsic_block *block,
1418		struct btrfsic_block_data_ctx *block_ctx,
1419		u32 item_offset, int force_iodone_flag)
1420{
1421	int ret;
1422	struct btrfs_file_extent_item file_extent_item;
1423	u64 file_extent_item_offset;
1424	u64 next_bytenr;
1425	u64 num_bytes;
1426	u64 generation;
1427	struct btrfsic_block_link *l;
1428
1429	file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430				  item_offset;
1431	if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1432	    block_ctx->len) {
1433		printk(KERN_INFO
1434		       "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1435		       block_ctx->start, block_ctx->dev->name);
1436		return -1;
1437	}
1438	btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1439				     file_extent_item_offset,
1440				     sizeof(struct btrfs_file_extent_item));
1441	next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1442		      le64_to_cpu(file_extent_item.offset);
1443	generation = le64_to_cpu(file_extent_item.generation);
1444	num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1445	generation = le64_to_cpu(file_extent_item.generation);
1446
1447	if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1448		printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1449		       " offset = %llu, num_bytes = %llu\n",
1450		       file_extent_item.type,
1451		       (unsigned long long)
1452		       le64_to_cpu(file_extent_item.disk_bytenr),
1453		       (unsigned long long)le64_to_cpu(file_extent_item.offset),
1454		       (unsigned long long)num_bytes);
1455	if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1456	    ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr))
1457		return 0;
1458	while (num_bytes > 0) {
1459		u32 chunk_len;
1460		int num_copies;
1461		int mirror_num;
1462
1463		if (num_bytes > state->datablock_size)
1464			chunk_len = state->datablock_size;
1465		else
1466			chunk_len = num_bytes;
1467
1468		num_copies =
1469		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
1470				     next_bytenr, state->datablock_size);
1471		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1472			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1473			       (unsigned long long)next_bytenr, num_copies);
1474		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1475			struct btrfsic_block_data_ctx next_block_ctx;
1476			struct btrfsic_block *next_block;
1477			int block_was_created;
1478
1479			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1480				printk(KERN_INFO "btrfsic_handle_extent_data("
1481				       "mirror_num=%d)\n", mirror_num);
1482			if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1483				printk(KERN_INFO
1484				       "\tdisk_bytenr = %llu, num_bytes %u\n",
1485				       (unsigned long long)next_bytenr,
1486				       chunk_len);
1487			ret = btrfsic_map_block(state, next_bytenr,
1488						chunk_len, &next_block_ctx,
1489						mirror_num);
1490			if (ret) {
1491				printk(KERN_INFO
1492				       "btrfsic: btrfsic_map_block(@%llu,"
1493				       " mirror=%d) failed!\n",
1494				       (unsigned long long)next_bytenr,
1495				       mirror_num);
1496				return -1;
1497			}
1498
1499			next_block = btrfsic_block_lookup_or_add(
1500					state,
1501					&next_block_ctx,
1502					"referenced ",
1503					0,
1504					force_iodone_flag,
1505					!force_iodone_flag,
1506					mirror_num,
1507					&block_was_created);
1508			if (NULL == next_block) {
1509				printk(KERN_INFO
1510				       "btrfsic: error, kmalloc failed!\n");
1511				btrfsic_release_block_ctx(&next_block_ctx);
1512				return -1;
1513			}
1514			if (!block_was_created) {
1515				if (next_block->logical_bytenr != next_bytenr &&
1516				    !(!next_block->is_metadata &&
1517				      0 == next_block->logical_bytenr)) {
1518					printk(KERN_INFO
1519					       "Referenced block"
1520					       " @%llu (%s/%llu/%d)"
1521					       " found in hash table, D,"
1522					       " bytenr mismatch"
1523					       " (!= stored %llu).\n",
1524					       (unsigned long long)next_bytenr,
1525					       next_block_ctx.dev->name,
1526					       (unsigned long long)
1527					       next_block_ctx.dev_bytenr,
1528					       mirror_num,
1529					       (unsigned long long)
1530					       next_block->logical_bytenr);
1531				}
1532				next_block->logical_bytenr = next_bytenr;
1533				next_block->mirror_num = mirror_num;
1534			}
1535
1536			l = btrfsic_block_link_lookup_or_add(state,
1537							     &next_block_ctx,
1538							     next_block, block,
1539							     generation);
1540			btrfsic_release_block_ctx(&next_block_ctx);
1541			if (NULL == l)
1542				return -1;
1543		}
1544
1545		next_bytenr += chunk_len;
1546		num_bytes -= chunk_len;
1547	}
1548
1549	return 0;
1550}
1551
1552static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1553			     struct btrfsic_block_data_ctx *block_ctx_out,
1554			     int mirror_num)
1555{
1556	int ret;
1557	u64 length;
1558	struct btrfs_bio *multi = NULL;
1559	struct btrfs_device *device;
1560
1561	length = len;
1562	ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1563			      bytenr, &length, &multi, mirror_num);
1564
1565	device = multi->stripes[0].dev;
1566	block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1567	block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1568	block_ctx_out->start = bytenr;
1569	block_ctx_out->len = len;
1570	block_ctx_out->datav = NULL;
1571	block_ctx_out->pagev = NULL;
1572	block_ctx_out->mem_to_free = NULL;
1573
1574	if (0 == ret)
1575		kfree(multi);
1576	if (NULL == block_ctx_out->dev) {
1577		ret = -ENXIO;
1578		printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1579	}
1580
1581	return ret;
1582}
1583
1584static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1585				  u32 len, struct block_device *bdev,
1586				  struct btrfsic_block_data_ctx *block_ctx_out)
1587{
1588	block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1589	block_ctx_out->dev_bytenr = bytenr;
1590	block_ctx_out->start = bytenr;
1591	block_ctx_out->len = len;
1592	block_ctx_out->datav = NULL;
1593	block_ctx_out->pagev = NULL;
1594	block_ctx_out->mem_to_free = NULL;
1595	if (NULL != block_ctx_out->dev) {
1596		return 0;
1597	} else {
1598		printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1599		return -ENXIO;
1600	}
1601}
1602
1603static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1604{
1605	if (block_ctx->mem_to_free) {
1606		unsigned int num_pages;
1607
1608		BUG_ON(!block_ctx->datav);
1609		BUG_ON(!block_ctx->pagev);
1610		num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1611			    PAGE_CACHE_SHIFT;
1612		while (num_pages > 0) {
1613			num_pages--;
1614			if (block_ctx->datav[num_pages]) {
1615				kunmap(block_ctx->pagev[num_pages]);
1616				block_ctx->datav[num_pages] = NULL;
1617			}
1618			if (block_ctx->pagev[num_pages]) {
1619				__free_page(block_ctx->pagev[num_pages]);
1620				block_ctx->pagev[num_pages] = NULL;
1621			}
1622		}
1623
1624		kfree(block_ctx->mem_to_free);
1625		block_ctx->mem_to_free = NULL;
1626		block_ctx->pagev = NULL;
1627		block_ctx->datav = NULL;
1628	}
1629}
1630
1631static int btrfsic_read_block(struct btrfsic_state *state,
1632			      struct btrfsic_block_data_ctx *block_ctx)
1633{
1634	unsigned int num_pages;
1635	unsigned int i;
1636	u64 dev_bytenr;
1637	int ret;
1638
1639	BUG_ON(block_ctx->datav);
1640	BUG_ON(block_ctx->pagev);
1641	BUG_ON(block_ctx->mem_to_free);
1642	if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1643		printk(KERN_INFO
1644		       "btrfsic: read_block() with unaligned bytenr %llu\n",
1645		       (unsigned long long)block_ctx->dev_bytenr);
1646		return -1;
1647	}
1648
1649	num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1650		    PAGE_CACHE_SHIFT;
1651	block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1652					  sizeof(*block_ctx->pagev)) *
1653					 num_pages, GFP_NOFS);
1654	if (!block_ctx->mem_to_free)
1655		return -1;
1656	block_ctx->datav = block_ctx->mem_to_free;
1657	block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1658	for (i = 0; i < num_pages; i++) {
1659		block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1660		if (!block_ctx->pagev[i])
1661			return -1;
1662	}
1663
1664	dev_bytenr = block_ctx->dev_bytenr;
1665	for (i = 0; i < num_pages;) {
1666		struct bio *bio;
1667		unsigned int j;
1668		DECLARE_COMPLETION_ONSTACK(complete);
1669
1670		bio = bio_alloc(GFP_NOFS, num_pages - i);
1671		if (!bio) {
1672			printk(KERN_INFO
1673			       "btrfsic: bio_alloc() for %u pages failed!\n",
1674			       num_pages - i);
1675			return -1;
1676		}
1677		bio->bi_bdev = block_ctx->dev->bdev;
1678		bio->bi_sector = dev_bytenr >> 9;
1679		bio->bi_end_io = btrfsic_complete_bio_end_io;
1680		bio->bi_private = &complete;
1681
1682		for (j = i; j < num_pages; j++) {
1683			ret = bio_add_page(bio, block_ctx->pagev[j],
1684					   PAGE_CACHE_SIZE, 0);
1685			if (PAGE_CACHE_SIZE != ret)
1686				break;
1687		}
1688		if (j == i) {
1689			printk(KERN_INFO
1690			       "btrfsic: error, failed to add a single page!\n");
1691			return -1;
1692		}
1693		submit_bio(READ, bio);
1694
1695		/* this will also unplug the queue */
1696		wait_for_completion(&complete);
1697
1698		if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1699			printk(KERN_INFO
1700			       "btrfsic: read error at logical %llu dev %s!\n",
1701			       block_ctx->start, block_ctx->dev->name);
1702			bio_put(bio);
1703			return -1;
1704		}
1705		bio_put(bio);
1706		dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1707		i = j;
1708	}
1709	for (i = 0; i < num_pages; i++) {
1710		block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1711		if (!block_ctx->datav[i]) {
1712			printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1713			       block_ctx->dev->name);
1714			return -1;
1715		}
1716	}
1717
1718	return block_ctx->len;
1719}
1720
1721static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1722{
1723	complete((struct completion *)bio->bi_private);
1724}
1725
1726static void btrfsic_dump_database(struct btrfsic_state *state)
1727{
1728	struct list_head *elem_all;
1729
1730	BUG_ON(NULL == state);
1731
1732	printk(KERN_INFO "all_blocks_list:\n");
1733	list_for_each(elem_all, &state->all_blocks_list) {
1734		const struct btrfsic_block *const b_all =
1735		    list_entry(elem_all, struct btrfsic_block,
1736			       all_blocks_node);
1737		struct list_head *elem_ref_to;
1738		struct list_head *elem_ref_from;
1739
1740		printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1741		       btrfsic_get_block_type(state, b_all),
1742		       (unsigned long long)b_all->logical_bytenr,
1743		       b_all->dev_state->name,
1744		       (unsigned long long)b_all->dev_bytenr,
1745		       b_all->mirror_num);
1746
1747		list_for_each(elem_ref_to, &b_all->ref_to_list) {
1748			const struct btrfsic_block_link *const l =
1749			    list_entry(elem_ref_to,
1750				       struct btrfsic_block_link,
1751				       node_ref_to);
1752
1753			printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1754			       " refers %u* to"
1755			       " %c @%llu (%s/%llu/%d)\n",
1756			       btrfsic_get_block_type(state, b_all),
1757			       (unsigned long long)b_all->logical_bytenr,
1758			       b_all->dev_state->name,
1759			       (unsigned long long)b_all->dev_bytenr,
1760			       b_all->mirror_num,
1761			       l->ref_cnt,
1762			       btrfsic_get_block_type(state, l->block_ref_to),
1763			       (unsigned long long)
1764			       l->block_ref_to->logical_bytenr,
1765			       l->block_ref_to->dev_state->name,
1766			       (unsigned long long)l->block_ref_to->dev_bytenr,
1767			       l->block_ref_to->mirror_num);
1768		}
1769
1770		list_for_each(elem_ref_from, &b_all->ref_from_list) {
1771			const struct btrfsic_block_link *const l =
1772			    list_entry(elem_ref_from,
1773				       struct btrfsic_block_link,
1774				       node_ref_from);
1775
1776			printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1777			       " is ref %u* from"
1778			       " %c @%llu (%s/%llu/%d)\n",
1779			       btrfsic_get_block_type(state, b_all),
1780			       (unsigned long long)b_all->logical_bytenr,
1781			       b_all->dev_state->name,
1782			       (unsigned long long)b_all->dev_bytenr,
1783			       b_all->mirror_num,
1784			       l->ref_cnt,
1785			       btrfsic_get_block_type(state, l->block_ref_from),
1786			       (unsigned long long)
1787			       l->block_ref_from->logical_bytenr,
1788			       l->block_ref_from->dev_state->name,
1789			       (unsigned long long)
1790			       l->block_ref_from->dev_bytenr,
1791			       l->block_ref_from->mirror_num);
1792		}
1793
1794		printk(KERN_INFO "\n");
1795	}
1796}
1797
1798/*
1799 * Test whether the disk block contains a tree block (leaf or node)
1800 * (note that this test fails for the super block)
1801 */
1802static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1803				     char **datav, unsigned int num_pages)
1804{
1805	struct btrfs_header *h;
1806	u8 csum[BTRFS_CSUM_SIZE];
1807	u32 crc = ~(u32)0;
1808	unsigned int i;
1809
1810	if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1811		return 1; /* not metadata */
1812	num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1813	h = (struct btrfs_header *)datav[0];
1814
1815	if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1816		return 1;
1817
1818	for (i = 0; i < num_pages; i++) {
1819		u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1820		size_t sublen = i ? PAGE_CACHE_SIZE :
1821				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1822
1823		crc = crc32c(crc, data, sublen);
1824	}
1825	btrfs_csum_final(crc, csum);
1826	if (memcmp(csum, h->csum, state->csum_size))
1827		return 1;
1828
1829	return 0; /* is metadata */
1830}
1831
1832static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1833					  u64 dev_bytenr, char **mapped_datav,
1834					  unsigned int num_pages,
1835					  struct bio *bio, int *bio_is_patched,
1836					  struct buffer_head *bh,
1837					  int submit_bio_bh_rw)
1838{
1839	int is_metadata;
1840	struct btrfsic_block *block;
1841	struct btrfsic_block_data_ctx block_ctx;
1842	int ret;
1843	struct btrfsic_state *state = dev_state->state;
1844	struct block_device *bdev = dev_state->bdev;
1845	unsigned int processed_len;
1846
1847	if (NULL != bio_is_patched)
1848		*bio_is_patched = 0;
1849
1850again:
1851	if (num_pages == 0)
1852		return;
1853
1854	processed_len = 0;
1855	is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1856						      num_pages));
1857
1858	block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1859					       &state->block_hashtable);
1860	if (NULL != block) {
1861		u64 bytenr = 0;
1862		struct list_head *elem_ref_to;
1863		struct list_head *tmp_ref_to;
1864
1865		if (block->is_superblock) {
1866			bytenr = le64_to_cpu(((struct btrfs_super_block *)
1867					      mapped_datav[0])->bytenr);
1868			if (num_pages * PAGE_CACHE_SIZE <
1869			    BTRFS_SUPER_INFO_SIZE) {
1870				printk(KERN_INFO
1871				       "btrfsic: cannot work with too short bios!\n");
1872				return;
1873			}
1874			is_metadata = 1;
1875			BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1876			processed_len = BTRFS_SUPER_INFO_SIZE;
1877			if (state->print_mask &
1878			    BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1879				printk(KERN_INFO
1880				       "[before new superblock is written]:\n");
1881				btrfsic_dump_tree_sub(state, block, 0);
1882			}
1883		}
1884		if (is_metadata) {
1885			if (!block->is_superblock) {
1886				if (num_pages * PAGE_CACHE_SIZE <
1887				    state->metablock_size) {
1888					printk(KERN_INFO
1889					       "btrfsic: cannot work with too short bios!\n");
1890					return;
1891				}
1892				processed_len = state->metablock_size;
1893				bytenr = le64_to_cpu(((struct btrfs_header *)
1894						      mapped_datav[0])->bytenr);
1895				btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1896							       dev_state,
1897							       dev_bytenr);
1898			}
1899			if (block->logical_bytenr != bytenr) {
1900				printk(KERN_INFO
1901				       "Written block @%llu (%s/%llu/%d)"
1902				       " found in hash table, %c,"
1903				       " bytenr mismatch"
1904				       " (!= stored %llu).\n",
1905				       (unsigned long long)bytenr,
1906				       dev_state->name,
1907				       (unsigned long long)dev_bytenr,
1908				       block->mirror_num,
1909				       btrfsic_get_block_type(state, block),
1910				       (unsigned long long)
1911				       block->logical_bytenr);
1912				block->logical_bytenr = bytenr;
1913			} else if (state->print_mask &
1914				   BTRFSIC_PRINT_MASK_VERBOSE)
1915				printk(KERN_INFO
1916				       "Written block @%llu (%s/%llu/%d)"
1917				       " found in hash table, %c.\n",
1918				       (unsigned long long)bytenr,
1919				       dev_state->name,
1920				       (unsigned long long)dev_bytenr,
1921				       block->mirror_num,
1922				       btrfsic_get_block_type(state, block));
1923		} else {
1924			if (num_pages * PAGE_CACHE_SIZE <
1925			    state->datablock_size) {
1926				printk(KERN_INFO
1927				       "btrfsic: cannot work with too short bios!\n");
1928				return;
1929			}
1930			processed_len = state->datablock_size;
1931			bytenr = block->logical_bytenr;
1932			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1933				printk(KERN_INFO
1934				       "Written block @%llu (%s/%llu/%d)"
1935				       " found in hash table, %c.\n",
1936				       (unsigned long long)bytenr,
1937				       dev_state->name,
1938				       (unsigned long long)dev_bytenr,
1939				       block->mirror_num,
1940				       btrfsic_get_block_type(state, block));
1941		}
1942
1943		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1944			printk(KERN_INFO
1945			       "ref_to_list: %cE, ref_from_list: %cE\n",
1946			       list_empty(&block->ref_to_list) ? ' ' : '!',
1947			       list_empty(&block->ref_from_list) ? ' ' : '!');
1948		if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1949			printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1950			       " @%llu (%s/%llu/%d), old(gen=%llu,"
1951			       " objectid=%llu, type=%d, offset=%llu),"
1952			       " new(gen=%llu),"
1953			       " which is referenced by most recent superblock"
1954			       " (superblockgen=%llu)!\n",
1955			       btrfsic_get_block_type(state, block),
1956			       (unsigned long long)bytenr,
1957			       dev_state->name,
1958			       (unsigned long long)dev_bytenr,
1959			       block->mirror_num,
1960			       (unsigned long long)block->generation,
1961			       (unsigned long long)
1962			       le64_to_cpu(block->disk_key.objectid),
1963			       block->disk_key.type,
1964			       (unsigned long long)
1965			       le64_to_cpu(block->disk_key.offset),
1966			       (unsigned long long)
1967			       le64_to_cpu(((struct btrfs_header *)
1968					    mapped_datav[0])->generation),
1969			       (unsigned long long)
1970			       state->max_superblock_generation);
1971			btrfsic_dump_tree(state);
1972		}
1973
1974		if (!block->is_iodone && !block->never_written) {
1975			printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1976			       " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1977			       " which is not yet iodone!\n",
1978			       btrfsic_get_block_type(state, block),
1979			       (unsigned long long)bytenr,
1980			       dev_state->name,
1981			       (unsigned long long)dev_bytenr,
1982			       block->mirror_num,
1983			       (unsigned long long)block->generation,
1984			       (unsigned long long)
1985			       le64_to_cpu(((struct btrfs_header *)
1986					    mapped_datav[0])->generation));
1987			/* it would not be safe to go on */
1988			btrfsic_dump_tree(state);
1989			goto continue_loop;
1990		}
1991
1992		/*
1993		 * Clear all references of this block. Do not free
1994		 * the block itself even if is not referenced anymore
1995		 * because it still carries valueable information
1996		 * like whether it was ever written and IO completed.
1997		 */
1998		list_for_each_safe(elem_ref_to, tmp_ref_to,
1999				   &block->ref_to_list) {
2000			struct btrfsic_block_link *const l =
2001			    list_entry(elem_ref_to,
2002				       struct btrfsic_block_link,
2003				       node_ref_to);
2004
2005			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2006				btrfsic_print_rem_link(state, l);
2007			l->ref_cnt--;
2008			if (0 == l->ref_cnt) {
2009				list_del(&l->node_ref_to);
2010				list_del(&l->node_ref_from);
2011				btrfsic_block_link_hashtable_remove(l);
2012				btrfsic_block_link_free(l);
2013			}
2014		}
2015
2016		if (block->is_superblock)
2017			ret = btrfsic_map_superblock(state, bytenr,
2018						     processed_len,
2019						     bdev, &block_ctx);
2020		else
2021			ret = btrfsic_map_block(state, bytenr, processed_len,
2022						&block_ctx, 0);
2023		if (ret) {
2024			printk(KERN_INFO
2025			       "btrfsic: btrfsic_map_block(root @%llu)"
2026			       " failed!\n", (unsigned long long)bytenr);
2027			goto continue_loop;
2028		}
2029		block_ctx.datav = mapped_datav;
2030		/* the following is required in case of writes to mirrors,
2031		 * use the same that was used for the lookup */
2032		block_ctx.dev = dev_state;
2033		block_ctx.dev_bytenr = dev_bytenr;
2034
2035		if (is_metadata || state->include_extent_data) {
2036			block->never_written = 0;
2037			block->iodone_w_error = 0;
2038			if (NULL != bio) {
2039				block->is_iodone = 0;
2040				BUG_ON(NULL == bio_is_patched);
2041				if (!*bio_is_patched) {
2042					block->orig_bio_bh_private =
2043					    bio->bi_private;
2044					block->orig_bio_bh_end_io.bio =
2045					    bio->bi_end_io;
2046					block->next_in_same_bio = NULL;
2047					bio->bi_private = block;
2048					bio->bi_end_io = btrfsic_bio_end_io;
2049					*bio_is_patched = 1;
2050				} else {
2051					struct btrfsic_block *chained_block =
2052					    (struct btrfsic_block *)
2053					    bio->bi_private;
2054
2055					BUG_ON(NULL == chained_block);
2056					block->orig_bio_bh_private =
2057					    chained_block->orig_bio_bh_private;
2058					block->orig_bio_bh_end_io.bio =
2059					    chained_block->orig_bio_bh_end_io.
2060					    bio;
2061					block->next_in_same_bio = chained_block;
2062					bio->bi_private = block;
2063				}
2064			} else if (NULL != bh) {
2065				block->is_iodone = 0;
2066				block->orig_bio_bh_private = bh->b_private;
2067				block->orig_bio_bh_end_io.bh = bh->b_end_io;
2068				block->next_in_same_bio = NULL;
2069				bh->b_private = block;
2070				bh->b_end_io = btrfsic_bh_end_io;
2071			} else {
2072				block->is_iodone = 1;
2073				block->orig_bio_bh_private = NULL;
2074				block->orig_bio_bh_end_io.bio = NULL;
2075				block->next_in_same_bio = NULL;
2076			}
2077		}
2078
2079		block->flush_gen = dev_state->last_flush_gen + 1;
2080		block->submit_bio_bh_rw = submit_bio_bh_rw;
2081		if (is_metadata) {
2082			block->logical_bytenr = bytenr;
2083			block->is_metadata = 1;
2084			if (block->is_superblock) {
2085				BUG_ON(PAGE_CACHE_SIZE !=
2086				       BTRFS_SUPER_INFO_SIZE);
2087				ret = btrfsic_process_written_superblock(
2088						state,
2089						block,
2090						(struct btrfs_super_block *)
2091						mapped_datav[0]);
2092				if (state->print_mask &
2093				    BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2094					printk(KERN_INFO
2095					"[after new superblock is written]:\n");
2096					btrfsic_dump_tree_sub(state, block, 0);
2097				}
2098			} else {
2099				block->mirror_num = 0;	/* unknown */
2100				ret = btrfsic_process_metablock(
2101						state,
2102						block,
2103						&block_ctx,
2104						0, 0);
2105			}
2106			if (ret)
2107				printk(KERN_INFO
2108				       "btrfsic: btrfsic_process_metablock"
2109				       "(root @%llu) failed!\n",
2110				       (unsigned long long)dev_bytenr);
2111		} else {
2112			block->is_metadata = 0;
2113			block->mirror_num = 0;	/* unknown */
2114			block->generation = BTRFSIC_GENERATION_UNKNOWN;
2115			if (!state->include_extent_data
2116			    && list_empty(&block->ref_from_list)) {
2117				/*
2118				 * disk block is overwritten with extent
2119				 * data (not meta data) and we are configured
2120				 * to not include extent data: take the
2121				 * chance and free the block's memory
2122				 */
2123				btrfsic_block_hashtable_remove(block);
2124				list_del(&block->all_blocks_node);
2125				btrfsic_block_free(block);
2126			}
2127		}
2128		btrfsic_release_block_ctx(&block_ctx);
2129	} else {
2130		/* block has not been found in hash table */
2131		u64 bytenr;
2132
2133		if (!is_metadata) {
2134			processed_len = state->datablock_size;
2135			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2136				printk(KERN_INFO "Written block (%s/%llu/?)"
2137				       " !found in hash table, D.\n",
2138				       dev_state->name,
2139				       (unsigned long long)dev_bytenr);
2140			if (!state->include_extent_data) {
2141				/* ignore that written D block */
2142				goto continue_loop;
2143			}
2144
2145			/* this is getting ugly for the
2146			 * include_extent_data case... */
2147			bytenr = 0;	/* unknown */
2148			block_ctx.start = bytenr;
2149			block_ctx.len = processed_len;
2150			block_ctx.mem_to_free = NULL;
2151			block_ctx.pagev = NULL;
2152		} else {
2153			processed_len = state->metablock_size;
2154			bytenr = le64_to_cpu(((struct btrfs_header *)
2155					      mapped_datav[0])->bytenr);
2156			btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2157						       dev_bytenr);
2158			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2159				printk(KERN_INFO
2160				       "Written block @%llu (%s/%llu/?)"
2161				       " !found in hash table, M.\n",
2162				       (unsigned long long)bytenr,
2163				       dev_state->name,
2164				       (unsigned long long)dev_bytenr);
2165
2166			ret = btrfsic_map_block(state, bytenr, processed_len,
2167						&block_ctx, 0);
2168			if (ret) {
2169				printk(KERN_INFO
2170				       "btrfsic: btrfsic_map_block(root @%llu)"
2171				       " failed!\n",
2172				       (unsigned long long)dev_bytenr);
2173				goto continue_loop;
2174			}
2175		}
2176		block_ctx.datav = mapped_datav;
2177		/* the following is required in case of writes to mirrors,
2178		 * use the same that was used for the lookup */
2179		block_ctx.dev = dev_state;
2180		block_ctx.dev_bytenr = dev_bytenr;
2181
2182		block = btrfsic_block_alloc();
2183		if (NULL == block) {
2184			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2185			btrfsic_release_block_ctx(&block_ctx);
2186			goto continue_loop;
2187		}
2188		block->dev_state = dev_state;
2189		block->dev_bytenr = dev_bytenr;
2190		block->logical_bytenr = bytenr;
2191		block->is_metadata = is_metadata;
2192		block->never_written = 0;
2193		block->iodone_w_error = 0;
2194		block->mirror_num = 0;	/* unknown */
2195		block->flush_gen = dev_state->last_flush_gen + 1;
2196		block->submit_bio_bh_rw = submit_bio_bh_rw;
2197		if (NULL != bio) {
2198			block->is_iodone = 0;
2199			BUG_ON(NULL == bio_is_patched);
2200			if (!*bio_is_patched) {
2201				block->orig_bio_bh_private = bio->bi_private;
2202				block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2203				block->next_in_same_bio = NULL;
2204				bio->bi_private = block;
2205				bio->bi_end_io = btrfsic_bio_end_io;
2206				*bio_is_patched = 1;
2207			} else {
2208				struct btrfsic_block *chained_block =
2209				    (struct btrfsic_block *)
2210				    bio->bi_private;
2211
2212				BUG_ON(NULL == chained_block);
2213				block->orig_bio_bh_private =
2214				    chained_block->orig_bio_bh_private;
2215				block->orig_bio_bh_end_io.bio =
2216				    chained_block->orig_bio_bh_end_io.bio;
2217				block->next_in_same_bio = chained_block;
2218				bio->bi_private = block;
2219			}
2220		} else if (NULL != bh) {
2221			block->is_iodone = 0;
2222			block->orig_bio_bh_private = bh->b_private;
2223			block->orig_bio_bh_end_io.bh = bh->b_end_io;
2224			block->next_in_same_bio = NULL;
2225			bh->b_private = block;
2226			bh->b_end_io = btrfsic_bh_end_io;
2227		} else {
2228			block->is_iodone = 1;
2229			block->orig_bio_bh_private = NULL;
2230			block->orig_bio_bh_end_io.bio = NULL;
2231			block->next_in_same_bio = NULL;
2232		}
2233		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2234			printk(KERN_INFO
2235			       "New written %c-block @%llu (%s/%llu/%d)\n",
2236			       is_metadata ? 'M' : 'D',
2237			       (unsigned long long)block->logical_bytenr,
2238			       block->dev_state->name,
2239			       (unsigned long long)block->dev_bytenr,
2240			       block->mirror_num);
2241		list_add(&block->all_blocks_node, &state->all_blocks_list);
2242		btrfsic_block_hashtable_add(block, &state->block_hashtable);
2243
2244		if (is_metadata) {
2245			ret = btrfsic_process_metablock(state, block,
2246							&block_ctx, 0, 0);
2247			if (ret)
2248				printk(KERN_INFO
2249				       "btrfsic: process_metablock(root @%llu)"
2250				       " failed!\n",
2251				       (unsigned long long)dev_bytenr);
2252		}
2253		btrfsic_release_block_ctx(&block_ctx);
2254	}
2255
2256continue_loop:
2257	BUG_ON(!processed_len);
2258	dev_bytenr += processed_len;
2259	mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2260	num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2261	goto again;
2262}
2263
2264static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2265{
2266	struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2267	int iodone_w_error;
2268
2269	/* mutex is not held! This is not save if IO is not yet completed
2270	 * on umount */
2271	iodone_w_error = 0;
2272	if (bio_error_status)
2273		iodone_w_error = 1;
2274
2275	BUG_ON(NULL == block);
2276	bp->bi_private = block->orig_bio_bh_private;
2277	bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2278
2279	do {
2280		struct btrfsic_block *next_block;
2281		struct btrfsic_dev_state *const dev_state = block->dev_state;
2282
2283		if ((dev_state->state->print_mask &
2284		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2285			printk(KERN_INFO
2286			       "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2287			       bio_error_status,
2288			       btrfsic_get_block_type(dev_state->state, block),
2289			       (unsigned long long)block->logical_bytenr,
2290			       dev_state->name,
2291			       (unsigned long long)block->dev_bytenr,
2292			       block->mirror_num);
2293		next_block = block->next_in_same_bio;
2294		block->iodone_w_error = iodone_w_error;
2295		if (block->submit_bio_bh_rw & REQ_FLUSH) {
2296			dev_state->last_flush_gen++;
2297			if ((dev_state->state->print_mask &
2298			     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2299				printk(KERN_INFO
2300				       "bio_end_io() new %s flush_gen=%llu\n",
2301				       dev_state->name,
2302				       (unsigned long long)
2303				       dev_state->last_flush_gen);
2304		}
2305		if (block->submit_bio_bh_rw & REQ_FUA)
2306			block->flush_gen = 0; /* FUA completed means block is
2307					       * on disk */
2308		block->is_iodone = 1; /* for FLUSH, this releases the block */
2309		block = next_block;
2310	} while (NULL != block);
2311
2312	bp->bi_end_io(bp, bio_error_status);
2313}
2314
2315static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2316{
2317	struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2318	int iodone_w_error = !uptodate;
2319	struct btrfsic_dev_state *dev_state;
2320
2321	BUG_ON(NULL == block);
2322	dev_state = block->dev_state;
2323	if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2324		printk(KERN_INFO
2325		       "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2326		       iodone_w_error,
2327		       btrfsic_get_block_type(dev_state->state, block),
2328		       (unsigned long long)block->logical_bytenr,
2329		       block->dev_state->name,
2330		       (unsigned long long)block->dev_bytenr,
2331		       block->mirror_num);
2332
2333	block->iodone_w_error = iodone_w_error;
2334	if (block->submit_bio_bh_rw & REQ_FLUSH) {
2335		dev_state->last_flush_gen++;
2336		if ((dev_state->state->print_mask &
2337		     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2338			printk(KERN_INFO
2339			       "bh_end_io() new %s flush_gen=%llu\n",
2340			       dev_state->name,
2341			       (unsigned long long)dev_state->last_flush_gen);
2342	}
2343	if (block->submit_bio_bh_rw & REQ_FUA)
2344		block->flush_gen = 0; /* FUA completed means block is on disk */
2345
2346	bh->b_private = block->orig_bio_bh_private;
2347	bh->b_end_io = block->orig_bio_bh_end_io.bh;
2348	block->is_iodone = 1; /* for FLUSH, this releases the block */
2349	bh->b_end_io(bh, uptodate);
2350}
2351
2352static int btrfsic_process_written_superblock(
2353		struct btrfsic_state *state,
2354		struct btrfsic_block *const superblock,
2355		struct btrfs_super_block *const super_hdr)
2356{
2357	int pass;
2358
2359	superblock->generation = btrfs_super_generation(super_hdr);
2360	if (!(superblock->generation > state->max_superblock_generation ||
2361	      0 == state->max_superblock_generation)) {
2362		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2363			printk(KERN_INFO
2364			       "btrfsic: superblock @%llu (%s/%llu/%d)"
2365			       " with old gen %llu <= %llu\n",
2366			       (unsigned long long)superblock->logical_bytenr,
2367			       superblock->dev_state->name,
2368			       (unsigned long long)superblock->dev_bytenr,
2369			       superblock->mirror_num,
2370			       (unsigned long long)
2371			       btrfs_super_generation(super_hdr),
2372			       (unsigned long long)
2373			       state->max_superblock_generation);
2374	} else {
2375		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2376			printk(KERN_INFO
2377			       "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2378			       " with new gen %llu > %llu\n",
2379			       (unsigned long long)superblock->logical_bytenr,
2380			       superblock->dev_state->name,
2381			       (unsigned long long)superblock->dev_bytenr,
2382			       superblock->mirror_num,
2383			       (unsigned long long)
2384			       btrfs_super_generation(super_hdr),
2385			       (unsigned long long)
2386			       state->max_superblock_generation);
2387
2388		state->max_superblock_generation =
2389		    btrfs_super_generation(super_hdr);
2390		state->latest_superblock = superblock;
2391	}
2392
2393	for (pass = 0; pass < 3; pass++) {
2394		int ret;
2395		u64 next_bytenr;
2396		struct btrfsic_block *next_block;
2397		struct btrfsic_block_data_ctx tmp_next_block_ctx;
2398		struct btrfsic_block_link *l;
2399		int num_copies;
2400		int mirror_num;
2401		const char *additional_string = NULL;
2402		struct btrfs_disk_key tmp_disk_key;
2403
2404		tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2405		tmp_disk_key.offset = 0;
2406
2407		switch (pass) {
2408		case 0:
2409			tmp_disk_key.objectid =
2410			    cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2411			additional_string = "root ";
2412			next_bytenr = btrfs_super_root(super_hdr);
2413			if (state->print_mask &
2414			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2415				printk(KERN_INFO "root@%llu\n",
2416				       (unsigned long long)next_bytenr);
2417			break;
2418		case 1:
2419			tmp_disk_key.objectid =
2420			    cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2421			additional_string = "chunk ";
2422			next_bytenr = btrfs_super_chunk_root(super_hdr);
2423			if (state->print_mask &
2424			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2425				printk(KERN_INFO "chunk@%llu\n",
2426				       (unsigned long long)next_bytenr);
2427			break;
2428		case 2:
2429			tmp_disk_key.objectid =
2430			    cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2431			additional_string = "log ";
2432			next_bytenr = btrfs_super_log_root(super_hdr);
2433			if (0 == next_bytenr)
2434				continue;
2435			if (state->print_mask &
2436			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2437				printk(KERN_INFO "log@%llu\n",
2438				       (unsigned long long)next_bytenr);
2439			break;
2440		}
2441
2442		num_copies =
2443		    btrfs_num_copies(&state->root->fs_info->mapping_tree,
2444				     next_bytenr, BTRFS_SUPER_INFO_SIZE);
2445		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2446			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2447			       (unsigned long long)next_bytenr, num_copies);
2448		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2449			int was_created;
2450
2451			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2452				printk(KERN_INFO
2453				       "btrfsic_process_written_superblock("
2454				       "mirror_num=%d)\n", mirror_num);
2455			ret = btrfsic_map_block(state, next_bytenr,
2456						BTRFS_SUPER_INFO_SIZE,
2457						&tmp_next_block_ctx,
2458						mirror_num);
2459			if (ret) {
2460				printk(KERN_INFO
2461				       "btrfsic: btrfsic_map_block(@%llu,"
2462				       " mirror=%d) failed!\n",
2463				       (unsigned long long)next_bytenr,
2464				       mirror_num);
2465				return -1;
2466			}
2467
2468			next_block = btrfsic_block_lookup_or_add(
2469					state,
2470					&tmp_next_block_ctx,
2471					additional_string,
2472					1, 0, 1,
2473					mirror_num,
2474					&was_created);
2475			if (NULL == next_block) {
2476				printk(KERN_INFO
2477				       "btrfsic: error, kmalloc failed!\n");
2478				btrfsic_release_block_ctx(&tmp_next_block_ctx);
2479				return -1;
2480			}
2481
2482			next_block->disk_key = tmp_disk_key;
2483			if (was_created)
2484				next_block->generation =
2485				    BTRFSIC_GENERATION_UNKNOWN;
2486			l = btrfsic_block_link_lookup_or_add(
2487					state,
2488					&tmp_next_block_ctx,
2489					next_block,
2490					superblock,
2491					BTRFSIC_GENERATION_UNKNOWN);
2492			btrfsic_release_block_ctx(&tmp_next_block_ctx);
2493			if (NULL == l)
2494				return -1;
2495		}
2496	}
2497
2498	if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2499		WARN_ON(1);
2500		btrfsic_dump_tree(state);
2501	}
2502
2503	return 0;
2504}
2505
2506static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2507					struct btrfsic_block *const block,
2508					int recursion_level)
2509{
2510	struct list_head *elem_ref_to;
2511	int ret = 0;
2512
2513	if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2514		/*
2515		 * Note that this situation can happen and does not
2516		 * indicate an error in regular cases. It happens
2517		 * when disk blocks are freed and later reused.
2518		 * The check-integrity module is not aware of any
2519		 * block free operations, it just recognizes block
2520		 * write operations. Therefore it keeps the linkage
2521		 * information for a block until a block is
2522		 * rewritten. This can temporarily cause incorrect
2523		 * and even circular linkage informations. This
2524		 * causes no harm unless such blocks are referenced
2525		 * by the most recent super block.
2526		 */
2527		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2528			printk(KERN_INFO
2529			       "btrfsic: abort cyclic linkage (case 1).\n");
2530
2531		return ret;
2532	}
2533
2534	/*
2535	 * This algorithm is recursive because the amount of used stack
2536	 * space is very small and the max recursion depth is limited.
2537	 */
2538	list_for_each(elem_ref_to, &block->ref_to_list) {
2539		const struct btrfsic_block_link *const l =
2540		    list_entry(elem_ref_to, struct btrfsic_block_link,
2541			       node_ref_to);
2542
2543		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2544			printk(KERN_INFO
2545			       "rl=%d, %c @%llu (%s/%llu/%d)"
2546			       " %u* refers to %c @%llu (%s/%llu/%d)\n",
2547			       recursion_level,
2548			       btrfsic_get_block_type(state, block),
2549			       (unsigned long long)block->logical_bytenr,
2550			       block->dev_state->name,
2551			       (unsigned long long)block->dev_bytenr,
2552			       block->mirror_num,
2553			       l->ref_cnt,
2554			       btrfsic_get_block_type(state, l->block_ref_to),
2555			       (unsigned long long)
2556			       l->block_ref_to->logical_bytenr,
2557			       l->block_ref_to->dev_state->name,
2558			       (unsigned long long)l->block_ref_to->dev_bytenr,
2559			       l->block_ref_to->mirror_num);
2560		if (l->block_ref_to->never_written) {
2561			printk(KERN_INFO "btrfs: attempt to write superblock"
2562			       " which references block %c @%llu (%s/%llu/%d)"
2563			       " which is never written!\n",
2564			       btrfsic_get_block_type(state, l->block_ref_to),
2565			       (unsigned long long)
2566			       l->block_ref_to->logical_bytenr,
2567			       l->block_ref_to->dev_state->name,
2568			       (unsigned long long)l->block_ref_to->dev_bytenr,
2569			       l->block_ref_to->mirror_num);
2570			ret = -1;
2571		} else if (!l->block_ref_to->is_iodone) {
2572			printk(KERN_INFO "btrfs: attempt to write superblock"
2573			       " which references block %c @%llu (%s/%llu/%d)"
2574			       " which is not yet iodone!\n",
2575			       btrfsic_get_block_type(state, l->block_ref_to),
2576			       (unsigned long long)
2577			       l->block_ref_to->logical_bytenr,
2578			       l->block_ref_to->dev_state->name,
2579			       (unsigned long long)l->block_ref_to->dev_bytenr,
2580			       l->block_ref_to->mirror_num);
2581			ret = -1;
2582		} else if (l->parent_generation !=
2583			   l->block_ref_to->generation &&
2584			   BTRFSIC_GENERATION_UNKNOWN !=
2585			   l->parent_generation &&
2586			   BTRFSIC_GENERATION_UNKNOWN !=
2587			   l->block_ref_to->generation) {
2588			printk(KERN_INFO "btrfs: attempt to write superblock"
2589			       " which references block %c @%llu (%s/%llu/%d)"
2590			       " with generation %llu !="
2591			       " parent generation %llu!\n",
2592			       btrfsic_get_block_type(state, l->block_ref_to),
2593			       (unsigned long long)
2594			       l->block_ref_to->logical_bytenr,
2595			       l->block_ref_to->dev_state->name,
2596			       (unsigned long long)l->block_ref_to->dev_bytenr,
2597			       l->block_ref_to->mirror_num,
2598			       (unsigned long long)l->block_ref_to->generation,
2599			       (unsigned long long)l->parent_generation);
2600			ret = -1;
2601		} else if (l->block_ref_to->flush_gen >
2602			   l->block_ref_to->dev_state->last_flush_gen) {
2603			printk(KERN_INFO "btrfs: attempt to write superblock"
2604			       " which references block %c @%llu (%s/%llu/%d)"
2605			       " which is not flushed out of disk's write cache"
2606			       " (block flush_gen=%llu,"
2607			       " dev->flush_gen=%llu)!\n",
2608			       btrfsic_get_block_type(state, l->block_ref_to),
2609			       (unsigned long long)
2610			       l->block_ref_to->logical_bytenr,
2611			       l->block_ref_to->dev_state->name,
2612			       (unsigned long long)l->block_ref_to->dev_bytenr,
2613			       l->block_ref_to->mirror_num,
2614			       (unsigned long long)block->flush_gen,
2615			       (unsigned long long)
2616			       l->block_ref_to->dev_state->last_flush_gen);
2617			ret = -1;
2618		} else if (-1 == btrfsic_check_all_ref_blocks(state,
2619							      l->block_ref_to,
2620							      recursion_level +
2621							      1)) {
2622			ret = -1;
2623		}
2624	}
2625
2626	return ret;
2627}
2628
2629static int btrfsic_is_block_ref_by_superblock(
2630		const struct btrfsic_state *state,
2631		const struct btrfsic_block *block,
2632		int recursion_level)
2633{
2634	struct list_head *elem_ref_from;
2635
2636	if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2637		/* refer to comment at "abort cyclic linkage (case 1)" */
2638		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2639			printk(KERN_INFO
2640			       "btrfsic: abort cyclic linkage (case 2).\n");
2641
2642		return 0;
2643	}
2644
2645	/*
2646	 * This algorithm is recursive because the amount of used stack space
2647	 * is very small and the max recursion depth is limited.
2648	 */
2649	list_for_each(elem_ref_from, &block->ref_from_list) {
2650		const struct btrfsic_block_link *const l =
2651		    list_entry(elem_ref_from, struct btrfsic_block_link,
2652			       node_ref_from);
2653
2654		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2655			printk(KERN_INFO
2656			       "rl=%d, %c @%llu (%s/%llu/%d)"
2657			       " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2658			       recursion_level,
2659			       btrfsic_get_block_type(state, block),
2660			       (unsigned long long)block->logical_bytenr,
2661			       block->dev_state->name,
2662			       (unsigned long long)block->dev_bytenr,
2663			       block->mirror_num,
2664			       l->ref_cnt,
2665			       btrfsic_get_block_type(state, l->block_ref_from),
2666			       (unsigned long long)
2667			       l->block_ref_from->logical_bytenr,
2668			       l->block_ref_from->dev_state->name,
2669			       (unsigned long long)
2670			       l->block_ref_from->dev_bytenr,
2671			       l->block_ref_from->mirror_num);
2672		if (l->block_ref_from->is_superblock &&
2673		    state->latest_superblock->dev_bytenr ==
2674		    l->block_ref_from->dev_bytenr &&
2675		    state->latest_superblock->dev_state->bdev ==
2676		    l->block_ref_from->dev_state->bdev)
2677			return 1;
2678		else if (btrfsic_is_block_ref_by_superblock(state,
2679							    l->block_ref_from,
2680							    recursion_level +
2681							    1))
2682			return 1;
2683	}
2684
2685	return 0;
2686}
2687
2688static void btrfsic_print_add_link(const struct btrfsic_state *state,
2689				   const struct btrfsic_block_link *l)
2690{
2691	printk(KERN_INFO
2692	       "Add %u* link from %c @%llu (%s/%llu/%d)"
2693	       " to %c @%llu (%s/%llu/%d).\n",
2694	       l->ref_cnt,
2695	       btrfsic_get_block_type(state, l->block_ref_from),
2696	       (unsigned long long)l->block_ref_from->logical_bytenr,
2697	       l->block_ref_from->dev_state->name,
2698	       (unsigned long long)l->block_ref_from->dev_bytenr,
2699	       l->block_ref_from->mirror_num,
2700	       btrfsic_get_block_type(state, l->block_ref_to),
2701	       (unsigned long long)l->block_ref_to->logical_bytenr,
2702	       l->block_ref_to->dev_state->name,
2703	       (unsigned long long)l->block_ref_to->dev_bytenr,
2704	       l->block_ref_to->mirror_num);
2705}
2706
2707static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2708				   const struct btrfsic_block_link *l)
2709{
2710	printk(KERN_INFO
2711	       "Rem %u* link from %c @%llu (%s/%llu/%d)"
2712	       " to %c @%llu (%s/%llu/%d).\n",
2713	       l->ref_cnt,
2714	       btrfsic_get_block_type(state, l->block_ref_from),
2715	       (unsigned long long)l->block_ref_from->logical_bytenr,
2716	       l->block_ref_from->dev_state->name,
2717	       (unsigned long long)l->block_ref_from->dev_bytenr,
2718	       l->block_ref_from->mirror_num,
2719	       btrfsic_get_block_type(state, l->block_ref_to),
2720	       (unsigned long long)l->block_ref_to->logical_bytenr,
2721	       l->block_ref_to->dev_state->name,
2722	       (unsigned long long)l->block_ref_to->dev_bytenr,
2723	       l->block_ref_to->mirror_num);
2724}
2725
2726static char btrfsic_get_block_type(const struct btrfsic_state *state,
2727				   const struct btrfsic_block *block)
2728{
2729	if (block->is_superblock &&
2730	    state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2731	    state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2732		return 'S';
2733	else if (block->is_superblock)
2734		return 's';
2735	else if (block->is_metadata)
2736		return 'M';
2737	else
2738		return 'D';
2739}
2740
2741static void btrfsic_dump_tree(const struct btrfsic_state *state)
2742{
2743	btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2744}
2745
2746static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2747				  const struct btrfsic_block *block,
2748				  int indent_level)
2749{
2750	struct list_head *elem_ref_to;
2751	int indent_add;
2752	static char buf[80];
2753	int cursor_position;
2754
2755	/*
2756	 * Should better fill an on-stack buffer with a complete line and
2757	 * dump it at once when it is time to print a newline character.
2758	 */
2759
2760	/*
2761	 * This algorithm is recursive because the amount of used stack space
2762	 * is very small and the max recursion depth is limited.
2763	 */
2764	indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2765			     btrfsic_get_block_type(state, block),
2766			     (unsigned long long)block->logical_bytenr,
2767			     block->dev_state->name,
2768			     (unsigned long long)block->dev_bytenr,
2769			     block->mirror_num);
2770	if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2771		printk("[...]\n");
2772		return;
2773	}
2774	printk(buf);
2775	indent_level += indent_add;
2776	if (list_empty(&block->ref_to_list)) {
2777		printk("\n");
2778		return;
2779	}
2780	if (block->mirror_num > 1 &&
2781	    !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2782		printk(" [...]\n");
2783		return;
2784	}
2785
2786	cursor_position = indent_level;
2787	list_for_each(elem_ref_to, &block->ref_to_list) {
2788		const struct btrfsic_block_link *const l =
2789		    list_entry(elem_ref_to, struct btrfsic_block_link,
2790			       node_ref_to);
2791
2792		while (cursor_position < indent_level) {
2793			printk(" ");
2794			cursor_position++;
2795		}
2796		if (l->ref_cnt > 1)
2797			indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2798		else
2799			indent_add = sprintf(buf, " --> ");
2800		if (indent_level + indent_add >
2801		    BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2802			printk("[...]\n");
2803			cursor_position = 0;
2804			continue;
2805		}
2806
2807		printk(buf);
2808
2809		btrfsic_dump_tree_sub(state, l->block_ref_to,
2810				      indent_level + indent_add);
2811		cursor_position = 0;
2812	}
2813}
2814
2815static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2816		struct btrfsic_state *state,
2817		struct btrfsic_block_data_ctx *next_block_ctx,
2818		struct btrfsic_block *next_block,
2819		struct btrfsic_block *from_block,
2820		u64 parent_generation)
2821{
2822	struct btrfsic_block_link *l;
2823
2824	l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2825						next_block_ctx->dev_bytenr,
2826						from_block->dev_state->bdev,
2827						from_block->dev_bytenr,
2828						&state->block_link_hashtable);
2829	if (NULL == l) {
2830		l = btrfsic_block_link_alloc();
2831		if (NULL == l) {
2832			printk(KERN_INFO
2833			       "btrfsic: error, kmalloc" " failed!\n");
2834			return NULL;
2835		}
2836
2837		l->block_ref_to = next_block;
2838		l->block_ref_from = from_block;
2839		l->ref_cnt = 1;
2840		l->parent_generation = parent_generation;
2841
2842		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2843			btrfsic_print_add_link(state, l);
2844
2845		list_add(&l->node_ref_to, &from_block->ref_to_list);
2846		list_add(&l->node_ref_from, &next_block->ref_from_list);
2847
2848		btrfsic_block_link_hashtable_add(l,
2849						 &state->block_link_hashtable);
2850	} else {
2851		l->ref_cnt++;
2852		l->parent_generation = parent_generation;
2853		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2854			btrfsic_print_add_link(state, l);
2855	}
2856
2857	return l;
2858}
2859
2860static struct btrfsic_block *btrfsic_block_lookup_or_add(
2861		struct btrfsic_state *state,
2862		struct btrfsic_block_data_ctx *block_ctx,
2863		const char *additional_string,
2864		int is_metadata,
2865		int is_iodone,
2866		int never_written,
2867		int mirror_num,
2868		int *was_created)
2869{
2870	struct btrfsic_block *block;
2871
2872	block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2873					       block_ctx->dev_bytenr,
2874					       &state->block_hashtable);
2875	if (NULL == block) {
2876		struct btrfsic_dev_state *dev_state;
2877
2878		block = btrfsic_block_alloc();
2879		if (NULL == block) {
2880			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2881			return NULL;
2882		}
2883		dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2884		if (NULL == dev_state) {
2885			printk(KERN_INFO
2886			       "btrfsic: error, lookup dev_state failed!\n");
2887			btrfsic_block_free(block);
2888			return NULL;
2889		}
2890		block->dev_state = dev_state;
2891		block->dev_bytenr = block_ctx->dev_bytenr;
2892		block->logical_bytenr = block_ctx->start;
2893		block->is_metadata = is_metadata;
2894		block->is_iodone = is_iodone;
2895		block->never_written = never_written;
2896		block->mirror_num = mirror_num;
2897		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2898			printk(KERN_INFO
2899			       "New %s%c-block @%llu (%s/%llu/%d)\n",
2900			       additional_string,
2901			       btrfsic_get_block_type(state, block),
2902			       (unsigned long long)block->logical_bytenr,
2903			       dev_state->name,
2904			       (unsigned long long)block->dev_bytenr,
2905			       mirror_num);
2906		list_add(&block->all_blocks_node, &state->all_blocks_list);
2907		btrfsic_block_hashtable_add(block, &state->block_hashtable);
2908		if (NULL != was_created)
2909			*was_created = 1;
2910	} else {
2911		if (NULL != was_created)
2912			*was_created = 0;
2913	}
2914
2915	return block;
2916}
2917
2918static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2919					   u64 bytenr,
2920					   struct btrfsic_dev_state *dev_state,
2921					   u64 dev_bytenr)
2922{
2923	int num_copies;
2924	int mirror_num;
2925	int ret;
2926	struct btrfsic_block_data_ctx block_ctx;
2927	int match = 0;
2928
2929	num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2930				      bytenr, state->metablock_size);
2931
2932	for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2933		ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2934					&block_ctx, mirror_num);
2935		if (ret) {
2936			printk(KERN_INFO "btrfsic:"
2937			       " btrfsic_map_block(logical @%llu,"
2938			       " mirror %d) failed!\n",
2939			       (unsigned long long)bytenr, mirror_num);
2940			continue;
2941		}
2942
2943		if (dev_state->bdev == block_ctx.dev->bdev &&
2944		    dev_bytenr == block_ctx.dev_bytenr) {
2945			match++;
2946			btrfsic_release_block_ctx(&block_ctx);
2947			break;
2948		}
2949		btrfsic_release_block_ctx(&block_ctx);
2950	}
2951
2952	if (!match) {
2953		printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2954		       " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2955		       " phys_bytenr=%llu)!\n",
2956		       (unsigned long long)bytenr, dev_state->name,
2957		       (unsigned long long)dev_bytenr);
2958		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2959			ret = btrfsic_map_block(state, bytenr,
2960						state->metablock_size,
2961						&block_ctx, mirror_num);
2962			if (ret)
2963				continue;
2964
2965			printk(KERN_INFO "Read logical bytenr @%llu maps to"
2966			       " (%s/%llu/%d)\n",
2967			       (unsigned long long)bytenr,
2968			       block_ctx.dev->name,
2969			       (unsigned long long)block_ctx.dev_bytenr,
2970			       mirror_num);
2971		}
2972		WARN_ON(1);
2973	}
2974}
2975
2976static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2977		struct block_device *bdev)
2978{
2979	struct btrfsic_dev_state *ds;
2980
2981	ds = btrfsic_dev_state_hashtable_lookup(bdev,
2982						&btrfsic_dev_state_hashtable);
2983	return ds;
2984}
2985
2986int btrfsic_submit_bh(int rw, struct buffer_head *bh)
2987{
2988	struct btrfsic_dev_state *dev_state;
2989
2990	if (!btrfsic_is_initialized)
2991		return submit_bh(rw, bh);
2992
2993	mutex_lock(&btrfsic_mutex);
2994	/* since btrfsic_submit_bh() might also be called before
2995	 * btrfsic_mount(), this might return NULL */
2996	dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2997
2998	/* Only called to write the superblock (incl. FLUSH/FUA) */
2999	if (NULL != dev_state &&
3000	    (rw & WRITE) && bh->b_size > 0) {
3001		u64 dev_bytenr;
3002
3003		dev_bytenr = 4096 * bh->b_blocknr;
3004		if (dev_state->state->print_mask &
3005		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3006			printk(KERN_INFO
3007			       "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3008			       " size=%lu, data=%p, bdev=%p)\n",
3009			       rw, (unsigned long)bh->b_blocknr,
3010			       (unsigned long long)dev_bytenr,
3011			       (unsigned long)bh->b_size, bh->b_data,
3012			       bh->b_bdev);
3013		btrfsic_process_written_block(dev_state, dev_bytenr,
3014					      &bh->b_data, 1, NULL,
3015					      NULL, bh, rw);
3016	} else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3017		if (dev_state->state->print_mask &
3018		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3019			printk(KERN_INFO
3020			       "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3021			       rw, bh->b_bdev);
3022		if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3023			if ((dev_state->state->print_mask &
3024			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3025			      BTRFSIC_PRINT_MASK_VERBOSE)))
3026				printk(KERN_INFO
3027				       "btrfsic_submit_bh(%s) with FLUSH"
3028				       " but dummy block already in use"
3029				       " (ignored)!\n",
3030				       dev_state->name);
3031		} else {
3032			struct btrfsic_block *const block =
3033				&dev_state->dummy_block_for_bio_bh_flush;
3034
3035			block->is_iodone = 0;
3036			block->never_written = 0;
3037			block->iodone_w_error = 0;
3038			block->flush_gen = dev_state->last_flush_gen + 1;
3039			block->submit_bio_bh_rw = rw;
3040			block->orig_bio_bh_private = bh->b_private;
3041			block->orig_bio_bh_end_io.bh = bh->b_end_io;
3042			block->next_in_same_bio = NULL;
3043			bh->b_private = block;
3044			bh->b_end_io = btrfsic_bh_end_io;
3045		}
3046	}
3047	mutex_unlock(&btrfsic_mutex);
3048	return submit_bh(rw, bh);
3049}
3050
3051void btrfsic_submit_bio(int rw, struct bio *bio)
3052{
3053	struct btrfsic_dev_state *dev_state;
3054
3055	if (!btrfsic_is_initialized) {
3056		submit_bio(rw, bio);
3057		return;
3058	}
3059
3060	mutex_lock(&btrfsic_mutex);
3061	/* since btrfsic_submit_bio() is also called before
3062	 * btrfsic_mount(), this might return NULL */
3063	dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3064	if (NULL != dev_state &&
3065	    (rw & WRITE) && NULL != bio->bi_io_vec) {
3066		unsigned int i;
3067		u64 dev_bytenr;
3068		int bio_is_patched;
3069		char **mapped_datav;
3070
3071		dev_bytenr = 512 * bio->bi_sector;
3072		bio_is_patched = 0;
3073		if (dev_state->state->print_mask &
3074		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3075			printk(KERN_INFO
3076			       "submit_bio(rw=0x%x, bi_vcnt=%u,"
3077			       " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3078			       rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3079			       (unsigned long long)dev_bytenr,
3080			       bio->bi_bdev);
3081
3082		mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3083				       GFP_NOFS);
3084		if (!mapped_datav)
3085			goto leave;
3086		for (i = 0; i < bio->bi_vcnt; i++) {
3087			BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3088			mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3089			if (!mapped_datav[i]) {
3090				while (i > 0) {
3091					i--;
3092					kunmap(bio->bi_io_vec[i].bv_page);
3093				}
3094				kfree(mapped_datav);
3095				goto leave;
3096			}
3097			if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3098			     BTRFSIC_PRINT_MASK_VERBOSE) ==
3099			    (dev_state->state->print_mask &
3100			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3101			      BTRFSIC_PRINT_MASK_VERBOSE)))
3102				printk(KERN_INFO
3103				       "#%u: page=%p, len=%u, offset=%u\n",
3104				       i, bio->bi_io_vec[i].bv_page,
3105				       bio->bi_io_vec[i].bv_len,
3106				       bio->bi_io_vec[i].bv_offset);
3107		}
3108		btrfsic_process_written_block(dev_state, dev_bytenr,
3109					      mapped_datav, bio->bi_vcnt,
3110					      bio, &bio_is_patched,
3111					      NULL, rw);
3112		while (i > 0) {
3113			i--;
3114			kunmap(bio->bi_io_vec[i].bv_page);
3115		}
3116		kfree(mapped_datav);
3117	} else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3118		if (dev_state->state->print_mask &
3119		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3120			printk(KERN_INFO
3121			       "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3122			       rw, bio->bi_bdev);
3123		if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3124			if ((dev_state->state->print_mask &
3125			     (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3126			      BTRFSIC_PRINT_MASK_VERBOSE)))
3127				printk(KERN_INFO
3128				       "btrfsic_submit_bio(%s) with FLUSH"
3129				       " but dummy block already in use"
3130				       " (ignored)!\n",
3131				       dev_state->name);
3132		} else {
3133			struct btrfsic_block *const block =
3134				&dev_state->dummy_block_for_bio_bh_flush;
3135
3136			block->is_iodone = 0;
3137			block->never_written = 0;
3138			block->iodone_w_error = 0;
3139			block->flush_gen = dev_state->last_flush_gen + 1;
3140			block->submit_bio_bh_rw = rw;
3141			block->orig_bio_bh_private = bio->bi_private;
3142			block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3143			block->next_in_same_bio = NULL;
3144			bio->bi_private = block;
3145			bio->bi_end_io = btrfsic_bio_end_io;
3146		}
3147	}
3148leave:
3149	mutex_unlock(&btrfsic_mutex);
3150
3151	submit_bio(rw, bio);
3152}
3153
3154int btrfsic_mount(struct btrfs_root *root,
3155		  struct btrfs_fs_devices *fs_devices,
3156		  int including_extent_data, u32 print_mask)
3157{
3158	int ret;
3159	struct btrfsic_state *state;
3160	struct list_head *dev_head = &fs_devices->devices;
3161	struct btrfs_device *device;
3162
3163	if (root->nodesize != root->leafsize) {
3164		printk(KERN_INFO
3165		       "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3166		       root->nodesize, root->leafsize);
3167		return -1;
3168	}
3169	if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3170		printk(KERN_INFO
3171		       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3172		       root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3173		return -1;
3174	}
3175	if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3176		printk(KERN_INFO
3177		       "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3178		       root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3179		return -1;
3180	}
3181	if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3182		printk(KERN_INFO
3183		       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3184		       root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3185		return -1;
3186	}
3187	state = kzalloc(sizeof(*state), GFP_NOFS);
3188	if (NULL == state) {
3189		printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3190		return -1;
3191	}
3192
3193	if (!btrfsic_is_initialized) {
3194		mutex_init(&btrfsic_mutex);
3195		btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3196		btrfsic_is_initialized = 1;
3197	}
3198	mutex_lock(&btrfsic_mutex);
3199	state->root = root;
3200	state->print_mask = print_mask;
3201	state->include_extent_data = including_extent_data;
3202	state->csum_size = 0;
3203	state->metablock_size = root->nodesize;
3204	state->datablock_size = root->sectorsize;
3205	INIT_LIST_HEAD(&state->all_blocks_list);
3206	btrfsic_block_hashtable_init(&state->block_hashtable);
3207	btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3208	state->max_superblock_generation = 0;
3209	state->latest_superblock = NULL;
3210
3211	list_for_each_entry(device, dev_head, dev_list) {
3212		struct btrfsic_dev_state *ds;
3213		char *p;
3214
3215		if (!device->bdev || !device->name)
3216			continue;
3217
3218		ds = btrfsic_dev_state_alloc();
3219		if (NULL == ds) {
3220			printk(KERN_INFO
3221			       "btrfs check-integrity: kmalloc() failed!\n");
3222			mutex_unlock(&btrfsic_mutex);
3223			return -1;
3224		}
3225		ds->bdev = device->bdev;
3226		ds->state = state;
3227		bdevname(ds->bdev, ds->name);
3228		ds->name[BDEVNAME_SIZE - 1] = '\0';
3229		for (p = ds->name; *p != '\0'; p++);
3230		while (p > ds->name && *p != '/')
3231			p--;
3232		if (*p == '/')
3233			p++;
3234		strlcpy(ds->name, p, sizeof(ds->name));
3235		btrfsic_dev_state_hashtable_add(ds,
3236						&btrfsic_dev_state_hashtable);
3237	}
3238
3239	ret = btrfsic_process_superblock(state, fs_devices);
3240	if (0 != ret) {
3241		mutex_unlock(&btrfsic_mutex);
3242		btrfsic_unmount(root, fs_devices);
3243		return ret;
3244	}
3245
3246	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3247		btrfsic_dump_database(state);
3248	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3249		btrfsic_dump_tree(state);
3250
3251	mutex_unlock(&btrfsic_mutex);
3252	return 0;
3253}
3254
3255void btrfsic_unmount(struct btrfs_root *root,
3256		     struct btrfs_fs_devices *fs_devices)
3257{
3258	struct list_head *elem_all;
3259	struct list_head *tmp_all;
3260	struct btrfsic_state *state;
3261	struct list_head *dev_head = &fs_devices->devices;
3262	struct btrfs_device *device;
3263
3264	if (!btrfsic_is_initialized)
3265		return;
3266
3267	mutex_lock(&btrfsic_mutex);
3268
3269	state = NULL;
3270	list_for_each_entry(device, dev_head, dev_list) {
3271		struct btrfsic_dev_state *ds;
3272
3273		if (!device->bdev || !device->name)
3274			continue;
3275
3276		ds = btrfsic_dev_state_hashtable_lookup(
3277				device->bdev,
3278				&btrfsic_dev_state_hashtable);
3279		if (NULL != ds) {
3280			state = ds->state;
3281			btrfsic_dev_state_hashtable_remove(ds);
3282			btrfsic_dev_state_free(ds);
3283		}
3284	}
3285
3286	if (NULL == state) {
3287		printk(KERN_INFO
3288		       "btrfsic: error, cannot find state information"
3289		       " on umount!\n");
3290		mutex_unlock(&btrfsic_mutex);
3291		return;
3292	}
3293
3294	/*
3295	 * Don't care about keeping the lists' state up to date,
3296	 * just free all memory that was allocated dynamically.
3297	 * Free the blocks and the block_links.
3298	 */
3299	list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3300		struct btrfsic_block *const b_all =
3301		    list_entry(elem_all, struct btrfsic_block,
3302			       all_blocks_node);
3303		struct list_head *elem_ref_to;
3304		struct list_head *tmp_ref_to;
3305
3306		list_for_each_safe(elem_ref_to, tmp_ref_to,
3307				   &b_all->ref_to_list) {
3308			struct btrfsic_block_link *const l =
3309			    list_entry(elem_ref_to,
3310				       struct btrfsic_block_link,
3311				       node_ref_to);
3312
3313			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3314				btrfsic_print_rem_link(state, l);
3315
3316			l->ref_cnt--;
3317			if (0 == l->ref_cnt)
3318				btrfsic_block_link_free(l);
3319		}
3320
3321		if (b_all->is_iodone)
3322			btrfsic_block_free(b_all);
3323		else
3324			printk(KERN_INFO "btrfs: attempt to free %c-block"
3325			       " @%llu (%s/%llu/%d) on umount which is"
3326			       " not yet iodone!\n",
3327			       btrfsic_get_block_type(state, b_all),
3328			       (unsigned long long)b_all->logical_bytenr,
3329			       b_all->dev_state->name,
3330			       (unsigned long long)b_all->dev_bytenr,
3331			       b_all->mirror_num);
3332	}
3333
3334	mutex_unlock(&btrfsic_mutex);
3335
3336	kfree(state);
3337}
3338