check-integrity.c revision e06baab4184509bdfddd294efc6cae7a410c6f07
1/* 2 * Copyright (C) STRATO AG 2011. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19/* 20 * This module can be used to catch cases when the btrfs kernel 21 * code executes write requests to the disk that bring the file 22 * system in an inconsistent state. In such a state, a power-loss 23 * or kernel panic event would cause that the data on disk is 24 * lost or at least damaged. 25 * 26 * Code is added that examines all block write requests during 27 * runtime (including writes of the super block). Three rules 28 * are verified and an error is printed on violation of the 29 * rules: 30 * 1. It is not allowed to write a disk block which is 31 * currently referenced by the super block (either directly 32 * or indirectly). 33 * 2. When a super block is written, it is verified that all 34 * referenced (directly or indirectly) blocks fulfill the 35 * following requirements: 36 * 2a. All referenced blocks have either been present when 37 * the file system was mounted, (i.e., they have been 38 * referenced by the super block) or they have been 39 * written since then and the write completion callback 40 * was called and a FLUSH request to the device where 41 * these blocks are located was received and completed. 42 * 2b. All referenced blocks need to have a generation 43 * number which is equal to the parent's number. 44 * 45 * One issue that was found using this module was that the log 46 * tree on disk became temporarily corrupted because disk blocks 47 * that had been in use for the log tree had been freed and 48 * reused too early, while being referenced by the written super 49 * block. 50 * 51 * The search term in the kernel log that can be used to filter 52 * on the existence of detected integrity issues is 53 * "btrfs: attempt". 54 * 55 * The integrity check is enabled via mount options. These 56 * mount options are only supported if the integrity check 57 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 58 * 59 * Example #1, apply integrity checks to all metadata: 60 * mount /dev/sdb1 /mnt -o check_int 61 * 62 * Example #2, apply integrity checks to all metadata and 63 * to data extents: 64 * mount /dev/sdb1 /mnt -o check_int_data 65 * 66 * Example #3, apply integrity checks to all metadata and dump 67 * the tree that the super block references to kernel messages 68 * each time after a super block was written: 69 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 70 * 71 * If the integrity check tool is included and activated in 72 * the mount options, plenty of kernel memory is used, and 73 * plenty of additional CPU cycles are spent. Enabling this 74 * functionality is not intended for normal use. In most 75 * cases, unless you are a btrfs developer who needs to verify 76 * the integrity of (super)-block write requests, do not 77 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 78 * include and compile the integrity check tool. 79 */ 80 81#include <linux/sched.h> 82#include <linux/slab.h> 83#include <linux/buffer_head.h> 84#include <linux/mutex.h> 85#include <linux/crc32c.h> 86#include <linux/genhd.h> 87#include <linux/blkdev.h> 88#include "ctree.h" 89#include "disk-io.h" 90#include "transaction.h" 91#include "extent_io.h" 92#include "volumes.h" 93#include "print-tree.h" 94#include "locking.h" 95#include "check-integrity.h" 96 97#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 100#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 101#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 102#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 103#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 104#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 105 * excluding " [...]" */ 106#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 107 108/* 109 * The definition of the bitmask fields for the print_mask. 110 * They are specified with the mount option check_integrity_print_mask. 111 */ 112#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 113#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 114#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 115#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 116#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 117#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 118#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 119#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 120#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 121#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 122#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 123#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 124#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 125 126struct btrfsic_dev_state; 127struct btrfsic_state; 128 129struct btrfsic_block { 130 u32 magic_num; /* only used for debug purposes */ 131 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 132 unsigned int is_superblock:1; /* if it is one of the superblocks */ 133 unsigned int is_iodone:1; /* if is done by lower subsystem */ 134 unsigned int iodone_w_error:1; /* error was indicated to endio */ 135 unsigned int never_written:1; /* block was added because it was 136 * referenced, not because it was 137 * written */ 138 unsigned int mirror_num:2; /* large enough to hold 139 * BTRFS_SUPER_MIRROR_MAX */ 140 struct btrfsic_dev_state *dev_state; 141 u64 dev_bytenr; /* key, physical byte num on disk */ 142 u64 logical_bytenr; /* logical byte num on disk */ 143 u64 generation; 144 struct btrfs_disk_key disk_key; /* extra info to print in case of 145 * issues, will not always be correct */ 146 struct list_head collision_resolving_node; /* list node */ 147 struct list_head all_blocks_node; /* list node */ 148 149 /* the following two lists contain block_link items */ 150 struct list_head ref_to_list; /* list */ 151 struct list_head ref_from_list; /* list */ 152 struct btrfsic_block *next_in_same_bio; 153 void *orig_bio_bh_private; 154 union { 155 bio_end_io_t *bio; 156 bh_end_io_t *bh; 157 } orig_bio_bh_end_io; 158 int submit_bio_bh_rw; 159 u64 flush_gen; /* only valid if !never_written */ 160}; 161 162/* 163 * Elements of this type are allocated dynamically and required because 164 * each block object can refer to and can be ref from multiple blocks. 165 * The key to lookup them in the hashtable is the dev_bytenr of 166 * the block ref to plus the one from the block refered from. 167 * The fact that they are searchable via a hashtable and that a 168 * ref_cnt is maintained is not required for the btrfs integrity 169 * check algorithm itself, it is only used to make the output more 170 * beautiful in case that an error is detected (an error is defined 171 * as a write operation to a block while that block is still referenced). 172 */ 173struct btrfsic_block_link { 174 u32 magic_num; /* only used for debug purposes */ 175 u32 ref_cnt; 176 struct list_head node_ref_to; /* list node */ 177 struct list_head node_ref_from; /* list node */ 178 struct list_head collision_resolving_node; /* list node */ 179 struct btrfsic_block *block_ref_to; 180 struct btrfsic_block *block_ref_from; 181 u64 parent_generation; 182}; 183 184struct btrfsic_dev_state { 185 u32 magic_num; /* only used for debug purposes */ 186 struct block_device *bdev; 187 struct btrfsic_state *state; 188 struct list_head collision_resolving_node; /* list node */ 189 struct btrfsic_block dummy_block_for_bio_bh_flush; 190 u64 last_flush_gen; 191 char name[BDEVNAME_SIZE]; 192}; 193 194struct btrfsic_block_hashtable { 195 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 196}; 197 198struct btrfsic_block_link_hashtable { 199 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 200}; 201 202struct btrfsic_dev_state_hashtable { 203 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 204}; 205 206struct btrfsic_block_data_ctx { 207 u64 start; /* virtual bytenr */ 208 u64 dev_bytenr; /* physical bytenr on device */ 209 u32 len; 210 struct btrfsic_dev_state *dev; 211 char **datav; 212 struct page **pagev; 213 void *mem_to_free; 214}; 215 216/* This structure is used to implement recursion without occupying 217 * any stack space, refer to btrfsic_process_metablock() */ 218struct btrfsic_stack_frame { 219 u32 magic; 220 u32 nr; 221 int error; 222 int i; 223 int limit_nesting; 224 int num_copies; 225 int mirror_num; 226 struct btrfsic_block *block; 227 struct btrfsic_block_data_ctx *block_ctx; 228 struct btrfsic_block *next_block; 229 struct btrfsic_block_data_ctx next_block_ctx; 230 struct btrfs_header *hdr; 231 struct btrfsic_stack_frame *prev; 232}; 233 234/* Some state per mounted filesystem */ 235struct btrfsic_state { 236 u32 print_mask; 237 int include_extent_data; 238 int csum_size; 239 struct list_head all_blocks_list; 240 struct btrfsic_block_hashtable block_hashtable; 241 struct btrfsic_block_link_hashtable block_link_hashtable; 242 struct btrfs_root *root; 243 u64 max_superblock_generation; 244 struct btrfsic_block *latest_superblock; 245 u32 metablock_size; 246 u32 datablock_size; 247}; 248 249static void btrfsic_block_init(struct btrfsic_block *b); 250static struct btrfsic_block *btrfsic_block_alloc(void); 251static void btrfsic_block_free(struct btrfsic_block *b); 252static void btrfsic_block_link_init(struct btrfsic_block_link *n); 253static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 254static void btrfsic_block_link_free(struct btrfsic_block_link *n); 255static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 256static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 257static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 258static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 259static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 260 struct btrfsic_block_hashtable *h); 261static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 262static struct btrfsic_block *btrfsic_block_hashtable_lookup( 263 struct block_device *bdev, 264 u64 dev_bytenr, 265 struct btrfsic_block_hashtable *h); 266static void btrfsic_block_link_hashtable_init( 267 struct btrfsic_block_link_hashtable *h); 268static void btrfsic_block_link_hashtable_add( 269 struct btrfsic_block_link *l, 270 struct btrfsic_block_link_hashtable *h); 271static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 272static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 273 struct block_device *bdev_ref_to, 274 u64 dev_bytenr_ref_to, 275 struct block_device *bdev_ref_from, 276 u64 dev_bytenr_ref_from, 277 struct btrfsic_block_link_hashtable *h); 278static void btrfsic_dev_state_hashtable_init( 279 struct btrfsic_dev_state_hashtable *h); 280static void btrfsic_dev_state_hashtable_add( 281 struct btrfsic_dev_state *ds, 282 struct btrfsic_dev_state_hashtable *h); 283static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 284static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 285 struct block_device *bdev, 286 struct btrfsic_dev_state_hashtable *h); 287static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 288static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 289static int btrfsic_process_superblock(struct btrfsic_state *state, 290 struct btrfs_fs_devices *fs_devices); 291static int btrfsic_process_metablock(struct btrfsic_state *state, 292 struct btrfsic_block *block, 293 struct btrfsic_block_data_ctx *block_ctx, 294 int limit_nesting, int force_iodone_flag); 295static void btrfsic_read_from_block_data( 296 struct btrfsic_block_data_ctx *block_ctx, 297 void *dst, u32 offset, size_t len); 298static int btrfsic_create_link_to_next_block( 299 struct btrfsic_state *state, 300 struct btrfsic_block *block, 301 struct btrfsic_block_data_ctx 302 *block_ctx, u64 next_bytenr, 303 int limit_nesting, 304 struct btrfsic_block_data_ctx *next_block_ctx, 305 struct btrfsic_block **next_blockp, 306 int force_iodone_flag, 307 int *num_copiesp, int *mirror_nump, 308 struct btrfs_disk_key *disk_key, 309 u64 parent_generation); 310static int btrfsic_handle_extent_data(struct btrfsic_state *state, 311 struct btrfsic_block *block, 312 struct btrfsic_block_data_ctx *block_ctx, 313 u32 item_offset, int force_iodone_flag); 314static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 315 struct btrfsic_block_data_ctx *block_ctx_out, 316 int mirror_num); 317static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 318 u32 len, struct block_device *bdev, 319 struct btrfsic_block_data_ctx *block_ctx_out); 320static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 321static int btrfsic_read_block(struct btrfsic_state *state, 322 struct btrfsic_block_data_ctx *block_ctx); 323static void btrfsic_dump_database(struct btrfsic_state *state); 324static void btrfsic_complete_bio_end_io(struct bio *bio, int err); 325static int btrfsic_test_for_metadata(struct btrfsic_state *state, 326 char **datav, unsigned int num_pages); 327static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 328 u64 dev_bytenr, char **mapped_datav, 329 unsigned int num_pages, 330 struct bio *bio, int *bio_is_patched, 331 struct buffer_head *bh, 332 int submit_bio_bh_rw); 333static int btrfsic_process_written_superblock( 334 struct btrfsic_state *state, 335 struct btrfsic_block *const block, 336 struct btrfs_super_block *const super_hdr); 337static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); 338static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); 339static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 340 const struct btrfsic_block *block, 341 int recursion_level); 342static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 343 struct btrfsic_block *const block, 344 int recursion_level); 345static void btrfsic_print_add_link(const struct btrfsic_state *state, 346 const struct btrfsic_block_link *l); 347static void btrfsic_print_rem_link(const struct btrfsic_state *state, 348 const struct btrfsic_block_link *l); 349static char btrfsic_get_block_type(const struct btrfsic_state *state, 350 const struct btrfsic_block *block); 351static void btrfsic_dump_tree(const struct btrfsic_state *state); 352static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 353 const struct btrfsic_block *block, 354 int indent_level); 355static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 356 struct btrfsic_state *state, 357 struct btrfsic_block_data_ctx *next_block_ctx, 358 struct btrfsic_block *next_block, 359 struct btrfsic_block *from_block, 360 u64 parent_generation); 361static struct btrfsic_block *btrfsic_block_lookup_or_add( 362 struct btrfsic_state *state, 363 struct btrfsic_block_data_ctx *block_ctx, 364 const char *additional_string, 365 int is_metadata, 366 int is_iodone, 367 int never_written, 368 int mirror_num, 369 int *was_created); 370static int btrfsic_process_superblock_dev_mirror( 371 struct btrfsic_state *state, 372 struct btrfsic_dev_state *dev_state, 373 struct btrfs_device *device, 374 int superblock_mirror_num, 375 struct btrfsic_dev_state **selected_dev_state, 376 struct btrfs_super_block *selected_super); 377static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 378 struct block_device *bdev); 379static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 380 u64 bytenr, 381 struct btrfsic_dev_state *dev_state, 382 u64 dev_bytenr); 383 384static struct mutex btrfsic_mutex; 385static int btrfsic_is_initialized; 386static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 387 388 389static void btrfsic_block_init(struct btrfsic_block *b) 390{ 391 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 392 b->dev_state = NULL; 393 b->dev_bytenr = 0; 394 b->logical_bytenr = 0; 395 b->generation = BTRFSIC_GENERATION_UNKNOWN; 396 b->disk_key.objectid = 0; 397 b->disk_key.type = 0; 398 b->disk_key.offset = 0; 399 b->is_metadata = 0; 400 b->is_superblock = 0; 401 b->is_iodone = 0; 402 b->iodone_w_error = 0; 403 b->never_written = 0; 404 b->mirror_num = 0; 405 b->next_in_same_bio = NULL; 406 b->orig_bio_bh_private = NULL; 407 b->orig_bio_bh_end_io.bio = NULL; 408 INIT_LIST_HEAD(&b->collision_resolving_node); 409 INIT_LIST_HEAD(&b->all_blocks_node); 410 INIT_LIST_HEAD(&b->ref_to_list); 411 INIT_LIST_HEAD(&b->ref_from_list); 412 b->submit_bio_bh_rw = 0; 413 b->flush_gen = 0; 414} 415 416static struct btrfsic_block *btrfsic_block_alloc(void) 417{ 418 struct btrfsic_block *b; 419 420 b = kzalloc(sizeof(*b), GFP_NOFS); 421 if (NULL != b) 422 btrfsic_block_init(b); 423 424 return b; 425} 426 427static void btrfsic_block_free(struct btrfsic_block *b) 428{ 429 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 430 kfree(b); 431} 432 433static void btrfsic_block_link_init(struct btrfsic_block_link *l) 434{ 435 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 436 l->ref_cnt = 1; 437 INIT_LIST_HEAD(&l->node_ref_to); 438 INIT_LIST_HEAD(&l->node_ref_from); 439 INIT_LIST_HEAD(&l->collision_resolving_node); 440 l->block_ref_to = NULL; 441 l->block_ref_from = NULL; 442} 443 444static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 445{ 446 struct btrfsic_block_link *l; 447 448 l = kzalloc(sizeof(*l), GFP_NOFS); 449 if (NULL != l) 450 btrfsic_block_link_init(l); 451 452 return l; 453} 454 455static void btrfsic_block_link_free(struct btrfsic_block_link *l) 456{ 457 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 458 kfree(l); 459} 460 461static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 462{ 463 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 464 ds->bdev = NULL; 465 ds->state = NULL; 466 ds->name[0] = '\0'; 467 INIT_LIST_HEAD(&ds->collision_resolving_node); 468 ds->last_flush_gen = 0; 469 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 470 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 471 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 472} 473 474static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 475{ 476 struct btrfsic_dev_state *ds; 477 478 ds = kzalloc(sizeof(*ds), GFP_NOFS); 479 if (NULL != ds) 480 btrfsic_dev_state_init(ds); 481 482 return ds; 483} 484 485static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 486{ 487 BUG_ON(!(NULL == ds || 488 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 489 kfree(ds); 490} 491 492static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 493{ 494 int i; 495 496 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 497 INIT_LIST_HEAD(h->table + i); 498} 499 500static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 501 struct btrfsic_block_hashtable *h) 502{ 503 const unsigned int hashval = 504 (((unsigned int)(b->dev_bytenr >> 16)) ^ 505 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 506 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 507 508 list_add(&b->collision_resolving_node, h->table + hashval); 509} 510 511static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 512{ 513 list_del(&b->collision_resolving_node); 514} 515 516static struct btrfsic_block *btrfsic_block_hashtable_lookup( 517 struct block_device *bdev, 518 u64 dev_bytenr, 519 struct btrfsic_block_hashtable *h) 520{ 521 const unsigned int hashval = 522 (((unsigned int)(dev_bytenr >> 16)) ^ 523 ((unsigned int)((uintptr_t)bdev))) & 524 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 525 struct list_head *elem; 526 527 list_for_each(elem, h->table + hashval) { 528 struct btrfsic_block *const b = 529 list_entry(elem, struct btrfsic_block, 530 collision_resolving_node); 531 532 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 533 return b; 534 } 535 536 return NULL; 537} 538 539static void btrfsic_block_link_hashtable_init( 540 struct btrfsic_block_link_hashtable *h) 541{ 542 int i; 543 544 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 545 INIT_LIST_HEAD(h->table + i); 546} 547 548static void btrfsic_block_link_hashtable_add( 549 struct btrfsic_block_link *l, 550 struct btrfsic_block_link_hashtable *h) 551{ 552 const unsigned int hashval = 553 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 554 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 555 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 556 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 557 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 558 559 BUG_ON(NULL == l->block_ref_to); 560 BUG_ON(NULL == l->block_ref_from); 561 list_add(&l->collision_resolving_node, h->table + hashval); 562} 563 564static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 565{ 566 list_del(&l->collision_resolving_node); 567} 568 569static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 570 struct block_device *bdev_ref_to, 571 u64 dev_bytenr_ref_to, 572 struct block_device *bdev_ref_from, 573 u64 dev_bytenr_ref_from, 574 struct btrfsic_block_link_hashtable *h) 575{ 576 const unsigned int hashval = 577 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 578 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 579 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 580 ((unsigned int)((uintptr_t)bdev_ref_from))) & 581 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 582 struct list_head *elem; 583 584 list_for_each(elem, h->table + hashval) { 585 struct btrfsic_block_link *const l = 586 list_entry(elem, struct btrfsic_block_link, 587 collision_resolving_node); 588 589 BUG_ON(NULL == l->block_ref_to); 590 BUG_ON(NULL == l->block_ref_from); 591 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 592 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 593 l->block_ref_from->dev_state->bdev == bdev_ref_from && 594 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 595 return l; 596 } 597 598 return NULL; 599} 600 601static void btrfsic_dev_state_hashtable_init( 602 struct btrfsic_dev_state_hashtable *h) 603{ 604 int i; 605 606 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 607 INIT_LIST_HEAD(h->table + i); 608} 609 610static void btrfsic_dev_state_hashtable_add( 611 struct btrfsic_dev_state *ds, 612 struct btrfsic_dev_state_hashtable *h) 613{ 614 const unsigned int hashval = 615 (((unsigned int)((uintptr_t)ds->bdev)) & 616 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 617 618 list_add(&ds->collision_resolving_node, h->table + hashval); 619} 620 621static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 622{ 623 list_del(&ds->collision_resolving_node); 624} 625 626static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 627 struct block_device *bdev, 628 struct btrfsic_dev_state_hashtable *h) 629{ 630 const unsigned int hashval = 631 (((unsigned int)((uintptr_t)bdev)) & 632 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 633 struct list_head *elem; 634 635 list_for_each(elem, h->table + hashval) { 636 struct btrfsic_dev_state *const ds = 637 list_entry(elem, struct btrfsic_dev_state, 638 collision_resolving_node); 639 640 if (ds->bdev == bdev) 641 return ds; 642 } 643 644 return NULL; 645} 646 647static int btrfsic_process_superblock(struct btrfsic_state *state, 648 struct btrfs_fs_devices *fs_devices) 649{ 650 int ret = 0; 651 struct btrfs_super_block *selected_super; 652 struct list_head *dev_head = &fs_devices->devices; 653 struct btrfs_device *device; 654 struct btrfsic_dev_state *selected_dev_state = NULL; 655 int pass; 656 657 BUG_ON(NULL == state); 658 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 659 if (NULL == selected_super) { 660 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 661 return -1; 662 } 663 664 list_for_each_entry(device, dev_head, dev_list) { 665 int i; 666 struct btrfsic_dev_state *dev_state; 667 668 if (!device->bdev || !device->name) 669 continue; 670 671 dev_state = btrfsic_dev_state_lookup(device->bdev); 672 BUG_ON(NULL == dev_state); 673 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 674 ret = btrfsic_process_superblock_dev_mirror( 675 state, dev_state, device, i, 676 &selected_dev_state, selected_super); 677 if (0 != ret && 0 == i) { 678 kfree(selected_super); 679 return ret; 680 } 681 } 682 } 683 684 if (NULL == state->latest_superblock) { 685 printk(KERN_INFO "btrfsic: no superblock found!\n"); 686 kfree(selected_super); 687 return -1; 688 } 689 690 state->csum_size = btrfs_super_csum_size(selected_super); 691 692 for (pass = 0; pass < 3; pass++) { 693 int num_copies; 694 int mirror_num; 695 u64 next_bytenr; 696 697 switch (pass) { 698 case 0: 699 next_bytenr = btrfs_super_root(selected_super); 700 if (state->print_mask & 701 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 702 printk(KERN_INFO "root@%llu\n", 703 (unsigned long long)next_bytenr); 704 break; 705 case 1: 706 next_bytenr = btrfs_super_chunk_root(selected_super); 707 if (state->print_mask & 708 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 709 printk(KERN_INFO "chunk@%llu\n", 710 (unsigned long long)next_bytenr); 711 break; 712 case 2: 713 next_bytenr = btrfs_super_log_root(selected_super); 714 if (0 == next_bytenr) 715 continue; 716 if (state->print_mask & 717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 718 printk(KERN_INFO "log@%llu\n", 719 (unsigned long long)next_bytenr); 720 break; 721 } 722 723 num_copies = 724 btrfs_num_copies(&state->root->fs_info->mapping_tree, 725 next_bytenr, state->metablock_size); 726 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 727 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 728 (unsigned long long)next_bytenr, num_copies); 729 730 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 731 struct btrfsic_block *next_block; 732 struct btrfsic_block_data_ctx tmp_next_block_ctx; 733 struct btrfsic_block_link *l; 734 735 ret = btrfsic_map_block(state, next_bytenr, 736 state->metablock_size, 737 &tmp_next_block_ctx, 738 mirror_num); 739 if (ret) { 740 printk(KERN_INFO "btrfsic:" 741 " btrfsic_map_block(root @%llu," 742 " mirror %d) failed!\n", 743 (unsigned long long)next_bytenr, 744 mirror_num); 745 kfree(selected_super); 746 return -1; 747 } 748 749 next_block = btrfsic_block_hashtable_lookup( 750 tmp_next_block_ctx.dev->bdev, 751 tmp_next_block_ctx.dev_bytenr, 752 &state->block_hashtable); 753 BUG_ON(NULL == next_block); 754 755 l = btrfsic_block_link_hashtable_lookup( 756 tmp_next_block_ctx.dev->bdev, 757 tmp_next_block_ctx.dev_bytenr, 758 state->latest_superblock->dev_state-> 759 bdev, 760 state->latest_superblock->dev_bytenr, 761 &state->block_link_hashtable); 762 BUG_ON(NULL == l); 763 764 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 765 if (ret < (int)PAGE_CACHE_SIZE) { 766 printk(KERN_INFO 767 "btrfsic: read @logical %llu failed!\n", 768 (unsigned long long) 769 tmp_next_block_ctx.start); 770 btrfsic_release_block_ctx(&tmp_next_block_ctx); 771 kfree(selected_super); 772 return -1; 773 } 774 775 ret = btrfsic_process_metablock(state, 776 next_block, 777 &tmp_next_block_ctx, 778 BTRFS_MAX_LEVEL + 3, 1); 779 btrfsic_release_block_ctx(&tmp_next_block_ctx); 780 } 781 } 782 783 kfree(selected_super); 784 return ret; 785} 786 787static int btrfsic_process_superblock_dev_mirror( 788 struct btrfsic_state *state, 789 struct btrfsic_dev_state *dev_state, 790 struct btrfs_device *device, 791 int superblock_mirror_num, 792 struct btrfsic_dev_state **selected_dev_state, 793 struct btrfs_super_block *selected_super) 794{ 795 struct btrfs_super_block *super_tmp; 796 u64 dev_bytenr; 797 struct buffer_head *bh; 798 struct btrfsic_block *superblock_tmp; 799 int pass; 800 struct block_device *const superblock_bdev = device->bdev; 801 802 /* super block bytenr is always the unmapped device bytenr */ 803 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 804 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) 805 return -1; 806 bh = __bread(superblock_bdev, dev_bytenr / 4096, 807 BTRFS_SUPER_INFO_SIZE); 808 if (NULL == bh) 809 return -1; 810 super_tmp = (struct btrfs_super_block *) 811 (bh->b_data + (dev_bytenr & 4095)); 812 813 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 814 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 815 sizeof(super_tmp->magic)) || 816 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 817 btrfs_super_nodesize(super_tmp) != state->metablock_size || 818 btrfs_super_leafsize(super_tmp) != state->metablock_size || 819 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 820 brelse(bh); 821 return 0; 822 } 823 824 superblock_tmp = 825 btrfsic_block_hashtable_lookup(superblock_bdev, 826 dev_bytenr, 827 &state->block_hashtable); 828 if (NULL == superblock_tmp) { 829 superblock_tmp = btrfsic_block_alloc(); 830 if (NULL == superblock_tmp) { 831 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 832 brelse(bh); 833 return -1; 834 } 835 /* for superblock, only the dev_bytenr makes sense */ 836 superblock_tmp->dev_bytenr = dev_bytenr; 837 superblock_tmp->dev_state = dev_state; 838 superblock_tmp->logical_bytenr = dev_bytenr; 839 superblock_tmp->generation = btrfs_super_generation(super_tmp); 840 superblock_tmp->is_metadata = 1; 841 superblock_tmp->is_superblock = 1; 842 superblock_tmp->is_iodone = 1; 843 superblock_tmp->never_written = 0; 844 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 845 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 846 printk(KERN_INFO "New initial S-block (bdev %p, %s)" 847 " @%llu (%s/%llu/%d)\n", 848 superblock_bdev, device->name, 849 (unsigned long long)dev_bytenr, 850 dev_state->name, 851 (unsigned long long)dev_bytenr, 852 superblock_mirror_num); 853 list_add(&superblock_tmp->all_blocks_node, 854 &state->all_blocks_list); 855 btrfsic_block_hashtable_add(superblock_tmp, 856 &state->block_hashtable); 857 } 858 859 /* select the one with the highest generation field */ 860 if (btrfs_super_generation(super_tmp) > 861 state->max_superblock_generation || 862 0 == state->max_superblock_generation) { 863 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 864 *selected_dev_state = dev_state; 865 state->max_superblock_generation = 866 btrfs_super_generation(super_tmp); 867 state->latest_superblock = superblock_tmp; 868 } 869 870 for (pass = 0; pass < 3; pass++) { 871 u64 next_bytenr; 872 int num_copies; 873 int mirror_num; 874 const char *additional_string = NULL; 875 struct btrfs_disk_key tmp_disk_key; 876 877 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 878 tmp_disk_key.offset = 0; 879 switch (pass) { 880 case 0: 881 tmp_disk_key.objectid = 882 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 883 additional_string = "initial root "; 884 next_bytenr = btrfs_super_root(super_tmp); 885 break; 886 case 1: 887 tmp_disk_key.objectid = 888 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 889 additional_string = "initial chunk "; 890 next_bytenr = btrfs_super_chunk_root(super_tmp); 891 break; 892 case 2: 893 tmp_disk_key.objectid = 894 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 895 additional_string = "initial log "; 896 next_bytenr = btrfs_super_log_root(super_tmp); 897 if (0 == next_bytenr) 898 continue; 899 break; 900 } 901 902 num_copies = 903 btrfs_num_copies(&state->root->fs_info->mapping_tree, 904 next_bytenr, state->metablock_size); 905 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 906 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 907 (unsigned long long)next_bytenr, num_copies); 908 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 909 struct btrfsic_block *next_block; 910 struct btrfsic_block_data_ctx tmp_next_block_ctx; 911 struct btrfsic_block_link *l; 912 913 if (btrfsic_map_block(state, next_bytenr, 914 state->metablock_size, 915 &tmp_next_block_ctx, 916 mirror_num)) { 917 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 918 "bytenr @%llu, mirror %d) failed!\n", 919 (unsigned long long)next_bytenr, 920 mirror_num); 921 brelse(bh); 922 return -1; 923 } 924 925 next_block = btrfsic_block_lookup_or_add( 926 state, &tmp_next_block_ctx, 927 additional_string, 1, 1, 0, 928 mirror_num, NULL); 929 if (NULL == next_block) { 930 btrfsic_release_block_ctx(&tmp_next_block_ctx); 931 brelse(bh); 932 return -1; 933 } 934 935 next_block->disk_key = tmp_disk_key; 936 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 937 l = btrfsic_block_link_lookup_or_add( 938 state, &tmp_next_block_ctx, 939 next_block, superblock_tmp, 940 BTRFSIC_GENERATION_UNKNOWN); 941 btrfsic_release_block_ctx(&tmp_next_block_ctx); 942 if (NULL == l) { 943 brelse(bh); 944 return -1; 945 } 946 } 947 } 948 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 949 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 950 951 brelse(bh); 952 return 0; 953} 954 955static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 956{ 957 struct btrfsic_stack_frame *sf; 958 959 sf = kzalloc(sizeof(*sf), GFP_NOFS); 960 if (NULL == sf) 961 printk(KERN_INFO "btrfsic: alloc memory failed!\n"); 962 else 963 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 964 return sf; 965} 966 967static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 968{ 969 BUG_ON(!(NULL == sf || 970 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 971 kfree(sf); 972} 973 974static int btrfsic_process_metablock( 975 struct btrfsic_state *state, 976 struct btrfsic_block *const first_block, 977 struct btrfsic_block_data_ctx *const first_block_ctx, 978 int first_limit_nesting, int force_iodone_flag) 979{ 980 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 981 struct btrfsic_stack_frame *sf; 982 struct btrfsic_stack_frame *next_stack; 983 struct btrfs_header *const first_hdr = 984 (struct btrfs_header *)first_block_ctx->datav[0]; 985 986 BUG_ON(!first_hdr); 987 sf = &initial_stack_frame; 988 sf->error = 0; 989 sf->i = -1; 990 sf->limit_nesting = first_limit_nesting; 991 sf->block = first_block; 992 sf->block_ctx = first_block_ctx; 993 sf->next_block = NULL; 994 sf->hdr = first_hdr; 995 sf->prev = NULL; 996 997continue_with_new_stack_frame: 998 sf->block->generation = le64_to_cpu(sf->hdr->generation); 999 if (0 == sf->hdr->level) { 1000 struct btrfs_leaf *const leafhdr = 1001 (struct btrfs_leaf *)sf->hdr; 1002 1003 if (-1 == sf->i) { 1004 sf->nr = le32_to_cpu(leafhdr->header.nritems); 1005 1006 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1007 printk(KERN_INFO 1008 "leaf %llu items %d generation %llu" 1009 " owner %llu\n", 1010 (unsigned long long) 1011 sf->block_ctx->start, 1012 sf->nr, 1013 (unsigned long long) 1014 le64_to_cpu(leafhdr->header.generation), 1015 (unsigned long long) 1016 le64_to_cpu(leafhdr->header.owner)); 1017 } 1018 1019continue_with_current_leaf_stack_frame: 1020 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1021 sf->i++; 1022 sf->num_copies = 0; 1023 } 1024 1025 if (sf->i < sf->nr) { 1026 struct btrfs_item disk_item; 1027 u32 disk_item_offset = 1028 (uintptr_t)(leafhdr->items + sf->i) - 1029 (uintptr_t)leafhdr; 1030 struct btrfs_disk_key *disk_key; 1031 u8 type; 1032 u32 item_offset; 1033 1034 if (disk_item_offset + sizeof(struct btrfs_item) > 1035 sf->block_ctx->len) { 1036leaf_item_out_of_bounce_error: 1037 printk(KERN_INFO 1038 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 1039 sf->block_ctx->start, 1040 sf->block_ctx->dev->name); 1041 goto one_stack_frame_backwards; 1042 } 1043 btrfsic_read_from_block_data(sf->block_ctx, 1044 &disk_item, 1045 disk_item_offset, 1046 sizeof(struct btrfs_item)); 1047 item_offset = le32_to_cpu(disk_item.offset); 1048 disk_key = &disk_item.key; 1049 type = disk_key->type; 1050 1051 if (BTRFS_ROOT_ITEM_KEY == type) { 1052 struct btrfs_root_item root_item; 1053 u32 root_item_offset; 1054 u64 next_bytenr; 1055 1056 root_item_offset = item_offset + 1057 offsetof(struct btrfs_leaf, items); 1058 if (root_item_offset + 1059 sizeof(struct btrfs_root_item) > 1060 sf->block_ctx->len) 1061 goto leaf_item_out_of_bounce_error; 1062 btrfsic_read_from_block_data( 1063 sf->block_ctx, &root_item, 1064 root_item_offset, 1065 sizeof(struct btrfs_root_item)); 1066 next_bytenr = le64_to_cpu(root_item.bytenr); 1067 1068 sf->error = 1069 btrfsic_create_link_to_next_block( 1070 state, 1071 sf->block, 1072 sf->block_ctx, 1073 next_bytenr, 1074 sf->limit_nesting, 1075 &sf->next_block_ctx, 1076 &sf->next_block, 1077 force_iodone_flag, 1078 &sf->num_copies, 1079 &sf->mirror_num, 1080 disk_key, 1081 le64_to_cpu(root_item. 1082 generation)); 1083 if (sf->error) 1084 goto one_stack_frame_backwards; 1085 1086 if (NULL != sf->next_block) { 1087 struct btrfs_header *const next_hdr = 1088 (struct btrfs_header *) 1089 sf->next_block_ctx.datav[0]; 1090 1091 next_stack = 1092 btrfsic_stack_frame_alloc(); 1093 if (NULL == next_stack) { 1094 btrfsic_release_block_ctx( 1095 &sf-> 1096 next_block_ctx); 1097 goto one_stack_frame_backwards; 1098 } 1099 1100 next_stack->i = -1; 1101 next_stack->block = sf->next_block; 1102 next_stack->block_ctx = 1103 &sf->next_block_ctx; 1104 next_stack->next_block = NULL; 1105 next_stack->hdr = next_hdr; 1106 next_stack->limit_nesting = 1107 sf->limit_nesting - 1; 1108 next_stack->prev = sf; 1109 sf = next_stack; 1110 goto continue_with_new_stack_frame; 1111 } 1112 } else if (BTRFS_EXTENT_DATA_KEY == type && 1113 state->include_extent_data) { 1114 sf->error = btrfsic_handle_extent_data( 1115 state, 1116 sf->block, 1117 sf->block_ctx, 1118 item_offset, 1119 force_iodone_flag); 1120 if (sf->error) 1121 goto one_stack_frame_backwards; 1122 } 1123 1124 goto continue_with_current_leaf_stack_frame; 1125 } 1126 } else { 1127 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1128 1129 if (-1 == sf->i) { 1130 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1131 1132 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1133 printk(KERN_INFO "node %llu level %d items %d" 1134 " generation %llu owner %llu\n", 1135 (unsigned long long) 1136 sf->block_ctx->start, 1137 nodehdr->header.level, sf->nr, 1138 (unsigned long long) 1139 le64_to_cpu(nodehdr->header.generation), 1140 (unsigned long long) 1141 le64_to_cpu(nodehdr->header.owner)); 1142 } 1143 1144continue_with_current_node_stack_frame: 1145 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1146 sf->i++; 1147 sf->num_copies = 0; 1148 } 1149 1150 if (sf->i < sf->nr) { 1151 struct btrfs_key_ptr key_ptr; 1152 u32 key_ptr_offset; 1153 u64 next_bytenr; 1154 1155 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1156 (uintptr_t)nodehdr; 1157 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1158 sf->block_ctx->len) { 1159 printk(KERN_INFO 1160 "btrfsic: node item out of bounce at logical %llu, dev %s\n", 1161 sf->block_ctx->start, 1162 sf->block_ctx->dev->name); 1163 goto one_stack_frame_backwards; 1164 } 1165 btrfsic_read_from_block_data( 1166 sf->block_ctx, &key_ptr, key_ptr_offset, 1167 sizeof(struct btrfs_key_ptr)); 1168 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1169 1170 sf->error = btrfsic_create_link_to_next_block( 1171 state, 1172 sf->block, 1173 sf->block_ctx, 1174 next_bytenr, 1175 sf->limit_nesting, 1176 &sf->next_block_ctx, 1177 &sf->next_block, 1178 force_iodone_flag, 1179 &sf->num_copies, 1180 &sf->mirror_num, 1181 &key_ptr.key, 1182 le64_to_cpu(key_ptr.generation)); 1183 if (sf->error) 1184 goto one_stack_frame_backwards; 1185 1186 if (NULL != sf->next_block) { 1187 struct btrfs_header *const next_hdr = 1188 (struct btrfs_header *) 1189 sf->next_block_ctx.datav[0]; 1190 1191 next_stack = btrfsic_stack_frame_alloc(); 1192 if (NULL == next_stack) 1193 goto one_stack_frame_backwards; 1194 1195 next_stack->i = -1; 1196 next_stack->block = sf->next_block; 1197 next_stack->block_ctx = &sf->next_block_ctx; 1198 next_stack->next_block = NULL; 1199 next_stack->hdr = next_hdr; 1200 next_stack->limit_nesting = 1201 sf->limit_nesting - 1; 1202 next_stack->prev = sf; 1203 sf = next_stack; 1204 goto continue_with_new_stack_frame; 1205 } 1206 1207 goto continue_with_current_node_stack_frame; 1208 } 1209 } 1210 1211one_stack_frame_backwards: 1212 if (NULL != sf->prev) { 1213 struct btrfsic_stack_frame *const prev = sf->prev; 1214 1215 /* the one for the initial block is freed in the caller */ 1216 btrfsic_release_block_ctx(sf->block_ctx); 1217 1218 if (sf->error) { 1219 prev->error = sf->error; 1220 btrfsic_stack_frame_free(sf); 1221 sf = prev; 1222 goto one_stack_frame_backwards; 1223 } 1224 1225 btrfsic_stack_frame_free(sf); 1226 sf = prev; 1227 goto continue_with_new_stack_frame; 1228 } else { 1229 BUG_ON(&initial_stack_frame != sf); 1230 } 1231 1232 return sf->error; 1233} 1234 1235static void btrfsic_read_from_block_data( 1236 struct btrfsic_block_data_ctx *block_ctx, 1237 void *dstv, u32 offset, size_t len) 1238{ 1239 size_t cur; 1240 size_t offset_in_page; 1241 char *kaddr; 1242 char *dst = (char *)dstv; 1243 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); 1244 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1245 1246 WARN_ON(offset + len > block_ctx->len); 1247 offset_in_page = (start_offset + offset) & 1248 ((unsigned long)PAGE_CACHE_SIZE - 1); 1249 1250 while (len > 0) { 1251 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1252 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> 1253 PAGE_CACHE_SHIFT); 1254 kaddr = block_ctx->datav[i]; 1255 memcpy(dst, kaddr + offset_in_page, cur); 1256 1257 dst += cur; 1258 len -= cur; 1259 offset_in_page = 0; 1260 i++; 1261 } 1262} 1263 1264static int btrfsic_create_link_to_next_block( 1265 struct btrfsic_state *state, 1266 struct btrfsic_block *block, 1267 struct btrfsic_block_data_ctx *block_ctx, 1268 u64 next_bytenr, 1269 int limit_nesting, 1270 struct btrfsic_block_data_ctx *next_block_ctx, 1271 struct btrfsic_block **next_blockp, 1272 int force_iodone_flag, 1273 int *num_copiesp, int *mirror_nump, 1274 struct btrfs_disk_key *disk_key, 1275 u64 parent_generation) 1276{ 1277 struct btrfsic_block *next_block = NULL; 1278 int ret; 1279 struct btrfsic_block_link *l; 1280 int did_alloc_block_link; 1281 int block_was_created; 1282 1283 *next_blockp = NULL; 1284 if (0 == *num_copiesp) { 1285 *num_copiesp = 1286 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1287 next_bytenr, state->metablock_size); 1288 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1289 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1290 (unsigned long long)next_bytenr, *num_copiesp); 1291 *mirror_nump = 1; 1292 } 1293 1294 if (*mirror_nump > *num_copiesp) 1295 return 0; 1296 1297 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1298 printk(KERN_INFO 1299 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1300 *mirror_nump); 1301 ret = btrfsic_map_block(state, next_bytenr, 1302 state->metablock_size, 1303 next_block_ctx, *mirror_nump); 1304 if (ret) { 1305 printk(KERN_INFO 1306 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1307 (unsigned long long)next_bytenr, *mirror_nump); 1308 btrfsic_release_block_ctx(next_block_ctx); 1309 *next_blockp = NULL; 1310 return -1; 1311 } 1312 1313 next_block = btrfsic_block_lookup_or_add(state, 1314 next_block_ctx, "referenced ", 1315 1, force_iodone_flag, 1316 !force_iodone_flag, 1317 *mirror_nump, 1318 &block_was_created); 1319 if (NULL == next_block) { 1320 btrfsic_release_block_ctx(next_block_ctx); 1321 *next_blockp = NULL; 1322 return -1; 1323 } 1324 if (block_was_created) { 1325 l = NULL; 1326 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1327 } else { 1328 if (next_block->logical_bytenr != next_bytenr && 1329 !(!next_block->is_metadata && 1330 0 == next_block->logical_bytenr)) { 1331 printk(KERN_INFO 1332 "Referenced block @%llu (%s/%llu/%d)" 1333 " found in hash table, %c," 1334 " bytenr mismatch (!= stored %llu).\n", 1335 (unsigned long long)next_bytenr, 1336 next_block_ctx->dev->name, 1337 (unsigned long long)next_block_ctx->dev_bytenr, 1338 *mirror_nump, 1339 btrfsic_get_block_type(state, next_block), 1340 (unsigned long long)next_block->logical_bytenr); 1341 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1342 printk(KERN_INFO 1343 "Referenced block @%llu (%s/%llu/%d)" 1344 " found in hash table, %c.\n", 1345 (unsigned long long)next_bytenr, 1346 next_block_ctx->dev->name, 1347 (unsigned long long)next_block_ctx->dev_bytenr, 1348 *mirror_nump, 1349 btrfsic_get_block_type(state, next_block)); 1350 next_block->logical_bytenr = next_bytenr; 1351 1352 next_block->mirror_num = *mirror_nump; 1353 l = btrfsic_block_link_hashtable_lookup( 1354 next_block_ctx->dev->bdev, 1355 next_block_ctx->dev_bytenr, 1356 block_ctx->dev->bdev, 1357 block_ctx->dev_bytenr, 1358 &state->block_link_hashtable); 1359 } 1360 1361 next_block->disk_key = *disk_key; 1362 if (NULL == l) { 1363 l = btrfsic_block_link_alloc(); 1364 if (NULL == l) { 1365 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 1366 btrfsic_release_block_ctx(next_block_ctx); 1367 *next_blockp = NULL; 1368 return -1; 1369 } 1370 1371 did_alloc_block_link = 1; 1372 l->block_ref_to = next_block; 1373 l->block_ref_from = block; 1374 l->ref_cnt = 1; 1375 l->parent_generation = parent_generation; 1376 1377 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1378 btrfsic_print_add_link(state, l); 1379 1380 list_add(&l->node_ref_to, &block->ref_to_list); 1381 list_add(&l->node_ref_from, &next_block->ref_from_list); 1382 1383 btrfsic_block_link_hashtable_add(l, 1384 &state->block_link_hashtable); 1385 } else { 1386 did_alloc_block_link = 0; 1387 if (0 == limit_nesting) { 1388 l->ref_cnt++; 1389 l->parent_generation = parent_generation; 1390 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1391 btrfsic_print_add_link(state, l); 1392 } 1393 } 1394 1395 if (limit_nesting > 0 && did_alloc_block_link) { 1396 ret = btrfsic_read_block(state, next_block_ctx); 1397 if (ret < (int)next_block_ctx->len) { 1398 printk(KERN_INFO 1399 "btrfsic: read block @logical %llu failed!\n", 1400 (unsigned long long)next_bytenr); 1401 btrfsic_release_block_ctx(next_block_ctx); 1402 *next_blockp = NULL; 1403 return -1; 1404 } 1405 1406 *next_blockp = next_block; 1407 } else { 1408 *next_blockp = NULL; 1409 } 1410 (*mirror_nump)++; 1411 1412 return 0; 1413} 1414 1415static int btrfsic_handle_extent_data( 1416 struct btrfsic_state *state, 1417 struct btrfsic_block *block, 1418 struct btrfsic_block_data_ctx *block_ctx, 1419 u32 item_offset, int force_iodone_flag) 1420{ 1421 int ret; 1422 struct btrfs_file_extent_item file_extent_item; 1423 u64 file_extent_item_offset; 1424 u64 next_bytenr; 1425 u64 num_bytes; 1426 u64 generation; 1427 struct btrfsic_block_link *l; 1428 1429 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1430 item_offset; 1431 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1432 block_ctx->len) { 1433 printk(KERN_INFO 1434 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1435 block_ctx->start, block_ctx->dev->name); 1436 return -1; 1437 } 1438 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1439 file_extent_item_offset, 1440 sizeof(struct btrfs_file_extent_item)); 1441 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1442 le64_to_cpu(file_extent_item.offset); 1443 generation = le64_to_cpu(file_extent_item.generation); 1444 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1445 generation = le64_to_cpu(file_extent_item.generation); 1446 1447 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1448 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1449 " offset = %llu, num_bytes = %llu\n", 1450 file_extent_item.type, 1451 (unsigned long long) 1452 le64_to_cpu(file_extent_item.disk_bytenr), 1453 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1454 (unsigned long long)num_bytes); 1455 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1456 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) 1457 return 0; 1458 while (num_bytes > 0) { 1459 u32 chunk_len; 1460 int num_copies; 1461 int mirror_num; 1462 1463 if (num_bytes > state->datablock_size) 1464 chunk_len = state->datablock_size; 1465 else 1466 chunk_len = num_bytes; 1467 1468 num_copies = 1469 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1470 next_bytenr, state->datablock_size); 1471 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1472 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1473 (unsigned long long)next_bytenr, num_copies); 1474 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1475 struct btrfsic_block_data_ctx next_block_ctx; 1476 struct btrfsic_block *next_block; 1477 int block_was_created; 1478 1479 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1480 printk(KERN_INFO "btrfsic_handle_extent_data(" 1481 "mirror_num=%d)\n", mirror_num); 1482 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1483 printk(KERN_INFO 1484 "\tdisk_bytenr = %llu, num_bytes %u\n", 1485 (unsigned long long)next_bytenr, 1486 chunk_len); 1487 ret = btrfsic_map_block(state, next_bytenr, 1488 chunk_len, &next_block_ctx, 1489 mirror_num); 1490 if (ret) { 1491 printk(KERN_INFO 1492 "btrfsic: btrfsic_map_block(@%llu," 1493 " mirror=%d) failed!\n", 1494 (unsigned long long)next_bytenr, 1495 mirror_num); 1496 return -1; 1497 } 1498 1499 next_block = btrfsic_block_lookup_or_add( 1500 state, 1501 &next_block_ctx, 1502 "referenced ", 1503 0, 1504 force_iodone_flag, 1505 !force_iodone_flag, 1506 mirror_num, 1507 &block_was_created); 1508 if (NULL == next_block) { 1509 printk(KERN_INFO 1510 "btrfsic: error, kmalloc failed!\n"); 1511 btrfsic_release_block_ctx(&next_block_ctx); 1512 return -1; 1513 } 1514 if (!block_was_created) { 1515 if (next_block->logical_bytenr != next_bytenr && 1516 !(!next_block->is_metadata && 1517 0 == next_block->logical_bytenr)) { 1518 printk(KERN_INFO 1519 "Referenced block" 1520 " @%llu (%s/%llu/%d)" 1521 " found in hash table, D," 1522 " bytenr mismatch" 1523 " (!= stored %llu).\n", 1524 (unsigned long long)next_bytenr, 1525 next_block_ctx.dev->name, 1526 (unsigned long long) 1527 next_block_ctx.dev_bytenr, 1528 mirror_num, 1529 (unsigned long long) 1530 next_block->logical_bytenr); 1531 } 1532 next_block->logical_bytenr = next_bytenr; 1533 next_block->mirror_num = mirror_num; 1534 } 1535 1536 l = btrfsic_block_link_lookup_or_add(state, 1537 &next_block_ctx, 1538 next_block, block, 1539 generation); 1540 btrfsic_release_block_ctx(&next_block_ctx); 1541 if (NULL == l) 1542 return -1; 1543 } 1544 1545 next_bytenr += chunk_len; 1546 num_bytes -= chunk_len; 1547 } 1548 1549 return 0; 1550} 1551 1552static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1553 struct btrfsic_block_data_ctx *block_ctx_out, 1554 int mirror_num) 1555{ 1556 int ret; 1557 u64 length; 1558 struct btrfs_bio *multi = NULL; 1559 struct btrfs_device *device; 1560 1561 length = len; 1562 ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, 1563 bytenr, &length, &multi, mirror_num); 1564 1565 device = multi->stripes[0].dev; 1566 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); 1567 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1568 block_ctx_out->start = bytenr; 1569 block_ctx_out->len = len; 1570 block_ctx_out->datav = NULL; 1571 block_ctx_out->pagev = NULL; 1572 block_ctx_out->mem_to_free = NULL; 1573 1574 if (0 == ret) 1575 kfree(multi); 1576 if (NULL == block_ctx_out->dev) { 1577 ret = -ENXIO; 1578 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); 1579 } 1580 1581 return ret; 1582} 1583 1584static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 1585 u32 len, struct block_device *bdev, 1586 struct btrfsic_block_data_ctx *block_ctx_out) 1587{ 1588 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); 1589 block_ctx_out->dev_bytenr = bytenr; 1590 block_ctx_out->start = bytenr; 1591 block_ctx_out->len = len; 1592 block_ctx_out->datav = NULL; 1593 block_ctx_out->pagev = NULL; 1594 block_ctx_out->mem_to_free = NULL; 1595 if (NULL != block_ctx_out->dev) { 1596 return 0; 1597 } else { 1598 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); 1599 return -ENXIO; 1600 } 1601} 1602 1603static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1604{ 1605 if (block_ctx->mem_to_free) { 1606 unsigned int num_pages; 1607 1608 BUG_ON(!block_ctx->datav); 1609 BUG_ON(!block_ctx->pagev); 1610 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1611 PAGE_CACHE_SHIFT; 1612 while (num_pages > 0) { 1613 num_pages--; 1614 if (block_ctx->datav[num_pages]) { 1615 kunmap(block_ctx->pagev[num_pages]); 1616 block_ctx->datav[num_pages] = NULL; 1617 } 1618 if (block_ctx->pagev[num_pages]) { 1619 __free_page(block_ctx->pagev[num_pages]); 1620 block_ctx->pagev[num_pages] = NULL; 1621 } 1622 } 1623 1624 kfree(block_ctx->mem_to_free); 1625 block_ctx->mem_to_free = NULL; 1626 block_ctx->pagev = NULL; 1627 block_ctx->datav = NULL; 1628 } 1629} 1630 1631static int btrfsic_read_block(struct btrfsic_state *state, 1632 struct btrfsic_block_data_ctx *block_ctx) 1633{ 1634 unsigned int num_pages; 1635 unsigned int i; 1636 u64 dev_bytenr; 1637 int ret; 1638 1639 BUG_ON(block_ctx->datav); 1640 BUG_ON(block_ctx->pagev); 1641 BUG_ON(block_ctx->mem_to_free); 1642 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1643 printk(KERN_INFO 1644 "btrfsic: read_block() with unaligned bytenr %llu\n", 1645 (unsigned long long)block_ctx->dev_bytenr); 1646 return -1; 1647 } 1648 1649 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1650 PAGE_CACHE_SHIFT; 1651 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + 1652 sizeof(*block_ctx->pagev)) * 1653 num_pages, GFP_NOFS); 1654 if (!block_ctx->mem_to_free) 1655 return -1; 1656 block_ctx->datav = block_ctx->mem_to_free; 1657 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1658 for (i = 0; i < num_pages; i++) { 1659 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1660 if (!block_ctx->pagev[i]) 1661 return -1; 1662 } 1663 1664 dev_bytenr = block_ctx->dev_bytenr; 1665 for (i = 0; i < num_pages;) { 1666 struct bio *bio; 1667 unsigned int j; 1668 DECLARE_COMPLETION_ONSTACK(complete); 1669 1670 bio = bio_alloc(GFP_NOFS, num_pages - i); 1671 if (!bio) { 1672 printk(KERN_INFO 1673 "btrfsic: bio_alloc() for %u pages failed!\n", 1674 num_pages - i); 1675 return -1; 1676 } 1677 bio->bi_bdev = block_ctx->dev->bdev; 1678 bio->bi_sector = dev_bytenr >> 9; 1679 bio->bi_end_io = btrfsic_complete_bio_end_io; 1680 bio->bi_private = &complete; 1681 1682 for (j = i; j < num_pages; j++) { 1683 ret = bio_add_page(bio, block_ctx->pagev[j], 1684 PAGE_CACHE_SIZE, 0); 1685 if (PAGE_CACHE_SIZE != ret) 1686 break; 1687 } 1688 if (j == i) { 1689 printk(KERN_INFO 1690 "btrfsic: error, failed to add a single page!\n"); 1691 return -1; 1692 } 1693 submit_bio(READ, bio); 1694 1695 /* this will also unplug the queue */ 1696 wait_for_completion(&complete); 1697 1698 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1699 printk(KERN_INFO 1700 "btrfsic: read error at logical %llu dev %s!\n", 1701 block_ctx->start, block_ctx->dev->name); 1702 bio_put(bio); 1703 return -1; 1704 } 1705 bio_put(bio); 1706 dev_bytenr += (j - i) * PAGE_CACHE_SIZE; 1707 i = j; 1708 } 1709 for (i = 0; i < num_pages; i++) { 1710 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1711 if (!block_ctx->datav[i]) { 1712 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", 1713 block_ctx->dev->name); 1714 return -1; 1715 } 1716 } 1717 1718 return block_ctx->len; 1719} 1720 1721static void btrfsic_complete_bio_end_io(struct bio *bio, int err) 1722{ 1723 complete((struct completion *)bio->bi_private); 1724} 1725 1726static void btrfsic_dump_database(struct btrfsic_state *state) 1727{ 1728 struct list_head *elem_all; 1729 1730 BUG_ON(NULL == state); 1731 1732 printk(KERN_INFO "all_blocks_list:\n"); 1733 list_for_each(elem_all, &state->all_blocks_list) { 1734 const struct btrfsic_block *const b_all = 1735 list_entry(elem_all, struct btrfsic_block, 1736 all_blocks_node); 1737 struct list_head *elem_ref_to; 1738 struct list_head *elem_ref_from; 1739 1740 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1741 btrfsic_get_block_type(state, b_all), 1742 (unsigned long long)b_all->logical_bytenr, 1743 b_all->dev_state->name, 1744 (unsigned long long)b_all->dev_bytenr, 1745 b_all->mirror_num); 1746 1747 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1748 const struct btrfsic_block_link *const l = 1749 list_entry(elem_ref_to, 1750 struct btrfsic_block_link, 1751 node_ref_to); 1752 1753 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1754 " refers %u* to" 1755 " %c @%llu (%s/%llu/%d)\n", 1756 btrfsic_get_block_type(state, b_all), 1757 (unsigned long long)b_all->logical_bytenr, 1758 b_all->dev_state->name, 1759 (unsigned long long)b_all->dev_bytenr, 1760 b_all->mirror_num, 1761 l->ref_cnt, 1762 btrfsic_get_block_type(state, l->block_ref_to), 1763 (unsigned long long) 1764 l->block_ref_to->logical_bytenr, 1765 l->block_ref_to->dev_state->name, 1766 (unsigned long long)l->block_ref_to->dev_bytenr, 1767 l->block_ref_to->mirror_num); 1768 } 1769 1770 list_for_each(elem_ref_from, &b_all->ref_from_list) { 1771 const struct btrfsic_block_link *const l = 1772 list_entry(elem_ref_from, 1773 struct btrfsic_block_link, 1774 node_ref_from); 1775 1776 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1777 " is ref %u* from" 1778 " %c @%llu (%s/%llu/%d)\n", 1779 btrfsic_get_block_type(state, b_all), 1780 (unsigned long long)b_all->logical_bytenr, 1781 b_all->dev_state->name, 1782 (unsigned long long)b_all->dev_bytenr, 1783 b_all->mirror_num, 1784 l->ref_cnt, 1785 btrfsic_get_block_type(state, l->block_ref_from), 1786 (unsigned long long) 1787 l->block_ref_from->logical_bytenr, 1788 l->block_ref_from->dev_state->name, 1789 (unsigned long long) 1790 l->block_ref_from->dev_bytenr, 1791 l->block_ref_from->mirror_num); 1792 } 1793 1794 printk(KERN_INFO "\n"); 1795 } 1796} 1797 1798/* 1799 * Test whether the disk block contains a tree block (leaf or node) 1800 * (note that this test fails for the super block) 1801 */ 1802static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1803 char **datav, unsigned int num_pages) 1804{ 1805 struct btrfs_header *h; 1806 u8 csum[BTRFS_CSUM_SIZE]; 1807 u32 crc = ~(u32)0; 1808 unsigned int i; 1809 1810 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) 1811 return 1; /* not metadata */ 1812 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; 1813 h = (struct btrfs_header *)datav[0]; 1814 1815 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1816 return 1; 1817 1818 for (i = 0; i < num_pages; i++) { 1819 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1820 size_t sublen = i ? PAGE_CACHE_SIZE : 1821 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1822 1823 crc = crc32c(crc, data, sublen); 1824 } 1825 btrfs_csum_final(crc, csum); 1826 if (memcmp(csum, h->csum, state->csum_size)) 1827 return 1; 1828 1829 return 0; /* is metadata */ 1830} 1831 1832static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1833 u64 dev_bytenr, char **mapped_datav, 1834 unsigned int num_pages, 1835 struct bio *bio, int *bio_is_patched, 1836 struct buffer_head *bh, 1837 int submit_bio_bh_rw) 1838{ 1839 int is_metadata; 1840 struct btrfsic_block *block; 1841 struct btrfsic_block_data_ctx block_ctx; 1842 int ret; 1843 struct btrfsic_state *state = dev_state->state; 1844 struct block_device *bdev = dev_state->bdev; 1845 unsigned int processed_len; 1846 1847 if (NULL != bio_is_patched) 1848 *bio_is_patched = 0; 1849 1850again: 1851 if (num_pages == 0) 1852 return; 1853 1854 processed_len = 0; 1855 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1856 num_pages)); 1857 1858 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1859 &state->block_hashtable); 1860 if (NULL != block) { 1861 u64 bytenr = 0; 1862 struct list_head *elem_ref_to; 1863 struct list_head *tmp_ref_to; 1864 1865 if (block->is_superblock) { 1866 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1867 mapped_datav[0])->bytenr); 1868 if (num_pages * PAGE_CACHE_SIZE < 1869 BTRFS_SUPER_INFO_SIZE) { 1870 printk(KERN_INFO 1871 "btrfsic: cannot work with too short bios!\n"); 1872 return; 1873 } 1874 is_metadata = 1; 1875 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); 1876 processed_len = BTRFS_SUPER_INFO_SIZE; 1877 if (state->print_mask & 1878 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1879 printk(KERN_INFO 1880 "[before new superblock is written]:\n"); 1881 btrfsic_dump_tree_sub(state, block, 0); 1882 } 1883 } 1884 if (is_metadata) { 1885 if (!block->is_superblock) { 1886 if (num_pages * PAGE_CACHE_SIZE < 1887 state->metablock_size) { 1888 printk(KERN_INFO 1889 "btrfsic: cannot work with too short bios!\n"); 1890 return; 1891 } 1892 processed_len = state->metablock_size; 1893 bytenr = le64_to_cpu(((struct btrfs_header *) 1894 mapped_datav[0])->bytenr); 1895 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1896 dev_state, 1897 dev_bytenr); 1898 } 1899 if (block->logical_bytenr != bytenr) { 1900 printk(KERN_INFO 1901 "Written block @%llu (%s/%llu/%d)" 1902 " found in hash table, %c," 1903 " bytenr mismatch" 1904 " (!= stored %llu).\n", 1905 (unsigned long long)bytenr, 1906 dev_state->name, 1907 (unsigned long long)dev_bytenr, 1908 block->mirror_num, 1909 btrfsic_get_block_type(state, block), 1910 (unsigned long long) 1911 block->logical_bytenr); 1912 block->logical_bytenr = bytenr; 1913 } else if (state->print_mask & 1914 BTRFSIC_PRINT_MASK_VERBOSE) 1915 printk(KERN_INFO 1916 "Written block @%llu (%s/%llu/%d)" 1917 " found in hash table, %c.\n", 1918 (unsigned long long)bytenr, 1919 dev_state->name, 1920 (unsigned long long)dev_bytenr, 1921 block->mirror_num, 1922 btrfsic_get_block_type(state, block)); 1923 } else { 1924 if (num_pages * PAGE_CACHE_SIZE < 1925 state->datablock_size) { 1926 printk(KERN_INFO 1927 "btrfsic: cannot work with too short bios!\n"); 1928 return; 1929 } 1930 processed_len = state->datablock_size; 1931 bytenr = block->logical_bytenr; 1932 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1933 printk(KERN_INFO 1934 "Written block @%llu (%s/%llu/%d)" 1935 " found in hash table, %c.\n", 1936 (unsigned long long)bytenr, 1937 dev_state->name, 1938 (unsigned long long)dev_bytenr, 1939 block->mirror_num, 1940 btrfsic_get_block_type(state, block)); 1941 } 1942 1943 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1944 printk(KERN_INFO 1945 "ref_to_list: %cE, ref_from_list: %cE\n", 1946 list_empty(&block->ref_to_list) ? ' ' : '!', 1947 list_empty(&block->ref_from_list) ? ' ' : '!'); 1948 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1949 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1950 " @%llu (%s/%llu/%d), old(gen=%llu," 1951 " objectid=%llu, type=%d, offset=%llu)," 1952 " new(gen=%llu)," 1953 " which is referenced by most recent superblock" 1954 " (superblockgen=%llu)!\n", 1955 btrfsic_get_block_type(state, block), 1956 (unsigned long long)bytenr, 1957 dev_state->name, 1958 (unsigned long long)dev_bytenr, 1959 block->mirror_num, 1960 (unsigned long long)block->generation, 1961 (unsigned long long) 1962 le64_to_cpu(block->disk_key.objectid), 1963 block->disk_key.type, 1964 (unsigned long long) 1965 le64_to_cpu(block->disk_key.offset), 1966 (unsigned long long) 1967 le64_to_cpu(((struct btrfs_header *) 1968 mapped_datav[0])->generation), 1969 (unsigned long long) 1970 state->max_superblock_generation); 1971 btrfsic_dump_tree(state); 1972 } 1973 1974 if (!block->is_iodone && !block->never_written) { 1975 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1976 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 1977 " which is not yet iodone!\n", 1978 btrfsic_get_block_type(state, block), 1979 (unsigned long long)bytenr, 1980 dev_state->name, 1981 (unsigned long long)dev_bytenr, 1982 block->mirror_num, 1983 (unsigned long long)block->generation, 1984 (unsigned long long) 1985 le64_to_cpu(((struct btrfs_header *) 1986 mapped_datav[0])->generation)); 1987 /* it would not be safe to go on */ 1988 btrfsic_dump_tree(state); 1989 goto continue_loop; 1990 } 1991 1992 /* 1993 * Clear all references of this block. Do not free 1994 * the block itself even if is not referenced anymore 1995 * because it still carries valueable information 1996 * like whether it was ever written and IO completed. 1997 */ 1998 list_for_each_safe(elem_ref_to, tmp_ref_to, 1999 &block->ref_to_list) { 2000 struct btrfsic_block_link *const l = 2001 list_entry(elem_ref_to, 2002 struct btrfsic_block_link, 2003 node_ref_to); 2004 2005 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2006 btrfsic_print_rem_link(state, l); 2007 l->ref_cnt--; 2008 if (0 == l->ref_cnt) { 2009 list_del(&l->node_ref_to); 2010 list_del(&l->node_ref_from); 2011 btrfsic_block_link_hashtable_remove(l); 2012 btrfsic_block_link_free(l); 2013 } 2014 } 2015 2016 if (block->is_superblock) 2017 ret = btrfsic_map_superblock(state, bytenr, 2018 processed_len, 2019 bdev, &block_ctx); 2020 else 2021 ret = btrfsic_map_block(state, bytenr, processed_len, 2022 &block_ctx, 0); 2023 if (ret) { 2024 printk(KERN_INFO 2025 "btrfsic: btrfsic_map_block(root @%llu)" 2026 " failed!\n", (unsigned long long)bytenr); 2027 goto continue_loop; 2028 } 2029 block_ctx.datav = mapped_datav; 2030 /* the following is required in case of writes to mirrors, 2031 * use the same that was used for the lookup */ 2032 block_ctx.dev = dev_state; 2033 block_ctx.dev_bytenr = dev_bytenr; 2034 2035 if (is_metadata || state->include_extent_data) { 2036 block->never_written = 0; 2037 block->iodone_w_error = 0; 2038 if (NULL != bio) { 2039 block->is_iodone = 0; 2040 BUG_ON(NULL == bio_is_patched); 2041 if (!*bio_is_patched) { 2042 block->orig_bio_bh_private = 2043 bio->bi_private; 2044 block->orig_bio_bh_end_io.bio = 2045 bio->bi_end_io; 2046 block->next_in_same_bio = NULL; 2047 bio->bi_private = block; 2048 bio->bi_end_io = btrfsic_bio_end_io; 2049 *bio_is_patched = 1; 2050 } else { 2051 struct btrfsic_block *chained_block = 2052 (struct btrfsic_block *) 2053 bio->bi_private; 2054 2055 BUG_ON(NULL == chained_block); 2056 block->orig_bio_bh_private = 2057 chained_block->orig_bio_bh_private; 2058 block->orig_bio_bh_end_io.bio = 2059 chained_block->orig_bio_bh_end_io. 2060 bio; 2061 block->next_in_same_bio = chained_block; 2062 bio->bi_private = block; 2063 } 2064 } else if (NULL != bh) { 2065 block->is_iodone = 0; 2066 block->orig_bio_bh_private = bh->b_private; 2067 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2068 block->next_in_same_bio = NULL; 2069 bh->b_private = block; 2070 bh->b_end_io = btrfsic_bh_end_io; 2071 } else { 2072 block->is_iodone = 1; 2073 block->orig_bio_bh_private = NULL; 2074 block->orig_bio_bh_end_io.bio = NULL; 2075 block->next_in_same_bio = NULL; 2076 } 2077 } 2078 2079 block->flush_gen = dev_state->last_flush_gen + 1; 2080 block->submit_bio_bh_rw = submit_bio_bh_rw; 2081 if (is_metadata) { 2082 block->logical_bytenr = bytenr; 2083 block->is_metadata = 1; 2084 if (block->is_superblock) { 2085 BUG_ON(PAGE_CACHE_SIZE != 2086 BTRFS_SUPER_INFO_SIZE); 2087 ret = btrfsic_process_written_superblock( 2088 state, 2089 block, 2090 (struct btrfs_super_block *) 2091 mapped_datav[0]); 2092 if (state->print_mask & 2093 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2094 printk(KERN_INFO 2095 "[after new superblock is written]:\n"); 2096 btrfsic_dump_tree_sub(state, block, 0); 2097 } 2098 } else { 2099 block->mirror_num = 0; /* unknown */ 2100 ret = btrfsic_process_metablock( 2101 state, 2102 block, 2103 &block_ctx, 2104 0, 0); 2105 } 2106 if (ret) 2107 printk(KERN_INFO 2108 "btrfsic: btrfsic_process_metablock" 2109 "(root @%llu) failed!\n", 2110 (unsigned long long)dev_bytenr); 2111 } else { 2112 block->is_metadata = 0; 2113 block->mirror_num = 0; /* unknown */ 2114 block->generation = BTRFSIC_GENERATION_UNKNOWN; 2115 if (!state->include_extent_data 2116 && list_empty(&block->ref_from_list)) { 2117 /* 2118 * disk block is overwritten with extent 2119 * data (not meta data) and we are configured 2120 * to not include extent data: take the 2121 * chance and free the block's memory 2122 */ 2123 btrfsic_block_hashtable_remove(block); 2124 list_del(&block->all_blocks_node); 2125 btrfsic_block_free(block); 2126 } 2127 } 2128 btrfsic_release_block_ctx(&block_ctx); 2129 } else { 2130 /* block has not been found in hash table */ 2131 u64 bytenr; 2132 2133 if (!is_metadata) { 2134 processed_len = state->datablock_size; 2135 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2136 printk(KERN_INFO "Written block (%s/%llu/?)" 2137 " !found in hash table, D.\n", 2138 dev_state->name, 2139 (unsigned long long)dev_bytenr); 2140 if (!state->include_extent_data) { 2141 /* ignore that written D block */ 2142 goto continue_loop; 2143 } 2144 2145 /* this is getting ugly for the 2146 * include_extent_data case... */ 2147 bytenr = 0; /* unknown */ 2148 block_ctx.start = bytenr; 2149 block_ctx.len = processed_len; 2150 block_ctx.mem_to_free = NULL; 2151 block_ctx.pagev = NULL; 2152 } else { 2153 processed_len = state->metablock_size; 2154 bytenr = le64_to_cpu(((struct btrfs_header *) 2155 mapped_datav[0])->bytenr); 2156 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2157 dev_bytenr); 2158 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2159 printk(KERN_INFO 2160 "Written block @%llu (%s/%llu/?)" 2161 " !found in hash table, M.\n", 2162 (unsigned long long)bytenr, 2163 dev_state->name, 2164 (unsigned long long)dev_bytenr); 2165 2166 ret = btrfsic_map_block(state, bytenr, processed_len, 2167 &block_ctx, 0); 2168 if (ret) { 2169 printk(KERN_INFO 2170 "btrfsic: btrfsic_map_block(root @%llu)" 2171 " failed!\n", 2172 (unsigned long long)dev_bytenr); 2173 goto continue_loop; 2174 } 2175 } 2176 block_ctx.datav = mapped_datav; 2177 /* the following is required in case of writes to mirrors, 2178 * use the same that was used for the lookup */ 2179 block_ctx.dev = dev_state; 2180 block_ctx.dev_bytenr = dev_bytenr; 2181 2182 block = btrfsic_block_alloc(); 2183 if (NULL == block) { 2184 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2185 btrfsic_release_block_ctx(&block_ctx); 2186 goto continue_loop; 2187 } 2188 block->dev_state = dev_state; 2189 block->dev_bytenr = dev_bytenr; 2190 block->logical_bytenr = bytenr; 2191 block->is_metadata = is_metadata; 2192 block->never_written = 0; 2193 block->iodone_w_error = 0; 2194 block->mirror_num = 0; /* unknown */ 2195 block->flush_gen = dev_state->last_flush_gen + 1; 2196 block->submit_bio_bh_rw = submit_bio_bh_rw; 2197 if (NULL != bio) { 2198 block->is_iodone = 0; 2199 BUG_ON(NULL == bio_is_patched); 2200 if (!*bio_is_patched) { 2201 block->orig_bio_bh_private = bio->bi_private; 2202 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 2203 block->next_in_same_bio = NULL; 2204 bio->bi_private = block; 2205 bio->bi_end_io = btrfsic_bio_end_io; 2206 *bio_is_patched = 1; 2207 } else { 2208 struct btrfsic_block *chained_block = 2209 (struct btrfsic_block *) 2210 bio->bi_private; 2211 2212 BUG_ON(NULL == chained_block); 2213 block->orig_bio_bh_private = 2214 chained_block->orig_bio_bh_private; 2215 block->orig_bio_bh_end_io.bio = 2216 chained_block->orig_bio_bh_end_io.bio; 2217 block->next_in_same_bio = chained_block; 2218 bio->bi_private = block; 2219 } 2220 } else if (NULL != bh) { 2221 block->is_iodone = 0; 2222 block->orig_bio_bh_private = bh->b_private; 2223 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2224 block->next_in_same_bio = NULL; 2225 bh->b_private = block; 2226 bh->b_end_io = btrfsic_bh_end_io; 2227 } else { 2228 block->is_iodone = 1; 2229 block->orig_bio_bh_private = NULL; 2230 block->orig_bio_bh_end_io.bio = NULL; 2231 block->next_in_same_bio = NULL; 2232 } 2233 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2234 printk(KERN_INFO 2235 "New written %c-block @%llu (%s/%llu/%d)\n", 2236 is_metadata ? 'M' : 'D', 2237 (unsigned long long)block->logical_bytenr, 2238 block->dev_state->name, 2239 (unsigned long long)block->dev_bytenr, 2240 block->mirror_num); 2241 list_add(&block->all_blocks_node, &state->all_blocks_list); 2242 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2243 2244 if (is_metadata) { 2245 ret = btrfsic_process_metablock(state, block, 2246 &block_ctx, 0, 0); 2247 if (ret) 2248 printk(KERN_INFO 2249 "btrfsic: process_metablock(root @%llu)" 2250 " failed!\n", 2251 (unsigned long long)dev_bytenr); 2252 } 2253 btrfsic_release_block_ctx(&block_ctx); 2254 } 2255 2256continue_loop: 2257 BUG_ON(!processed_len); 2258 dev_bytenr += processed_len; 2259 mapped_datav += processed_len >> PAGE_CACHE_SHIFT; 2260 num_pages -= processed_len >> PAGE_CACHE_SHIFT; 2261 goto again; 2262} 2263 2264static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2265{ 2266 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2267 int iodone_w_error; 2268 2269 /* mutex is not held! This is not save if IO is not yet completed 2270 * on umount */ 2271 iodone_w_error = 0; 2272 if (bio_error_status) 2273 iodone_w_error = 1; 2274 2275 BUG_ON(NULL == block); 2276 bp->bi_private = block->orig_bio_bh_private; 2277 bp->bi_end_io = block->orig_bio_bh_end_io.bio; 2278 2279 do { 2280 struct btrfsic_block *next_block; 2281 struct btrfsic_dev_state *const dev_state = block->dev_state; 2282 2283 if ((dev_state->state->print_mask & 2284 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2285 printk(KERN_INFO 2286 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2287 bio_error_status, 2288 btrfsic_get_block_type(dev_state->state, block), 2289 (unsigned long long)block->logical_bytenr, 2290 dev_state->name, 2291 (unsigned long long)block->dev_bytenr, 2292 block->mirror_num); 2293 next_block = block->next_in_same_bio; 2294 block->iodone_w_error = iodone_w_error; 2295 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2296 dev_state->last_flush_gen++; 2297 if ((dev_state->state->print_mask & 2298 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2299 printk(KERN_INFO 2300 "bio_end_io() new %s flush_gen=%llu\n", 2301 dev_state->name, 2302 (unsigned long long) 2303 dev_state->last_flush_gen); 2304 } 2305 if (block->submit_bio_bh_rw & REQ_FUA) 2306 block->flush_gen = 0; /* FUA completed means block is 2307 * on disk */ 2308 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2309 block = next_block; 2310 } while (NULL != block); 2311 2312 bp->bi_end_io(bp, bio_error_status); 2313} 2314 2315static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) 2316{ 2317 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; 2318 int iodone_w_error = !uptodate; 2319 struct btrfsic_dev_state *dev_state; 2320 2321 BUG_ON(NULL == block); 2322 dev_state = block->dev_state; 2323 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2324 printk(KERN_INFO 2325 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2326 iodone_w_error, 2327 btrfsic_get_block_type(dev_state->state, block), 2328 (unsigned long long)block->logical_bytenr, 2329 block->dev_state->name, 2330 (unsigned long long)block->dev_bytenr, 2331 block->mirror_num); 2332 2333 block->iodone_w_error = iodone_w_error; 2334 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2335 dev_state->last_flush_gen++; 2336 if ((dev_state->state->print_mask & 2337 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2338 printk(KERN_INFO 2339 "bh_end_io() new %s flush_gen=%llu\n", 2340 dev_state->name, 2341 (unsigned long long)dev_state->last_flush_gen); 2342 } 2343 if (block->submit_bio_bh_rw & REQ_FUA) 2344 block->flush_gen = 0; /* FUA completed means block is on disk */ 2345 2346 bh->b_private = block->orig_bio_bh_private; 2347 bh->b_end_io = block->orig_bio_bh_end_io.bh; 2348 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2349 bh->b_end_io(bh, uptodate); 2350} 2351 2352static int btrfsic_process_written_superblock( 2353 struct btrfsic_state *state, 2354 struct btrfsic_block *const superblock, 2355 struct btrfs_super_block *const super_hdr) 2356{ 2357 int pass; 2358 2359 superblock->generation = btrfs_super_generation(super_hdr); 2360 if (!(superblock->generation > state->max_superblock_generation || 2361 0 == state->max_superblock_generation)) { 2362 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2363 printk(KERN_INFO 2364 "btrfsic: superblock @%llu (%s/%llu/%d)" 2365 " with old gen %llu <= %llu\n", 2366 (unsigned long long)superblock->logical_bytenr, 2367 superblock->dev_state->name, 2368 (unsigned long long)superblock->dev_bytenr, 2369 superblock->mirror_num, 2370 (unsigned long long) 2371 btrfs_super_generation(super_hdr), 2372 (unsigned long long) 2373 state->max_superblock_generation); 2374 } else { 2375 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2376 printk(KERN_INFO 2377 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2378 " with new gen %llu > %llu\n", 2379 (unsigned long long)superblock->logical_bytenr, 2380 superblock->dev_state->name, 2381 (unsigned long long)superblock->dev_bytenr, 2382 superblock->mirror_num, 2383 (unsigned long long) 2384 btrfs_super_generation(super_hdr), 2385 (unsigned long long) 2386 state->max_superblock_generation); 2387 2388 state->max_superblock_generation = 2389 btrfs_super_generation(super_hdr); 2390 state->latest_superblock = superblock; 2391 } 2392 2393 for (pass = 0; pass < 3; pass++) { 2394 int ret; 2395 u64 next_bytenr; 2396 struct btrfsic_block *next_block; 2397 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2398 struct btrfsic_block_link *l; 2399 int num_copies; 2400 int mirror_num; 2401 const char *additional_string = NULL; 2402 struct btrfs_disk_key tmp_disk_key; 2403 2404 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2405 tmp_disk_key.offset = 0; 2406 2407 switch (pass) { 2408 case 0: 2409 tmp_disk_key.objectid = 2410 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2411 additional_string = "root "; 2412 next_bytenr = btrfs_super_root(super_hdr); 2413 if (state->print_mask & 2414 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2415 printk(KERN_INFO "root@%llu\n", 2416 (unsigned long long)next_bytenr); 2417 break; 2418 case 1: 2419 tmp_disk_key.objectid = 2420 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2421 additional_string = "chunk "; 2422 next_bytenr = btrfs_super_chunk_root(super_hdr); 2423 if (state->print_mask & 2424 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2425 printk(KERN_INFO "chunk@%llu\n", 2426 (unsigned long long)next_bytenr); 2427 break; 2428 case 2: 2429 tmp_disk_key.objectid = 2430 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2431 additional_string = "log "; 2432 next_bytenr = btrfs_super_log_root(super_hdr); 2433 if (0 == next_bytenr) 2434 continue; 2435 if (state->print_mask & 2436 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2437 printk(KERN_INFO "log@%llu\n", 2438 (unsigned long long)next_bytenr); 2439 break; 2440 } 2441 2442 num_copies = 2443 btrfs_num_copies(&state->root->fs_info->mapping_tree, 2444 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2445 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2446 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2447 (unsigned long long)next_bytenr, num_copies); 2448 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2449 int was_created; 2450 2451 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2452 printk(KERN_INFO 2453 "btrfsic_process_written_superblock(" 2454 "mirror_num=%d)\n", mirror_num); 2455 ret = btrfsic_map_block(state, next_bytenr, 2456 BTRFS_SUPER_INFO_SIZE, 2457 &tmp_next_block_ctx, 2458 mirror_num); 2459 if (ret) { 2460 printk(KERN_INFO 2461 "btrfsic: btrfsic_map_block(@%llu," 2462 " mirror=%d) failed!\n", 2463 (unsigned long long)next_bytenr, 2464 mirror_num); 2465 return -1; 2466 } 2467 2468 next_block = btrfsic_block_lookup_or_add( 2469 state, 2470 &tmp_next_block_ctx, 2471 additional_string, 2472 1, 0, 1, 2473 mirror_num, 2474 &was_created); 2475 if (NULL == next_block) { 2476 printk(KERN_INFO 2477 "btrfsic: error, kmalloc failed!\n"); 2478 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2479 return -1; 2480 } 2481 2482 next_block->disk_key = tmp_disk_key; 2483 if (was_created) 2484 next_block->generation = 2485 BTRFSIC_GENERATION_UNKNOWN; 2486 l = btrfsic_block_link_lookup_or_add( 2487 state, 2488 &tmp_next_block_ctx, 2489 next_block, 2490 superblock, 2491 BTRFSIC_GENERATION_UNKNOWN); 2492 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2493 if (NULL == l) 2494 return -1; 2495 } 2496 } 2497 2498 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2499 WARN_ON(1); 2500 btrfsic_dump_tree(state); 2501 } 2502 2503 return 0; 2504} 2505 2506static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2507 struct btrfsic_block *const block, 2508 int recursion_level) 2509{ 2510 struct list_head *elem_ref_to; 2511 int ret = 0; 2512 2513 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2514 /* 2515 * Note that this situation can happen and does not 2516 * indicate an error in regular cases. It happens 2517 * when disk blocks are freed and later reused. 2518 * The check-integrity module is not aware of any 2519 * block free operations, it just recognizes block 2520 * write operations. Therefore it keeps the linkage 2521 * information for a block until a block is 2522 * rewritten. This can temporarily cause incorrect 2523 * and even circular linkage informations. This 2524 * causes no harm unless such blocks are referenced 2525 * by the most recent super block. 2526 */ 2527 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2528 printk(KERN_INFO 2529 "btrfsic: abort cyclic linkage (case 1).\n"); 2530 2531 return ret; 2532 } 2533 2534 /* 2535 * This algorithm is recursive because the amount of used stack 2536 * space is very small and the max recursion depth is limited. 2537 */ 2538 list_for_each(elem_ref_to, &block->ref_to_list) { 2539 const struct btrfsic_block_link *const l = 2540 list_entry(elem_ref_to, struct btrfsic_block_link, 2541 node_ref_to); 2542 2543 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2544 printk(KERN_INFO 2545 "rl=%d, %c @%llu (%s/%llu/%d)" 2546 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2547 recursion_level, 2548 btrfsic_get_block_type(state, block), 2549 (unsigned long long)block->logical_bytenr, 2550 block->dev_state->name, 2551 (unsigned long long)block->dev_bytenr, 2552 block->mirror_num, 2553 l->ref_cnt, 2554 btrfsic_get_block_type(state, l->block_ref_to), 2555 (unsigned long long) 2556 l->block_ref_to->logical_bytenr, 2557 l->block_ref_to->dev_state->name, 2558 (unsigned long long)l->block_ref_to->dev_bytenr, 2559 l->block_ref_to->mirror_num); 2560 if (l->block_ref_to->never_written) { 2561 printk(KERN_INFO "btrfs: attempt to write superblock" 2562 " which references block %c @%llu (%s/%llu/%d)" 2563 " which is never written!\n", 2564 btrfsic_get_block_type(state, l->block_ref_to), 2565 (unsigned long long) 2566 l->block_ref_to->logical_bytenr, 2567 l->block_ref_to->dev_state->name, 2568 (unsigned long long)l->block_ref_to->dev_bytenr, 2569 l->block_ref_to->mirror_num); 2570 ret = -1; 2571 } else if (!l->block_ref_to->is_iodone) { 2572 printk(KERN_INFO "btrfs: attempt to write superblock" 2573 " which references block %c @%llu (%s/%llu/%d)" 2574 " which is not yet iodone!\n", 2575 btrfsic_get_block_type(state, l->block_ref_to), 2576 (unsigned long long) 2577 l->block_ref_to->logical_bytenr, 2578 l->block_ref_to->dev_state->name, 2579 (unsigned long long)l->block_ref_to->dev_bytenr, 2580 l->block_ref_to->mirror_num); 2581 ret = -1; 2582 } else if (l->parent_generation != 2583 l->block_ref_to->generation && 2584 BTRFSIC_GENERATION_UNKNOWN != 2585 l->parent_generation && 2586 BTRFSIC_GENERATION_UNKNOWN != 2587 l->block_ref_to->generation) { 2588 printk(KERN_INFO "btrfs: attempt to write superblock" 2589 " which references block %c @%llu (%s/%llu/%d)" 2590 " with generation %llu !=" 2591 " parent generation %llu!\n", 2592 btrfsic_get_block_type(state, l->block_ref_to), 2593 (unsigned long long) 2594 l->block_ref_to->logical_bytenr, 2595 l->block_ref_to->dev_state->name, 2596 (unsigned long long)l->block_ref_to->dev_bytenr, 2597 l->block_ref_to->mirror_num, 2598 (unsigned long long)l->block_ref_to->generation, 2599 (unsigned long long)l->parent_generation); 2600 ret = -1; 2601 } else if (l->block_ref_to->flush_gen > 2602 l->block_ref_to->dev_state->last_flush_gen) { 2603 printk(KERN_INFO "btrfs: attempt to write superblock" 2604 " which references block %c @%llu (%s/%llu/%d)" 2605 " which is not flushed out of disk's write cache" 2606 " (block flush_gen=%llu," 2607 " dev->flush_gen=%llu)!\n", 2608 btrfsic_get_block_type(state, l->block_ref_to), 2609 (unsigned long long) 2610 l->block_ref_to->logical_bytenr, 2611 l->block_ref_to->dev_state->name, 2612 (unsigned long long)l->block_ref_to->dev_bytenr, 2613 l->block_ref_to->mirror_num, 2614 (unsigned long long)block->flush_gen, 2615 (unsigned long long) 2616 l->block_ref_to->dev_state->last_flush_gen); 2617 ret = -1; 2618 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2619 l->block_ref_to, 2620 recursion_level + 2621 1)) { 2622 ret = -1; 2623 } 2624 } 2625 2626 return ret; 2627} 2628 2629static int btrfsic_is_block_ref_by_superblock( 2630 const struct btrfsic_state *state, 2631 const struct btrfsic_block *block, 2632 int recursion_level) 2633{ 2634 struct list_head *elem_ref_from; 2635 2636 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2637 /* refer to comment at "abort cyclic linkage (case 1)" */ 2638 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2639 printk(KERN_INFO 2640 "btrfsic: abort cyclic linkage (case 2).\n"); 2641 2642 return 0; 2643 } 2644 2645 /* 2646 * This algorithm is recursive because the amount of used stack space 2647 * is very small and the max recursion depth is limited. 2648 */ 2649 list_for_each(elem_ref_from, &block->ref_from_list) { 2650 const struct btrfsic_block_link *const l = 2651 list_entry(elem_ref_from, struct btrfsic_block_link, 2652 node_ref_from); 2653 2654 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2655 printk(KERN_INFO 2656 "rl=%d, %c @%llu (%s/%llu/%d)" 2657 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2658 recursion_level, 2659 btrfsic_get_block_type(state, block), 2660 (unsigned long long)block->logical_bytenr, 2661 block->dev_state->name, 2662 (unsigned long long)block->dev_bytenr, 2663 block->mirror_num, 2664 l->ref_cnt, 2665 btrfsic_get_block_type(state, l->block_ref_from), 2666 (unsigned long long) 2667 l->block_ref_from->logical_bytenr, 2668 l->block_ref_from->dev_state->name, 2669 (unsigned long long) 2670 l->block_ref_from->dev_bytenr, 2671 l->block_ref_from->mirror_num); 2672 if (l->block_ref_from->is_superblock && 2673 state->latest_superblock->dev_bytenr == 2674 l->block_ref_from->dev_bytenr && 2675 state->latest_superblock->dev_state->bdev == 2676 l->block_ref_from->dev_state->bdev) 2677 return 1; 2678 else if (btrfsic_is_block_ref_by_superblock(state, 2679 l->block_ref_from, 2680 recursion_level + 2681 1)) 2682 return 1; 2683 } 2684 2685 return 0; 2686} 2687 2688static void btrfsic_print_add_link(const struct btrfsic_state *state, 2689 const struct btrfsic_block_link *l) 2690{ 2691 printk(KERN_INFO 2692 "Add %u* link from %c @%llu (%s/%llu/%d)" 2693 " to %c @%llu (%s/%llu/%d).\n", 2694 l->ref_cnt, 2695 btrfsic_get_block_type(state, l->block_ref_from), 2696 (unsigned long long)l->block_ref_from->logical_bytenr, 2697 l->block_ref_from->dev_state->name, 2698 (unsigned long long)l->block_ref_from->dev_bytenr, 2699 l->block_ref_from->mirror_num, 2700 btrfsic_get_block_type(state, l->block_ref_to), 2701 (unsigned long long)l->block_ref_to->logical_bytenr, 2702 l->block_ref_to->dev_state->name, 2703 (unsigned long long)l->block_ref_to->dev_bytenr, 2704 l->block_ref_to->mirror_num); 2705} 2706 2707static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2708 const struct btrfsic_block_link *l) 2709{ 2710 printk(KERN_INFO 2711 "Rem %u* link from %c @%llu (%s/%llu/%d)" 2712 " to %c @%llu (%s/%llu/%d).\n", 2713 l->ref_cnt, 2714 btrfsic_get_block_type(state, l->block_ref_from), 2715 (unsigned long long)l->block_ref_from->logical_bytenr, 2716 l->block_ref_from->dev_state->name, 2717 (unsigned long long)l->block_ref_from->dev_bytenr, 2718 l->block_ref_from->mirror_num, 2719 btrfsic_get_block_type(state, l->block_ref_to), 2720 (unsigned long long)l->block_ref_to->logical_bytenr, 2721 l->block_ref_to->dev_state->name, 2722 (unsigned long long)l->block_ref_to->dev_bytenr, 2723 l->block_ref_to->mirror_num); 2724} 2725 2726static char btrfsic_get_block_type(const struct btrfsic_state *state, 2727 const struct btrfsic_block *block) 2728{ 2729 if (block->is_superblock && 2730 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2731 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2732 return 'S'; 2733 else if (block->is_superblock) 2734 return 's'; 2735 else if (block->is_metadata) 2736 return 'M'; 2737 else 2738 return 'D'; 2739} 2740 2741static void btrfsic_dump_tree(const struct btrfsic_state *state) 2742{ 2743 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2744} 2745 2746static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2747 const struct btrfsic_block *block, 2748 int indent_level) 2749{ 2750 struct list_head *elem_ref_to; 2751 int indent_add; 2752 static char buf[80]; 2753 int cursor_position; 2754 2755 /* 2756 * Should better fill an on-stack buffer with a complete line and 2757 * dump it at once when it is time to print a newline character. 2758 */ 2759 2760 /* 2761 * This algorithm is recursive because the amount of used stack space 2762 * is very small and the max recursion depth is limited. 2763 */ 2764 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2765 btrfsic_get_block_type(state, block), 2766 (unsigned long long)block->logical_bytenr, 2767 block->dev_state->name, 2768 (unsigned long long)block->dev_bytenr, 2769 block->mirror_num); 2770 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2771 printk("[...]\n"); 2772 return; 2773 } 2774 printk(buf); 2775 indent_level += indent_add; 2776 if (list_empty(&block->ref_to_list)) { 2777 printk("\n"); 2778 return; 2779 } 2780 if (block->mirror_num > 1 && 2781 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2782 printk(" [...]\n"); 2783 return; 2784 } 2785 2786 cursor_position = indent_level; 2787 list_for_each(elem_ref_to, &block->ref_to_list) { 2788 const struct btrfsic_block_link *const l = 2789 list_entry(elem_ref_to, struct btrfsic_block_link, 2790 node_ref_to); 2791 2792 while (cursor_position < indent_level) { 2793 printk(" "); 2794 cursor_position++; 2795 } 2796 if (l->ref_cnt > 1) 2797 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2798 else 2799 indent_add = sprintf(buf, " --> "); 2800 if (indent_level + indent_add > 2801 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2802 printk("[...]\n"); 2803 cursor_position = 0; 2804 continue; 2805 } 2806 2807 printk(buf); 2808 2809 btrfsic_dump_tree_sub(state, l->block_ref_to, 2810 indent_level + indent_add); 2811 cursor_position = 0; 2812 } 2813} 2814 2815static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2816 struct btrfsic_state *state, 2817 struct btrfsic_block_data_ctx *next_block_ctx, 2818 struct btrfsic_block *next_block, 2819 struct btrfsic_block *from_block, 2820 u64 parent_generation) 2821{ 2822 struct btrfsic_block_link *l; 2823 2824 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2825 next_block_ctx->dev_bytenr, 2826 from_block->dev_state->bdev, 2827 from_block->dev_bytenr, 2828 &state->block_link_hashtable); 2829 if (NULL == l) { 2830 l = btrfsic_block_link_alloc(); 2831 if (NULL == l) { 2832 printk(KERN_INFO 2833 "btrfsic: error, kmalloc" " failed!\n"); 2834 return NULL; 2835 } 2836 2837 l->block_ref_to = next_block; 2838 l->block_ref_from = from_block; 2839 l->ref_cnt = 1; 2840 l->parent_generation = parent_generation; 2841 2842 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2843 btrfsic_print_add_link(state, l); 2844 2845 list_add(&l->node_ref_to, &from_block->ref_to_list); 2846 list_add(&l->node_ref_from, &next_block->ref_from_list); 2847 2848 btrfsic_block_link_hashtable_add(l, 2849 &state->block_link_hashtable); 2850 } else { 2851 l->ref_cnt++; 2852 l->parent_generation = parent_generation; 2853 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2854 btrfsic_print_add_link(state, l); 2855 } 2856 2857 return l; 2858} 2859 2860static struct btrfsic_block *btrfsic_block_lookup_or_add( 2861 struct btrfsic_state *state, 2862 struct btrfsic_block_data_ctx *block_ctx, 2863 const char *additional_string, 2864 int is_metadata, 2865 int is_iodone, 2866 int never_written, 2867 int mirror_num, 2868 int *was_created) 2869{ 2870 struct btrfsic_block *block; 2871 2872 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2873 block_ctx->dev_bytenr, 2874 &state->block_hashtable); 2875 if (NULL == block) { 2876 struct btrfsic_dev_state *dev_state; 2877 2878 block = btrfsic_block_alloc(); 2879 if (NULL == block) { 2880 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2881 return NULL; 2882 } 2883 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); 2884 if (NULL == dev_state) { 2885 printk(KERN_INFO 2886 "btrfsic: error, lookup dev_state failed!\n"); 2887 btrfsic_block_free(block); 2888 return NULL; 2889 } 2890 block->dev_state = dev_state; 2891 block->dev_bytenr = block_ctx->dev_bytenr; 2892 block->logical_bytenr = block_ctx->start; 2893 block->is_metadata = is_metadata; 2894 block->is_iodone = is_iodone; 2895 block->never_written = never_written; 2896 block->mirror_num = mirror_num; 2897 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2898 printk(KERN_INFO 2899 "New %s%c-block @%llu (%s/%llu/%d)\n", 2900 additional_string, 2901 btrfsic_get_block_type(state, block), 2902 (unsigned long long)block->logical_bytenr, 2903 dev_state->name, 2904 (unsigned long long)block->dev_bytenr, 2905 mirror_num); 2906 list_add(&block->all_blocks_node, &state->all_blocks_list); 2907 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2908 if (NULL != was_created) 2909 *was_created = 1; 2910 } else { 2911 if (NULL != was_created) 2912 *was_created = 0; 2913 } 2914 2915 return block; 2916} 2917 2918static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2919 u64 bytenr, 2920 struct btrfsic_dev_state *dev_state, 2921 u64 dev_bytenr) 2922{ 2923 int num_copies; 2924 int mirror_num; 2925 int ret; 2926 struct btrfsic_block_data_ctx block_ctx; 2927 int match = 0; 2928 2929 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, 2930 bytenr, state->metablock_size); 2931 2932 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2933 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2934 &block_ctx, mirror_num); 2935 if (ret) { 2936 printk(KERN_INFO "btrfsic:" 2937 " btrfsic_map_block(logical @%llu," 2938 " mirror %d) failed!\n", 2939 (unsigned long long)bytenr, mirror_num); 2940 continue; 2941 } 2942 2943 if (dev_state->bdev == block_ctx.dev->bdev && 2944 dev_bytenr == block_ctx.dev_bytenr) { 2945 match++; 2946 btrfsic_release_block_ctx(&block_ctx); 2947 break; 2948 } 2949 btrfsic_release_block_ctx(&block_ctx); 2950 } 2951 2952 if (!match) { 2953 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2954 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2955 " phys_bytenr=%llu)!\n", 2956 (unsigned long long)bytenr, dev_state->name, 2957 (unsigned long long)dev_bytenr); 2958 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2959 ret = btrfsic_map_block(state, bytenr, 2960 state->metablock_size, 2961 &block_ctx, mirror_num); 2962 if (ret) 2963 continue; 2964 2965 printk(KERN_INFO "Read logical bytenr @%llu maps to" 2966 " (%s/%llu/%d)\n", 2967 (unsigned long long)bytenr, 2968 block_ctx.dev->name, 2969 (unsigned long long)block_ctx.dev_bytenr, 2970 mirror_num); 2971 } 2972 WARN_ON(1); 2973 } 2974} 2975 2976static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 2977 struct block_device *bdev) 2978{ 2979 struct btrfsic_dev_state *ds; 2980 2981 ds = btrfsic_dev_state_hashtable_lookup(bdev, 2982 &btrfsic_dev_state_hashtable); 2983 return ds; 2984} 2985 2986int btrfsic_submit_bh(int rw, struct buffer_head *bh) 2987{ 2988 struct btrfsic_dev_state *dev_state; 2989 2990 if (!btrfsic_is_initialized) 2991 return submit_bh(rw, bh); 2992 2993 mutex_lock(&btrfsic_mutex); 2994 /* since btrfsic_submit_bh() might also be called before 2995 * btrfsic_mount(), this might return NULL */ 2996 dev_state = btrfsic_dev_state_lookup(bh->b_bdev); 2997 2998 /* Only called to write the superblock (incl. FLUSH/FUA) */ 2999 if (NULL != dev_state && 3000 (rw & WRITE) && bh->b_size > 0) { 3001 u64 dev_bytenr; 3002 3003 dev_bytenr = 4096 * bh->b_blocknr; 3004 if (dev_state->state->print_mask & 3005 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3006 printk(KERN_INFO 3007 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 3008 " size=%lu, data=%p, bdev=%p)\n", 3009 rw, (unsigned long)bh->b_blocknr, 3010 (unsigned long long)dev_bytenr, 3011 (unsigned long)bh->b_size, bh->b_data, 3012 bh->b_bdev); 3013 btrfsic_process_written_block(dev_state, dev_bytenr, 3014 &bh->b_data, 1, NULL, 3015 NULL, bh, rw); 3016 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3017 if (dev_state->state->print_mask & 3018 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3019 printk(KERN_INFO 3020 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", 3021 rw, bh->b_bdev); 3022 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3023 if ((dev_state->state->print_mask & 3024 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3025 BTRFSIC_PRINT_MASK_VERBOSE))) 3026 printk(KERN_INFO 3027 "btrfsic_submit_bh(%s) with FLUSH" 3028 " but dummy block already in use" 3029 " (ignored)!\n", 3030 dev_state->name); 3031 } else { 3032 struct btrfsic_block *const block = 3033 &dev_state->dummy_block_for_bio_bh_flush; 3034 3035 block->is_iodone = 0; 3036 block->never_written = 0; 3037 block->iodone_w_error = 0; 3038 block->flush_gen = dev_state->last_flush_gen + 1; 3039 block->submit_bio_bh_rw = rw; 3040 block->orig_bio_bh_private = bh->b_private; 3041 block->orig_bio_bh_end_io.bh = bh->b_end_io; 3042 block->next_in_same_bio = NULL; 3043 bh->b_private = block; 3044 bh->b_end_io = btrfsic_bh_end_io; 3045 } 3046 } 3047 mutex_unlock(&btrfsic_mutex); 3048 return submit_bh(rw, bh); 3049} 3050 3051void btrfsic_submit_bio(int rw, struct bio *bio) 3052{ 3053 struct btrfsic_dev_state *dev_state; 3054 3055 if (!btrfsic_is_initialized) { 3056 submit_bio(rw, bio); 3057 return; 3058 } 3059 3060 mutex_lock(&btrfsic_mutex); 3061 /* since btrfsic_submit_bio() is also called before 3062 * btrfsic_mount(), this might return NULL */ 3063 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); 3064 if (NULL != dev_state && 3065 (rw & WRITE) && NULL != bio->bi_io_vec) { 3066 unsigned int i; 3067 u64 dev_bytenr; 3068 int bio_is_patched; 3069 char **mapped_datav; 3070 3071 dev_bytenr = 512 * bio->bi_sector; 3072 bio_is_patched = 0; 3073 if (dev_state->state->print_mask & 3074 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3075 printk(KERN_INFO 3076 "submit_bio(rw=0x%x, bi_vcnt=%u," 3077 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3078 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3079 (unsigned long long)dev_bytenr, 3080 bio->bi_bdev); 3081 3082 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3083 GFP_NOFS); 3084 if (!mapped_datav) 3085 goto leave; 3086 for (i = 0; i < bio->bi_vcnt; i++) { 3087 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3088 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3089 if (!mapped_datav[i]) { 3090 while (i > 0) { 3091 i--; 3092 kunmap(bio->bi_io_vec[i].bv_page); 3093 } 3094 kfree(mapped_datav); 3095 goto leave; 3096 } 3097 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3098 BTRFSIC_PRINT_MASK_VERBOSE) == 3099 (dev_state->state->print_mask & 3100 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3101 BTRFSIC_PRINT_MASK_VERBOSE))) 3102 printk(KERN_INFO 3103 "#%u: page=%p, len=%u, offset=%u\n", 3104 i, bio->bi_io_vec[i].bv_page, 3105 bio->bi_io_vec[i].bv_len, 3106 bio->bi_io_vec[i].bv_offset); 3107 } 3108 btrfsic_process_written_block(dev_state, dev_bytenr, 3109 mapped_datav, bio->bi_vcnt, 3110 bio, &bio_is_patched, 3111 NULL, rw); 3112 while (i > 0) { 3113 i--; 3114 kunmap(bio->bi_io_vec[i].bv_page); 3115 } 3116 kfree(mapped_datav); 3117 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3118 if (dev_state->state->print_mask & 3119 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3120 printk(KERN_INFO 3121 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", 3122 rw, bio->bi_bdev); 3123 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3124 if ((dev_state->state->print_mask & 3125 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3126 BTRFSIC_PRINT_MASK_VERBOSE))) 3127 printk(KERN_INFO 3128 "btrfsic_submit_bio(%s) with FLUSH" 3129 " but dummy block already in use" 3130 " (ignored)!\n", 3131 dev_state->name); 3132 } else { 3133 struct btrfsic_block *const block = 3134 &dev_state->dummy_block_for_bio_bh_flush; 3135 3136 block->is_iodone = 0; 3137 block->never_written = 0; 3138 block->iodone_w_error = 0; 3139 block->flush_gen = dev_state->last_flush_gen + 1; 3140 block->submit_bio_bh_rw = rw; 3141 block->orig_bio_bh_private = bio->bi_private; 3142 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 3143 block->next_in_same_bio = NULL; 3144 bio->bi_private = block; 3145 bio->bi_end_io = btrfsic_bio_end_io; 3146 } 3147 } 3148leave: 3149 mutex_unlock(&btrfsic_mutex); 3150 3151 submit_bio(rw, bio); 3152} 3153 3154int btrfsic_mount(struct btrfs_root *root, 3155 struct btrfs_fs_devices *fs_devices, 3156 int including_extent_data, u32 print_mask) 3157{ 3158 int ret; 3159 struct btrfsic_state *state; 3160 struct list_head *dev_head = &fs_devices->devices; 3161 struct btrfs_device *device; 3162 3163 if (root->nodesize != root->leafsize) { 3164 printk(KERN_INFO 3165 "btrfsic: cannot handle nodesize %d != leafsize %d!\n", 3166 root->nodesize, root->leafsize); 3167 return -1; 3168 } 3169 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3170 printk(KERN_INFO 3171 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3172 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3173 return -1; 3174 } 3175 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3176 printk(KERN_INFO 3177 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3178 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3179 return -1; 3180 } 3181 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3182 printk(KERN_INFO 3183 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3184 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3185 return -1; 3186 } 3187 state = kzalloc(sizeof(*state), GFP_NOFS); 3188 if (NULL == state) { 3189 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3190 return -1; 3191 } 3192 3193 if (!btrfsic_is_initialized) { 3194 mutex_init(&btrfsic_mutex); 3195 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 3196 btrfsic_is_initialized = 1; 3197 } 3198 mutex_lock(&btrfsic_mutex); 3199 state->root = root; 3200 state->print_mask = print_mask; 3201 state->include_extent_data = including_extent_data; 3202 state->csum_size = 0; 3203 state->metablock_size = root->nodesize; 3204 state->datablock_size = root->sectorsize; 3205 INIT_LIST_HEAD(&state->all_blocks_list); 3206 btrfsic_block_hashtable_init(&state->block_hashtable); 3207 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3208 state->max_superblock_generation = 0; 3209 state->latest_superblock = NULL; 3210 3211 list_for_each_entry(device, dev_head, dev_list) { 3212 struct btrfsic_dev_state *ds; 3213 char *p; 3214 3215 if (!device->bdev || !device->name) 3216 continue; 3217 3218 ds = btrfsic_dev_state_alloc(); 3219 if (NULL == ds) { 3220 printk(KERN_INFO 3221 "btrfs check-integrity: kmalloc() failed!\n"); 3222 mutex_unlock(&btrfsic_mutex); 3223 return -1; 3224 } 3225 ds->bdev = device->bdev; 3226 ds->state = state; 3227 bdevname(ds->bdev, ds->name); 3228 ds->name[BDEVNAME_SIZE - 1] = '\0'; 3229 for (p = ds->name; *p != '\0'; p++); 3230 while (p > ds->name && *p != '/') 3231 p--; 3232 if (*p == '/') 3233 p++; 3234 strlcpy(ds->name, p, sizeof(ds->name)); 3235 btrfsic_dev_state_hashtable_add(ds, 3236 &btrfsic_dev_state_hashtable); 3237 } 3238 3239 ret = btrfsic_process_superblock(state, fs_devices); 3240 if (0 != ret) { 3241 mutex_unlock(&btrfsic_mutex); 3242 btrfsic_unmount(root, fs_devices); 3243 return ret; 3244 } 3245 3246 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 3247 btrfsic_dump_database(state); 3248 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 3249 btrfsic_dump_tree(state); 3250 3251 mutex_unlock(&btrfsic_mutex); 3252 return 0; 3253} 3254 3255void btrfsic_unmount(struct btrfs_root *root, 3256 struct btrfs_fs_devices *fs_devices) 3257{ 3258 struct list_head *elem_all; 3259 struct list_head *tmp_all; 3260 struct btrfsic_state *state; 3261 struct list_head *dev_head = &fs_devices->devices; 3262 struct btrfs_device *device; 3263 3264 if (!btrfsic_is_initialized) 3265 return; 3266 3267 mutex_lock(&btrfsic_mutex); 3268 3269 state = NULL; 3270 list_for_each_entry(device, dev_head, dev_list) { 3271 struct btrfsic_dev_state *ds; 3272 3273 if (!device->bdev || !device->name) 3274 continue; 3275 3276 ds = btrfsic_dev_state_hashtable_lookup( 3277 device->bdev, 3278 &btrfsic_dev_state_hashtable); 3279 if (NULL != ds) { 3280 state = ds->state; 3281 btrfsic_dev_state_hashtable_remove(ds); 3282 btrfsic_dev_state_free(ds); 3283 } 3284 } 3285 3286 if (NULL == state) { 3287 printk(KERN_INFO 3288 "btrfsic: error, cannot find state information" 3289 " on umount!\n"); 3290 mutex_unlock(&btrfsic_mutex); 3291 return; 3292 } 3293 3294 /* 3295 * Don't care about keeping the lists' state up to date, 3296 * just free all memory that was allocated dynamically. 3297 * Free the blocks and the block_links. 3298 */ 3299 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { 3300 struct btrfsic_block *const b_all = 3301 list_entry(elem_all, struct btrfsic_block, 3302 all_blocks_node); 3303 struct list_head *elem_ref_to; 3304 struct list_head *tmp_ref_to; 3305 3306 list_for_each_safe(elem_ref_to, tmp_ref_to, 3307 &b_all->ref_to_list) { 3308 struct btrfsic_block_link *const l = 3309 list_entry(elem_ref_to, 3310 struct btrfsic_block_link, 3311 node_ref_to); 3312 3313 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 3314 btrfsic_print_rem_link(state, l); 3315 3316 l->ref_cnt--; 3317 if (0 == l->ref_cnt) 3318 btrfsic_block_link_free(l); 3319 } 3320 3321 if (b_all->is_iodone) 3322 btrfsic_block_free(b_all); 3323 else 3324 printk(KERN_INFO "btrfs: attempt to free %c-block" 3325 " @%llu (%s/%llu/%d) on umount which is" 3326 " not yet iodone!\n", 3327 btrfsic_get_block_type(state, b_all), 3328 (unsigned long long)b_all->logical_bytenr, 3329 b_all->dev_state->name, 3330 (unsigned long long)b_all->dev_bytenr, 3331 b_all->mirror_num); 3332 } 3333 3334 mutex_unlock(&btrfsic_mutex); 3335 3336 kfree(state); 3337} 3338