check-integrity.c revision 5d9640517d92d05843711ea982cbeff42d7ed32d
1/* 2 * Copyright (C) STRATO AG 2011. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19/* 20 * This module can be used to catch cases when the btrfs kernel 21 * code executes write requests to the disk that bring the file 22 * system in an inconsistent state. In such a state, a power-loss 23 * or kernel panic event would cause that the data on disk is 24 * lost or at least damaged. 25 * 26 * Code is added that examines all block write requests during 27 * runtime (including writes of the super block). Three rules 28 * are verified and an error is printed on violation of the 29 * rules: 30 * 1. It is not allowed to write a disk block which is 31 * currently referenced by the super block (either directly 32 * or indirectly). 33 * 2. When a super block is written, it is verified that all 34 * referenced (directly or indirectly) blocks fulfill the 35 * following requirements: 36 * 2a. All referenced blocks have either been present when 37 * the file system was mounted, (i.e., they have been 38 * referenced by the super block) or they have been 39 * written since then and the write completion callback 40 * was called and no write error was indicated and a 41 * FLUSH request to the device where these blocks are 42 * located was received and completed. 43 * 2b. All referenced blocks need to have a generation 44 * number which is equal to the parent's number. 45 * 46 * One issue that was found using this module was that the log 47 * tree on disk became temporarily corrupted because disk blocks 48 * that had been in use for the log tree had been freed and 49 * reused too early, while being referenced by the written super 50 * block. 51 * 52 * The search term in the kernel log that can be used to filter 53 * on the existence of detected integrity issues is 54 * "btrfs: attempt". 55 * 56 * The integrity check is enabled via mount options. These 57 * mount options are only supported if the integrity check 58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 59 * 60 * Example #1, apply integrity checks to all metadata: 61 * mount /dev/sdb1 /mnt -o check_int 62 * 63 * Example #2, apply integrity checks to all metadata and 64 * to data extents: 65 * mount /dev/sdb1 /mnt -o check_int_data 66 * 67 * Example #3, apply integrity checks to all metadata and dump 68 * the tree that the super block references to kernel messages 69 * each time after a super block was written: 70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 71 * 72 * If the integrity check tool is included and activated in 73 * the mount options, plenty of kernel memory is used, and 74 * plenty of additional CPU cycles are spent. Enabling this 75 * functionality is not intended for normal use. In most 76 * cases, unless you are a btrfs developer who needs to verify 77 * the integrity of (super)-block write requests, do not 78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 79 * include and compile the integrity check tool. 80 */ 81 82#include <linux/sched.h> 83#include <linux/slab.h> 84#include <linux/buffer_head.h> 85#include <linux/mutex.h> 86#include <linux/crc32c.h> 87#include <linux/genhd.h> 88#include <linux/blkdev.h> 89#include "ctree.h" 90#include "disk-io.h" 91#include "transaction.h" 92#include "extent_io.h" 93#include "volumes.h" 94#include "print-tree.h" 95#include "locking.h" 96#include "check-integrity.h" 97#include "rcu-string.h" 98 99#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 100#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 101#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 102#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 103#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 104#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 105#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 106#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 107 * excluding " [...]" */ 108#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 109 110/* 111 * The definition of the bitmask fields for the print_mask. 112 * They are specified with the mount option check_integrity_print_mask. 113 */ 114#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 115#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 116#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 117#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 118#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 119#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 120#define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 121#define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 122#define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 123#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 124#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 125#define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 126#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 127 128struct btrfsic_dev_state; 129struct btrfsic_state; 130 131struct btrfsic_block { 132 u32 magic_num; /* only used for debug purposes */ 133 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 134 unsigned int is_superblock:1; /* if it is one of the superblocks */ 135 unsigned int is_iodone:1; /* if is done by lower subsystem */ 136 unsigned int iodone_w_error:1; /* error was indicated to endio */ 137 unsigned int never_written:1; /* block was added because it was 138 * referenced, not because it was 139 * written */ 140 unsigned int mirror_num:2; /* large enough to hold 141 * BTRFS_SUPER_MIRROR_MAX */ 142 struct btrfsic_dev_state *dev_state; 143 u64 dev_bytenr; /* key, physical byte num on disk */ 144 u64 logical_bytenr; /* logical byte num on disk */ 145 u64 generation; 146 struct btrfs_disk_key disk_key; /* extra info to print in case of 147 * issues, will not always be correct */ 148 struct list_head collision_resolving_node; /* list node */ 149 struct list_head all_blocks_node; /* list node */ 150 151 /* the following two lists contain block_link items */ 152 struct list_head ref_to_list; /* list */ 153 struct list_head ref_from_list; /* list */ 154 struct btrfsic_block *next_in_same_bio; 155 void *orig_bio_bh_private; 156 union { 157 bio_end_io_t *bio; 158 bh_end_io_t *bh; 159 } orig_bio_bh_end_io; 160 int submit_bio_bh_rw; 161 u64 flush_gen; /* only valid if !never_written */ 162}; 163 164/* 165 * Elements of this type are allocated dynamically and required because 166 * each block object can refer to and can be ref from multiple blocks. 167 * The key to lookup them in the hashtable is the dev_bytenr of 168 * the block ref to plus the one from the block refered from. 169 * The fact that they are searchable via a hashtable and that a 170 * ref_cnt is maintained is not required for the btrfs integrity 171 * check algorithm itself, it is only used to make the output more 172 * beautiful in case that an error is detected (an error is defined 173 * as a write operation to a block while that block is still referenced). 174 */ 175struct btrfsic_block_link { 176 u32 magic_num; /* only used for debug purposes */ 177 u32 ref_cnt; 178 struct list_head node_ref_to; /* list node */ 179 struct list_head node_ref_from; /* list node */ 180 struct list_head collision_resolving_node; /* list node */ 181 struct btrfsic_block *block_ref_to; 182 struct btrfsic_block *block_ref_from; 183 u64 parent_generation; 184}; 185 186struct btrfsic_dev_state { 187 u32 magic_num; /* only used for debug purposes */ 188 struct block_device *bdev; 189 struct btrfsic_state *state; 190 struct list_head collision_resolving_node; /* list node */ 191 struct btrfsic_block dummy_block_for_bio_bh_flush; 192 u64 last_flush_gen; 193 char name[BDEVNAME_SIZE]; 194}; 195 196struct btrfsic_block_hashtable { 197 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 198}; 199 200struct btrfsic_block_link_hashtable { 201 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 202}; 203 204struct btrfsic_dev_state_hashtable { 205 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 206}; 207 208struct btrfsic_block_data_ctx { 209 u64 start; /* virtual bytenr */ 210 u64 dev_bytenr; /* physical bytenr on device */ 211 u32 len; 212 struct btrfsic_dev_state *dev; 213 char **datav; 214 struct page **pagev; 215 void *mem_to_free; 216}; 217 218/* This structure is used to implement recursion without occupying 219 * any stack space, refer to btrfsic_process_metablock() */ 220struct btrfsic_stack_frame { 221 u32 magic; 222 u32 nr; 223 int error; 224 int i; 225 int limit_nesting; 226 int num_copies; 227 int mirror_num; 228 struct btrfsic_block *block; 229 struct btrfsic_block_data_ctx *block_ctx; 230 struct btrfsic_block *next_block; 231 struct btrfsic_block_data_ctx next_block_ctx; 232 struct btrfs_header *hdr; 233 struct btrfsic_stack_frame *prev; 234}; 235 236/* Some state per mounted filesystem */ 237struct btrfsic_state { 238 u32 print_mask; 239 int include_extent_data; 240 int csum_size; 241 struct list_head all_blocks_list; 242 struct btrfsic_block_hashtable block_hashtable; 243 struct btrfsic_block_link_hashtable block_link_hashtable; 244 struct btrfs_root *root; 245 u64 max_superblock_generation; 246 struct btrfsic_block *latest_superblock; 247 u32 metablock_size; 248 u32 datablock_size; 249}; 250 251static void btrfsic_block_init(struct btrfsic_block *b); 252static struct btrfsic_block *btrfsic_block_alloc(void); 253static void btrfsic_block_free(struct btrfsic_block *b); 254static void btrfsic_block_link_init(struct btrfsic_block_link *n); 255static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 256static void btrfsic_block_link_free(struct btrfsic_block_link *n); 257static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 258static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 259static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 260static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 261static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 262 struct btrfsic_block_hashtable *h); 263static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 264static struct btrfsic_block *btrfsic_block_hashtable_lookup( 265 struct block_device *bdev, 266 u64 dev_bytenr, 267 struct btrfsic_block_hashtable *h); 268static void btrfsic_block_link_hashtable_init( 269 struct btrfsic_block_link_hashtable *h); 270static void btrfsic_block_link_hashtable_add( 271 struct btrfsic_block_link *l, 272 struct btrfsic_block_link_hashtable *h); 273static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 274static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 275 struct block_device *bdev_ref_to, 276 u64 dev_bytenr_ref_to, 277 struct block_device *bdev_ref_from, 278 u64 dev_bytenr_ref_from, 279 struct btrfsic_block_link_hashtable *h); 280static void btrfsic_dev_state_hashtable_init( 281 struct btrfsic_dev_state_hashtable *h); 282static void btrfsic_dev_state_hashtable_add( 283 struct btrfsic_dev_state *ds, 284 struct btrfsic_dev_state_hashtable *h); 285static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 286static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 287 struct block_device *bdev, 288 struct btrfsic_dev_state_hashtable *h); 289static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 290static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 291static int btrfsic_process_superblock(struct btrfsic_state *state, 292 struct btrfs_fs_devices *fs_devices); 293static int btrfsic_process_metablock(struct btrfsic_state *state, 294 struct btrfsic_block *block, 295 struct btrfsic_block_data_ctx *block_ctx, 296 int limit_nesting, int force_iodone_flag); 297static void btrfsic_read_from_block_data( 298 struct btrfsic_block_data_ctx *block_ctx, 299 void *dst, u32 offset, size_t len); 300static int btrfsic_create_link_to_next_block( 301 struct btrfsic_state *state, 302 struct btrfsic_block *block, 303 struct btrfsic_block_data_ctx 304 *block_ctx, u64 next_bytenr, 305 int limit_nesting, 306 struct btrfsic_block_data_ctx *next_block_ctx, 307 struct btrfsic_block **next_blockp, 308 int force_iodone_flag, 309 int *num_copiesp, int *mirror_nump, 310 struct btrfs_disk_key *disk_key, 311 u64 parent_generation); 312static int btrfsic_handle_extent_data(struct btrfsic_state *state, 313 struct btrfsic_block *block, 314 struct btrfsic_block_data_ctx *block_ctx, 315 u32 item_offset, int force_iodone_flag); 316static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 317 struct btrfsic_block_data_ctx *block_ctx_out, 318 int mirror_num); 319static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 320 u32 len, struct block_device *bdev, 321 struct btrfsic_block_data_ctx *block_ctx_out); 322static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 323static int btrfsic_read_block(struct btrfsic_state *state, 324 struct btrfsic_block_data_ctx *block_ctx); 325static void btrfsic_dump_database(struct btrfsic_state *state); 326static void btrfsic_complete_bio_end_io(struct bio *bio, int err); 327static int btrfsic_test_for_metadata(struct btrfsic_state *state, 328 char **datav, unsigned int num_pages); 329static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 330 u64 dev_bytenr, char **mapped_datav, 331 unsigned int num_pages, 332 struct bio *bio, int *bio_is_patched, 333 struct buffer_head *bh, 334 int submit_bio_bh_rw); 335static int btrfsic_process_written_superblock( 336 struct btrfsic_state *state, 337 struct btrfsic_block *const block, 338 struct btrfs_super_block *const super_hdr); 339static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); 340static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); 341static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 342 const struct btrfsic_block *block, 343 int recursion_level); 344static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 345 struct btrfsic_block *const block, 346 int recursion_level); 347static void btrfsic_print_add_link(const struct btrfsic_state *state, 348 const struct btrfsic_block_link *l); 349static void btrfsic_print_rem_link(const struct btrfsic_state *state, 350 const struct btrfsic_block_link *l); 351static char btrfsic_get_block_type(const struct btrfsic_state *state, 352 const struct btrfsic_block *block); 353static void btrfsic_dump_tree(const struct btrfsic_state *state); 354static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 355 const struct btrfsic_block *block, 356 int indent_level); 357static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 358 struct btrfsic_state *state, 359 struct btrfsic_block_data_ctx *next_block_ctx, 360 struct btrfsic_block *next_block, 361 struct btrfsic_block *from_block, 362 u64 parent_generation); 363static struct btrfsic_block *btrfsic_block_lookup_or_add( 364 struct btrfsic_state *state, 365 struct btrfsic_block_data_ctx *block_ctx, 366 const char *additional_string, 367 int is_metadata, 368 int is_iodone, 369 int never_written, 370 int mirror_num, 371 int *was_created); 372static int btrfsic_process_superblock_dev_mirror( 373 struct btrfsic_state *state, 374 struct btrfsic_dev_state *dev_state, 375 struct btrfs_device *device, 376 int superblock_mirror_num, 377 struct btrfsic_dev_state **selected_dev_state, 378 struct btrfs_super_block *selected_super); 379static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 380 struct block_device *bdev); 381static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 382 u64 bytenr, 383 struct btrfsic_dev_state *dev_state, 384 u64 dev_bytenr); 385 386static struct mutex btrfsic_mutex; 387static int btrfsic_is_initialized; 388static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 389 390 391static void btrfsic_block_init(struct btrfsic_block *b) 392{ 393 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 394 b->dev_state = NULL; 395 b->dev_bytenr = 0; 396 b->logical_bytenr = 0; 397 b->generation = BTRFSIC_GENERATION_UNKNOWN; 398 b->disk_key.objectid = 0; 399 b->disk_key.type = 0; 400 b->disk_key.offset = 0; 401 b->is_metadata = 0; 402 b->is_superblock = 0; 403 b->is_iodone = 0; 404 b->iodone_w_error = 0; 405 b->never_written = 0; 406 b->mirror_num = 0; 407 b->next_in_same_bio = NULL; 408 b->orig_bio_bh_private = NULL; 409 b->orig_bio_bh_end_io.bio = NULL; 410 INIT_LIST_HEAD(&b->collision_resolving_node); 411 INIT_LIST_HEAD(&b->all_blocks_node); 412 INIT_LIST_HEAD(&b->ref_to_list); 413 INIT_LIST_HEAD(&b->ref_from_list); 414 b->submit_bio_bh_rw = 0; 415 b->flush_gen = 0; 416} 417 418static struct btrfsic_block *btrfsic_block_alloc(void) 419{ 420 struct btrfsic_block *b; 421 422 b = kzalloc(sizeof(*b), GFP_NOFS); 423 if (NULL != b) 424 btrfsic_block_init(b); 425 426 return b; 427} 428 429static void btrfsic_block_free(struct btrfsic_block *b) 430{ 431 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 432 kfree(b); 433} 434 435static void btrfsic_block_link_init(struct btrfsic_block_link *l) 436{ 437 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 438 l->ref_cnt = 1; 439 INIT_LIST_HEAD(&l->node_ref_to); 440 INIT_LIST_HEAD(&l->node_ref_from); 441 INIT_LIST_HEAD(&l->collision_resolving_node); 442 l->block_ref_to = NULL; 443 l->block_ref_from = NULL; 444} 445 446static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 447{ 448 struct btrfsic_block_link *l; 449 450 l = kzalloc(sizeof(*l), GFP_NOFS); 451 if (NULL != l) 452 btrfsic_block_link_init(l); 453 454 return l; 455} 456 457static void btrfsic_block_link_free(struct btrfsic_block_link *l) 458{ 459 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 460 kfree(l); 461} 462 463static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 464{ 465 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 466 ds->bdev = NULL; 467 ds->state = NULL; 468 ds->name[0] = '\0'; 469 INIT_LIST_HEAD(&ds->collision_resolving_node); 470 ds->last_flush_gen = 0; 471 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 472 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 473 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 474} 475 476static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 477{ 478 struct btrfsic_dev_state *ds; 479 480 ds = kzalloc(sizeof(*ds), GFP_NOFS); 481 if (NULL != ds) 482 btrfsic_dev_state_init(ds); 483 484 return ds; 485} 486 487static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 488{ 489 BUG_ON(!(NULL == ds || 490 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 491 kfree(ds); 492} 493 494static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 495{ 496 int i; 497 498 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 499 INIT_LIST_HEAD(h->table + i); 500} 501 502static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 503 struct btrfsic_block_hashtable *h) 504{ 505 const unsigned int hashval = 506 (((unsigned int)(b->dev_bytenr >> 16)) ^ 507 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 508 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 509 510 list_add(&b->collision_resolving_node, h->table + hashval); 511} 512 513static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 514{ 515 list_del(&b->collision_resolving_node); 516} 517 518static struct btrfsic_block *btrfsic_block_hashtable_lookup( 519 struct block_device *bdev, 520 u64 dev_bytenr, 521 struct btrfsic_block_hashtable *h) 522{ 523 const unsigned int hashval = 524 (((unsigned int)(dev_bytenr >> 16)) ^ 525 ((unsigned int)((uintptr_t)bdev))) & 526 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 527 struct list_head *elem; 528 529 list_for_each(elem, h->table + hashval) { 530 struct btrfsic_block *const b = 531 list_entry(elem, struct btrfsic_block, 532 collision_resolving_node); 533 534 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 535 return b; 536 } 537 538 return NULL; 539} 540 541static void btrfsic_block_link_hashtable_init( 542 struct btrfsic_block_link_hashtable *h) 543{ 544 int i; 545 546 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 547 INIT_LIST_HEAD(h->table + i); 548} 549 550static void btrfsic_block_link_hashtable_add( 551 struct btrfsic_block_link *l, 552 struct btrfsic_block_link_hashtable *h) 553{ 554 const unsigned int hashval = 555 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 556 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 557 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 558 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 559 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 560 561 BUG_ON(NULL == l->block_ref_to); 562 BUG_ON(NULL == l->block_ref_from); 563 list_add(&l->collision_resolving_node, h->table + hashval); 564} 565 566static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 567{ 568 list_del(&l->collision_resolving_node); 569} 570 571static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 572 struct block_device *bdev_ref_to, 573 u64 dev_bytenr_ref_to, 574 struct block_device *bdev_ref_from, 575 u64 dev_bytenr_ref_from, 576 struct btrfsic_block_link_hashtable *h) 577{ 578 const unsigned int hashval = 579 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 580 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 581 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 582 ((unsigned int)((uintptr_t)bdev_ref_from))) & 583 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 584 struct list_head *elem; 585 586 list_for_each(elem, h->table + hashval) { 587 struct btrfsic_block_link *const l = 588 list_entry(elem, struct btrfsic_block_link, 589 collision_resolving_node); 590 591 BUG_ON(NULL == l->block_ref_to); 592 BUG_ON(NULL == l->block_ref_from); 593 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 594 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 595 l->block_ref_from->dev_state->bdev == bdev_ref_from && 596 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 597 return l; 598 } 599 600 return NULL; 601} 602 603static void btrfsic_dev_state_hashtable_init( 604 struct btrfsic_dev_state_hashtable *h) 605{ 606 int i; 607 608 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 609 INIT_LIST_HEAD(h->table + i); 610} 611 612static void btrfsic_dev_state_hashtable_add( 613 struct btrfsic_dev_state *ds, 614 struct btrfsic_dev_state_hashtable *h) 615{ 616 const unsigned int hashval = 617 (((unsigned int)((uintptr_t)ds->bdev)) & 618 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 619 620 list_add(&ds->collision_resolving_node, h->table + hashval); 621} 622 623static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 624{ 625 list_del(&ds->collision_resolving_node); 626} 627 628static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 629 struct block_device *bdev, 630 struct btrfsic_dev_state_hashtable *h) 631{ 632 const unsigned int hashval = 633 (((unsigned int)((uintptr_t)bdev)) & 634 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 635 struct list_head *elem; 636 637 list_for_each(elem, h->table + hashval) { 638 struct btrfsic_dev_state *const ds = 639 list_entry(elem, struct btrfsic_dev_state, 640 collision_resolving_node); 641 642 if (ds->bdev == bdev) 643 return ds; 644 } 645 646 return NULL; 647} 648 649static int btrfsic_process_superblock(struct btrfsic_state *state, 650 struct btrfs_fs_devices *fs_devices) 651{ 652 int ret = 0; 653 struct btrfs_super_block *selected_super; 654 struct list_head *dev_head = &fs_devices->devices; 655 struct btrfs_device *device; 656 struct btrfsic_dev_state *selected_dev_state = NULL; 657 int pass; 658 659 BUG_ON(NULL == state); 660 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 661 if (NULL == selected_super) { 662 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 663 return -1; 664 } 665 666 list_for_each_entry(device, dev_head, dev_list) { 667 int i; 668 struct btrfsic_dev_state *dev_state; 669 670 if (!device->bdev || !device->name) 671 continue; 672 673 dev_state = btrfsic_dev_state_lookup(device->bdev); 674 BUG_ON(NULL == dev_state); 675 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 676 ret = btrfsic_process_superblock_dev_mirror( 677 state, dev_state, device, i, 678 &selected_dev_state, selected_super); 679 if (0 != ret && 0 == i) { 680 kfree(selected_super); 681 return ret; 682 } 683 } 684 } 685 686 if (NULL == state->latest_superblock) { 687 printk(KERN_INFO "btrfsic: no superblock found!\n"); 688 kfree(selected_super); 689 return -1; 690 } 691 692 state->csum_size = btrfs_super_csum_size(selected_super); 693 694 for (pass = 0; pass < 3; pass++) { 695 int num_copies; 696 int mirror_num; 697 u64 next_bytenr; 698 699 switch (pass) { 700 case 0: 701 next_bytenr = btrfs_super_root(selected_super); 702 if (state->print_mask & 703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 704 printk(KERN_INFO "root@%llu\n", 705 (unsigned long long)next_bytenr); 706 break; 707 case 1: 708 next_bytenr = btrfs_super_chunk_root(selected_super); 709 if (state->print_mask & 710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 711 printk(KERN_INFO "chunk@%llu\n", 712 (unsigned long long)next_bytenr); 713 break; 714 case 2: 715 next_bytenr = btrfs_super_log_root(selected_super); 716 if (0 == next_bytenr) 717 continue; 718 if (state->print_mask & 719 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 720 printk(KERN_INFO "log@%llu\n", 721 (unsigned long long)next_bytenr); 722 break; 723 } 724 725 num_copies = 726 btrfs_num_copies(state->root->fs_info, 727 next_bytenr, state->metablock_size); 728 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 729 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 730 (unsigned long long)next_bytenr, num_copies); 731 732 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 733 struct btrfsic_block *next_block; 734 struct btrfsic_block_data_ctx tmp_next_block_ctx; 735 struct btrfsic_block_link *l; 736 737 ret = btrfsic_map_block(state, next_bytenr, 738 state->metablock_size, 739 &tmp_next_block_ctx, 740 mirror_num); 741 if (ret) { 742 printk(KERN_INFO "btrfsic:" 743 " btrfsic_map_block(root @%llu," 744 " mirror %d) failed!\n", 745 (unsigned long long)next_bytenr, 746 mirror_num); 747 kfree(selected_super); 748 return -1; 749 } 750 751 next_block = btrfsic_block_hashtable_lookup( 752 tmp_next_block_ctx.dev->bdev, 753 tmp_next_block_ctx.dev_bytenr, 754 &state->block_hashtable); 755 BUG_ON(NULL == next_block); 756 757 l = btrfsic_block_link_hashtable_lookup( 758 tmp_next_block_ctx.dev->bdev, 759 tmp_next_block_ctx.dev_bytenr, 760 state->latest_superblock->dev_state-> 761 bdev, 762 state->latest_superblock->dev_bytenr, 763 &state->block_link_hashtable); 764 BUG_ON(NULL == l); 765 766 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 767 if (ret < (int)PAGE_CACHE_SIZE) { 768 printk(KERN_INFO 769 "btrfsic: read @logical %llu failed!\n", 770 (unsigned long long) 771 tmp_next_block_ctx.start); 772 btrfsic_release_block_ctx(&tmp_next_block_ctx); 773 kfree(selected_super); 774 return -1; 775 } 776 777 ret = btrfsic_process_metablock(state, 778 next_block, 779 &tmp_next_block_ctx, 780 BTRFS_MAX_LEVEL + 3, 1); 781 btrfsic_release_block_ctx(&tmp_next_block_ctx); 782 } 783 } 784 785 kfree(selected_super); 786 return ret; 787} 788 789static int btrfsic_process_superblock_dev_mirror( 790 struct btrfsic_state *state, 791 struct btrfsic_dev_state *dev_state, 792 struct btrfs_device *device, 793 int superblock_mirror_num, 794 struct btrfsic_dev_state **selected_dev_state, 795 struct btrfs_super_block *selected_super) 796{ 797 struct btrfs_super_block *super_tmp; 798 u64 dev_bytenr; 799 struct buffer_head *bh; 800 struct btrfsic_block *superblock_tmp; 801 int pass; 802 struct block_device *const superblock_bdev = device->bdev; 803 804 /* super block bytenr is always the unmapped device bytenr */ 805 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 806 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) 807 return -1; 808 bh = __bread(superblock_bdev, dev_bytenr / 4096, 809 BTRFS_SUPER_INFO_SIZE); 810 if (NULL == bh) 811 return -1; 812 super_tmp = (struct btrfs_super_block *) 813 (bh->b_data + (dev_bytenr & 4095)); 814 815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 816 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 817 sizeof(super_tmp->magic)) || 818 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 819 btrfs_super_nodesize(super_tmp) != state->metablock_size || 820 btrfs_super_leafsize(super_tmp) != state->metablock_size || 821 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 822 brelse(bh); 823 return 0; 824 } 825 826 superblock_tmp = 827 btrfsic_block_hashtable_lookup(superblock_bdev, 828 dev_bytenr, 829 &state->block_hashtable); 830 if (NULL == superblock_tmp) { 831 superblock_tmp = btrfsic_block_alloc(); 832 if (NULL == superblock_tmp) { 833 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 834 brelse(bh); 835 return -1; 836 } 837 /* for superblock, only the dev_bytenr makes sense */ 838 superblock_tmp->dev_bytenr = dev_bytenr; 839 superblock_tmp->dev_state = dev_state; 840 superblock_tmp->logical_bytenr = dev_bytenr; 841 superblock_tmp->generation = btrfs_super_generation(super_tmp); 842 superblock_tmp->is_metadata = 1; 843 superblock_tmp->is_superblock = 1; 844 superblock_tmp->is_iodone = 1; 845 superblock_tmp->never_written = 0; 846 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 847 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 848 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 849 " @%llu (%s/%llu/%d)\n", 850 superblock_bdev, 851 rcu_str_deref(device->name), 852 (unsigned long long)dev_bytenr, 853 dev_state->name, 854 (unsigned long long)dev_bytenr, 855 superblock_mirror_num); 856 list_add(&superblock_tmp->all_blocks_node, 857 &state->all_blocks_list); 858 btrfsic_block_hashtable_add(superblock_tmp, 859 &state->block_hashtable); 860 } 861 862 /* select the one with the highest generation field */ 863 if (btrfs_super_generation(super_tmp) > 864 state->max_superblock_generation || 865 0 == state->max_superblock_generation) { 866 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 867 *selected_dev_state = dev_state; 868 state->max_superblock_generation = 869 btrfs_super_generation(super_tmp); 870 state->latest_superblock = superblock_tmp; 871 } 872 873 for (pass = 0; pass < 3; pass++) { 874 u64 next_bytenr; 875 int num_copies; 876 int mirror_num; 877 const char *additional_string = NULL; 878 struct btrfs_disk_key tmp_disk_key; 879 880 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 881 tmp_disk_key.offset = 0; 882 switch (pass) { 883 case 0: 884 tmp_disk_key.objectid = 885 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 886 additional_string = "initial root "; 887 next_bytenr = btrfs_super_root(super_tmp); 888 break; 889 case 1: 890 tmp_disk_key.objectid = 891 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 892 additional_string = "initial chunk "; 893 next_bytenr = btrfs_super_chunk_root(super_tmp); 894 break; 895 case 2: 896 tmp_disk_key.objectid = 897 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 898 additional_string = "initial log "; 899 next_bytenr = btrfs_super_log_root(super_tmp); 900 if (0 == next_bytenr) 901 continue; 902 break; 903 } 904 905 num_copies = 906 btrfs_num_copies(state->root->fs_info, 907 next_bytenr, state->metablock_size); 908 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 909 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 910 (unsigned long long)next_bytenr, num_copies); 911 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 912 struct btrfsic_block *next_block; 913 struct btrfsic_block_data_ctx tmp_next_block_ctx; 914 struct btrfsic_block_link *l; 915 916 if (btrfsic_map_block(state, next_bytenr, 917 state->metablock_size, 918 &tmp_next_block_ctx, 919 mirror_num)) { 920 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 921 "bytenr @%llu, mirror %d) failed!\n", 922 (unsigned long long)next_bytenr, 923 mirror_num); 924 brelse(bh); 925 return -1; 926 } 927 928 next_block = btrfsic_block_lookup_or_add( 929 state, &tmp_next_block_ctx, 930 additional_string, 1, 1, 0, 931 mirror_num, NULL); 932 if (NULL == next_block) { 933 btrfsic_release_block_ctx(&tmp_next_block_ctx); 934 brelse(bh); 935 return -1; 936 } 937 938 next_block->disk_key = tmp_disk_key; 939 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 940 l = btrfsic_block_link_lookup_or_add( 941 state, &tmp_next_block_ctx, 942 next_block, superblock_tmp, 943 BTRFSIC_GENERATION_UNKNOWN); 944 btrfsic_release_block_ctx(&tmp_next_block_ctx); 945 if (NULL == l) { 946 brelse(bh); 947 return -1; 948 } 949 } 950 } 951 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 952 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 953 954 brelse(bh); 955 return 0; 956} 957 958static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 959{ 960 struct btrfsic_stack_frame *sf; 961 962 sf = kzalloc(sizeof(*sf), GFP_NOFS); 963 if (NULL == sf) 964 printk(KERN_INFO "btrfsic: alloc memory failed!\n"); 965 else 966 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 967 return sf; 968} 969 970static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 971{ 972 BUG_ON(!(NULL == sf || 973 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 974 kfree(sf); 975} 976 977static int btrfsic_process_metablock( 978 struct btrfsic_state *state, 979 struct btrfsic_block *const first_block, 980 struct btrfsic_block_data_ctx *const first_block_ctx, 981 int first_limit_nesting, int force_iodone_flag) 982{ 983 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 984 struct btrfsic_stack_frame *sf; 985 struct btrfsic_stack_frame *next_stack; 986 struct btrfs_header *const first_hdr = 987 (struct btrfs_header *)first_block_ctx->datav[0]; 988 989 BUG_ON(!first_hdr); 990 sf = &initial_stack_frame; 991 sf->error = 0; 992 sf->i = -1; 993 sf->limit_nesting = first_limit_nesting; 994 sf->block = first_block; 995 sf->block_ctx = first_block_ctx; 996 sf->next_block = NULL; 997 sf->hdr = first_hdr; 998 sf->prev = NULL; 999 1000continue_with_new_stack_frame: 1001 sf->block->generation = le64_to_cpu(sf->hdr->generation); 1002 if (0 == sf->hdr->level) { 1003 struct btrfs_leaf *const leafhdr = 1004 (struct btrfs_leaf *)sf->hdr; 1005 1006 if (-1 == sf->i) { 1007 sf->nr = le32_to_cpu(leafhdr->header.nritems); 1008 1009 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1010 printk(KERN_INFO 1011 "leaf %llu items %d generation %llu" 1012 " owner %llu\n", 1013 (unsigned long long) 1014 sf->block_ctx->start, 1015 sf->nr, 1016 (unsigned long long) 1017 le64_to_cpu(leafhdr->header.generation), 1018 (unsigned long long) 1019 le64_to_cpu(leafhdr->header.owner)); 1020 } 1021 1022continue_with_current_leaf_stack_frame: 1023 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1024 sf->i++; 1025 sf->num_copies = 0; 1026 } 1027 1028 if (sf->i < sf->nr) { 1029 struct btrfs_item disk_item; 1030 u32 disk_item_offset = 1031 (uintptr_t)(leafhdr->items + sf->i) - 1032 (uintptr_t)leafhdr; 1033 struct btrfs_disk_key *disk_key; 1034 u8 type; 1035 u32 item_offset; 1036 u32 item_size; 1037 1038 if (disk_item_offset + sizeof(struct btrfs_item) > 1039 sf->block_ctx->len) { 1040leaf_item_out_of_bounce_error: 1041 printk(KERN_INFO 1042 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 1043 sf->block_ctx->start, 1044 sf->block_ctx->dev->name); 1045 goto one_stack_frame_backwards; 1046 } 1047 btrfsic_read_from_block_data(sf->block_ctx, 1048 &disk_item, 1049 disk_item_offset, 1050 sizeof(struct btrfs_item)); 1051 item_offset = le32_to_cpu(disk_item.offset); 1052 item_size = le32_to_cpu(disk_item.size); 1053 disk_key = &disk_item.key; 1054 type = disk_key->type; 1055 1056 if (BTRFS_ROOT_ITEM_KEY == type) { 1057 struct btrfs_root_item root_item; 1058 u32 root_item_offset; 1059 u64 next_bytenr; 1060 1061 root_item_offset = item_offset + 1062 offsetof(struct btrfs_leaf, items); 1063 if (root_item_offset + item_size > 1064 sf->block_ctx->len) 1065 goto leaf_item_out_of_bounce_error; 1066 btrfsic_read_from_block_data( 1067 sf->block_ctx, &root_item, 1068 root_item_offset, 1069 item_size); 1070 next_bytenr = le64_to_cpu(root_item.bytenr); 1071 1072 sf->error = 1073 btrfsic_create_link_to_next_block( 1074 state, 1075 sf->block, 1076 sf->block_ctx, 1077 next_bytenr, 1078 sf->limit_nesting, 1079 &sf->next_block_ctx, 1080 &sf->next_block, 1081 force_iodone_flag, 1082 &sf->num_copies, 1083 &sf->mirror_num, 1084 disk_key, 1085 le64_to_cpu(root_item. 1086 generation)); 1087 if (sf->error) 1088 goto one_stack_frame_backwards; 1089 1090 if (NULL != sf->next_block) { 1091 struct btrfs_header *const next_hdr = 1092 (struct btrfs_header *) 1093 sf->next_block_ctx.datav[0]; 1094 1095 next_stack = 1096 btrfsic_stack_frame_alloc(); 1097 if (NULL == next_stack) { 1098 btrfsic_release_block_ctx( 1099 &sf-> 1100 next_block_ctx); 1101 goto one_stack_frame_backwards; 1102 } 1103 1104 next_stack->i = -1; 1105 next_stack->block = sf->next_block; 1106 next_stack->block_ctx = 1107 &sf->next_block_ctx; 1108 next_stack->next_block = NULL; 1109 next_stack->hdr = next_hdr; 1110 next_stack->limit_nesting = 1111 sf->limit_nesting - 1; 1112 next_stack->prev = sf; 1113 sf = next_stack; 1114 goto continue_with_new_stack_frame; 1115 } 1116 } else if (BTRFS_EXTENT_DATA_KEY == type && 1117 state->include_extent_data) { 1118 sf->error = btrfsic_handle_extent_data( 1119 state, 1120 sf->block, 1121 sf->block_ctx, 1122 item_offset, 1123 force_iodone_flag); 1124 if (sf->error) 1125 goto one_stack_frame_backwards; 1126 } 1127 1128 goto continue_with_current_leaf_stack_frame; 1129 } 1130 } else { 1131 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1132 1133 if (-1 == sf->i) { 1134 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1135 1136 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1137 printk(KERN_INFO "node %llu level %d items %d" 1138 " generation %llu owner %llu\n", 1139 (unsigned long long) 1140 sf->block_ctx->start, 1141 nodehdr->header.level, sf->nr, 1142 (unsigned long long) 1143 le64_to_cpu(nodehdr->header.generation), 1144 (unsigned long long) 1145 le64_to_cpu(nodehdr->header.owner)); 1146 } 1147 1148continue_with_current_node_stack_frame: 1149 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1150 sf->i++; 1151 sf->num_copies = 0; 1152 } 1153 1154 if (sf->i < sf->nr) { 1155 struct btrfs_key_ptr key_ptr; 1156 u32 key_ptr_offset; 1157 u64 next_bytenr; 1158 1159 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1160 (uintptr_t)nodehdr; 1161 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1162 sf->block_ctx->len) { 1163 printk(KERN_INFO 1164 "btrfsic: node item out of bounce at logical %llu, dev %s\n", 1165 sf->block_ctx->start, 1166 sf->block_ctx->dev->name); 1167 goto one_stack_frame_backwards; 1168 } 1169 btrfsic_read_from_block_data( 1170 sf->block_ctx, &key_ptr, key_ptr_offset, 1171 sizeof(struct btrfs_key_ptr)); 1172 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1173 1174 sf->error = btrfsic_create_link_to_next_block( 1175 state, 1176 sf->block, 1177 sf->block_ctx, 1178 next_bytenr, 1179 sf->limit_nesting, 1180 &sf->next_block_ctx, 1181 &sf->next_block, 1182 force_iodone_flag, 1183 &sf->num_copies, 1184 &sf->mirror_num, 1185 &key_ptr.key, 1186 le64_to_cpu(key_ptr.generation)); 1187 if (sf->error) 1188 goto one_stack_frame_backwards; 1189 1190 if (NULL != sf->next_block) { 1191 struct btrfs_header *const next_hdr = 1192 (struct btrfs_header *) 1193 sf->next_block_ctx.datav[0]; 1194 1195 next_stack = btrfsic_stack_frame_alloc(); 1196 if (NULL == next_stack) 1197 goto one_stack_frame_backwards; 1198 1199 next_stack->i = -1; 1200 next_stack->block = sf->next_block; 1201 next_stack->block_ctx = &sf->next_block_ctx; 1202 next_stack->next_block = NULL; 1203 next_stack->hdr = next_hdr; 1204 next_stack->limit_nesting = 1205 sf->limit_nesting - 1; 1206 next_stack->prev = sf; 1207 sf = next_stack; 1208 goto continue_with_new_stack_frame; 1209 } 1210 1211 goto continue_with_current_node_stack_frame; 1212 } 1213 } 1214 1215one_stack_frame_backwards: 1216 if (NULL != sf->prev) { 1217 struct btrfsic_stack_frame *const prev = sf->prev; 1218 1219 /* the one for the initial block is freed in the caller */ 1220 btrfsic_release_block_ctx(sf->block_ctx); 1221 1222 if (sf->error) { 1223 prev->error = sf->error; 1224 btrfsic_stack_frame_free(sf); 1225 sf = prev; 1226 goto one_stack_frame_backwards; 1227 } 1228 1229 btrfsic_stack_frame_free(sf); 1230 sf = prev; 1231 goto continue_with_new_stack_frame; 1232 } else { 1233 BUG_ON(&initial_stack_frame != sf); 1234 } 1235 1236 return sf->error; 1237} 1238 1239static void btrfsic_read_from_block_data( 1240 struct btrfsic_block_data_ctx *block_ctx, 1241 void *dstv, u32 offset, size_t len) 1242{ 1243 size_t cur; 1244 size_t offset_in_page; 1245 char *kaddr; 1246 char *dst = (char *)dstv; 1247 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); 1248 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1249 1250 WARN_ON(offset + len > block_ctx->len); 1251 offset_in_page = (start_offset + offset) & 1252 ((unsigned long)PAGE_CACHE_SIZE - 1); 1253 1254 while (len > 0) { 1255 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1256 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> 1257 PAGE_CACHE_SHIFT); 1258 kaddr = block_ctx->datav[i]; 1259 memcpy(dst, kaddr + offset_in_page, cur); 1260 1261 dst += cur; 1262 len -= cur; 1263 offset_in_page = 0; 1264 i++; 1265 } 1266} 1267 1268static int btrfsic_create_link_to_next_block( 1269 struct btrfsic_state *state, 1270 struct btrfsic_block *block, 1271 struct btrfsic_block_data_ctx *block_ctx, 1272 u64 next_bytenr, 1273 int limit_nesting, 1274 struct btrfsic_block_data_ctx *next_block_ctx, 1275 struct btrfsic_block **next_blockp, 1276 int force_iodone_flag, 1277 int *num_copiesp, int *mirror_nump, 1278 struct btrfs_disk_key *disk_key, 1279 u64 parent_generation) 1280{ 1281 struct btrfsic_block *next_block = NULL; 1282 int ret; 1283 struct btrfsic_block_link *l; 1284 int did_alloc_block_link; 1285 int block_was_created; 1286 1287 *next_blockp = NULL; 1288 if (0 == *num_copiesp) { 1289 *num_copiesp = 1290 btrfs_num_copies(state->root->fs_info, 1291 next_bytenr, state->metablock_size); 1292 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1293 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1294 (unsigned long long)next_bytenr, *num_copiesp); 1295 *mirror_nump = 1; 1296 } 1297 1298 if (*mirror_nump > *num_copiesp) 1299 return 0; 1300 1301 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1302 printk(KERN_INFO 1303 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1304 *mirror_nump); 1305 ret = btrfsic_map_block(state, next_bytenr, 1306 state->metablock_size, 1307 next_block_ctx, *mirror_nump); 1308 if (ret) { 1309 printk(KERN_INFO 1310 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1311 (unsigned long long)next_bytenr, *mirror_nump); 1312 btrfsic_release_block_ctx(next_block_ctx); 1313 *next_blockp = NULL; 1314 return -1; 1315 } 1316 1317 next_block = btrfsic_block_lookup_or_add(state, 1318 next_block_ctx, "referenced ", 1319 1, force_iodone_flag, 1320 !force_iodone_flag, 1321 *mirror_nump, 1322 &block_was_created); 1323 if (NULL == next_block) { 1324 btrfsic_release_block_ctx(next_block_ctx); 1325 *next_blockp = NULL; 1326 return -1; 1327 } 1328 if (block_was_created) { 1329 l = NULL; 1330 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1331 } else { 1332 if (next_block->logical_bytenr != next_bytenr && 1333 !(!next_block->is_metadata && 1334 0 == next_block->logical_bytenr)) { 1335 printk(KERN_INFO 1336 "Referenced block @%llu (%s/%llu/%d)" 1337 " found in hash table, %c," 1338 " bytenr mismatch (!= stored %llu).\n", 1339 (unsigned long long)next_bytenr, 1340 next_block_ctx->dev->name, 1341 (unsigned long long)next_block_ctx->dev_bytenr, 1342 *mirror_nump, 1343 btrfsic_get_block_type(state, next_block), 1344 (unsigned long long)next_block->logical_bytenr); 1345 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1346 printk(KERN_INFO 1347 "Referenced block @%llu (%s/%llu/%d)" 1348 " found in hash table, %c.\n", 1349 (unsigned long long)next_bytenr, 1350 next_block_ctx->dev->name, 1351 (unsigned long long)next_block_ctx->dev_bytenr, 1352 *mirror_nump, 1353 btrfsic_get_block_type(state, next_block)); 1354 next_block->logical_bytenr = next_bytenr; 1355 1356 next_block->mirror_num = *mirror_nump; 1357 l = btrfsic_block_link_hashtable_lookup( 1358 next_block_ctx->dev->bdev, 1359 next_block_ctx->dev_bytenr, 1360 block_ctx->dev->bdev, 1361 block_ctx->dev_bytenr, 1362 &state->block_link_hashtable); 1363 } 1364 1365 next_block->disk_key = *disk_key; 1366 if (NULL == l) { 1367 l = btrfsic_block_link_alloc(); 1368 if (NULL == l) { 1369 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 1370 btrfsic_release_block_ctx(next_block_ctx); 1371 *next_blockp = NULL; 1372 return -1; 1373 } 1374 1375 did_alloc_block_link = 1; 1376 l->block_ref_to = next_block; 1377 l->block_ref_from = block; 1378 l->ref_cnt = 1; 1379 l->parent_generation = parent_generation; 1380 1381 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1382 btrfsic_print_add_link(state, l); 1383 1384 list_add(&l->node_ref_to, &block->ref_to_list); 1385 list_add(&l->node_ref_from, &next_block->ref_from_list); 1386 1387 btrfsic_block_link_hashtable_add(l, 1388 &state->block_link_hashtable); 1389 } else { 1390 did_alloc_block_link = 0; 1391 if (0 == limit_nesting) { 1392 l->ref_cnt++; 1393 l->parent_generation = parent_generation; 1394 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1395 btrfsic_print_add_link(state, l); 1396 } 1397 } 1398 1399 if (limit_nesting > 0 && did_alloc_block_link) { 1400 ret = btrfsic_read_block(state, next_block_ctx); 1401 if (ret < (int)next_block_ctx->len) { 1402 printk(KERN_INFO 1403 "btrfsic: read block @logical %llu failed!\n", 1404 (unsigned long long)next_bytenr); 1405 btrfsic_release_block_ctx(next_block_ctx); 1406 *next_blockp = NULL; 1407 return -1; 1408 } 1409 1410 *next_blockp = next_block; 1411 } else { 1412 *next_blockp = NULL; 1413 } 1414 (*mirror_nump)++; 1415 1416 return 0; 1417} 1418 1419static int btrfsic_handle_extent_data( 1420 struct btrfsic_state *state, 1421 struct btrfsic_block *block, 1422 struct btrfsic_block_data_ctx *block_ctx, 1423 u32 item_offset, int force_iodone_flag) 1424{ 1425 int ret; 1426 struct btrfs_file_extent_item file_extent_item; 1427 u64 file_extent_item_offset; 1428 u64 next_bytenr; 1429 u64 num_bytes; 1430 u64 generation; 1431 struct btrfsic_block_link *l; 1432 1433 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1434 item_offset; 1435 if (file_extent_item_offset + 1436 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1437 block_ctx->len) { 1438 printk(KERN_INFO 1439 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1440 block_ctx->start, block_ctx->dev->name); 1441 return -1; 1442 } 1443 1444 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1445 file_extent_item_offset, 1446 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1447 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1448 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { 1449 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1450 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1451 file_extent_item.type, 1452 (unsigned long long) 1453 le64_to_cpu(file_extent_item.disk_bytenr)); 1454 return 0; 1455 } 1456 1457 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1458 block_ctx->len) { 1459 printk(KERN_INFO 1460 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1461 block_ctx->start, block_ctx->dev->name); 1462 return -1; 1463 } 1464 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1465 file_extent_item_offset, 1466 sizeof(struct btrfs_file_extent_item)); 1467 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1468 le64_to_cpu(file_extent_item.offset); 1469 generation = le64_to_cpu(file_extent_item.generation); 1470 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1471 generation = le64_to_cpu(file_extent_item.generation); 1472 1473 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1474 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1475 " offset = %llu, num_bytes = %llu\n", 1476 file_extent_item.type, 1477 (unsigned long long) 1478 le64_to_cpu(file_extent_item.disk_bytenr), 1479 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1480 (unsigned long long)num_bytes); 1481 while (num_bytes > 0) { 1482 u32 chunk_len; 1483 int num_copies; 1484 int mirror_num; 1485 1486 if (num_bytes > state->datablock_size) 1487 chunk_len = state->datablock_size; 1488 else 1489 chunk_len = num_bytes; 1490 1491 num_copies = 1492 btrfs_num_copies(state->root->fs_info, 1493 next_bytenr, state->datablock_size); 1494 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1495 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1496 (unsigned long long)next_bytenr, num_copies); 1497 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1498 struct btrfsic_block_data_ctx next_block_ctx; 1499 struct btrfsic_block *next_block; 1500 int block_was_created; 1501 1502 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1503 printk(KERN_INFO "btrfsic_handle_extent_data(" 1504 "mirror_num=%d)\n", mirror_num); 1505 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1506 printk(KERN_INFO 1507 "\tdisk_bytenr = %llu, num_bytes %u\n", 1508 (unsigned long long)next_bytenr, 1509 chunk_len); 1510 ret = btrfsic_map_block(state, next_bytenr, 1511 chunk_len, &next_block_ctx, 1512 mirror_num); 1513 if (ret) { 1514 printk(KERN_INFO 1515 "btrfsic: btrfsic_map_block(@%llu," 1516 " mirror=%d) failed!\n", 1517 (unsigned long long)next_bytenr, 1518 mirror_num); 1519 return -1; 1520 } 1521 1522 next_block = btrfsic_block_lookup_or_add( 1523 state, 1524 &next_block_ctx, 1525 "referenced ", 1526 0, 1527 force_iodone_flag, 1528 !force_iodone_flag, 1529 mirror_num, 1530 &block_was_created); 1531 if (NULL == next_block) { 1532 printk(KERN_INFO 1533 "btrfsic: error, kmalloc failed!\n"); 1534 btrfsic_release_block_ctx(&next_block_ctx); 1535 return -1; 1536 } 1537 if (!block_was_created) { 1538 if (next_block->logical_bytenr != next_bytenr && 1539 !(!next_block->is_metadata && 1540 0 == next_block->logical_bytenr)) { 1541 printk(KERN_INFO 1542 "Referenced block" 1543 " @%llu (%s/%llu/%d)" 1544 " found in hash table, D," 1545 " bytenr mismatch" 1546 " (!= stored %llu).\n", 1547 (unsigned long long)next_bytenr, 1548 next_block_ctx.dev->name, 1549 (unsigned long long) 1550 next_block_ctx.dev_bytenr, 1551 mirror_num, 1552 (unsigned long long) 1553 next_block->logical_bytenr); 1554 } 1555 next_block->logical_bytenr = next_bytenr; 1556 next_block->mirror_num = mirror_num; 1557 } 1558 1559 l = btrfsic_block_link_lookup_or_add(state, 1560 &next_block_ctx, 1561 next_block, block, 1562 generation); 1563 btrfsic_release_block_ctx(&next_block_ctx); 1564 if (NULL == l) 1565 return -1; 1566 } 1567 1568 next_bytenr += chunk_len; 1569 num_bytes -= chunk_len; 1570 } 1571 1572 return 0; 1573} 1574 1575static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1576 struct btrfsic_block_data_ctx *block_ctx_out, 1577 int mirror_num) 1578{ 1579 int ret; 1580 u64 length; 1581 struct btrfs_bio *multi = NULL; 1582 struct btrfs_device *device; 1583 1584 length = len; 1585 ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, 1586 bytenr, &length, &multi, mirror_num); 1587 1588 device = multi->stripes[0].dev; 1589 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); 1590 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1591 block_ctx_out->start = bytenr; 1592 block_ctx_out->len = len; 1593 block_ctx_out->datav = NULL; 1594 block_ctx_out->pagev = NULL; 1595 block_ctx_out->mem_to_free = NULL; 1596 1597 if (0 == ret) 1598 kfree(multi); 1599 if (NULL == block_ctx_out->dev) { 1600 ret = -ENXIO; 1601 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); 1602 } 1603 1604 return ret; 1605} 1606 1607static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 1608 u32 len, struct block_device *bdev, 1609 struct btrfsic_block_data_ctx *block_ctx_out) 1610{ 1611 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); 1612 block_ctx_out->dev_bytenr = bytenr; 1613 block_ctx_out->start = bytenr; 1614 block_ctx_out->len = len; 1615 block_ctx_out->datav = NULL; 1616 block_ctx_out->pagev = NULL; 1617 block_ctx_out->mem_to_free = NULL; 1618 if (NULL != block_ctx_out->dev) { 1619 return 0; 1620 } else { 1621 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); 1622 return -ENXIO; 1623 } 1624} 1625 1626static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1627{ 1628 if (block_ctx->mem_to_free) { 1629 unsigned int num_pages; 1630 1631 BUG_ON(!block_ctx->datav); 1632 BUG_ON(!block_ctx->pagev); 1633 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1634 PAGE_CACHE_SHIFT; 1635 while (num_pages > 0) { 1636 num_pages--; 1637 if (block_ctx->datav[num_pages]) { 1638 kunmap(block_ctx->pagev[num_pages]); 1639 block_ctx->datav[num_pages] = NULL; 1640 } 1641 if (block_ctx->pagev[num_pages]) { 1642 __free_page(block_ctx->pagev[num_pages]); 1643 block_ctx->pagev[num_pages] = NULL; 1644 } 1645 } 1646 1647 kfree(block_ctx->mem_to_free); 1648 block_ctx->mem_to_free = NULL; 1649 block_ctx->pagev = NULL; 1650 block_ctx->datav = NULL; 1651 } 1652} 1653 1654static int btrfsic_read_block(struct btrfsic_state *state, 1655 struct btrfsic_block_data_ctx *block_ctx) 1656{ 1657 unsigned int num_pages; 1658 unsigned int i; 1659 u64 dev_bytenr; 1660 int ret; 1661 1662 BUG_ON(block_ctx->datav); 1663 BUG_ON(block_ctx->pagev); 1664 BUG_ON(block_ctx->mem_to_free); 1665 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1666 printk(KERN_INFO 1667 "btrfsic: read_block() with unaligned bytenr %llu\n", 1668 (unsigned long long)block_ctx->dev_bytenr); 1669 return -1; 1670 } 1671 1672 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1673 PAGE_CACHE_SHIFT; 1674 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + 1675 sizeof(*block_ctx->pagev)) * 1676 num_pages, GFP_NOFS); 1677 if (!block_ctx->mem_to_free) 1678 return -1; 1679 block_ctx->datav = block_ctx->mem_to_free; 1680 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1681 for (i = 0; i < num_pages; i++) { 1682 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1683 if (!block_ctx->pagev[i]) 1684 return -1; 1685 } 1686 1687 dev_bytenr = block_ctx->dev_bytenr; 1688 for (i = 0; i < num_pages;) { 1689 struct bio *bio; 1690 unsigned int j; 1691 DECLARE_COMPLETION_ONSTACK(complete); 1692 1693 bio = bio_alloc(GFP_NOFS, num_pages - i); 1694 if (!bio) { 1695 printk(KERN_INFO 1696 "btrfsic: bio_alloc() for %u pages failed!\n", 1697 num_pages - i); 1698 return -1; 1699 } 1700 bio->bi_bdev = block_ctx->dev->bdev; 1701 bio->bi_sector = dev_bytenr >> 9; 1702 bio->bi_end_io = btrfsic_complete_bio_end_io; 1703 bio->bi_private = &complete; 1704 1705 for (j = i; j < num_pages; j++) { 1706 ret = bio_add_page(bio, block_ctx->pagev[j], 1707 PAGE_CACHE_SIZE, 0); 1708 if (PAGE_CACHE_SIZE != ret) 1709 break; 1710 } 1711 if (j == i) { 1712 printk(KERN_INFO 1713 "btrfsic: error, failed to add a single page!\n"); 1714 return -1; 1715 } 1716 submit_bio(READ, bio); 1717 1718 /* this will also unplug the queue */ 1719 wait_for_completion(&complete); 1720 1721 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1722 printk(KERN_INFO 1723 "btrfsic: read error at logical %llu dev %s!\n", 1724 block_ctx->start, block_ctx->dev->name); 1725 bio_put(bio); 1726 return -1; 1727 } 1728 bio_put(bio); 1729 dev_bytenr += (j - i) * PAGE_CACHE_SIZE; 1730 i = j; 1731 } 1732 for (i = 0; i < num_pages; i++) { 1733 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1734 if (!block_ctx->datav[i]) { 1735 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", 1736 block_ctx->dev->name); 1737 return -1; 1738 } 1739 } 1740 1741 return block_ctx->len; 1742} 1743 1744static void btrfsic_complete_bio_end_io(struct bio *bio, int err) 1745{ 1746 complete((struct completion *)bio->bi_private); 1747} 1748 1749static void btrfsic_dump_database(struct btrfsic_state *state) 1750{ 1751 struct list_head *elem_all; 1752 1753 BUG_ON(NULL == state); 1754 1755 printk(KERN_INFO "all_blocks_list:\n"); 1756 list_for_each(elem_all, &state->all_blocks_list) { 1757 const struct btrfsic_block *const b_all = 1758 list_entry(elem_all, struct btrfsic_block, 1759 all_blocks_node); 1760 struct list_head *elem_ref_to; 1761 struct list_head *elem_ref_from; 1762 1763 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1764 btrfsic_get_block_type(state, b_all), 1765 (unsigned long long)b_all->logical_bytenr, 1766 b_all->dev_state->name, 1767 (unsigned long long)b_all->dev_bytenr, 1768 b_all->mirror_num); 1769 1770 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1771 const struct btrfsic_block_link *const l = 1772 list_entry(elem_ref_to, 1773 struct btrfsic_block_link, 1774 node_ref_to); 1775 1776 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1777 " refers %u* to" 1778 " %c @%llu (%s/%llu/%d)\n", 1779 btrfsic_get_block_type(state, b_all), 1780 (unsigned long long)b_all->logical_bytenr, 1781 b_all->dev_state->name, 1782 (unsigned long long)b_all->dev_bytenr, 1783 b_all->mirror_num, 1784 l->ref_cnt, 1785 btrfsic_get_block_type(state, l->block_ref_to), 1786 (unsigned long long) 1787 l->block_ref_to->logical_bytenr, 1788 l->block_ref_to->dev_state->name, 1789 (unsigned long long)l->block_ref_to->dev_bytenr, 1790 l->block_ref_to->mirror_num); 1791 } 1792 1793 list_for_each(elem_ref_from, &b_all->ref_from_list) { 1794 const struct btrfsic_block_link *const l = 1795 list_entry(elem_ref_from, 1796 struct btrfsic_block_link, 1797 node_ref_from); 1798 1799 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1800 " is ref %u* from" 1801 " %c @%llu (%s/%llu/%d)\n", 1802 btrfsic_get_block_type(state, b_all), 1803 (unsigned long long)b_all->logical_bytenr, 1804 b_all->dev_state->name, 1805 (unsigned long long)b_all->dev_bytenr, 1806 b_all->mirror_num, 1807 l->ref_cnt, 1808 btrfsic_get_block_type(state, l->block_ref_from), 1809 (unsigned long long) 1810 l->block_ref_from->logical_bytenr, 1811 l->block_ref_from->dev_state->name, 1812 (unsigned long long) 1813 l->block_ref_from->dev_bytenr, 1814 l->block_ref_from->mirror_num); 1815 } 1816 1817 printk(KERN_INFO "\n"); 1818 } 1819} 1820 1821/* 1822 * Test whether the disk block contains a tree block (leaf or node) 1823 * (note that this test fails for the super block) 1824 */ 1825static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1826 char **datav, unsigned int num_pages) 1827{ 1828 struct btrfs_header *h; 1829 u8 csum[BTRFS_CSUM_SIZE]; 1830 u32 crc = ~(u32)0; 1831 unsigned int i; 1832 1833 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) 1834 return 1; /* not metadata */ 1835 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; 1836 h = (struct btrfs_header *)datav[0]; 1837 1838 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1839 return 1; 1840 1841 for (i = 0; i < num_pages; i++) { 1842 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1843 size_t sublen = i ? PAGE_CACHE_SIZE : 1844 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1845 1846 crc = crc32c(crc, data, sublen); 1847 } 1848 btrfs_csum_final(crc, csum); 1849 if (memcmp(csum, h->csum, state->csum_size)) 1850 return 1; 1851 1852 return 0; /* is metadata */ 1853} 1854 1855static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1856 u64 dev_bytenr, char **mapped_datav, 1857 unsigned int num_pages, 1858 struct bio *bio, int *bio_is_patched, 1859 struct buffer_head *bh, 1860 int submit_bio_bh_rw) 1861{ 1862 int is_metadata; 1863 struct btrfsic_block *block; 1864 struct btrfsic_block_data_ctx block_ctx; 1865 int ret; 1866 struct btrfsic_state *state = dev_state->state; 1867 struct block_device *bdev = dev_state->bdev; 1868 unsigned int processed_len; 1869 1870 if (NULL != bio_is_patched) 1871 *bio_is_patched = 0; 1872 1873again: 1874 if (num_pages == 0) 1875 return; 1876 1877 processed_len = 0; 1878 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1879 num_pages)); 1880 1881 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1882 &state->block_hashtable); 1883 if (NULL != block) { 1884 u64 bytenr = 0; 1885 struct list_head *elem_ref_to; 1886 struct list_head *tmp_ref_to; 1887 1888 if (block->is_superblock) { 1889 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1890 mapped_datav[0])->bytenr); 1891 if (num_pages * PAGE_CACHE_SIZE < 1892 BTRFS_SUPER_INFO_SIZE) { 1893 printk(KERN_INFO 1894 "btrfsic: cannot work with too short bios!\n"); 1895 return; 1896 } 1897 is_metadata = 1; 1898 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); 1899 processed_len = BTRFS_SUPER_INFO_SIZE; 1900 if (state->print_mask & 1901 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1902 printk(KERN_INFO 1903 "[before new superblock is written]:\n"); 1904 btrfsic_dump_tree_sub(state, block, 0); 1905 } 1906 } 1907 if (is_metadata) { 1908 if (!block->is_superblock) { 1909 if (num_pages * PAGE_CACHE_SIZE < 1910 state->metablock_size) { 1911 printk(KERN_INFO 1912 "btrfsic: cannot work with too short bios!\n"); 1913 return; 1914 } 1915 processed_len = state->metablock_size; 1916 bytenr = le64_to_cpu(((struct btrfs_header *) 1917 mapped_datav[0])->bytenr); 1918 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1919 dev_state, 1920 dev_bytenr); 1921 } 1922 if (block->logical_bytenr != bytenr) { 1923 printk(KERN_INFO 1924 "Written block @%llu (%s/%llu/%d)" 1925 " found in hash table, %c," 1926 " bytenr mismatch" 1927 " (!= stored %llu).\n", 1928 (unsigned long long)bytenr, 1929 dev_state->name, 1930 (unsigned long long)dev_bytenr, 1931 block->mirror_num, 1932 btrfsic_get_block_type(state, block), 1933 (unsigned long long) 1934 block->logical_bytenr); 1935 block->logical_bytenr = bytenr; 1936 } else if (state->print_mask & 1937 BTRFSIC_PRINT_MASK_VERBOSE) 1938 printk(KERN_INFO 1939 "Written block @%llu (%s/%llu/%d)" 1940 " found in hash table, %c.\n", 1941 (unsigned long long)bytenr, 1942 dev_state->name, 1943 (unsigned long long)dev_bytenr, 1944 block->mirror_num, 1945 btrfsic_get_block_type(state, block)); 1946 } else { 1947 if (num_pages * PAGE_CACHE_SIZE < 1948 state->datablock_size) { 1949 printk(KERN_INFO 1950 "btrfsic: cannot work with too short bios!\n"); 1951 return; 1952 } 1953 processed_len = state->datablock_size; 1954 bytenr = block->logical_bytenr; 1955 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1956 printk(KERN_INFO 1957 "Written block @%llu (%s/%llu/%d)" 1958 " found in hash table, %c.\n", 1959 (unsigned long long)bytenr, 1960 dev_state->name, 1961 (unsigned long long)dev_bytenr, 1962 block->mirror_num, 1963 btrfsic_get_block_type(state, block)); 1964 } 1965 1966 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1967 printk(KERN_INFO 1968 "ref_to_list: %cE, ref_from_list: %cE\n", 1969 list_empty(&block->ref_to_list) ? ' ' : '!', 1970 list_empty(&block->ref_from_list) ? ' ' : '!'); 1971 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1972 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1973 " @%llu (%s/%llu/%d), old(gen=%llu," 1974 " objectid=%llu, type=%d, offset=%llu)," 1975 " new(gen=%llu)," 1976 " which is referenced by most recent superblock" 1977 " (superblockgen=%llu)!\n", 1978 btrfsic_get_block_type(state, block), 1979 (unsigned long long)bytenr, 1980 dev_state->name, 1981 (unsigned long long)dev_bytenr, 1982 block->mirror_num, 1983 (unsigned long long)block->generation, 1984 (unsigned long long) 1985 le64_to_cpu(block->disk_key.objectid), 1986 block->disk_key.type, 1987 (unsigned long long) 1988 le64_to_cpu(block->disk_key.offset), 1989 (unsigned long long) 1990 le64_to_cpu(((struct btrfs_header *) 1991 mapped_datav[0])->generation), 1992 (unsigned long long) 1993 state->max_superblock_generation); 1994 btrfsic_dump_tree(state); 1995 } 1996 1997 if (!block->is_iodone && !block->never_written) { 1998 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1999 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 2000 " which is not yet iodone!\n", 2001 btrfsic_get_block_type(state, block), 2002 (unsigned long long)bytenr, 2003 dev_state->name, 2004 (unsigned long long)dev_bytenr, 2005 block->mirror_num, 2006 (unsigned long long)block->generation, 2007 (unsigned long long) 2008 le64_to_cpu(((struct btrfs_header *) 2009 mapped_datav[0])->generation)); 2010 /* it would not be safe to go on */ 2011 btrfsic_dump_tree(state); 2012 goto continue_loop; 2013 } 2014 2015 /* 2016 * Clear all references of this block. Do not free 2017 * the block itself even if is not referenced anymore 2018 * because it still carries valueable information 2019 * like whether it was ever written and IO completed. 2020 */ 2021 list_for_each_safe(elem_ref_to, tmp_ref_to, 2022 &block->ref_to_list) { 2023 struct btrfsic_block_link *const l = 2024 list_entry(elem_ref_to, 2025 struct btrfsic_block_link, 2026 node_ref_to); 2027 2028 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2029 btrfsic_print_rem_link(state, l); 2030 l->ref_cnt--; 2031 if (0 == l->ref_cnt) { 2032 list_del(&l->node_ref_to); 2033 list_del(&l->node_ref_from); 2034 btrfsic_block_link_hashtable_remove(l); 2035 btrfsic_block_link_free(l); 2036 } 2037 } 2038 2039 if (block->is_superblock) 2040 ret = btrfsic_map_superblock(state, bytenr, 2041 processed_len, 2042 bdev, &block_ctx); 2043 else 2044 ret = btrfsic_map_block(state, bytenr, processed_len, 2045 &block_ctx, 0); 2046 if (ret) { 2047 printk(KERN_INFO 2048 "btrfsic: btrfsic_map_block(root @%llu)" 2049 " failed!\n", (unsigned long long)bytenr); 2050 goto continue_loop; 2051 } 2052 block_ctx.datav = mapped_datav; 2053 /* the following is required in case of writes to mirrors, 2054 * use the same that was used for the lookup */ 2055 block_ctx.dev = dev_state; 2056 block_ctx.dev_bytenr = dev_bytenr; 2057 2058 if (is_metadata || state->include_extent_data) { 2059 block->never_written = 0; 2060 block->iodone_w_error = 0; 2061 if (NULL != bio) { 2062 block->is_iodone = 0; 2063 BUG_ON(NULL == bio_is_patched); 2064 if (!*bio_is_patched) { 2065 block->orig_bio_bh_private = 2066 bio->bi_private; 2067 block->orig_bio_bh_end_io.bio = 2068 bio->bi_end_io; 2069 block->next_in_same_bio = NULL; 2070 bio->bi_private = block; 2071 bio->bi_end_io = btrfsic_bio_end_io; 2072 *bio_is_patched = 1; 2073 } else { 2074 struct btrfsic_block *chained_block = 2075 (struct btrfsic_block *) 2076 bio->bi_private; 2077 2078 BUG_ON(NULL == chained_block); 2079 block->orig_bio_bh_private = 2080 chained_block->orig_bio_bh_private; 2081 block->orig_bio_bh_end_io.bio = 2082 chained_block->orig_bio_bh_end_io. 2083 bio; 2084 block->next_in_same_bio = chained_block; 2085 bio->bi_private = block; 2086 } 2087 } else if (NULL != bh) { 2088 block->is_iodone = 0; 2089 block->orig_bio_bh_private = bh->b_private; 2090 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2091 block->next_in_same_bio = NULL; 2092 bh->b_private = block; 2093 bh->b_end_io = btrfsic_bh_end_io; 2094 } else { 2095 block->is_iodone = 1; 2096 block->orig_bio_bh_private = NULL; 2097 block->orig_bio_bh_end_io.bio = NULL; 2098 block->next_in_same_bio = NULL; 2099 } 2100 } 2101 2102 block->flush_gen = dev_state->last_flush_gen + 1; 2103 block->submit_bio_bh_rw = submit_bio_bh_rw; 2104 if (is_metadata) { 2105 block->logical_bytenr = bytenr; 2106 block->is_metadata = 1; 2107 if (block->is_superblock) { 2108 BUG_ON(PAGE_CACHE_SIZE != 2109 BTRFS_SUPER_INFO_SIZE); 2110 ret = btrfsic_process_written_superblock( 2111 state, 2112 block, 2113 (struct btrfs_super_block *) 2114 mapped_datav[0]); 2115 if (state->print_mask & 2116 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2117 printk(KERN_INFO 2118 "[after new superblock is written]:\n"); 2119 btrfsic_dump_tree_sub(state, block, 0); 2120 } 2121 } else { 2122 block->mirror_num = 0; /* unknown */ 2123 ret = btrfsic_process_metablock( 2124 state, 2125 block, 2126 &block_ctx, 2127 0, 0); 2128 } 2129 if (ret) 2130 printk(KERN_INFO 2131 "btrfsic: btrfsic_process_metablock" 2132 "(root @%llu) failed!\n", 2133 (unsigned long long)dev_bytenr); 2134 } else { 2135 block->is_metadata = 0; 2136 block->mirror_num = 0; /* unknown */ 2137 block->generation = BTRFSIC_GENERATION_UNKNOWN; 2138 if (!state->include_extent_data 2139 && list_empty(&block->ref_from_list)) { 2140 /* 2141 * disk block is overwritten with extent 2142 * data (not meta data) and we are configured 2143 * to not include extent data: take the 2144 * chance and free the block's memory 2145 */ 2146 btrfsic_block_hashtable_remove(block); 2147 list_del(&block->all_blocks_node); 2148 btrfsic_block_free(block); 2149 } 2150 } 2151 btrfsic_release_block_ctx(&block_ctx); 2152 } else { 2153 /* block has not been found in hash table */ 2154 u64 bytenr; 2155 2156 if (!is_metadata) { 2157 processed_len = state->datablock_size; 2158 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2159 printk(KERN_INFO "Written block (%s/%llu/?)" 2160 " !found in hash table, D.\n", 2161 dev_state->name, 2162 (unsigned long long)dev_bytenr); 2163 if (!state->include_extent_data) { 2164 /* ignore that written D block */ 2165 goto continue_loop; 2166 } 2167 2168 /* this is getting ugly for the 2169 * include_extent_data case... */ 2170 bytenr = 0; /* unknown */ 2171 block_ctx.start = bytenr; 2172 block_ctx.len = processed_len; 2173 block_ctx.mem_to_free = NULL; 2174 block_ctx.pagev = NULL; 2175 } else { 2176 processed_len = state->metablock_size; 2177 bytenr = le64_to_cpu(((struct btrfs_header *) 2178 mapped_datav[0])->bytenr); 2179 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2180 dev_bytenr); 2181 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2182 printk(KERN_INFO 2183 "Written block @%llu (%s/%llu/?)" 2184 " !found in hash table, M.\n", 2185 (unsigned long long)bytenr, 2186 dev_state->name, 2187 (unsigned long long)dev_bytenr); 2188 2189 ret = btrfsic_map_block(state, bytenr, processed_len, 2190 &block_ctx, 0); 2191 if (ret) { 2192 printk(KERN_INFO 2193 "btrfsic: btrfsic_map_block(root @%llu)" 2194 " failed!\n", 2195 (unsigned long long)dev_bytenr); 2196 goto continue_loop; 2197 } 2198 } 2199 block_ctx.datav = mapped_datav; 2200 /* the following is required in case of writes to mirrors, 2201 * use the same that was used for the lookup */ 2202 block_ctx.dev = dev_state; 2203 block_ctx.dev_bytenr = dev_bytenr; 2204 2205 block = btrfsic_block_alloc(); 2206 if (NULL == block) { 2207 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2208 btrfsic_release_block_ctx(&block_ctx); 2209 goto continue_loop; 2210 } 2211 block->dev_state = dev_state; 2212 block->dev_bytenr = dev_bytenr; 2213 block->logical_bytenr = bytenr; 2214 block->is_metadata = is_metadata; 2215 block->never_written = 0; 2216 block->iodone_w_error = 0; 2217 block->mirror_num = 0; /* unknown */ 2218 block->flush_gen = dev_state->last_flush_gen + 1; 2219 block->submit_bio_bh_rw = submit_bio_bh_rw; 2220 if (NULL != bio) { 2221 block->is_iodone = 0; 2222 BUG_ON(NULL == bio_is_patched); 2223 if (!*bio_is_patched) { 2224 block->orig_bio_bh_private = bio->bi_private; 2225 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 2226 block->next_in_same_bio = NULL; 2227 bio->bi_private = block; 2228 bio->bi_end_io = btrfsic_bio_end_io; 2229 *bio_is_patched = 1; 2230 } else { 2231 struct btrfsic_block *chained_block = 2232 (struct btrfsic_block *) 2233 bio->bi_private; 2234 2235 BUG_ON(NULL == chained_block); 2236 block->orig_bio_bh_private = 2237 chained_block->orig_bio_bh_private; 2238 block->orig_bio_bh_end_io.bio = 2239 chained_block->orig_bio_bh_end_io.bio; 2240 block->next_in_same_bio = chained_block; 2241 bio->bi_private = block; 2242 } 2243 } else if (NULL != bh) { 2244 block->is_iodone = 0; 2245 block->orig_bio_bh_private = bh->b_private; 2246 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2247 block->next_in_same_bio = NULL; 2248 bh->b_private = block; 2249 bh->b_end_io = btrfsic_bh_end_io; 2250 } else { 2251 block->is_iodone = 1; 2252 block->orig_bio_bh_private = NULL; 2253 block->orig_bio_bh_end_io.bio = NULL; 2254 block->next_in_same_bio = NULL; 2255 } 2256 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2257 printk(KERN_INFO 2258 "New written %c-block @%llu (%s/%llu/%d)\n", 2259 is_metadata ? 'M' : 'D', 2260 (unsigned long long)block->logical_bytenr, 2261 block->dev_state->name, 2262 (unsigned long long)block->dev_bytenr, 2263 block->mirror_num); 2264 list_add(&block->all_blocks_node, &state->all_blocks_list); 2265 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2266 2267 if (is_metadata) { 2268 ret = btrfsic_process_metablock(state, block, 2269 &block_ctx, 0, 0); 2270 if (ret) 2271 printk(KERN_INFO 2272 "btrfsic: process_metablock(root @%llu)" 2273 " failed!\n", 2274 (unsigned long long)dev_bytenr); 2275 } 2276 btrfsic_release_block_ctx(&block_ctx); 2277 } 2278 2279continue_loop: 2280 BUG_ON(!processed_len); 2281 dev_bytenr += processed_len; 2282 mapped_datav += processed_len >> PAGE_CACHE_SHIFT; 2283 num_pages -= processed_len >> PAGE_CACHE_SHIFT; 2284 goto again; 2285} 2286 2287static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2288{ 2289 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2290 int iodone_w_error; 2291 2292 /* mutex is not held! This is not save if IO is not yet completed 2293 * on umount */ 2294 iodone_w_error = 0; 2295 if (bio_error_status) 2296 iodone_w_error = 1; 2297 2298 BUG_ON(NULL == block); 2299 bp->bi_private = block->orig_bio_bh_private; 2300 bp->bi_end_io = block->orig_bio_bh_end_io.bio; 2301 2302 do { 2303 struct btrfsic_block *next_block; 2304 struct btrfsic_dev_state *const dev_state = block->dev_state; 2305 2306 if ((dev_state->state->print_mask & 2307 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2308 printk(KERN_INFO 2309 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2310 bio_error_status, 2311 btrfsic_get_block_type(dev_state->state, block), 2312 (unsigned long long)block->logical_bytenr, 2313 dev_state->name, 2314 (unsigned long long)block->dev_bytenr, 2315 block->mirror_num); 2316 next_block = block->next_in_same_bio; 2317 block->iodone_w_error = iodone_w_error; 2318 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2319 dev_state->last_flush_gen++; 2320 if ((dev_state->state->print_mask & 2321 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2322 printk(KERN_INFO 2323 "bio_end_io() new %s flush_gen=%llu\n", 2324 dev_state->name, 2325 (unsigned long long) 2326 dev_state->last_flush_gen); 2327 } 2328 if (block->submit_bio_bh_rw & REQ_FUA) 2329 block->flush_gen = 0; /* FUA completed means block is 2330 * on disk */ 2331 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2332 block = next_block; 2333 } while (NULL != block); 2334 2335 bp->bi_end_io(bp, bio_error_status); 2336} 2337 2338static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) 2339{ 2340 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; 2341 int iodone_w_error = !uptodate; 2342 struct btrfsic_dev_state *dev_state; 2343 2344 BUG_ON(NULL == block); 2345 dev_state = block->dev_state; 2346 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2347 printk(KERN_INFO 2348 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2349 iodone_w_error, 2350 btrfsic_get_block_type(dev_state->state, block), 2351 (unsigned long long)block->logical_bytenr, 2352 block->dev_state->name, 2353 (unsigned long long)block->dev_bytenr, 2354 block->mirror_num); 2355 2356 block->iodone_w_error = iodone_w_error; 2357 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2358 dev_state->last_flush_gen++; 2359 if ((dev_state->state->print_mask & 2360 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2361 printk(KERN_INFO 2362 "bh_end_io() new %s flush_gen=%llu\n", 2363 dev_state->name, 2364 (unsigned long long)dev_state->last_flush_gen); 2365 } 2366 if (block->submit_bio_bh_rw & REQ_FUA) 2367 block->flush_gen = 0; /* FUA completed means block is on disk */ 2368 2369 bh->b_private = block->orig_bio_bh_private; 2370 bh->b_end_io = block->orig_bio_bh_end_io.bh; 2371 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2372 bh->b_end_io(bh, uptodate); 2373} 2374 2375static int btrfsic_process_written_superblock( 2376 struct btrfsic_state *state, 2377 struct btrfsic_block *const superblock, 2378 struct btrfs_super_block *const super_hdr) 2379{ 2380 int pass; 2381 2382 superblock->generation = btrfs_super_generation(super_hdr); 2383 if (!(superblock->generation > state->max_superblock_generation || 2384 0 == state->max_superblock_generation)) { 2385 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2386 printk(KERN_INFO 2387 "btrfsic: superblock @%llu (%s/%llu/%d)" 2388 " with old gen %llu <= %llu\n", 2389 (unsigned long long)superblock->logical_bytenr, 2390 superblock->dev_state->name, 2391 (unsigned long long)superblock->dev_bytenr, 2392 superblock->mirror_num, 2393 (unsigned long long) 2394 btrfs_super_generation(super_hdr), 2395 (unsigned long long) 2396 state->max_superblock_generation); 2397 } else { 2398 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2399 printk(KERN_INFO 2400 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2401 " with new gen %llu > %llu\n", 2402 (unsigned long long)superblock->logical_bytenr, 2403 superblock->dev_state->name, 2404 (unsigned long long)superblock->dev_bytenr, 2405 superblock->mirror_num, 2406 (unsigned long long) 2407 btrfs_super_generation(super_hdr), 2408 (unsigned long long) 2409 state->max_superblock_generation); 2410 2411 state->max_superblock_generation = 2412 btrfs_super_generation(super_hdr); 2413 state->latest_superblock = superblock; 2414 } 2415 2416 for (pass = 0; pass < 3; pass++) { 2417 int ret; 2418 u64 next_bytenr; 2419 struct btrfsic_block *next_block; 2420 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2421 struct btrfsic_block_link *l; 2422 int num_copies; 2423 int mirror_num; 2424 const char *additional_string = NULL; 2425 struct btrfs_disk_key tmp_disk_key; 2426 2427 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2428 tmp_disk_key.offset = 0; 2429 2430 switch (pass) { 2431 case 0: 2432 tmp_disk_key.objectid = 2433 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2434 additional_string = "root "; 2435 next_bytenr = btrfs_super_root(super_hdr); 2436 if (state->print_mask & 2437 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2438 printk(KERN_INFO "root@%llu\n", 2439 (unsigned long long)next_bytenr); 2440 break; 2441 case 1: 2442 tmp_disk_key.objectid = 2443 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2444 additional_string = "chunk "; 2445 next_bytenr = btrfs_super_chunk_root(super_hdr); 2446 if (state->print_mask & 2447 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2448 printk(KERN_INFO "chunk@%llu\n", 2449 (unsigned long long)next_bytenr); 2450 break; 2451 case 2: 2452 tmp_disk_key.objectid = 2453 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2454 additional_string = "log "; 2455 next_bytenr = btrfs_super_log_root(super_hdr); 2456 if (0 == next_bytenr) 2457 continue; 2458 if (state->print_mask & 2459 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2460 printk(KERN_INFO "log@%llu\n", 2461 (unsigned long long)next_bytenr); 2462 break; 2463 } 2464 2465 num_copies = 2466 btrfs_num_copies(state->root->fs_info, 2467 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2468 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2469 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2470 (unsigned long long)next_bytenr, num_copies); 2471 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2472 int was_created; 2473 2474 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2475 printk(KERN_INFO 2476 "btrfsic_process_written_superblock(" 2477 "mirror_num=%d)\n", mirror_num); 2478 ret = btrfsic_map_block(state, next_bytenr, 2479 BTRFS_SUPER_INFO_SIZE, 2480 &tmp_next_block_ctx, 2481 mirror_num); 2482 if (ret) { 2483 printk(KERN_INFO 2484 "btrfsic: btrfsic_map_block(@%llu," 2485 " mirror=%d) failed!\n", 2486 (unsigned long long)next_bytenr, 2487 mirror_num); 2488 return -1; 2489 } 2490 2491 next_block = btrfsic_block_lookup_or_add( 2492 state, 2493 &tmp_next_block_ctx, 2494 additional_string, 2495 1, 0, 1, 2496 mirror_num, 2497 &was_created); 2498 if (NULL == next_block) { 2499 printk(KERN_INFO 2500 "btrfsic: error, kmalloc failed!\n"); 2501 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2502 return -1; 2503 } 2504 2505 next_block->disk_key = tmp_disk_key; 2506 if (was_created) 2507 next_block->generation = 2508 BTRFSIC_GENERATION_UNKNOWN; 2509 l = btrfsic_block_link_lookup_or_add( 2510 state, 2511 &tmp_next_block_ctx, 2512 next_block, 2513 superblock, 2514 BTRFSIC_GENERATION_UNKNOWN); 2515 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2516 if (NULL == l) 2517 return -1; 2518 } 2519 } 2520 2521 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2522 WARN_ON(1); 2523 btrfsic_dump_tree(state); 2524 } 2525 2526 return 0; 2527} 2528 2529static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2530 struct btrfsic_block *const block, 2531 int recursion_level) 2532{ 2533 struct list_head *elem_ref_to; 2534 int ret = 0; 2535 2536 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2537 /* 2538 * Note that this situation can happen and does not 2539 * indicate an error in regular cases. It happens 2540 * when disk blocks are freed and later reused. 2541 * The check-integrity module is not aware of any 2542 * block free operations, it just recognizes block 2543 * write operations. Therefore it keeps the linkage 2544 * information for a block until a block is 2545 * rewritten. This can temporarily cause incorrect 2546 * and even circular linkage informations. This 2547 * causes no harm unless such blocks are referenced 2548 * by the most recent super block. 2549 */ 2550 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2551 printk(KERN_INFO 2552 "btrfsic: abort cyclic linkage (case 1).\n"); 2553 2554 return ret; 2555 } 2556 2557 /* 2558 * This algorithm is recursive because the amount of used stack 2559 * space is very small and the max recursion depth is limited. 2560 */ 2561 list_for_each(elem_ref_to, &block->ref_to_list) { 2562 const struct btrfsic_block_link *const l = 2563 list_entry(elem_ref_to, struct btrfsic_block_link, 2564 node_ref_to); 2565 2566 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2567 printk(KERN_INFO 2568 "rl=%d, %c @%llu (%s/%llu/%d)" 2569 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2570 recursion_level, 2571 btrfsic_get_block_type(state, block), 2572 (unsigned long long)block->logical_bytenr, 2573 block->dev_state->name, 2574 (unsigned long long)block->dev_bytenr, 2575 block->mirror_num, 2576 l->ref_cnt, 2577 btrfsic_get_block_type(state, l->block_ref_to), 2578 (unsigned long long) 2579 l->block_ref_to->logical_bytenr, 2580 l->block_ref_to->dev_state->name, 2581 (unsigned long long)l->block_ref_to->dev_bytenr, 2582 l->block_ref_to->mirror_num); 2583 if (l->block_ref_to->never_written) { 2584 printk(KERN_INFO "btrfs: attempt to write superblock" 2585 " which references block %c @%llu (%s/%llu/%d)" 2586 " which is never written!\n", 2587 btrfsic_get_block_type(state, l->block_ref_to), 2588 (unsigned long long) 2589 l->block_ref_to->logical_bytenr, 2590 l->block_ref_to->dev_state->name, 2591 (unsigned long long)l->block_ref_to->dev_bytenr, 2592 l->block_ref_to->mirror_num); 2593 ret = -1; 2594 } else if (!l->block_ref_to->is_iodone) { 2595 printk(KERN_INFO "btrfs: attempt to write superblock" 2596 " which references block %c @%llu (%s/%llu/%d)" 2597 " which is not yet iodone!\n", 2598 btrfsic_get_block_type(state, l->block_ref_to), 2599 (unsigned long long) 2600 l->block_ref_to->logical_bytenr, 2601 l->block_ref_to->dev_state->name, 2602 (unsigned long long)l->block_ref_to->dev_bytenr, 2603 l->block_ref_to->mirror_num); 2604 ret = -1; 2605 } else if (l->block_ref_to->iodone_w_error) { 2606 printk(KERN_INFO "btrfs: attempt to write superblock" 2607 " which references block %c @%llu (%s/%llu/%d)" 2608 " which has write error!\n", 2609 btrfsic_get_block_type(state, l->block_ref_to), 2610 (unsigned long long) 2611 l->block_ref_to->logical_bytenr, 2612 l->block_ref_to->dev_state->name, 2613 (unsigned long long)l->block_ref_to->dev_bytenr, 2614 l->block_ref_to->mirror_num); 2615 ret = -1; 2616 } else if (l->parent_generation != 2617 l->block_ref_to->generation && 2618 BTRFSIC_GENERATION_UNKNOWN != 2619 l->parent_generation && 2620 BTRFSIC_GENERATION_UNKNOWN != 2621 l->block_ref_to->generation) { 2622 printk(KERN_INFO "btrfs: attempt to write superblock" 2623 " which references block %c @%llu (%s/%llu/%d)" 2624 " with generation %llu !=" 2625 " parent generation %llu!\n", 2626 btrfsic_get_block_type(state, l->block_ref_to), 2627 (unsigned long long) 2628 l->block_ref_to->logical_bytenr, 2629 l->block_ref_to->dev_state->name, 2630 (unsigned long long)l->block_ref_to->dev_bytenr, 2631 l->block_ref_to->mirror_num, 2632 (unsigned long long)l->block_ref_to->generation, 2633 (unsigned long long)l->parent_generation); 2634 ret = -1; 2635 } else if (l->block_ref_to->flush_gen > 2636 l->block_ref_to->dev_state->last_flush_gen) { 2637 printk(KERN_INFO "btrfs: attempt to write superblock" 2638 " which references block %c @%llu (%s/%llu/%d)" 2639 " which is not flushed out of disk's write cache" 2640 " (block flush_gen=%llu," 2641 " dev->flush_gen=%llu)!\n", 2642 btrfsic_get_block_type(state, l->block_ref_to), 2643 (unsigned long long) 2644 l->block_ref_to->logical_bytenr, 2645 l->block_ref_to->dev_state->name, 2646 (unsigned long long)l->block_ref_to->dev_bytenr, 2647 l->block_ref_to->mirror_num, 2648 (unsigned long long)block->flush_gen, 2649 (unsigned long long) 2650 l->block_ref_to->dev_state->last_flush_gen); 2651 ret = -1; 2652 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2653 l->block_ref_to, 2654 recursion_level + 2655 1)) { 2656 ret = -1; 2657 } 2658 } 2659 2660 return ret; 2661} 2662 2663static int btrfsic_is_block_ref_by_superblock( 2664 const struct btrfsic_state *state, 2665 const struct btrfsic_block *block, 2666 int recursion_level) 2667{ 2668 struct list_head *elem_ref_from; 2669 2670 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2671 /* refer to comment at "abort cyclic linkage (case 1)" */ 2672 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2673 printk(KERN_INFO 2674 "btrfsic: abort cyclic linkage (case 2).\n"); 2675 2676 return 0; 2677 } 2678 2679 /* 2680 * This algorithm is recursive because the amount of used stack space 2681 * is very small and the max recursion depth is limited. 2682 */ 2683 list_for_each(elem_ref_from, &block->ref_from_list) { 2684 const struct btrfsic_block_link *const l = 2685 list_entry(elem_ref_from, struct btrfsic_block_link, 2686 node_ref_from); 2687 2688 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2689 printk(KERN_INFO 2690 "rl=%d, %c @%llu (%s/%llu/%d)" 2691 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2692 recursion_level, 2693 btrfsic_get_block_type(state, block), 2694 (unsigned long long)block->logical_bytenr, 2695 block->dev_state->name, 2696 (unsigned long long)block->dev_bytenr, 2697 block->mirror_num, 2698 l->ref_cnt, 2699 btrfsic_get_block_type(state, l->block_ref_from), 2700 (unsigned long long) 2701 l->block_ref_from->logical_bytenr, 2702 l->block_ref_from->dev_state->name, 2703 (unsigned long long) 2704 l->block_ref_from->dev_bytenr, 2705 l->block_ref_from->mirror_num); 2706 if (l->block_ref_from->is_superblock && 2707 state->latest_superblock->dev_bytenr == 2708 l->block_ref_from->dev_bytenr && 2709 state->latest_superblock->dev_state->bdev == 2710 l->block_ref_from->dev_state->bdev) 2711 return 1; 2712 else if (btrfsic_is_block_ref_by_superblock(state, 2713 l->block_ref_from, 2714 recursion_level + 2715 1)) 2716 return 1; 2717 } 2718 2719 return 0; 2720} 2721 2722static void btrfsic_print_add_link(const struct btrfsic_state *state, 2723 const struct btrfsic_block_link *l) 2724{ 2725 printk(KERN_INFO 2726 "Add %u* link from %c @%llu (%s/%llu/%d)" 2727 " to %c @%llu (%s/%llu/%d).\n", 2728 l->ref_cnt, 2729 btrfsic_get_block_type(state, l->block_ref_from), 2730 (unsigned long long)l->block_ref_from->logical_bytenr, 2731 l->block_ref_from->dev_state->name, 2732 (unsigned long long)l->block_ref_from->dev_bytenr, 2733 l->block_ref_from->mirror_num, 2734 btrfsic_get_block_type(state, l->block_ref_to), 2735 (unsigned long long)l->block_ref_to->logical_bytenr, 2736 l->block_ref_to->dev_state->name, 2737 (unsigned long long)l->block_ref_to->dev_bytenr, 2738 l->block_ref_to->mirror_num); 2739} 2740 2741static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2742 const struct btrfsic_block_link *l) 2743{ 2744 printk(KERN_INFO 2745 "Rem %u* link from %c @%llu (%s/%llu/%d)" 2746 " to %c @%llu (%s/%llu/%d).\n", 2747 l->ref_cnt, 2748 btrfsic_get_block_type(state, l->block_ref_from), 2749 (unsigned long long)l->block_ref_from->logical_bytenr, 2750 l->block_ref_from->dev_state->name, 2751 (unsigned long long)l->block_ref_from->dev_bytenr, 2752 l->block_ref_from->mirror_num, 2753 btrfsic_get_block_type(state, l->block_ref_to), 2754 (unsigned long long)l->block_ref_to->logical_bytenr, 2755 l->block_ref_to->dev_state->name, 2756 (unsigned long long)l->block_ref_to->dev_bytenr, 2757 l->block_ref_to->mirror_num); 2758} 2759 2760static char btrfsic_get_block_type(const struct btrfsic_state *state, 2761 const struct btrfsic_block *block) 2762{ 2763 if (block->is_superblock && 2764 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2765 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2766 return 'S'; 2767 else if (block->is_superblock) 2768 return 's'; 2769 else if (block->is_metadata) 2770 return 'M'; 2771 else 2772 return 'D'; 2773} 2774 2775static void btrfsic_dump_tree(const struct btrfsic_state *state) 2776{ 2777 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2778} 2779 2780static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2781 const struct btrfsic_block *block, 2782 int indent_level) 2783{ 2784 struct list_head *elem_ref_to; 2785 int indent_add; 2786 static char buf[80]; 2787 int cursor_position; 2788 2789 /* 2790 * Should better fill an on-stack buffer with a complete line and 2791 * dump it at once when it is time to print a newline character. 2792 */ 2793 2794 /* 2795 * This algorithm is recursive because the amount of used stack space 2796 * is very small and the max recursion depth is limited. 2797 */ 2798 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2799 btrfsic_get_block_type(state, block), 2800 (unsigned long long)block->logical_bytenr, 2801 block->dev_state->name, 2802 (unsigned long long)block->dev_bytenr, 2803 block->mirror_num); 2804 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2805 printk("[...]\n"); 2806 return; 2807 } 2808 printk(buf); 2809 indent_level += indent_add; 2810 if (list_empty(&block->ref_to_list)) { 2811 printk("\n"); 2812 return; 2813 } 2814 if (block->mirror_num > 1 && 2815 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2816 printk(" [...]\n"); 2817 return; 2818 } 2819 2820 cursor_position = indent_level; 2821 list_for_each(elem_ref_to, &block->ref_to_list) { 2822 const struct btrfsic_block_link *const l = 2823 list_entry(elem_ref_to, struct btrfsic_block_link, 2824 node_ref_to); 2825 2826 while (cursor_position < indent_level) { 2827 printk(" "); 2828 cursor_position++; 2829 } 2830 if (l->ref_cnt > 1) 2831 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2832 else 2833 indent_add = sprintf(buf, " --> "); 2834 if (indent_level + indent_add > 2835 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2836 printk("[...]\n"); 2837 cursor_position = 0; 2838 continue; 2839 } 2840 2841 printk(buf); 2842 2843 btrfsic_dump_tree_sub(state, l->block_ref_to, 2844 indent_level + indent_add); 2845 cursor_position = 0; 2846 } 2847} 2848 2849static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2850 struct btrfsic_state *state, 2851 struct btrfsic_block_data_ctx *next_block_ctx, 2852 struct btrfsic_block *next_block, 2853 struct btrfsic_block *from_block, 2854 u64 parent_generation) 2855{ 2856 struct btrfsic_block_link *l; 2857 2858 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2859 next_block_ctx->dev_bytenr, 2860 from_block->dev_state->bdev, 2861 from_block->dev_bytenr, 2862 &state->block_link_hashtable); 2863 if (NULL == l) { 2864 l = btrfsic_block_link_alloc(); 2865 if (NULL == l) { 2866 printk(KERN_INFO 2867 "btrfsic: error, kmalloc" " failed!\n"); 2868 return NULL; 2869 } 2870 2871 l->block_ref_to = next_block; 2872 l->block_ref_from = from_block; 2873 l->ref_cnt = 1; 2874 l->parent_generation = parent_generation; 2875 2876 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2877 btrfsic_print_add_link(state, l); 2878 2879 list_add(&l->node_ref_to, &from_block->ref_to_list); 2880 list_add(&l->node_ref_from, &next_block->ref_from_list); 2881 2882 btrfsic_block_link_hashtable_add(l, 2883 &state->block_link_hashtable); 2884 } else { 2885 l->ref_cnt++; 2886 l->parent_generation = parent_generation; 2887 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2888 btrfsic_print_add_link(state, l); 2889 } 2890 2891 return l; 2892} 2893 2894static struct btrfsic_block *btrfsic_block_lookup_or_add( 2895 struct btrfsic_state *state, 2896 struct btrfsic_block_data_ctx *block_ctx, 2897 const char *additional_string, 2898 int is_metadata, 2899 int is_iodone, 2900 int never_written, 2901 int mirror_num, 2902 int *was_created) 2903{ 2904 struct btrfsic_block *block; 2905 2906 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2907 block_ctx->dev_bytenr, 2908 &state->block_hashtable); 2909 if (NULL == block) { 2910 struct btrfsic_dev_state *dev_state; 2911 2912 block = btrfsic_block_alloc(); 2913 if (NULL == block) { 2914 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2915 return NULL; 2916 } 2917 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); 2918 if (NULL == dev_state) { 2919 printk(KERN_INFO 2920 "btrfsic: error, lookup dev_state failed!\n"); 2921 btrfsic_block_free(block); 2922 return NULL; 2923 } 2924 block->dev_state = dev_state; 2925 block->dev_bytenr = block_ctx->dev_bytenr; 2926 block->logical_bytenr = block_ctx->start; 2927 block->is_metadata = is_metadata; 2928 block->is_iodone = is_iodone; 2929 block->never_written = never_written; 2930 block->mirror_num = mirror_num; 2931 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2932 printk(KERN_INFO 2933 "New %s%c-block @%llu (%s/%llu/%d)\n", 2934 additional_string, 2935 btrfsic_get_block_type(state, block), 2936 (unsigned long long)block->logical_bytenr, 2937 dev_state->name, 2938 (unsigned long long)block->dev_bytenr, 2939 mirror_num); 2940 list_add(&block->all_blocks_node, &state->all_blocks_list); 2941 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2942 if (NULL != was_created) 2943 *was_created = 1; 2944 } else { 2945 if (NULL != was_created) 2946 *was_created = 0; 2947 } 2948 2949 return block; 2950} 2951 2952static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2953 u64 bytenr, 2954 struct btrfsic_dev_state *dev_state, 2955 u64 dev_bytenr) 2956{ 2957 int num_copies; 2958 int mirror_num; 2959 int ret; 2960 struct btrfsic_block_data_ctx block_ctx; 2961 int match = 0; 2962 2963 num_copies = btrfs_num_copies(state->root->fs_info, 2964 bytenr, state->metablock_size); 2965 2966 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2967 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2968 &block_ctx, mirror_num); 2969 if (ret) { 2970 printk(KERN_INFO "btrfsic:" 2971 " btrfsic_map_block(logical @%llu," 2972 " mirror %d) failed!\n", 2973 (unsigned long long)bytenr, mirror_num); 2974 continue; 2975 } 2976 2977 if (dev_state->bdev == block_ctx.dev->bdev && 2978 dev_bytenr == block_ctx.dev_bytenr) { 2979 match++; 2980 btrfsic_release_block_ctx(&block_ctx); 2981 break; 2982 } 2983 btrfsic_release_block_ctx(&block_ctx); 2984 } 2985 2986 if (!match) { 2987 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2988 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2989 " phys_bytenr=%llu)!\n", 2990 (unsigned long long)bytenr, dev_state->name, 2991 (unsigned long long)dev_bytenr); 2992 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2993 ret = btrfsic_map_block(state, bytenr, 2994 state->metablock_size, 2995 &block_ctx, mirror_num); 2996 if (ret) 2997 continue; 2998 2999 printk(KERN_INFO "Read logical bytenr @%llu maps to" 3000 " (%s/%llu/%d)\n", 3001 (unsigned long long)bytenr, 3002 block_ctx.dev->name, 3003 (unsigned long long)block_ctx.dev_bytenr, 3004 mirror_num); 3005 } 3006 WARN_ON(1); 3007 } 3008} 3009 3010static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 3011 struct block_device *bdev) 3012{ 3013 struct btrfsic_dev_state *ds; 3014 3015 ds = btrfsic_dev_state_hashtable_lookup(bdev, 3016 &btrfsic_dev_state_hashtable); 3017 return ds; 3018} 3019 3020int btrfsic_submit_bh(int rw, struct buffer_head *bh) 3021{ 3022 struct btrfsic_dev_state *dev_state; 3023 3024 if (!btrfsic_is_initialized) 3025 return submit_bh(rw, bh); 3026 3027 mutex_lock(&btrfsic_mutex); 3028 /* since btrfsic_submit_bh() might also be called before 3029 * btrfsic_mount(), this might return NULL */ 3030 dev_state = btrfsic_dev_state_lookup(bh->b_bdev); 3031 3032 /* Only called to write the superblock (incl. FLUSH/FUA) */ 3033 if (NULL != dev_state && 3034 (rw & WRITE) && bh->b_size > 0) { 3035 u64 dev_bytenr; 3036 3037 dev_bytenr = 4096 * bh->b_blocknr; 3038 if (dev_state->state->print_mask & 3039 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3040 printk(KERN_INFO 3041 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 3042 " size=%lu, data=%p, bdev=%p)\n", 3043 rw, (unsigned long)bh->b_blocknr, 3044 (unsigned long long)dev_bytenr, 3045 (unsigned long)bh->b_size, bh->b_data, 3046 bh->b_bdev); 3047 btrfsic_process_written_block(dev_state, dev_bytenr, 3048 &bh->b_data, 1, NULL, 3049 NULL, bh, rw); 3050 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3051 if (dev_state->state->print_mask & 3052 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3053 printk(KERN_INFO 3054 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", 3055 rw, bh->b_bdev); 3056 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3057 if ((dev_state->state->print_mask & 3058 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3059 BTRFSIC_PRINT_MASK_VERBOSE))) 3060 printk(KERN_INFO 3061 "btrfsic_submit_bh(%s) with FLUSH" 3062 " but dummy block already in use" 3063 " (ignored)!\n", 3064 dev_state->name); 3065 } else { 3066 struct btrfsic_block *const block = 3067 &dev_state->dummy_block_for_bio_bh_flush; 3068 3069 block->is_iodone = 0; 3070 block->never_written = 0; 3071 block->iodone_w_error = 0; 3072 block->flush_gen = dev_state->last_flush_gen + 1; 3073 block->submit_bio_bh_rw = rw; 3074 block->orig_bio_bh_private = bh->b_private; 3075 block->orig_bio_bh_end_io.bh = bh->b_end_io; 3076 block->next_in_same_bio = NULL; 3077 bh->b_private = block; 3078 bh->b_end_io = btrfsic_bh_end_io; 3079 } 3080 } 3081 mutex_unlock(&btrfsic_mutex); 3082 return submit_bh(rw, bh); 3083} 3084 3085void btrfsic_submit_bio(int rw, struct bio *bio) 3086{ 3087 struct btrfsic_dev_state *dev_state; 3088 3089 if (!btrfsic_is_initialized) { 3090 submit_bio(rw, bio); 3091 return; 3092 } 3093 3094 mutex_lock(&btrfsic_mutex); 3095 /* since btrfsic_submit_bio() is also called before 3096 * btrfsic_mount(), this might return NULL */ 3097 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); 3098 if (NULL != dev_state && 3099 (rw & WRITE) && NULL != bio->bi_io_vec) { 3100 unsigned int i; 3101 u64 dev_bytenr; 3102 int bio_is_patched; 3103 char **mapped_datav; 3104 3105 dev_bytenr = 512 * bio->bi_sector; 3106 bio_is_patched = 0; 3107 if (dev_state->state->print_mask & 3108 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3109 printk(KERN_INFO 3110 "submit_bio(rw=0x%x, bi_vcnt=%u," 3111 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3112 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3113 (unsigned long long)dev_bytenr, 3114 bio->bi_bdev); 3115 3116 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3117 GFP_NOFS); 3118 if (!mapped_datav) 3119 goto leave; 3120 for (i = 0; i < bio->bi_vcnt; i++) { 3121 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3122 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3123 if (!mapped_datav[i]) { 3124 while (i > 0) { 3125 i--; 3126 kunmap(bio->bi_io_vec[i].bv_page); 3127 } 3128 kfree(mapped_datav); 3129 goto leave; 3130 } 3131 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3132 BTRFSIC_PRINT_MASK_VERBOSE) == 3133 (dev_state->state->print_mask & 3134 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3135 BTRFSIC_PRINT_MASK_VERBOSE))) 3136 printk(KERN_INFO 3137 "#%u: page=%p, len=%u, offset=%u\n", 3138 i, bio->bi_io_vec[i].bv_page, 3139 bio->bi_io_vec[i].bv_len, 3140 bio->bi_io_vec[i].bv_offset); 3141 } 3142 btrfsic_process_written_block(dev_state, dev_bytenr, 3143 mapped_datav, bio->bi_vcnt, 3144 bio, &bio_is_patched, 3145 NULL, rw); 3146 while (i > 0) { 3147 i--; 3148 kunmap(bio->bi_io_vec[i].bv_page); 3149 } 3150 kfree(mapped_datav); 3151 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3152 if (dev_state->state->print_mask & 3153 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3154 printk(KERN_INFO 3155 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", 3156 rw, bio->bi_bdev); 3157 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3158 if ((dev_state->state->print_mask & 3159 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3160 BTRFSIC_PRINT_MASK_VERBOSE))) 3161 printk(KERN_INFO 3162 "btrfsic_submit_bio(%s) with FLUSH" 3163 " but dummy block already in use" 3164 " (ignored)!\n", 3165 dev_state->name); 3166 } else { 3167 struct btrfsic_block *const block = 3168 &dev_state->dummy_block_for_bio_bh_flush; 3169 3170 block->is_iodone = 0; 3171 block->never_written = 0; 3172 block->iodone_w_error = 0; 3173 block->flush_gen = dev_state->last_flush_gen + 1; 3174 block->submit_bio_bh_rw = rw; 3175 block->orig_bio_bh_private = bio->bi_private; 3176 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 3177 block->next_in_same_bio = NULL; 3178 bio->bi_private = block; 3179 bio->bi_end_io = btrfsic_bio_end_io; 3180 } 3181 } 3182leave: 3183 mutex_unlock(&btrfsic_mutex); 3184 3185 submit_bio(rw, bio); 3186} 3187 3188int btrfsic_mount(struct btrfs_root *root, 3189 struct btrfs_fs_devices *fs_devices, 3190 int including_extent_data, u32 print_mask) 3191{ 3192 int ret; 3193 struct btrfsic_state *state; 3194 struct list_head *dev_head = &fs_devices->devices; 3195 struct btrfs_device *device; 3196 3197 if (root->nodesize != root->leafsize) { 3198 printk(KERN_INFO 3199 "btrfsic: cannot handle nodesize %d != leafsize %d!\n", 3200 root->nodesize, root->leafsize); 3201 return -1; 3202 } 3203 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3204 printk(KERN_INFO 3205 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3206 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3207 return -1; 3208 } 3209 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3210 printk(KERN_INFO 3211 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3212 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3213 return -1; 3214 } 3215 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3216 printk(KERN_INFO 3217 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3218 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3219 return -1; 3220 } 3221 state = kzalloc(sizeof(*state), GFP_NOFS); 3222 if (NULL == state) { 3223 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3224 return -1; 3225 } 3226 3227 if (!btrfsic_is_initialized) { 3228 mutex_init(&btrfsic_mutex); 3229 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 3230 btrfsic_is_initialized = 1; 3231 } 3232 mutex_lock(&btrfsic_mutex); 3233 state->root = root; 3234 state->print_mask = print_mask; 3235 state->include_extent_data = including_extent_data; 3236 state->csum_size = 0; 3237 state->metablock_size = root->nodesize; 3238 state->datablock_size = root->sectorsize; 3239 INIT_LIST_HEAD(&state->all_blocks_list); 3240 btrfsic_block_hashtable_init(&state->block_hashtable); 3241 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3242 state->max_superblock_generation = 0; 3243 state->latest_superblock = NULL; 3244 3245 list_for_each_entry(device, dev_head, dev_list) { 3246 struct btrfsic_dev_state *ds; 3247 char *p; 3248 3249 if (!device->bdev || !device->name) 3250 continue; 3251 3252 ds = btrfsic_dev_state_alloc(); 3253 if (NULL == ds) { 3254 printk(KERN_INFO 3255 "btrfs check-integrity: kmalloc() failed!\n"); 3256 mutex_unlock(&btrfsic_mutex); 3257 return -1; 3258 } 3259 ds->bdev = device->bdev; 3260 ds->state = state; 3261 bdevname(ds->bdev, ds->name); 3262 ds->name[BDEVNAME_SIZE - 1] = '\0'; 3263 for (p = ds->name; *p != '\0'; p++); 3264 while (p > ds->name && *p != '/') 3265 p--; 3266 if (*p == '/') 3267 p++; 3268 strlcpy(ds->name, p, sizeof(ds->name)); 3269 btrfsic_dev_state_hashtable_add(ds, 3270 &btrfsic_dev_state_hashtable); 3271 } 3272 3273 ret = btrfsic_process_superblock(state, fs_devices); 3274 if (0 != ret) { 3275 mutex_unlock(&btrfsic_mutex); 3276 btrfsic_unmount(root, fs_devices); 3277 return ret; 3278 } 3279 3280 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 3281 btrfsic_dump_database(state); 3282 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 3283 btrfsic_dump_tree(state); 3284 3285 mutex_unlock(&btrfsic_mutex); 3286 return 0; 3287} 3288 3289void btrfsic_unmount(struct btrfs_root *root, 3290 struct btrfs_fs_devices *fs_devices) 3291{ 3292 struct list_head *elem_all; 3293 struct list_head *tmp_all; 3294 struct btrfsic_state *state; 3295 struct list_head *dev_head = &fs_devices->devices; 3296 struct btrfs_device *device; 3297 3298 if (!btrfsic_is_initialized) 3299 return; 3300 3301 mutex_lock(&btrfsic_mutex); 3302 3303 state = NULL; 3304 list_for_each_entry(device, dev_head, dev_list) { 3305 struct btrfsic_dev_state *ds; 3306 3307 if (!device->bdev || !device->name) 3308 continue; 3309 3310 ds = btrfsic_dev_state_hashtable_lookup( 3311 device->bdev, 3312 &btrfsic_dev_state_hashtable); 3313 if (NULL != ds) { 3314 state = ds->state; 3315 btrfsic_dev_state_hashtable_remove(ds); 3316 btrfsic_dev_state_free(ds); 3317 } 3318 } 3319 3320 if (NULL == state) { 3321 printk(KERN_INFO 3322 "btrfsic: error, cannot find state information" 3323 " on umount!\n"); 3324 mutex_unlock(&btrfsic_mutex); 3325 return; 3326 } 3327 3328 /* 3329 * Don't care about keeping the lists' state up to date, 3330 * just free all memory that was allocated dynamically. 3331 * Free the blocks and the block_links. 3332 */ 3333 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { 3334 struct btrfsic_block *const b_all = 3335 list_entry(elem_all, struct btrfsic_block, 3336 all_blocks_node); 3337 struct list_head *elem_ref_to; 3338 struct list_head *tmp_ref_to; 3339 3340 list_for_each_safe(elem_ref_to, tmp_ref_to, 3341 &b_all->ref_to_list) { 3342 struct btrfsic_block_link *const l = 3343 list_entry(elem_ref_to, 3344 struct btrfsic_block_link, 3345 node_ref_to); 3346 3347 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 3348 btrfsic_print_rem_link(state, l); 3349 3350 l->ref_cnt--; 3351 if (0 == l->ref_cnt) 3352 btrfsic_block_link_free(l); 3353 } 3354 3355 if (b_all->is_iodone || b_all->never_written) 3356 btrfsic_block_free(b_all); 3357 else 3358 printk(KERN_INFO "btrfs: attempt to free %c-block" 3359 " @%llu (%s/%llu/%d) on umount which is" 3360 " not yet iodone!\n", 3361 btrfsic_get_block_type(state, b_all), 3362 (unsigned long long)b_all->logical_bytenr, 3363 b_all->dev_state->name, 3364 (unsigned long long)b_all->dev_bytenr, 3365 b_all->mirror_num); 3366 } 3367 3368 mutex_unlock(&btrfsic_mutex); 3369 3370 kfree(state); 3371} 3372