1/* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7#include "dm-space-map.h" 8#include "dm-space-map-common.h" 9#include "dm-space-map-metadata.h" 10 11#include <linux/list.h> 12#include <linux/slab.h> 13#include <linux/device-mapper.h> 14 15#define DM_MSG_PREFIX "space map metadata" 16 17/*----------------------------------------------------------------*/ 18 19/* 20 * An edge triggered threshold. 21 */ 22struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29}; 30 31static void threshold_init(struct threshold *t) 32{ 33 t->threshold_set = false; 34 t->value_set = false; 35} 36 37static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39{ 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44} 45 46static bool below_threshold(struct threshold *t, dm_block_t value) 47{ 48 return t->threshold_set && value <= t->threshold; 49} 50 51static bool threshold_already_triggered(struct threshold *t) 52{ 53 return t->value_set && below_threshold(t, t->current_value); 54} 55 56static void check_threshold(struct threshold *t, dm_block_t value) 57{ 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64} 65 66/*----------------------------------------------------------------*/ 67 68/* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78/* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82#define MAX_RECURSIVE_ALLOCATIONS 1024 83 84enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87}; 88 89struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92}; 93 94struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98}; 99 100static void brb_init(struct bop_ring_buffer *brb) 101{ 102 brb->begin = 0; 103 brb->end = 0; 104} 105 106static bool brb_empty(struct bop_ring_buffer *brb) 107{ 108 return brb->begin == brb->end; 109} 110 111static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112{ 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115} 116 117static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119{ 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137} 138 139static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) 140{ 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 brb->begin = brb_next(brb, brb->begin); 151 152 return 0; 153} 154 155/*----------------------------------------------------------------*/ 156 157struct sm_metadata { 158 struct dm_space_map sm; 159 160 struct ll_disk ll; 161 struct ll_disk old_ll; 162 163 dm_block_t begin; 164 165 unsigned recursion_count; 166 unsigned allocated_this_transaction; 167 struct bop_ring_buffer uncommitted; 168 169 struct threshold threshold; 170}; 171 172static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 173{ 174 int r = brb_push(&smm->uncommitted, type, b); 175 176 if (r) { 177 DMERR("too many recursive allocations"); 178 return -ENOMEM; 179 } 180 181 return 0; 182} 183 184static int commit_bop(struct sm_metadata *smm, struct block_op *op) 185{ 186 int r = 0; 187 enum allocation_event ev; 188 189 switch (op->type) { 190 case BOP_INC: 191 r = sm_ll_inc(&smm->ll, op->block, &ev); 192 break; 193 194 case BOP_DEC: 195 r = sm_ll_dec(&smm->ll, op->block, &ev); 196 break; 197 } 198 199 return r; 200} 201 202static void in(struct sm_metadata *smm) 203{ 204 smm->recursion_count++; 205} 206 207static int out(struct sm_metadata *smm) 208{ 209 int r = 0; 210 211 /* 212 * If we're not recursing then very bad things are happening. 213 */ 214 if (!smm->recursion_count) { 215 DMERR("lost track of recursion depth"); 216 return -ENOMEM; 217 } 218 219 if (smm->recursion_count == 1) { 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_pop(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 } 233 } 234 235 smm->recursion_count--; 236 237 return r; 238} 239 240/* 241 * When using the out() function above, we often want to combine an error 242 * code for the operation run in the recursive context with that from 243 * out(). 244 */ 245static int combine_errors(int r1, int r2) 246{ 247 return r1 ? r1 : r2; 248} 249 250static int recursing(struct sm_metadata *smm) 251{ 252 return smm->recursion_count; 253} 254 255static void sm_metadata_destroy(struct dm_space_map *sm) 256{ 257 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 258 259 kfree(smm); 260} 261 262static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 263{ 264 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 265 266 *count = smm->ll.nr_blocks; 267 268 return 0; 269} 270 271static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 272{ 273 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 274 275 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 276 smm->allocated_this_transaction; 277 278 return 0; 279} 280 281static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 282 uint32_t *result) 283{ 284 int r; 285 unsigned i; 286 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 287 unsigned adjustment = 0; 288 289 /* 290 * We may have some uncommitted adjustments to add. This list 291 * should always be really short. 292 */ 293 for (i = smm->uncommitted.begin; 294 i != smm->uncommitted.end; 295 i = brb_next(&smm->uncommitted, i)) { 296 struct block_op *op = smm->uncommitted.bops + i; 297 298 if (op->block != b) 299 continue; 300 301 switch (op->type) { 302 case BOP_INC: 303 adjustment++; 304 break; 305 306 case BOP_DEC: 307 adjustment--; 308 break; 309 } 310 } 311 312 r = sm_ll_lookup(&smm->ll, b, result); 313 if (r) 314 return r; 315 316 *result += adjustment; 317 318 return 0; 319} 320 321static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 322 dm_block_t b, int *result) 323{ 324 int r, adjustment = 0; 325 unsigned i; 326 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 327 uint32_t rc; 328 329 /* 330 * We may have some uncommitted adjustments to add. This list 331 * should always be really short. 332 */ 333 for (i = smm->uncommitted.begin; 334 i != smm->uncommitted.end; 335 i = brb_next(&smm->uncommitted, i)) { 336 337 struct block_op *op = smm->uncommitted.bops + i; 338 339 if (op->block != b) 340 continue; 341 342 switch (op->type) { 343 case BOP_INC: 344 adjustment++; 345 break; 346 347 case BOP_DEC: 348 adjustment--; 349 break; 350 } 351 } 352 353 if (adjustment > 1) { 354 *result = 1; 355 return 0; 356 } 357 358 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 359 if (r) 360 return r; 361 362 if (rc == 3) 363 /* 364 * We err on the side of caution, and always return true. 365 */ 366 *result = 1; 367 else 368 *result = rc + adjustment > 1; 369 370 return 0; 371} 372 373static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 374 uint32_t count) 375{ 376 int r, r2; 377 enum allocation_event ev; 378 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 379 380 if (smm->recursion_count) { 381 DMERR("cannot recurse set_count()"); 382 return -EINVAL; 383 } 384 385 in(smm); 386 r = sm_ll_insert(&smm->ll, b, count, &ev); 387 r2 = out(smm); 388 389 return combine_errors(r, r2); 390} 391 392static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 393{ 394 int r, r2 = 0; 395 enum allocation_event ev; 396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 if (recursing(smm)) 399 r = add_bop(smm, BOP_INC, b); 400 else { 401 in(smm); 402 r = sm_ll_inc(&smm->ll, b, &ev); 403 r2 = out(smm); 404 } 405 406 return combine_errors(r, r2); 407} 408 409static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 410{ 411 int r, r2 = 0; 412 enum allocation_event ev; 413 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 414 415 if (recursing(smm)) 416 r = add_bop(smm, BOP_DEC, b); 417 else { 418 in(smm); 419 r = sm_ll_dec(&smm->ll, b, &ev); 420 r2 = out(smm); 421 } 422 423 return combine_errors(r, r2); 424} 425 426static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 427{ 428 int r, r2 = 0; 429 enum allocation_event ev; 430 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 431 432 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 433 if (r) 434 return r; 435 436 smm->begin = *b + 1; 437 438 if (recursing(smm)) 439 r = add_bop(smm, BOP_INC, *b); 440 else { 441 in(smm); 442 r = sm_ll_inc(&smm->ll, *b, &ev); 443 r2 = out(smm); 444 } 445 446 if (!r) 447 smm->allocated_this_transaction++; 448 449 return combine_errors(r, r2); 450} 451 452static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 453{ 454 dm_block_t count; 455 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 456 457 int r = sm_metadata_new_block_(sm, b); 458 if (r) { 459 DMERR_LIMIT("unable to allocate new metadata block"); 460 return r; 461 } 462 463 r = sm_metadata_get_nr_free(sm, &count); 464 if (r) { 465 DMERR_LIMIT("couldn't get free block count"); 466 return r; 467 } 468 469 check_threshold(&smm->threshold, count); 470 471 return r; 472} 473 474static int sm_metadata_commit(struct dm_space_map *sm) 475{ 476 int r; 477 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 478 479 r = sm_ll_commit(&smm->ll); 480 if (r) 481 return r; 482 483 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 484 smm->begin = 0; 485 smm->allocated_this_transaction = 0; 486 487 return 0; 488} 489 490static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 491 dm_block_t threshold, 492 dm_sm_threshold_fn fn, 493 void *context) 494{ 495 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 496 497 set_threshold(&smm->threshold, threshold, fn, context); 498 499 return 0; 500} 501 502static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 503{ 504 *result = sizeof(struct disk_sm_root); 505 506 return 0; 507} 508 509static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 510{ 511 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 512 struct disk_sm_root root_le; 513 514 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 515 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 516 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 517 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 518 519 if (max < sizeof(root_le)) 520 return -ENOSPC; 521 522 memcpy(where_le, &root_le, sizeof(root_le)); 523 524 return 0; 525} 526 527static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 528 529static struct dm_space_map ops = { 530 .destroy = sm_metadata_destroy, 531 .extend = sm_metadata_extend, 532 .get_nr_blocks = sm_metadata_get_nr_blocks, 533 .get_nr_free = sm_metadata_get_nr_free, 534 .get_count = sm_metadata_get_count, 535 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 536 .set_count = sm_metadata_set_count, 537 .inc_block = sm_metadata_inc_block, 538 .dec_block = sm_metadata_dec_block, 539 .new_block = sm_metadata_new_block, 540 .commit = sm_metadata_commit, 541 .root_size = sm_metadata_root_size, 542 .copy_root = sm_metadata_copy_root, 543 .register_threshold_callback = sm_metadata_register_threshold_callback 544}; 545 546/*----------------------------------------------------------------*/ 547 548/* 549 * When a new space map is created that manages its own space. We use 550 * this tiny bootstrap allocator. 551 */ 552static void sm_bootstrap_destroy(struct dm_space_map *sm) 553{ 554} 555 556static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 557{ 558 DMERR("bootstrap doesn't support extend"); 559 560 return -EINVAL; 561} 562 563static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 564{ 565 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 566 567 return smm->ll.nr_blocks; 568} 569 570static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 571{ 572 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 573 574 *count = smm->ll.nr_blocks - smm->begin; 575 576 return 0; 577} 578 579static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 580 uint32_t *result) 581{ 582 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 583 584 return b < smm->begin ? 1 : 0; 585} 586 587static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 588 dm_block_t b, int *result) 589{ 590 *result = 0; 591 592 return 0; 593} 594 595static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 596 uint32_t count) 597{ 598 DMERR("bootstrap doesn't support set_count"); 599 600 return -EINVAL; 601} 602 603static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 604{ 605 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 606 607 /* 608 * We know the entire device is unused. 609 */ 610 if (smm->begin == smm->ll.nr_blocks) 611 return -ENOSPC; 612 613 *b = smm->begin++; 614 615 return 0; 616} 617 618static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 619{ 620 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 621 622 return add_bop(smm, BOP_INC, b); 623} 624 625static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 626{ 627 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 628 629 return add_bop(smm, BOP_DEC, b); 630} 631 632static int sm_bootstrap_commit(struct dm_space_map *sm) 633{ 634 return 0; 635} 636 637static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 638{ 639 DMERR("bootstrap doesn't support root_size"); 640 641 return -EINVAL; 642} 643 644static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 645 size_t max) 646{ 647 DMERR("bootstrap doesn't support copy_root"); 648 649 return -EINVAL; 650} 651 652static struct dm_space_map bootstrap_ops = { 653 .destroy = sm_bootstrap_destroy, 654 .extend = sm_bootstrap_extend, 655 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 656 .get_nr_free = sm_bootstrap_get_nr_free, 657 .get_count = sm_bootstrap_get_count, 658 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 659 .set_count = sm_bootstrap_set_count, 660 .inc_block = sm_bootstrap_inc_block, 661 .dec_block = sm_bootstrap_dec_block, 662 .new_block = sm_bootstrap_new_block, 663 .commit = sm_bootstrap_commit, 664 .root_size = sm_bootstrap_root_size, 665 .copy_root = sm_bootstrap_copy_root, 666 .register_threshold_callback = NULL 667}; 668 669/*----------------------------------------------------------------*/ 670 671static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 672{ 673 int r, i; 674 enum allocation_event ev; 675 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 676 dm_block_t old_len = smm->ll.nr_blocks; 677 678 /* 679 * Flick into a mode where all blocks get allocated in the new area. 680 */ 681 smm->begin = old_len; 682 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 683 684 /* 685 * Extend. 686 */ 687 r = sm_ll_extend(&smm->ll, extra_blocks); 688 if (r) 689 goto out; 690 691 /* 692 * We repeatedly increment then commit until the commit doesn't 693 * allocate any new blocks. 694 */ 695 do { 696 for (i = old_len; !r && i < smm->begin; i++) { 697 r = sm_ll_inc(&smm->ll, i, &ev); 698 if (r) 699 goto out; 700 } 701 old_len = smm->begin; 702 703 r = sm_ll_commit(&smm->ll); 704 if (r) 705 goto out; 706 707 } while (old_len != smm->begin); 708 709out: 710 /* 711 * Switch back to normal behaviour. 712 */ 713 memcpy(sm, &ops, sizeof(*sm)); 714 return r; 715} 716 717/*----------------------------------------------------------------*/ 718 719struct dm_space_map *dm_sm_metadata_init(void) 720{ 721 struct sm_metadata *smm; 722 723 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 724 if (!smm) 725 return ERR_PTR(-ENOMEM); 726 727 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 728 729 return &smm->sm; 730} 731 732int dm_sm_metadata_create(struct dm_space_map *sm, 733 struct dm_transaction_manager *tm, 734 dm_block_t nr_blocks, 735 dm_block_t superblock) 736{ 737 int r; 738 dm_block_t i; 739 enum allocation_event ev; 740 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 741 742 smm->begin = superblock + 1; 743 smm->recursion_count = 0; 744 smm->allocated_this_transaction = 0; 745 brb_init(&smm->uncommitted); 746 threshold_init(&smm->threshold); 747 748 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 749 750 r = sm_ll_new_metadata(&smm->ll, tm); 751 if (r) 752 return r; 753 754 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 755 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 756 r = sm_ll_extend(&smm->ll, nr_blocks); 757 if (r) 758 return r; 759 760 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 761 762 /* 763 * Now we need to update the newly created data structures with the 764 * allocated blocks that they were built from. 765 */ 766 for (i = superblock; !r && i < smm->begin; i++) 767 r = sm_ll_inc(&smm->ll, i, &ev); 768 769 if (r) 770 return r; 771 772 return sm_metadata_commit(sm); 773} 774 775int dm_sm_metadata_open(struct dm_space_map *sm, 776 struct dm_transaction_manager *tm, 777 void *root_le, size_t len) 778{ 779 int r; 780 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 781 782 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 783 if (r) 784 return r; 785 786 smm->begin = 0; 787 smm->recursion_count = 0; 788 smm->allocated_this_transaction = 0; 789 brb_init(&smm->uncommitted); 790 threshold_init(&smm->threshold); 791 792 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 793 return 0; 794} 795