extents.c revision 09b882520bbe01f2e5044642109c1c1d19fe3559
1/* 2 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com 3 * Written by Alex Tomas <alex@clusterfs.com> 4 * 5 * Architecture independence: 6 * Copyright (c) 2005, Bull S.A. 7 * Written by Pierre Peiffer <pierre.peiffer@bull.net> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public Licens 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 21 */ 22 23/* 24 * Extents support for EXT4 25 * 26 * TODO: 27 * - ext4*_error() should be used in some situations 28 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate 29 * - smart tree reduction 30 */ 31 32#include <linux/module.h> 33#include <linux/fs.h> 34#include <linux/time.h> 35#include <linux/ext4_jbd2.h> 36#include <linux/jbd.h> 37#include <linux/smp_lock.h> 38#include <linux/highuid.h> 39#include <linux/pagemap.h> 40#include <linux/quotaops.h> 41#include <linux/string.h> 42#include <linux/slab.h> 43#include <linux/ext4_fs_extents.h> 44#include <asm/uaccess.h> 45 46 47/* 48 * ext_pblock: 49 * combine low and high parts of physical block number into ext4_fsblk_t 50 */ 51static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) 52{ 53 ext4_fsblk_t block; 54 55 block = le32_to_cpu(ex->ee_start); 56 block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; 57 return block; 58} 59 60/* 61 * idx_pblock: 62 * combine low and high parts of a leaf physical block number into ext4_fsblk_t 63 */ 64static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) 65{ 66 ext4_fsblk_t block; 67 68 block = le32_to_cpu(ix->ei_leaf); 69 block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; 70 return block; 71} 72 73/* 74 * ext4_ext_store_pblock: 75 * stores a large physical block number into an extent struct, 76 * breaking it into parts 77 */ 78static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) 79{ 80 ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff)); 81 ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); 82} 83 84/* 85 * ext4_idx_store_pblock: 86 * stores a large physical block number into an index struct, 87 * breaking it into parts 88 */ 89static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) 90{ 91 ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff)); 92 ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); 93} 94 95static int ext4_ext_check_header(const char *function, struct inode *inode, 96 struct ext4_extent_header *eh) 97{ 98 const char *error_msg = NULL; 99 100 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { 101 error_msg = "invalid magic"; 102 goto corrupted; 103 } 104 if (unlikely(eh->eh_max == 0)) { 105 error_msg = "invalid eh_max"; 106 goto corrupted; 107 } 108 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { 109 error_msg = "invalid eh_entries"; 110 goto corrupted; 111 } 112 return 0; 113 114corrupted: 115 ext4_error(inode->i_sb, function, 116 "bad header in inode #%lu: %s - magic %x, " 117 "entries %u, max %u, depth %u", 118 inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), 119 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), 120 le16_to_cpu(eh->eh_depth)); 121 122 return -EIO; 123} 124 125static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) 126{ 127 int err; 128 129 if (handle->h_buffer_credits > needed) 130 return handle; 131 if (!ext4_journal_extend(handle, needed)) 132 return handle; 133 err = ext4_journal_restart(handle, needed); 134 135 return handle; 136} 137 138/* 139 * could return: 140 * - EROFS 141 * - ENOMEM 142 */ 143static int ext4_ext_get_access(handle_t *handle, struct inode *inode, 144 struct ext4_ext_path *path) 145{ 146 if (path->p_bh) { 147 /* path points to block */ 148 return ext4_journal_get_write_access(handle, path->p_bh); 149 } 150 /* path points to leaf/index in inode body */ 151 /* we use in-core data, no need to protect them */ 152 return 0; 153} 154 155/* 156 * could return: 157 * - EROFS 158 * - ENOMEM 159 * - EIO 160 */ 161static int ext4_ext_dirty(handle_t *handle, struct inode *inode, 162 struct ext4_ext_path *path) 163{ 164 int err; 165 if (path->p_bh) { 166 /* path points to block */ 167 err = ext4_journal_dirty_metadata(handle, path->p_bh); 168 } else { 169 /* path points to leaf/index in inode body */ 170 err = ext4_mark_inode_dirty(handle, inode); 171 } 172 return err; 173} 174 175static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, 176 struct ext4_ext_path *path, 177 ext4_fsblk_t block) 178{ 179 struct ext4_inode_info *ei = EXT4_I(inode); 180 ext4_fsblk_t bg_start; 181 ext4_grpblk_t colour; 182 int depth; 183 184 if (path) { 185 struct ext4_extent *ex; 186 depth = path->p_depth; 187 188 /* try to predict block placement */ 189 ex = path[depth].p_ext; 190 if (ex) 191 return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block)); 192 193 /* it looks like index is empty; 194 * try to find starting block from index itself */ 195 if (path[depth].p_bh) 196 return path[depth].p_bh->b_blocknr; 197 } 198 199 /* OK. use inode's group */ 200 bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + 201 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); 202 colour = (current->pid % 16) * 203 (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16); 204 return bg_start + colour + block; 205} 206 207static ext4_fsblk_t 208ext4_ext_new_block(handle_t *handle, struct inode *inode, 209 struct ext4_ext_path *path, 210 struct ext4_extent *ex, int *err) 211{ 212 ext4_fsblk_t goal, newblock; 213 214 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block)); 215 newblock = ext4_new_block(handle, inode, goal, err); 216 return newblock; 217} 218 219static int ext4_ext_space_block(struct inode *inode) 220{ 221 int size; 222 223 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 224 / sizeof(struct ext4_extent); 225#ifdef AGRESSIVE_TEST 226 if (size > 6) 227 size = 6; 228#endif 229 return size; 230} 231 232static int ext4_ext_space_block_idx(struct inode *inode) 233{ 234 int size; 235 236 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) 237 / sizeof(struct ext4_extent_idx); 238#ifdef AGRESSIVE_TEST 239 if (size > 5) 240 size = 5; 241#endif 242 return size; 243} 244 245static int ext4_ext_space_root(struct inode *inode) 246{ 247 int size; 248 249 size = sizeof(EXT4_I(inode)->i_data); 250 size -= sizeof(struct ext4_extent_header); 251 size /= sizeof(struct ext4_extent); 252#ifdef AGRESSIVE_TEST 253 if (size > 3) 254 size = 3; 255#endif 256 return size; 257} 258 259static int ext4_ext_space_root_idx(struct inode *inode) 260{ 261 int size; 262 263 size = sizeof(EXT4_I(inode)->i_data); 264 size -= sizeof(struct ext4_extent_header); 265 size /= sizeof(struct ext4_extent_idx); 266#ifdef AGRESSIVE_TEST 267 if (size > 4) 268 size = 4; 269#endif 270 return size; 271} 272 273#ifdef EXT_DEBUG 274static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) 275{ 276 int k, l = path->p_depth; 277 278 ext_debug("path:"); 279 for (k = 0; k <= l; k++, path++) { 280 if (path->p_idx) { 281 ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), 282 idx_pblock(path->p_idx)); 283 } else if (path->p_ext) { 284 ext_debug(" %d:%d:%llu ", 285 le32_to_cpu(path->p_ext->ee_block), 286 le16_to_cpu(path->p_ext->ee_len), 287 ext_pblock(path->p_ext)); 288 } else 289 ext_debug(" []"); 290 } 291 ext_debug("\n"); 292} 293 294static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) 295{ 296 int depth = ext_depth(inode); 297 struct ext4_extent_header *eh; 298 struct ext4_extent *ex; 299 int i; 300 301 if (!path) 302 return; 303 304 eh = path[depth].p_hdr; 305 ex = EXT_FIRST_EXTENT(eh); 306 307 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 308 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), 309 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 310 } 311 ext_debug("\n"); 312} 313#else 314#define ext4_ext_show_path(inode,path) 315#define ext4_ext_show_leaf(inode,path) 316#endif 317 318static void ext4_ext_drop_refs(struct ext4_ext_path *path) 319{ 320 int depth = path->p_depth; 321 int i; 322 323 for (i = 0; i <= depth; i++, path++) 324 if (path->p_bh) { 325 brelse(path->p_bh); 326 path->p_bh = NULL; 327 } 328} 329 330/* 331 * ext4_ext_binsearch_idx: 332 * binary search for the closest index of the given block 333 */ 334static void 335ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) 336{ 337 struct ext4_extent_header *eh = path->p_hdr; 338 struct ext4_extent_idx *r, *l, *m; 339 340 BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); 341 BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); 342 BUG_ON(le16_to_cpu(eh->eh_entries) <= 0); 343 344 ext_debug("binsearch for %d(idx): ", block); 345 346 l = EXT_FIRST_INDEX(eh) + 1; 347 r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1; 348 while (l <= r) { 349 m = l + (r - l) / 2; 350 if (block < le32_to_cpu(m->ei_block)) 351 r = m - 1; 352 else 353 l = m + 1; 354 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block, 355 m, m->ei_block, r, r->ei_block); 356 } 357 358 path->p_idx = l - 1; 359 ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), 360 idx_block(path->p_idx)); 361 362#ifdef CHECK_BINSEARCH 363 { 364 struct ext4_extent_idx *chix, *ix; 365 int k; 366 367 chix = ix = EXT_FIRST_INDEX(eh); 368 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { 369 if (k != 0 && 370 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { 371 printk("k=%d, ix=0x%p, first=0x%p\n", k, 372 ix, EXT_FIRST_INDEX(eh)); 373 printk("%u <= %u\n", 374 le32_to_cpu(ix->ei_block), 375 le32_to_cpu(ix[-1].ei_block)); 376 } 377 BUG_ON(k && le32_to_cpu(ix->ei_block) 378 <= le32_to_cpu(ix[-1].ei_block)); 379 if (block < le32_to_cpu(ix->ei_block)) 380 break; 381 chix = ix; 382 } 383 BUG_ON(chix != path->p_idx); 384 } 385#endif 386 387} 388 389/* 390 * ext4_ext_binsearch: 391 * binary search for closest extent of the given block 392 */ 393static void 394ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) 395{ 396 struct ext4_extent_header *eh = path->p_hdr; 397 struct ext4_extent *r, *l, *m; 398 399 BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); 400 BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); 401 402 if (eh->eh_entries == 0) { 403 /* 404 * this leaf is empty: 405 * we get such a leaf in split/add case 406 */ 407 return; 408 } 409 410 ext_debug("binsearch for %d: ", block); 411 412 l = EXT_FIRST_EXTENT(eh) + 1; 413 r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1; 414 415 while (l <= r) { 416 m = l + (r - l) / 2; 417 if (block < le32_to_cpu(m->ee_block)) 418 r = m - 1; 419 else 420 l = m + 1; 421 ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, 422 m, m->ee_block, r, r->ee_block); 423 } 424 425 path->p_ext = l - 1; 426 ext_debug(" -> %d:%llu:%d ", 427 le32_to_cpu(path->p_ext->ee_block), 428 ext_pblock(path->p_ext), 429 le16_to_cpu(path->p_ext->ee_len)); 430 431#ifdef CHECK_BINSEARCH 432 { 433 struct ext4_extent *chex, *ex; 434 int k; 435 436 chex = ex = EXT_FIRST_EXTENT(eh); 437 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) { 438 BUG_ON(k && le32_to_cpu(ex->ee_block) 439 <= le32_to_cpu(ex[-1].ee_block)); 440 if (block < le32_to_cpu(ex->ee_block)) 441 break; 442 chex = ex; 443 } 444 BUG_ON(chex != path->p_ext); 445 } 446#endif 447 448} 449 450int ext4_ext_tree_init(handle_t *handle, struct inode *inode) 451{ 452 struct ext4_extent_header *eh; 453 454 eh = ext_inode_hdr(inode); 455 eh->eh_depth = 0; 456 eh->eh_entries = 0; 457 eh->eh_magic = EXT4_EXT_MAGIC; 458 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); 459 ext4_mark_inode_dirty(handle, inode); 460 ext4_ext_invalidate_cache(inode); 461 return 0; 462} 463 464struct ext4_ext_path * 465ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) 466{ 467 struct ext4_extent_header *eh; 468 struct buffer_head *bh; 469 short int depth, i, ppos = 0, alloc = 0; 470 471 eh = ext_inode_hdr(inode); 472 BUG_ON(eh == NULL); 473 if (ext4_ext_check_header(__FUNCTION__, inode, eh)) 474 return ERR_PTR(-EIO); 475 476 i = depth = ext_depth(inode); 477 478 /* account possible depth increase */ 479 if (!path) { 480 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 481 GFP_NOFS); 482 if (!path) 483 return ERR_PTR(-ENOMEM); 484 alloc = 1; 485 } 486 path[0].p_hdr = eh; 487 488 /* walk through the tree */ 489 while (i) { 490 ext_debug("depth %d: num %d, max %d\n", 491 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 492 ext4_ext_binsearch_idx(inode, path + ppos, block); 493 path[ppos].p_block = idx_pblock(path[ppos].p_idx); 494 path[ppos].p_depth = i; 495 path[ppos].p_ext = NULL; 496 497 bh = sb_bread(inode->i_sb, path[ppos].p_block); 498 if (!bh) 499 goto err; 500 501 eh = ext_block_hdr(bh); 502 ppos++; 503 BUG_ON(ppos > depth); 504 path[ppos].p_bh = bh; 505 path[ppos].p_hdr = eh; 506 i--; 507 508 if (ext4_ext_check_header(__FUNCTION__, inode, eh)) 509 goto err; 510 } 511 512 path[ppos].p_depth = i; 513 path[ppos].p_hdr = eh; 514 path[ppos].p_ext = NULL; 515 path[ppos].p_idx = NULL; 516 517 if (ext4_ext_check_header(__FUNCTION__, inode, eh)) 518 goto err; 519 520 /* find extent */ 521 ext4_ext_binsearch(inode, path + ppos, block); 522 523 ext4_ext_show_path(inode, path); 524 525 return path; 526 527err: 528 ext4_ext_drop_refs(path); 529 if (alloc) 530 kfree(path); 531 return ERR_PTR(-EIO); 532} 533 534/* 535 * ext4_ext_insert_index: 536 * insert new index [@logical;@ptr] into the block at @curp; 537 * check where to insert: before @curp or after @curp 538 */ 539static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, 540 struct ext4_ext_path *curp, 541 int logical, ext4_fsblk_t ptr) 542{ 543 struct ext4_extent_idx *ix; 544 int len, err; 545 546 err = ext4_ext_get_access(handle, inode, curp); 547 if (err) 548 return err; 549 550 BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); 551 len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; 552 if (logical > le32_to_cpu(curp->p_idx->ei_block)) { 553 /* insert after */ 554 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { 555 len = (len - 1) * sizeof(struct ext4_extent_idx); 556 len = len < 0 ? 0 : len; 557 ext_debug("insert new index %d after: %d. " 558 "move %d from 0x%p to 0x%p\n", 559 logical, ptr, len, 560 (curp->p_idx + 1), (curp->p_idx + 2)); 561 memmove(curp->p_idx + 2, curp->p_idx + 1, len); 562 } 563 ix = curp->p_idx + 1; 564 } else { 565 /* insert before */ 566 len = len * sizeof(struct ext4_extent_idx); 567 len = len < 0 ? 0 : len; 568 ext_debug("insert new index %d before: %d. " 569 "move %d from 0x%p to 0x%p\n", 570 logical, ptr, len, 571 curp->p_idx, (curp->p_idx + 1)); 572 memmove(curp->p_idx + 1, curp->p_idx, len); 573 ix = curp->p_idx; 574 } 575 576 ix->ei_block = cpu_to_le32(logical); 577 ext4_idx_store_pblock(ix, ptr); 578 curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1); 579 580 BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) 581 > le16_to_cpu(curp->p_hdr->eh_max)); 582 BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); 583 584 err = ext4_ext_dirty(handle, inode, curp); 585 ext4_std_error(inode->i_sb, err); 586 587 return err; 588} 589 590/* 591 * ext4_ext_split: 592 * inserts new subtree into the path, using free index entry 593 * at depth @at: 594 * - allocates all needed blocks (new leaf and all intermediate index blocks) 595 * - makes decision where to split 596 * - moves remaining extents and index entries (right to the split point) 597 * into the newly allocated blocks 598 * - initializes subtree 599 */ 600static int ext4_ext_split(handle_t *handle, struct inode *inode, 601 struct ext4_ext_path *path, 602 struct ext4_extent *newext, int at) 603{ 604 struct buffer_head *bh = NULL; 605 int depth = ext_depth(inode); 606 struct ext4_extent_header *neh; 607 struct ext4_extent_idx *fidx; 608 struct ext4_extent *ex; 609 int i = at, k, m, a; 610 ext4_fsblk_t newblock, oldblock; 611 __le32 border; 612 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */ 613 int err = 0; 614 615 /* make decision: where to split? */ 616 /* FIXME: now decision is simplest: at current extent */ 617 618 /* if current leaf will be split, then we should use 619 * border from split point */ 620 BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); 621 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { 622 border = path[depth].p_ext[1].ee_block; 623 ext_debug("leaf will be split." 624 " next leaf starts at %d\n", 625 le32_to_cpu(border)); 626 } else { 627 border = newext->ee_block; 628 ext_debug("leaf will be added." 629 " next leaf starts at %d\n", 630 le32_to_cpu(border)); 631 } 632 633 /* 634 * If error occurs, then we break processing 635 * and mark filesystem read-only. index won't 636 * be inserted and tree will be in consistent 637 * state. Next mount will repair buffers too. 638 */ 639 640 /* 641 * Get array to track all allocated blocks. 642 * We need this to handle errors and free blocks 643 * upon them. 644 */ 645 ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS); 646 if (!ablocks) 647 return -ENOMEM; 648 649 /* allocate all needed blocks */ 650 ext_debug("allocate %d blocks for indexes/leaf\n", depth - at); 651 for (a = 0; a < depth - at; a++) { 652 newblock = ext4_ext_new_block(handle, inode, path, newext, &err); 653 if (newblock == 0) 654 goto cleanup; 655 ablocks[a] = newblock; 656 } 657 658 /* initialize new leaf */ 659 newblock = ablocks[--a]; 660 BUG_ON(newblock == 0); 661 bh = sb_getblk(inode->i_sb, newblock); 662 if (!bh) { 663 err = -EIO; 664 goto cleanup; 665 } 666 lock_buffer(bh); 667 668 err = ext4_journal_get_create_access(handle, bh); 669 if (err) 670 goto cleanup; 671 672 neh = ext_block_hdr(bh); 673 neh->eh_entries = 0; 674 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); 675 neh->eh_magic = EXT4_EXT_MAGIC; 676 neh->eh_depth = 0; 677 ex = EXT_FIRST_EXTENT(neh); 678 679 /* move remainder of path[depth] to the new leaf */ 680 BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); 681 /* start copy from next extent */ 682 /* TODO: we could do it by single memmove */ 683 m = 0; 684 path[depth].p_ext++; 685 while (path[depth].p_ext <= 686 EXT_MAX_EXTENT(path[depth].p_hdr)) { 687 ext_debug("move %d:%llu:%d in new leaf %llu\n", 688 le32_to_cpu(path[depth].p_ext->ee_block), 689 ext_pblock(path[depth].p_ext), 690 le16_to_cpu(path[depth].p_ext->ee_len), 691 newblock); 692 /*memmove(ex++, path[depth].p_ext++, 693 sizeof(struct ext4_extent)); 694 neh->eh_entries++;*/ 695 path[depth].p_ext++; 696 m++; 697 } 698 if (m) { 699 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m); 700 neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m); 701 } 702 703 set_buffer_uptodate(bh); 704 unlock_buffer(bh); 705 706 err = ext4_journal_dirty_metadata(handle, bh); 707 if (err) 708 goto cleanup; 709 brelse(bh); 710 bh = NULL; 711 712 /* correct old leaf */ 713 if (m) { 714 err = ext4_ext_get_access(handle, inode, path + depth); 715 if (err) 716 goto cleanup; 717 path[depth].p_hdr->eh_entries = 718 cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m); 719 err = ext4_ext_dirty(handle, inode, path + depth); 720 if (err) 721 goto cleanup; 722 723 } 724 725 /* create intermediate indexes */ 726 k = depth - at - 1; 727 BUG_ON(k < 0); 728 if (k) 729 ext_debug("create %d intermediate indices\n", k); 730 /* insert new index into current index block */ 731 /* current depth stored in i var */ 732 i = depth - 1; 733 while (k--) { 734 oldblock = newblock; 735 newblock = ablocks[--a]; 736 bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); 737 if (!bh) { 738 err = -EIO; 739 goto cleanup; 740 } 741 lock_buffer(bh); 742 743 err = ext4_journal_get_create_access(handle, bh); 744 if (err) 745 goto cleanup; 746 747 neh = ext_block_hdr(bh); 748 neh->eh_entries = cpu_to_le16(1); 749 neh->eh_magic = EXT4_EXT_MAGIC; 750 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); 751 neh->eh_depth = cpu_to_le16(depth - i); 752 fidx = EXT_FIRST_INDEX(neh); 753 fidx->ei_block = border; 754 ext4_idx_store_pblock(fidx, oldblock); 755 756 ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, 757 newblock, (unsigned long) le32_to_cpu(border), 758 oldblock); 759 /* copy indexes */ 760 m = 0; 761 path[i].p_idx++; 762 763 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, 764 EXT_MAX_INDEX(path[i].p_hdr)); 765 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != 766 EXT_LAST_INDEX(path[i].p_hdr)); 767 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { 768 ext_debug("%d: move %d:%d in new index %llu\n", i, 769 le32_to_cpu(path[i].p_idx->ei_block), 770 idx_pblock(path[i].p_idx), 771 newblock); 772 /*memmove(++fidx, path[i].p_idx++, 773 sizeof(struct ext4_extent_idx)); 774 neh->eh_entries++; 775 BUG_ON(neh->eh_entries > neh->eh_max);*/ 776 path[i].p_idx++; 777 m++; 778 } 779 if (m) { 780 memmove(++fidx, path[i].p_idx - m, 781 sizeof(struct ext4_extent_idx) * m); 782 neh->eh_entries = 783 cpu_to_le16(le16_to_cpu(neh->eh_entries) + m); 784 } 785 set_buffer_uptodate(bh); 786 unlock_buffer(bh); 787 788 err = ext4_journal_dirty_metadata(handle, bh); 789 if (err) 790 goto cleanup; 791 brelse(bh); 792 bh = NULL; 793 794 /* correct old index */ 795 if (m) { 796 err = ext4_ext_get_access(handle, inode, path + i); 797 if (err) 798 goto cleanup; 799 path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m); 800 err = ext4_ext_dirty(handle, inode, path + i); 801 if (err) 802 goto cleanup; 803 } 804 805 i--; 806 } 807 808 /* insert new index */ 809 err = ext4_ext_insert_index(handle, inode, path + at, 810 le32_to_cpu(border), newblock); 811 812cleanup: 813 if (bh) { 814 if (buffer_locked(bh)) 815 unlock_buffer(bh); 816 brelse(bh); 817 } 818 819 if (err) { 820 /* free all allocated blocks in error case */ 821 for (i = 0; i < depth; i++) { 822 if (!ablocks[i]) 823 continue; 824 ext4_free_blocks(handle, inode, ablocks[i], 1); 825 } 826 } 827 kfree(ablocks); 828 829 return err; 830} 831 832/* 833 * ext4_ext_grow_indepth: 834 * implements tree growing procedure: 835 * - allocates new block 836 * - moves top-level data (index block or leaf) into the new block 837 * - initializes new top-level, creating index that points to the 838 * just created block 839 */ 840static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 841 struct ext4_ext_path *path, 842 struct ext4_extent *newext) 843{ 844 struct ext4_ext_path *curp = path; 845 struct ext4_extent_header *neh; 846 struct ext4_extent_idx *fidx; 847 struct buffer_head *bh; 848 ext4_fsblk_t newblock; 849 int err = 0; 850 851 newblock = ext4_ext_new_block(handle, inode, path, newext, &err); 852 if (newblock == 0) 853 return err; 854 855 bh = sb_getblk(inode->i_sb, newblock); 856 if (!bh) { 857 err = -EIO; 858 ext4_std_error(inode->i_sb, err); 859 return err; 860 } 861 lock_buffer(bh); 862 863 err = ext4_journal_get_create_access(handle, bh); 864 if (err) { 865 unlock_buffer(bh); 866 goto out; 867 } 868 869 /* move top-level index/leaf into new block */ 870 memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data)); 871 872 /* set size of new block */ 873 neh = ext_block_hdr(bh); 874 /* old root could have indexes or leaves 875 * so calculate e_max right way */ 876 if (ext_depth(inode)) 877 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); 878 else 879 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); 880 neh->eh_magic = EXT4_EXT_MAGIC; 881 set_buffer_uptodate(bh); 882 unlock_buffer(bh); 883 884 err = ext4_journal_dirty_metadata(handle, bh); 885 if (err) 886 goto out; 887 888 /* create index in new top-level index: num,max,pointer */ 889 err = ext4_ext_get_access(handle, inode, curp); 890 if (err) 891 goto out; 892 893 curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; 894 curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); 895 curp->p_hdr->eh_entries = cpu_to_le16(1); 896 curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); 897 /* FIXME: it works, but actually path[0] can be index */ 898 curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; 899 ext4_idx_store_pblock(curp->p_idx, newblock); 900 901 neh = ext_inode_hdr(inode); 902 fidx = EXT_FIRST_INDEX(neh); 903 ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", 904 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), 905 le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); 906 907 neh->eh_depth = cpu_to_le16(path->p_depth + 1); 908 err = ext4_ext_dirty(handle, inode, curp); 909out: 910 brelse(bh); 911 912 return err; 913} 914 915/* 916 * ext4_ext_create_new_leaf: 917 * finds empty index and adds new leaf. 918 * if no free index is found, then it requests in-depth growing. 919 */ 920static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 921 struct ext4_ext_path *path, 922 struct ext4_extent *newext) 923{ 924 struct ext4_ext_path *curp; 925 int depth, i, err = 0; 926 927repeat: 928 i = depth = ext_depth(inode); 929 930 /* walk up to the tree and look for free index entry */ 931 curp = path + depth; 932 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { 933 i--; 934 curp--; 935 } 936 937 /* we use already allocated block for index block, 938 * so subsequent data blocks should be contiguous */ 939 if (EXT_HAS_FREE_INDEX(curp)) { 940 /* if we found index with free entry, then use that 941 * entry: create all needed subtree and add new leaf */ 942 err = ext4_ext_split(handle, inode, path, newext, i); 943 944 /* refill path */ 945 ext4_ext_drop_refs(path); 946 path = ext4_ext_find_extent(inode, 947 le32_to_cpu(newext->ee_block), 948 path); 949 if (IS_ERR(path)) 950 err = PTR_ERR(path); 951 } else { 952 /* tree is full, time to grow in depth */ 953 err = ext4_ext_grow_indepth(handle, inode, path, newext); 954 if (err) 955 goto out; 956 957 /* refill path */ 958 ext4_ext_drop_refs(path); 959 path = ext4_ext_find_extent(inode, 960 le32_to_cpu(newext->ee_block), 961 path); 962 if (IS_ERR(path)) { 963 err = PTR_ERR(path); 964 goto out; 965 } 966 967 /* 968 * only first (depth 0 -> 1) produces free space; 969 * in all other cases we have to split the grown tree 970 */ 971 depth = ext_depth(inode); 972 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { 973 /* now we need to split */ 974 goto repeat; 975 } 976 } 977 978out: 979 return err; 980} 981 982/* 983 * ext4_ext_next_allocated_block: 984 * returns allocated block in subsequent extent or EXT_MAX_BLOCK. 985 * NOTE: it considers block number from index entry as 986 * allocated block. Thus, index entries have to be consistent 987 * with leaves. 988 */ 989static unsigned long 990ext4_ext_next_allocated_block(struct ext4_ext_path *path) 991{ 992 int depth; 993 994 BUG_ON(path == NULL); 995 depth = path->p_depth; 996 997 if (depth == 0 && path->p_ext == NULL) 998 return EXT_MAX_BLOCK; 999 1000 while (depth >= 0) { 1001 if (depth == path->p_depth) { 1002 /* leaf */ 1003 if (path[depth].p_ext != 1004 EXT_LAST_EXTENT(path[depth].p_hdr)) 1005 return le32_to_cpu(path[depth].p_ext[1].ee_block); 1006 } else { 1007 /* index */ 1008 if (path[depth].p_idx != 1009 EXT_LAST_INDEX(path[depth].p_hdr)) 1010 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1011 } 1012 depth--; 1013 } 1014 1015 return EXT_MAX_BLOCK; 1016} 1017 1018/* 1019 * ext4_ext_next_leaf_block: 1020 * returns first allocated block from next leaf or EXT_MAX_BLOCK 1021 */ 1022static unsigned ext4_ext_next_leaf_block(struct inode *inode, 1023 struct ext4_ext_path *path) 1024{ 1025 int depth; 1026 1027 BUG_ON(path == NULL); 1028 depth = path->p_depth; 1029 1030 /* zero-tree has no leaf blocks at all */ 1031 if (depth == 0) 1032 return EXT_MAX_BLOCK; 1033 1034 /* go to index block */ 1035 depth--; 1036 1037 while (depth >= 0) { 1038 if (path[depth].p_idx != 1039 EXT_LAST_INDEX(path[depth].p_hdr)) 1040 return le32_to_cpu(path[depth].p_idx[1].ei_block); 1041 depth--; 1042 } 1043 1044 return EXT_MAX_BLOCK; 1045} 1046 1047/* 1048 * ext4_ext_correct_indexes: 1049 * if leaf gets modified and modified extent is first in the leaf, 1050 * then we have to correct all indexes above. 1051 * TODO: do we need to correct tree in all cases? 1052 */ 1053int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, 1054 struct ext4_ext_path *path) 1055{ 1056 struct ext4_extent_header *eh; 1057 int depth = ext_depth(inode); 1058 struct ext4_extent *ex; 1059 __le32 border; 1060 int k, err = 0; 1061 1062 eh = path[depth].p_hdr; 1063 ex = path[depth].p_ext; 1064 BUG_ON(ex == NULL); 1065 BUG_ON(eh == NULL); 1066 1067 if (depth == 0) { 1068 /* there is no tree at all */ 1069 return 0; 1070 } 1071 1072 if (ex != EXT_FIRST_EXTENT(eh)) { 1073 /* we correct tree if first leaf got modified only */ 1074 return 0; 1075 } 1076 1077 /* 1078 * TODO: we need correction if border is smaller than current one 1079 */ 1080 k = depth - 1; 1081 border = path[depth].p_ext->ee_block; 1082 err = ext4_ext_get_access(handle, inode, path + k); 1083 if (err) 1084 return err; 1085 path[k].p_idx->ei_block = border; 1086 err = ext4_ext_dirty(handle, inode, path + k); 1087 if (err) 1088 return err; 1089 1090 while (k--) { 1091 /* change all left-side indexes */ 1092 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) 1093 break; 1094 err = ext4_ext_get_access(handle, inode, path + k); 1095 if (err) 1096 break; 1097 path[k].p_idx->ei_block = border; 1098 err = ext4_ext_dirty(handle, inode, path + k); 1099 if (err) 1100 break; 1101 } 1102 1103 return err; 1104} 1105 1106static int 1107ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, 1108 struct ext4_extent *ex2) 1109{ 1110 if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != 1111 le32_to_cpu(ex2->ee_block)) 1112 return 0; 1113 1114 /* 1115 * To allow future support for preallocated extents to be added 1116 * as an RO_COMPAT feature, refuse to merge to extents if 1117 * this can result in the top bit of ee_len being set. 1118 */ 1119 if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) 1120 return 0; 1121#ifdef AGRESSIVE_TEST 1122 if (le16_to_cpu(ex1->ee_len) >= 4) 1123 return 0; 1124#endif 1125 1126 if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) 1127 return 1; 1128 return 0; 1129} 1130 1131/* 1132 * ext4_ext_insert_extent: 1133 * tries to merge requsted extent into the existing extent or 1134 * inserts requested extent as new one into the tree, 1135 * creating new leaf in the no-space case. 1136 */ 1137int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1138 struct ext4_ext_path *path, 1139 struct ext4_extent *newext) 1140{ 1141 struct ext4_extent_header * eh; 1142 struct ext4_extent *ex, *fex; 1143 struct ext4_extent *nearex; /* nearest extent */ 1144 struct ext4_ext_path *npath = NULL; 1145 int depth, len, err, next; 1146 1147 BUG_ON(newext->ee_len == 0); 1148 depth = ext_depth(inode); 1149 ex = path[depth].p_ext; 1150 BUG_ON(path[depth].p_hdr == NULL); 1151 1152 /* try to insert block into found extent and return */ 1153 if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { 1154 ext_debug("append %d block to %d:%d (from %llu)\n", 1155 le16_to_cpu(newext->ee_len), 1156 le32_to_cpu(ex->ee_block), 1157 le16_to_cpu(ex->ee_len), ext_pblock(ex)); 1158 err = ext4_ext_get_access(handle, inode, path + depth); 1159 if (err) 1160 return err; 1161 ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) 1162 + le16_to_cpu(newext->ee_len)); 1163 eh = path[depth].p_hdr; 1164 nearex = ex; 1165 goto merge; 1166 } 1167 1168repeat: 1169 depth = ext_depth(inode); 1170 eh = path[depth].p_hdr; 1171 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) 1172 goto has_space; 1173 1174 /* probably next leaf has space for us? */ 1175 fex = EXT_LAST_EXTENT(eh); 1176 next = ext4_ext_next_leaf_block(inode, path); 1177 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block) 1178 && next != EXT_MAX_BLOCK) { 1179 ext_debug("next leaf block - %d\n", next); 1180 BUG_ON(npath != NULL); 1181 npath = ext4_ext_find_extent(inode, next, NULL); 1182 if (IS_ERR(npath)) 1183 return PTR_ERR(npath); 1184 BUG_ON(npath->p_depth != path->p_depth); 1185 eh = npath[depth].p_hdr; 1186 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1187 ext_debug("next leaf isnt full(%d)\n", 1188 le16_to_cpu(eh->eh_entries)); 1189 path = npath; 1190 goto repeat; 1191 } 1192 ext_debug("next leaf has no free space(%d,%d)\n", 1193 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); 1194 } 1195 1196 /* 1197 * There is no free space in the found leaf. 1198 * We're gonna add a new leaf in the tree. 1199 */ 1200 err = ext4_ext_create_new_leaf(handle, inode, path, newext); 1201 if (err) 1202 goto cleanup; 1203 depth = ext_depth(inode); 1204 eh = path[depth].p_hdr; 1205 1206has_space: 1207 nearex = path[depth].p_ext; 1208 1209 err = ext4_ext_get_access(handle, inode, path + depth); 1210 if (err) 1211 goto cleanup; 1212 1213 if (!nearex) { 1214 /* there is no extent in this leaf, create first one */ 1215 ext_debug("first extent in the leaf: %d:%llu:%d\n", 1216 le32_to_cpu(newext->ee_block), 1217 ext_pblock(newext), 1218 le16_to_cpu(newext->ee_len)); 1219 path[depth].p_ext = EXT_FIRST_EXTENT(eh); 1220 } else if (le32_to_cpu(newext->ee_block) 1221 > le32_to_cpu(nearex->ee_block)) { 1222/* BUG_ON(newext->ee_block == nearex->ee_block); */ 1223 if (nearex != EXT_LAST_EXTENT(eh)) { 1224 len = EXT_MAX_EXTENT(eh) - nearex; 1225 len = (len - 1) * sizeof(struct ext4_extent); 1226 len = len < 0 ? 0 : len; 1227 ext_debug("insert %d:%llu:%d after: nearest 0x%p, " 1228 "move %d from 0x%p to 0x%p\n", 1229 le32_to_cpu(newext->ee_block), 1230 ext_pblock(newext), 1231 le16_to_cpu(newext->ee_len), 1232 nearex, len, nearex + 1, nearex + 2); 1233 memmove(nearex + 2, nearex + 1, len); 1234 } 1235 path[depth].p_ext = nearex + 1; 1236 } else { 1237 BUG_ON(newext->ee_block == nearex->ee_block); 1238 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); 1239 len = len < 0 ? 0 : len; 1240 ext_debug("insert %d:%llu:%d before: nearest 0x%p, " 1241 "move %d from 0x%p to 0x%p\n", 1242 le32_to_cpu(newext->ee_block), 1243 ext_pblock(newext), 1244 le16_to_cpu(newext->ee_len), 1245 nearex, len, nearex + 1, nearex + 2); 1246 memmove(nearex + 1, nearex, len); 1247 path[depth].p_ext = nearex; 1248 } 1249 1250 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1); 1251 nearex = path[depth].p_ext; 1252 nearex->ee_block = newext->ee_block; 1253 nearex->ee_start = newext->ee_start; 1254 nearex->ee_start_hi = newext->ee_start_hi; 1255 nearex->ee_len = newext->ee_len; 1256 1257merge: 1258 /* try to merge extents to the right */ 1259 while (nearex < EXT_LAST_EXTENT(eh)) { 1260 if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) 1261 break; 1262 /* merge with next extent! */ 1263 nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) 1264 + le16_to_cpu(nearex[1].ee_len)); 1265 if (nearex + 1 < EXT_LAST_EXTENT(eh)) { 1266 len = (EXT_LAST_EXTENT(eh) - nearex - 1) 1267 * sizeof(struct ext4_extent); 1268 memmove(nearex + 1, nearex + 2, len); 1269 } 1270 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1271 BUG_ON(eh->eh_entries == 0); 1272 } 1273 1274 /* try to merge extents to the left */ 1275 1276 /* time to correct all indexes above */ 1277 err = ext4_ext_correct_indexes(handle, inode, path); 1278 if (err) 1279 goto cleanup; 1280 1281 err = ext4_ext_dirty(handle, inode, path + depth); 1282 1283cleanup: 1284 if (npath) { 1285 ext4_ext_drop_refs(npath); 1286 kfree(npath); 1287 } 1288 ext4_ext_tree_changed(inode); 1289 ext4_ext_invalidate_cache(inode); 1290 return err; 1291} 1292 1293int ext4_ext_walk_space(struct inode *inode, unsigned long block, 1294 unsigned long num, ext_prepare_callback func, 1295 void *cbdata) 1296{ 1297 struct ext4_ext_path *path = NULL; 1298 struct ext4_ext_cache cbex; 1299 struct ext4_extent *ex; 1300 unsigned long next, start = 0, end = 0; 1301 unsigned long last = block + num; 1302 int depth, exists, err = 0; 1303 1304 BUG_ON(func == NULL); 1305 BUG_ON(inode == NULL); 1306 1307 while (block < last && block != EXT_MAX_BLOCK) { 1308 num = last - block; 1309 /* find extent for this block */ 1310 path = ext4_ext_find_extent(inode, block, path); 1311 if (IS_ERR(path)) { 1312 err = PTR_ERR(path); 1313 path = NULL; 1314 break; 1315 } 1316 1317 depth = ext_depth(inode); 1318 BUG_ON(path[depth].p_hdr == NULL); 1319 ex = path[depth].p_ext; 1320 next = ext4_ext_next_allocated_block(path); 1321 1322 exists = 0; 1323 if (!ex) { 1324 /* there is no extent yet, so try to allocate 1325 * all requested space */ 1326 start = block; 1327 end = block + num; 1328 } else if (le32_to_cpu(ex->ee_block) > block) { 1329 /* need to allocate space before found extent */ 1330 start = block; 1331 end = le32_to_cpu(ex->ee_block); 1332 if (block + num < end) 1333 end = block + num; 1334 } else if (block >= 1335 le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { 1336 /* need to allocate space after found extent */ 1337 start = block; 1338 end = block + num; 1339 if (end >= next) 1340 end = next; 1341 } else if (block >= le32_to_cpu(ex->ee_block)) { 1342 /* 1343 * some part of requested space is covered 1344 * by found extent 1345 */ 1346 start = block; 1347 end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); 1348 if (block + num < end) 1349 end = block + num; 1350 exists = 1; 1351 } else { 1352 BUG(); 1353 } 1354 BUG_ON(end <= start); 1355 1356 if (!exists) { 1357 cbex.ec_block = start; 1358 cbex.ec_len = end - start; 1359 cbex.ec_start = 0; 1360 cbex.ec_type = EXT4_EXT_CACHE_GAP; 1361 } else { 1362 cbex.ec_block = le32_to_cpu(ex->ee_block); 1363 cbex.ec_len = le16_to_cpu(ex->ee_len); 1364 cbex.ec_start = ext_pblock(ex); 1365 cbex.ec_type = EXT4_EXT_CACHE_EXTENT; 1366 } 1367 1368 BUG_ON(cbex.ec_len == 0); 1369 err = func(inode, path, &cbex, cbdata); 1370 ext4_ext_drop_refs(path); 1371 1372 if (err < 0) 1373 break; 1374 if (err == EXT_REPEAT) 1375 continue; 1376 else if (err == EXT_BREAK) { 1377 err = 0; 1378 break; 1379 } 1380 1381 if (ext_depth(inode) != depth) { 1382 /* depth was changed. we have to realloc path */ 1383 kfree(path); 1384 path = NULL; 1385 } 1386 1387 block = cbex.ec_block + cbex.ec_len; 1388 } 1389 1390 if (path) { 1391 ext4_ext_drop_refs(path); 1392 kfree(path); 1393 } 1394 1395 return err; 1396} 1397 1398static void 1399ext4_ext_put_in_cache(struct inode *inode, __u32 block, 1400 __u32 len, __u32 start, int type) 1401{ 1402 struct ext4_ext_cache *cex; 1403 BUG_ON(len == 0); 1404 cex = &EXT4_I(inode)->i_cached_extent; 1405 cex->ec_type = type; 1406 cex->ec_block = block; 1407 cex->ec_len = len; 1408 cex->ec_start = start; 1409} 1410 1411/* 1412 * ext4_ext_put_gap_in_cache: 1413 * calculate boundaries of the gap that the requested block fits into 1414 * and cache this gap 1415 */ 1416static void 1417ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, 1418 unsigned long block) 1419{ 1420 int depth = ext_depth(inode); 1421 unsigned long lblock, len; 1422 struct ext4_extent *ex; 1423 1424 ex = path[depth].p_ext; 1425 if (ex == NULL) { 1426 /* there is no extent yet, so gap is [0;-] */ 1427 lblock = 0; 1428 len = EXT_MAX_BLOCK; 1429 ext_debug("cache gap(whole file):"); 1430 } else if (block < le32_to_cpu(ex->ee_block)) { 1431 lblock = block; 1432 len = le32_to_cpu(ex->ee_block) - block; 1433 ext_debug("cache gap(before): %lu [%lu:%lu]", 1434 (unsigned long) block, 1435 (unsigned long) le32_to_cpu(ex->ee_block), 1436 (unsigned long) le16_to_cpu(ex->ee_len)); 1437 } else if (block >= le32_to_cpu(ex->ee_block) 1438 + le16_to_cpu(ex->ee_len)) { 1439 lblock = le32_to_cpu(ex->ee_block) 1440 + le16_to_cpu(ex->ee_len); 1441 len = ext4_ext_next_allocated_block(path); 1442 ext_debug("cache gap(after): [%lu:%lu] %lu", 1443 (unsigned long) le32_to_cpu(ex->ee_block), 1444 (unsigned long) le16_to_cpu(ex->ee_len), 1445 (unsigned long) block); 1446 BUG_ON(len == lblock); 1447 len = len - lblock; 1448 } else { 1449 lblock = len = 0; 1450 BUG(); 1451 } 1452 1453 ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); 1454 ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); 1455} 1456 1457static int 1458ext4_ext_in_cache(struct inode *inode, unsigned long block, 1459 struct ext4_extent *ex) 1460{ 1461 struct ext4_ext_cache *cex; 1462 1463 cex = &EXT4_I(inode)->i_cached_extent; 1464 1465 /* has cache valid data? */ 1466 if (cex->ec_type == EXT4_EXT_CACHE_NO) 1467 return EXT4_EXT_CACHE_NO; 1468 1469 BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && 1470 cex->ec_type != EXT4_EXT_CACHE_EXTENT); 1471 if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { 1472 ex->ee_block = cpu_to_le32(cex->ec_block); 1473 ext4_ext_store_pblock(ex, cex->ec_start); 1474 ex->ee_len = cpu_to_le16(cex->ec_len); 1475 ext_debug("%lu cached by %lu:%lu:%llu\n", 1476 (unsigned long) block, 1477 (unsigned long) cex->ec_block, 1478 (unsigned long) cex->ec_len, 1479 cex->ec_start); 1480 return cex->ec_type; 1481 } 1482 1483 /* not in cache */ 1484 return EXT4_EXT_CACHE_NO; 1485} 1486 1487/* 1488 * ext4_ext_rm_idx: 1489 * removes index from the index block. 1490 * It's used in truncate case only, thus all requests are for 1491 * last index in the block only. 1492 */ 1493int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 1494 struct ext4_ext_path *path) 1495{ 1496 struct buffer_head *bh; 1497 int err; 1498 ext4_fsblk_t leaf; 1499 1500 /* free index block */ 1501 path--; 1502 leaf = idx_pblock(path->p_idx); 1503 BUG_ON(path->p_hdr->eh_entries == 0); 1504 err = ext4_ext_get_access(handle, inode, path); 1505 if (err) 1506 return err; 1507 path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1); 1508 err = ext4_ext_dirty(handle, inode, path); 1509 if (err) 1510 return err; 1511 ext_debug("index is empty, remove it, free block %llu\n", leaf); 1512 bh = sb_find_get_block(inode->i_sb, leaf); 1513 ext4_forget(handle, 1, inode, bh, leaf); 1514 ext4_free_blocks(handle, inode, leaf, 1); 1515 return err; 1516} 1517 1518/* 1519 * ext4_ext_calc_credits_for_insert: 1520 * This routine returns max. credits that the extent tree can consume. 1521 * It should be OK for low-performance paths like ->writepage() 1522 * To allow many writing processes to fit into a single transaction, 1523 * the caller should calculate credits under truncate_mutex and 1524 * pass the actual path. 1525 */ 1526int ext4_ext_calc_credits_for_insert(struct inode *inode, 1527 struct ext4_ext_path *path) 1528{ 1529 int depth, needed; 1530 1531 if (path) { 1532 /* probably there is space in leaf? */ 1533 depth = ext_depth(inode); 1534 if (le16_to_cpu(path[depth].p_hdr->eh_entries) 1535 < le16_to_cpu(path[depth].p_hdr->eh_max)) 1536 return 1; 1537 } 1538 1539 /* 1540 * given 32-bit logical block (4294967296 blocks), max. tree 1541 * can be 4 levels in depth -- 4 * 340^4 == 53453440000. 1542 * Let's also add one more level for imbalance. 1543 */ 1544 depth = 5; 1545 1546 /* allocation of new data block(s) */ 1547 needed = 2; 1548 1549 /* 1550 * tree can be full, so it would need to grow in depth: 1551 * we need one credit to modify old root, credits for 1552 * new root will be added in split accounting 1553 */ 1554 needed += 1; 1555 1556 /* 1557 * Index split can happen, we would need: 1558 * allocate intermediate indexes (bitmap + group) 1559 * + change two blocks at each level, but root (already included) 1560 */ 1561 needed += (depth * 2) + (depth * 2); 1562 1563 /* any allocation modifies superblock */ 1564 needed += 1; 1565 1566 return needed; 1567} 1568 1569static int ext4_remove_blocks(handle_t *handle, struct inode *inode, 1570 struct ext4_extent *ex, 1571 unsigned long from, unsigned long to) 1572{ 1573 struct buffer_head *bh; 1574 int i; 1575 1576#ifdef EXTENTS_STATS 1577 { 1578 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1579 unsigned short ee_len = le16_to_cpu(ex->ee_len); 1580 spin_lock(&sbi->s_ext_stats_lock); 1581 sbi->s_ext_blocks += ee_len; 1582 sbi->s_ext_extents++; 1583 if (ee_len < sbi->s_ext_min) 1584 sbi->s_ext_min = ee_len; 1585 if (ee_len > sbi->s_ext_max) 1586 sbi->s_ext_max = ee_len; 1587 if (ext_depth(inode) > sbi->s_depth_max) 1588 sbi->s_depth_max = ext_depth(inode); 1589 spin_unlock(&sbi->s_ext_stats_lock); 1590 } 1591#endif 1592 if (from >= le32_to_cpu(ex->ee_block) 1593 && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1594 /* tail removal */ 1595 unsigned long num; 1596 ext4_fsblk_t start; 1597 num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; 1598 start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; 1599 ext_debug("free last %lu blocks starting %llu\n", num, start); 1600 for (i = 0; i < num; i++) { 1601 bh = sb_find_get_block(inode->i_sb, start + i); 1602 ext4_forget(handle, 0, inode, bh, start + i); 1603 } 1604 ext4_free_blocks(handle, inode, start, num); 1605 } else if (from == le32_to_cpu(ex->ee_block) 1606 && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { 1607 printk("strange request: removal %lu-%lu from %u:%u\n", 1608 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1609 } else { 1610 printk("strange request: removal(2) %lu-%lu from %u:%u\n", 1611 from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); 1612 } 1613 return 0; 1614} 1615 1616static int 1617ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, 1618 struct ext4_ext_path *path, unsigned long start) 1619{ 1620 int err = 0, correct_index = 0; 1621 int depth = ext_depth(inode), credits; 1622 struct ext4_extent_header *eh; 1623 unsigned a, b, block, num; 1624 unsigned long ex_ee_block; 1625 unsigned short ex_ee_len; 1626 struct ext4_extent *ex; 1627 1628 ext_debug("truncate since %lu in leaf\n", start); 1629 if (!path[depth].p_hdr) 1630 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 1631 eh = path[depth].p_hdr; 1632 BUG_ON(eh == NULL); 1633 BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); 1634 BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); 1635 1636 /* find where to start removing */ 1637 ex = EXT_LAST_EXTENT(eh); 1638 1639 ex_ee_block = le32_to_cpu(ex->ee_block); 1640 ex_ee_len = le16_to_cpu(ex->ee_len); 1641 1642 while (ex >= EXT_FIRST_EXTENT(eh) && 1643 ex_ee_block + ex_ee_len > start) { 1644 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); 1645 path[depth].p_ext = ex; 1646 1647 a = ex_ee_block > start ? ex_ee_block : start; 1648 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ? 1649 ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK; 1650 1651 ext_debug(" border %u:%u\n", a, b); 1652 1653 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) { 1654 block = 0; 1655 num = 0; 1656 BUG(); 1657 } else if (a != ex_ee_block) { 1658 /* remove tail of the extent */ 1659 block = ex_ee_block; 1660 num = a - block; 1661 } else if (b != ex_ee_block + ex_ee_len - 1) { 1662 /* remove head of the extent */ 1663 block = a; 1664 num = b - a; 1665 /* there is no "make a hole" API yet */ 1666 BUG(); 1667 } else { 1668 /* remove whole extent: excellent! */ 1669 block = ex_ee_block; 1670 num = 0; 1671 BUG_ON(a != ex_ee_block); 1672 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1673 } 1674 1675 /* at present, extent can't cross block group: */ 1676 /* leaf + bitmap + group desc + sb + inode */ 1677 credits = 5; 1678 if (ex == EXT_FIRST_EXTENT(eh)) { 1679 correct_index = 1; 1680 credits += (ext_depth(inode)) + 1; 1681 } 1682#ifdef CONFIG_QUOTA 1683 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 1684#endif 1685 1686 handle = ext4_ext_journal_restart(handle, credits); 1687 if (IS_ERR(handle)) { 1688 err = PTR_ERR(handle); 1689 goto out; 1690 } 1691 1692 err = ext4_ext_get_access(handle, inode, path + depth); 1693 if (err) 1694 goto out; 1695 1696 err = ext4_remove_blocks(handle, inode, ex, a, b); 1697 if (err) 1698 goto out; 1699 1700 if (num == 0) { 1701 /* this extent is removed; mark slot entirely unused */ 1702 ext4_ext_store_pblock(ex, 0); 1703 eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); 1704 } 1705 1706 ex->ee_block = cpu_to_le32(block); 1707 ex->ee_len = cpu_to_le16(num); 1708 1709 err = ext4_ext_dirty(handle, inode, path + depth); 1710 if (err) 1711 goto out; 1712 1713 ext_debug("new extent: %u:%u:%llu\n", block, num, 1714 ext_pblock(ex)); 1715 ex--; 1716 ex_ee_block = le32_to_cpu(ex->ee_block); 1717 ex_ee_len = le16_to_cpu(ex->ee_len); 1718 } 1719 1720 if (correct_index && eh->eh_entries) 1721 err = ext4_ext_correct_indexes(handle, inode, path); 1722 1723 /* if this leaf is free, then we should 1724 * remove it from index block above */ 1725 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) 1726 err = ext4_ext_rm_idx(handle, inode, path + depth); 1727 1728out: 1729 return err; 1730} 1731 1732/* 1733 * ext4_ext_more_to_rm: 1734 * returns 1 if current index has to be freed (even partial) 1735 */ 1736static int 1737ext4_ext_more_to_rm(struct ext4_ext_path *path) 1738{ 1739 BUG_ON(path->p_idx == NULL); 1740 1741 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) 1742 return 0; 1743 1744 /* 1745 * if truncate on deeper level happened, it wasn't partial, 1746 * so we have to consider current index for truncation 1747 */ 1748 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block) 1749 return 0; 1750 return 1; 1751} 1752 1753int ext4_ext_remove_space(struct inode *inode, unsigned long start) 1754{ 1755 struct super_block *sb = inode->i_sb; 1756 int depth = ext_depth(inode); 1757 struct ext4_ext_path *path; 1758 handle_t *handle; 1759 int i = 0, err = 0; 1760 1761 ext_debug("truncate since %lu\n", start); 1762 1763 /* probably first extent we're gonna free will be last in block */ 1764 handle = ext4_journal_start(inode, depth + 1); 1765 if (IS_ERR(handle)) 1766 return PTR_ERR(handle); 1767 1768 ext4_ext_invalidate_cache(inode); 1769 1770 /* 1771 * We start scanning from right side, freeing all the blocks 1772 * after i_size and walking into the tree depth-wise. 1773 */ 1774 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL); 1775 if (path == NULL) { 1776 ext4_journal_stop(handle); 1777 return -ENOMEM; 1778 } 1779 path[0].p_hdr = ext_inode_hdr(inode); 1780 if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { 1781 err = -EIO; 1782 goto out; 1783 } 1784 path[0].p_depth = depth; 1785 1786 while (i >= 0 && err == 0) { 1787 if (i == depth) { 1788 /* this is leaf block */ 1789 err = ext4_ext_rm_leaf(handle, inode, path, start); 1790 /* root level has p_bh == NULL, brelse() eats this */ 1791 brelse(path[i].p_bh); 1792 path[i].p_bh = NULL; 1793 i--; 1794 continue; 1795 } 1796 1797 /* this is index block */ 1798 if (!path[i].p_hdr) { 1799 ext_debug("initialize header\n"); 1800 path[i].p_hdr = ext_block_hdr(path[i].p_bh); 1801 if (ext4_ext_check_header(__FUNCTION__, inode, 1802 path[i].p_hdr)) { 1803 err = -EIO; 1804 goto out; 1805 } 1806 } 1807 1808 BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries) 1809 > le16_to_cpu(path[i].p_hdr->eh_max)); 1810 BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC); 1811 1812 if (!path[i].p_idx) { 1813 /* this level hasn't been touched yet */ 1814 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); 1815 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1; 1816 ext_debug("init index ptr: hdr 0x%p, num %d\n", 1817 path[i].p_hdr, 1818 le16_to_cpu(path[i].p_hdr->eh_entries)); 1819 } else { 1820 /* we were already here, see at next index */ 1821 path[i].p_idx--; 1822 } 1823 1824 ext_debug("level %d - index, first 0x%p, cur 0x%p\n", 1825 i, EXT_FIRST_INDEX(path[i].p_hdr), 1826 path[i].p_idx); 1827 if (ext4_ext_more_to_rm(path + i)) { 1828 /* go to the next level */ 1829 ext_debug("move to level %d (block %llu)\n", 1830 i + 1, idx_pblock(path[i].p_idx)); 1831 memset(path + i + 1, 0, sizeof(*path)); 1832 path[i+1].p_bh = 1833 sb_bread(sb, idx_pblock(path[i].p_idx)); 1834 if (!path[i+1].p_bh) { 1835 /* should we reset i_size? */ 1836 err = -EIO; 1837 break; 1838 } 1839 1840 /* save actual number of indexes since this 1841 * number is changed at the next iteration */ 1842 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries); 1843 i++; 1844 } else { 1845 /* we finished processing this index, go up */ 1846 if (path[i].p_hdr->eh_entries == 0 && i > 0) { 1847 /* index is empty, remove it; 1848 * handle must be already prepared by the 1849 * truncatei_leaf() */ 1850 err = ext4_ext_rm_idx(handle, inode, path + i); 1851 } 1852 /* root level has p_bh == NULL, brelse() eats this */ 1853 brelse(path[i].p_bh); 1854 path[i].p_bh = NULL; 1855 i--; 1856 ext_debug("return to level %d\n", i); 1857 } 1858 } 1859 1860 /* TODO: flexible tree reduction should be here */ 1861 if (path->p_hdr->eh_entries == 0) { 1862 /* 1863 * truncate to zero freed all the tree, 1864 * so we need to correct eh_depth 1865 */ 1866 err = ext4_ext_get_access(handle, inode, path); 1867 if (err == 0) { 1868 ext_inode_hdr(inode)->eh_depth = 0; 1869 ext_inode_hdr(inode)->eh_max = 1870 cpu_to_le16(ext4_ext_space_root(inode)); 1871 err = ext4_ext_dirty(handle, inode, path); 1872 } 1873 } 1874out: 1875 ext4_ext_tree_changed(inode); 1876 ext4_ext_drop_refs(path); 1877 kfree(path); 1878 ext4_journal_stop(handle); 1879 1880 return err; 1881} 1882 1883/* 1884 * called at mount time 1885 */ 1886void ext4_ext_init(struct super_block *sb) 1887{ 1888 /* 1889 * possible initialization would be here 1890 */ 1891 1892 if (test_opt(sb, EXTENTS)) { 1893 printk("EXT4-fs: file extents enabled"); 1894#ifdef AGRESSIVE_TEST 1895 printk(", agressive tests"); 1896#endif 1897#ifdef CHECK_BINSEARCH 1898 printk(", check binsearch"); 1899#endif 1900#ifdef EXTENTS_STATS 1901 printk(", stats"); 1902#endif 1903 printk("\n"); 1904#ifdef EXTENTS_STATS 1905 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock); 1906 EXT4_SB(sb)->s_ext_min = 1 << 30; 1907 EXT4_SB(sb)->s_ext_max = 0; 1908#endif 1909 } 1910} 1911 1912/* 1913 * called at umount time 1914 */ 1915void ext4_ext_release(struct super_block *sb) 1916{ 1917 if (!test_opt(sb, EXTENTS)) 1918 return; 1919 1920#ifdef EXTENTS_STATS 1921 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) { 1922 struct ext4_sb_info *sbi = EXT4_SB(sb); 1923 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n", 1924 sbi->s_ext_blocks, sbi->s_ext_extents, 1925 sbi->s_ext_blocks / sbi->s_ext_extents); 1926 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n", 1927 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max); 1928 } 1929#endif 1930} 1931 1932int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1933 ext4_fsblk_t iblock, 1934 unsigned long max_blocks, struct buffer_head *bh_result, 1935 int create, int extend_disksize) 1936{ 1937 struct ext4_ext_path *path = NULL; 1938 struct ext4_extent newex, *ex; 1939 ext4_fsblk_t goal, newblock; 1940 int err = 0, depth; 1941 unsigned long allocated = 0; 1942 1943 __clear_bit(BH_New, &bh_result->b_state); 1944 ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, 1945 max_blocks, (unsigned) inode->i_ino); 1946 mutex_lock(&EXT4_I(inode)->truncate_mutex); 1947 1948 /* check in cache */ 1949 goal = ext4_ext_in_cache(inode, iblock, &newex); 1950 if (goal) { 1951 if (goal == EXT4_EXT_CACHE_GAP) { 1952 if (!create) { 1953 /* block isn't allocated yet and 1954 * user doesn't want to allocate it */ 1955 goto out2; 1956 } 1957 /* we should allocate requested block */ 1958 } else if (goal == EXT4_EXT_CACHE_EXTENT) { 1959 /* block is already allocated */ 1960 newblock = iblock 1961 - le32_to_cpu(newex.ee_block) 1962 + ext_pblock(&newex); 1963 /* number of remaining blocks in the extent */ 1964 allocated = le16_to_cpu(newex.ee_len) - 1965 (iblock - le32_to_cpu(newex.ee_block)); 1966 goto out; 1967 } else { 1968 BUG(); 1969 } 1970 } 1971 1972 /* find extent for this block */ 1973 path = ext4_ext_find_extent(inode, iblock, NULL); 1974 if (IS_ERR(path)) { 1975 err = PTR_ERR(path); 1976 path = NULL; 1977 goto out2; 1978 } 1979 1980 depth = ext_depth(inode); 1981 1982 /* 1983 * consistent leaf must not be empty; 1984 * this situation is possible, though, _during_ tree modification; 1985 * this is why assert can't be put in ext4_ext_find_extent() 1986 */ 1987 BUG_ON(path[depth].p_ext == NULL && depth != 0); 1988 1989 ex = path[depth].p_ext; 1990 if (ex) { 1991 unsigned long ee_block = le32_to_cpu(ex->ee_block); 1992 ext4_fsblk_t ee_start = ext_pblock(ex); 1993 unsigned short ee_len = le16_to_cpu(ex->ee_len); 1994 1995 /* 1996 * Allow future support for preallocated extents to be added 1997 * as an RO_COMPAT feature: 1998 * Uninitialized extents are treated as holes, except that 1999 * we avoid (fail) allocating new blocks during a write. 2000 */ 2001 if (ee_len > EXT_MAX_LEN) 2002 goto out2; 2003 /* if found extent covers block, simply return it */ 2004 if (iblock >= ee_block && iblock < ee_block + ee_len) { 2005 newblock = iblock - ee_block + ee_start; 2006 /* number of remaining blocks in the extent */ 2007 allocated = ee_len - (iblock - ee_block); 2008 ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, 2009 ee_block, ee_len, newblock); 2010 ext4_ext_put_in_cache(inode, ee_block, ee_len, 2011 ee_start, EXT4_EXT_CACHE_EXTENT); 2012 goto out; 2013 } 2014 } 2015 2016 /* 2017 * requested block isn't allocated yet; 2018 * we couldn't try to create block if create flag is zero 2019 */ 2020 if (!create) { 2021 /* put just found gap into cache to speed up 2022 * subsequent requests */ 2023 ext4_ext_put_gap_in_cache(inode, path, iblock); 2024 goto out2; 2025 } 2026 /* 2027 * Okay, we need to do block allocation. Lazily initialize the block 2028 * allocation info here if necessary. 2029 */ 2030 if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) 2031 ext4_init_block_alloc_info(inode); 2032 2033 /* allocate new block */ 2034 goal = ext4_ext_find_goal(inode, path, iblock); 2035 allocated = max_blocks; 2036 newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err); 2037 if (!newblock) 2038 goto out2; 2039 ext_debug("allocate new block: goal %llu, found %llu/%lu\n", 2040 goal, newblock, allocated); 2041 2042 /* try to insert new extent into found leaf and return */ 2043 newex.ee_block = cpu_to_le32(iblock); 2044 ext4_ext_store_pblock(&newex, newblock); 2045 newex.ee_len = cpu_to_le16(allocated); 2046 err = ext4_ext_insert_extent(handle, inode, path, &newex); 2047 if (err) 2048 goto out2; 2049 2050 if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize) 2051 EXT4_I(inode)->i_disksize = inode->i_size; 2052 2053 /* previous routine could use block we allocated */ 2054 newblock = ext_pblock(&newex); 2055 __set_bit(BH_New, &bh_result->b_state); 2056 2057 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 2058 EXT4_EXT_CACHE_EXTENT); 2059out: 2060 if (allocated > max_blocks) 2061 allocated = max_blocks; 2062 ext4_ext_show_leaf(inode, path); 2063 __set_bit(BH_Mapped, &bh_result->b_state); 2064 bh_result->b_bdev = inode->i_sb->s_bdev; 2065 bh_result->b_blocknr = newblock; 2066out2: 2067 if (path) { 2068 ext4_ext_drop_refs(path); 2069 kfree(path); 2070 } 2071 mutex_unlock(&EXT4_I(inode)->truncate_mutex); 2072 2073 return err ? err : allocated; 2074} 2075 2076void ext4_ext_truncate(struct inode * inode, struct page *page) 2077{ 2078 struct address_space *mapping = inode->i_mapping; 2079 struct super_block *sb = inode->i_sb; 2080 unsigned long last_block; 2081 handle_t *handle; 2082 int err = 0; 2083 2084 /* 2085 * probably first extent we're gonna free will be last in block 2086 */ 2087 err = ext4_writepage_trans_blocks(inode) + 3; 2088 handle = ext4_journal_start(inode, err); 2089 if (IS_ERR(handle)) { 2090 if (page) { 2091 clear_highpage(page); 2092 flush_dcache_page(page); 2093 unlock_page(page); 2094 page_cache_release(page); 2095 } 2096 return; 2097 } 2098 2099 if (page) 2100 ext4_block_truncate_page(handle, page, mapping, inode->i_size); 2101 2102 mutex_lock(&EXT4_I(inode)->truncate_mutex); 2103 ext4_ext_invalidate_cache(inode); 2104 2105 /* 2106 * TODO: optimization is possible here. 2107 * Probably we need not scan at all, 2108 * because page truncation is enough. 2109 */ 2110 if (ext4_orphan_add(handle, inode)) 2111 goto out_stop; 2112 2113 /* we have to know where to truncate from in crash case */ 2114 EXT4_I(inode)->i_disksize = inode->i_size; 2115 ext4_mark_inode_dirty(handle, inode); 2116 2117 last_block = (inode->i_size + sb->s_blocksize - 1) 2118 >> EXT4_BLOCK_SIZE_BITS(sb); 2119 err = ext4_ext_remove_space(inode, last_block); 2120 2121 /* In a multi-transaction truncate, we only make the final 2122 * transaction synchronous. */ 2123 if (IS_SYNC(inode)) 2124 handle->h_sync = 1; 2125 2126out_stop: 2127 /* 2128 * If this was a simple ftruncate() and the file will remain alive, 2129 * then we need to clear up the orphan record which we created above. 2130 * However, if this was a real unlink then we were called by 2131 * ext4_delete_inode(), and we allow that function to clean up the 2132 * orphan info for us. 2133 */ 2134 if (inode->i_nlink) 2135 ext4_orphan_del(handle, inode); 2136 2137 mutex_unlock(&EXT4_I(inode)->truncate_mutex); 2138 ext4_journal_stop(handle); 2139} 2140 2141/* 2142 * ext4_ext_writepage_trans_blocks: 2143 * calculate max number of blocks we could modify 2144 * in order to allocate new block for an inode 2145 */ 2146int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) 2147{ 2148 int needed; 2149 2150 needed = ext4_ext_calc_credits_for_insert(inode, NULL); 2151 2152 /* caller wants to allocate num blocks, but note it includes sb */ 2153 needed = needed * num - (num - 1); 2154 2155#ifdef CONFIG_QUOTA 2156 needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2157#endif 2158 2159 return needed; 2160} 2161 2162EXPORT_SYMBOL(ext4_mark_inode_dirty); 2163EXPORT_SYMBOL(ext4_ext_invalidate_cache); 2164EXPORT_SYMBOL(ext4_ext_insert_extent); 2165EXPORT_SYMBOL(ext4_ext_walk_space); 2166EXPORT_SYMBOL(ext4_ext_find_goal); 2167EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert); 2168 2169