file.c revision 3e8962be915bacc1d70e4849a075041838d60a3f
1/* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 6#include <linux/time.h> 7#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_acl.h> 9#include <linux/reiserfs_xattr.h> 10#include <linux/smp_lock.h> 11#include <asm/uaccess.h> 12#include <linux/pagemap.h> 13#include <linux/swap.h> 14#include <linux/writeback.h> 15#include <linux/blkdev.h> 16#include <linux/buffer_head.h> 17#include <linux/quotaops.h> 18 19/* 20** We pack the tails of files on file close, not at the time they are written. 21** This implies an unnecessary copy of the tail and an unnecessary indirect item 22** insertion/balancing, for files that are written in one write. 23** It avoids unnecessary tail packings (balances) for files that are written in 24** multiple writes and are small enough to have tails. 25** 26** file_release is called by the VFS layer when the file is closed. If 27** this is the last open file descriptor, and the file 28** small enough to have a tail, and the tail is currently in an 29** unformatted node, the tail is converted back into a direct item. 30** 31** We use reiserfs_truncate_file to pack the tail, since it already has 32** all the conditions coded. 33*/ 34static int reiserfs_file_release (struct inode * inode, struct file * filp) 35{ 36 37 struct reiserfs_transaction_handle th ; 38 int err; 39 int jbegin_failure = 0; 40 41 if (!S_ISREG (inode->i_mode)) 42 BUG (); 43 44 /* fast out for when nothing needs to be done */ 45 if ((atomic_read(&inode->i_count) > 1 || 46 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || 47 !tail_has_to_be_packed(inode)) && 48 REISERFS_I(inode)->i_prealloc_count <= 0) { 49 return 0; 50 } 51 52 reiserfs_write_lock(inode->i_sb); 53 down (&inode->i_sem); 54 /* freeing preallocation only involves relogging blocks that 55 * are already in the current transaction. preallocation gets 56 * freed at the end of each transaction, so it is impossible for 57 * us to log any additional blocks (including quota blocks) 58 */ 59 err = journal_begin(&th, inode->i_sb, 1); 60 if (err) { 61 /* uh oh, we can't allow the inode to go away while there 62 * is still preallocation blocks pending. Try to join the 63 * aborted transaction 64 */ 65 jbegin_failure = err; 66 err = journal_join_abort(&th, inode->i_sb, 1); 67 68 if (err) { 69 /* hmpf, our choices here aren't good. We can pin the inode 70 * which will disallow unmount from every happening, we can 71 * do nothing, which will corrupt random memory on unmount, 72 * or we can forcibly remove the file from the preallocation 73 * list, which will leak blocks on disk. Lets pin the inode 74 * and let the admin know what is going on. 75 */ 76 igrab(inode); 77 reiserfs_warning(inode->i_sb, "pinning inode %lu because the " 78 "preallocation can't be freed"); 79 goto out; 80 } 81 } 82 reiserfs_update_inode_transaction(inode) ; 83 84#ifdef REISERFS_PREALLOCATE 85 reiserfs_discard_prealloc (&th, inode); 86#endif 87 err = journal_end(&th, inode->i_sb, 1); 88 89 /* copy back the error code from journal_begin */ 90 if (!err) 91 err = jbegin_failure; 92 93 if (!err && atomic_read(&inode->i_count) <= 1 && 94 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 95 tail_has_to_be_packed (inode)) { 96 /* if regular file is released by last holder and it has been 97 appended (we append by unformatted node only) or its direct 98 item(s) had to be converted, then it may have to be 99 indirect2direct converted */ 100 err = reiserfs_truncate_file(inode, 0) ; 101 } 102out: 103 up (&inode->i_sem); 104 reiserfs_write_unlock(inode->i_sb); 105 return err; 106} 107 108static void reiserfs_vfs_truncate_file(struct inode *inode) { 109 reiserfs_truncate_file(inode, 1) ; 110} 111 112/* Sync a reiserfs file. */ 113 114/* 115 * FIXME: sync_mapping_buffers() never has anything to sync. Can 116 * be removed... 117 */ 118 119static int reiserfs_sync_file( 120 struct file * p_s_filp, 121 struct dentry * p_s_dentry, 122 int datasync 123 ) { 124 struct inode * p_s_inode = p_s_dentry->d_inode; 125 int n_err; 126 int barrier_done; 127 128 if (!S_ISREG(p_s_inode->i_mode)) 129 BUG (); 130 n_err = sync_mapping_buffers(p_s_inode->i_mapping) ; 131 reiserfs_write_lock(p_s_inode->i_sb); 132 barrier_done = reiserfs_commit_for_inode(p_s_inode); 133 reiserfs_write_unlock(p_s_inode->i_sb); 134 if (barrier_done != 1) 135 blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); 136 if (barrier_done < 0) 137 return barrier_done; 138 return ( n_err < 0 ) ? -EIO : 0; 139} 140 141/* I really do not want to play with memory shortage right now, so 142 to simplify the code, we are not going to write more than this much pages at 143 a time. This still should considerably improve performance compared to 4k 144 at a time case. This is 32 pages of 4k size. */ 145#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE 146 147/* Allocates blocks for a file to fulfil write request. 148 Maps all unmapped but prepared pages from the list. 149 Updates metadata with newly allocated blocknumbers as needed */ 150static int reiserfs_allocate_blocks_for_region( 151 struct reiserfs_transaction_handle *th, 152 struct inode *inode, /* Inode we work with */ 153 loff_t pos, /* Writing position */ 154 int num_pages, /* number of pages write going 155 to touch */ 156 int write_bytes, /* amount of bytes to write */ 157 struct page **prepared_pages, /* array of 158 prepared pages 159 */ 160 int blocks_to_allocate /* Amount of blocks we 161 need to allocate to 162 fit the data into file 163 */ 164 ) 165{ 166 struct cpu_key key; // cpu key of item that we are going to deal with 167 struct item_head *ih; // pointer to item head that we are going to deal with 168 struct buffer_head *bh; // Buffer head that contains items that we are going to deal with 169 __le32 * item; // pointer to item we are going to deal with 170 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 171 b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored. 172 reiserfs_blocknr_hint_t hint; // hint structure for block allocator. 173 size_t res; // return value of various functions that we call. 174 int curr_block; // current block used to keep track of unmapped blocks. 175 int i; // loop counter 176 int itempos; // position in item 177 unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in 178 // first page 179 unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */ 180 __u64 hole_size ; // amount of blocks for a file hole, if it needed to be created. 181 int modifying_this_item = 0; // Flag for items traversal code to keep track 182 // of the fact that we already prepared 183 // current block for journal 184 int will_prealloc = 0; 185 RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?"); 186 187 /* only preallocate if this is a small write */ 188 if (REISERFS_I(inode)->i_prealloc_count || 189 (!(write_bytes & (inode->i_sb->s_blocksize -1)) && 190 blocks_to_allocate < 191 REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize)) 192 will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize; 193 194 allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) * 195 sizeof(b_blocknr_t), GFP_NOFS); 196 197 /* First we compose a key to point at the writing position, we want to do 198 that outside of any locking region. */ 199 make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/*key length*/); 200 201 /* If we came here, it means we absolutely need to open a transaction, 202 since we need to allocate some blocks */ 203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. 204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough 205 if (res) 206 goto error_exit; 207 reiserfs_update_inode_transaction(inode) ; 208 209 /* Look for the in-tree position of our write, need path for block allocator */ 210 res = search_for_position_by_key(inode->i_sb, &key, &path); 211 if ( res == IO_ERROR ) { 212 res = -EIO; 213 goto error_exit; 214 } 215 216 /* Allocate blocks */ 217 /* First fill in "hint" structure for block allocator */ 218 hint.th = th; // transaction handle. 219 hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine. 220 hint.inode = inode; // Inode is needed by block allocator too. 221 hint.search_start = 0; // We have no hint on where to search free blocks for block allocator. 222 hint.key = key.on_disk_key; // on disk key of file. 223 hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already. 224 hint.formatted_node = 0; // We are allocating blocks for unformatted node. 225 hint.preallocate = will_prealloc; 226 227 /* Call block allocator to allocate blocks */ 228 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); 229 if ( res != CARRY_ON ) { 230 if ( res == NO_DISK_SPACE ) { 231 /* We flush the transaction in case of no space. This way some 232 blocks might become free */ 233 SB_JOURNAL(inode->i_sb)->j_must_wait = 1; 234 res = restart_transaction(th, inode, &path); 235 if (res) 236 goto error_exit; 237 238 /* We might have scheduled, so search again */ 239 res = search_for_position_by_key(inode->i_sb, &key, &path); 240 if ( res == IO_ERROR ) { 241 res = -EIO; 242 goto error_exit; 243 } 244 245 /* update changed info for hint structure. */ 246 res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate); 247 if ( res != CARRY_ON ) { 248 res = -ENOSPC; 249 pathrelse(&path); 250 goto error_exit; 251 } 252 } else { 253 res = -ENOSPC; 254 pathrelse(&path); 255 goto error_exit; 256 } 257 } 258 259#ifdef __BIG_ENDIAN 260 // Too bad, I have not found any way to convert a given region from 261 // cpu format to little endian format 262 { 263 int i; 264 for ( i = 0; i < blocks_to_allocate ; i++) 265 allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]); 266 } 267#endif 268 269 /* Blocks allocating well might have scheduled and tree might have changed, 270 let's search the tree again */ 271 /* find where in the tree our write should go */ 272 res = search_for_position_by_key(inode->i_sb, &key, &path); 273 if ( res == IO_ERROR ) { 274 res = -EIO; 275 goto error_exit_free_blocks; 276 } 277 278 bh = get_last_bh( &path ); // Get a bufferhead for last element in path. 279 ih = get_ih( &path ); // Get a pointer to last item head in path. 280 item = get_item( &path ); // Get a pointer to last item in path 281 282 /* Let's see what we have found */ 283 if ( res != POSITION_FOUND ) { /* position not found, this means that we 284 might need to append file with holes 285 first */ 286 // Since we are writing past the file's end, we need to find out if 287 // there is a hole that needs to be inserted before our writing 288 // position, and how many blocks it is going to cover (we need to 289 // populate pointers to file blocks representing the hole with zeros) 290 291 { 292 int item_offset = 1; 293 /* 294 * if ih is stat data, its offset is 0 and we don't want to 295 * add 1 to pos in the hole_size calculation 296 */ 297 if (is_statdata_le_ih(ih)) 298 item_offset = 0; 299 hole_size = (pos + item_offset - 300 (le_key_k_offset( get_inode_item_key_version(inode), 301 &(ih->ih_key)) + 302 op_bytes_number(ih, inode->i_sb->s_blocksize))) >> 303 inode->i_sb->s_blocksize_bits; 304 } 305 306 if ( hole_size > 0 ) { 307 int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time. 308 /* area filled with zeroes, to supply as list of zero blocknumbers 309 We allocate it outside of loop just in case loop would spin for 310 several iterations. */ 311 char *zeros = kmalloc(to_paste*UNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway. 312 if ( !zeros ) { 313 res = -ENOMEM; 314 goto error_exit_free_blocks; 315 } 316 memset ( zeros, 0, to_paste*UNFM_P_SIZE); 317 do { 318 to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); 319 if ( is_indirect_le_ih(ih) ) { 320 /* Ok, there is existing indirect item already. Need to append it */ 321 /* Calculate position past inserted item */ 322 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 323 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)zeros, UNFM_P_SIZE*to_paste); 324 if ( res ) { 325 kfree(zeros); 326 goto error_exit_free_blocks; 327 } 328 } else if ( is_statdata_le_ih(ih) ) { 329 /* No existing item, create it */ 330 /* item head for new item */ 331 struct item_head ins_ih; 332 333 /* create a key for our new item */ 334 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); 335 336 /* Create new item head for our new item */ 337 make_le_item_head (&ins_ih, &key, key.version, 1, 338 TYPE_INDIRECT, to_paste*UNFM_P_SIZE, 339 0 /* free space */); 340 341 /* Find where such item should live in the tree */ 342 res = search_item (inode->i_sb, &key, &path); 343 if ( res != ITEM_NOT_FOUND ) { 344 /* item should not exist, otherwise we have error */ 345 if ( res != -ENOSPC ) { 346 reiserfs_warning (inode->i_sb, 347 "green-9008: search_by_key (%K) returned %d", 348 &key, res); 349 } 350 res = -EIO; 351 kfree(zeros); 352 goto error_exit_free_blocks; 353 } 354 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros); 355 } else { 356 reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key); 357 } 358 if ( res ) { 359 kfree(zeros); 360 goto error_exit_free_blocks; 361 } 362 /* Now we want to check if transaction is too full, and if it is 363 we restart it. This will also free the path. */ 364 if (journal_transaction_should_end(th, th->t_blocks_allocated)) { 365 res = restart_transaction(th, inode, &path); 366 if (res) { 367 pathrelse (&path); 368 kfree(zeros); 369 goto error_exit; 370 } 371 } 372 373 /* Well, need to recalculate path and stuff */ 374 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits)); 375 res = search_for_position_by_key(inode->i_sb, &key, &path); 376 if ( res == IO_ERROR ) { 377 res = -EIO; 378 kfree(zeros); 379 goto error_exit_free_blocks; 380 } 381 bh=get_last_bh(&path); 382 ih=get_ih(&path); 383 item = get_item(&path); 384 hole_size -= to_paste; 385 } while ( hole_size ); 386 kfree(zeros); 387 } 388 } 389 390 // Go through existing indirect items first 391 // replace all zeroes with blocknumbers from list 392 // Note that if no corresponding item was found, by previous search, 393 // it means there are no existing in-tree representation for file area 394 // we are going to overwrite, so there is nothing to scan through for holes. 395 for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) { 396retry: 397 398 if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) { 399 /* We run out of data in this indirect item, let's look for another 400 one. */ 401 /* First if we are already modifying current item, log it */ 402 if ( modifying_this_item ) { 403 journal_mark_dirty (th, inode->i_sb, bh); 404 modifying_this_item = 0; 405 } 406 /* Then set the key to look for a new indirect item (offset of old 407 item is added to old item length */ 408 set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize)); 409 /* Search ofor position of new key in the tree. */ 410 res = search_for_position_by_key(inode->i_sb, &key, &path); 411 if ( res == IO_ERROR) { 412 res = -EIO; 413 goto error_exit_free_blocks; 414 } 415 bh=get_last_bh(&path); 416 ih=get_ih(&path); 417 item = get_item(&path); 418 itempos = path.pos_in_item; 419 continue; // loop to check all kinds of conditions and so on. 420 } 421 /* Ok, we have correct position in item now, so let's see if it is 422 representing file hole (blocknumber is zero) and fill it if needed */ 423 if ( !item[itempos] ) { 424 /* Ok, a hole. Now we need to check if we already prepared this 425 block to be journaled */ 426 while ( !modifying_this_item ) { // loop until succeed 427 /* Well, this item is not journaled yet, so we must prepare 428 it for journal first, before we can change it */ 429 struct item_head tmp_ih; // We copy item head of found item, 430 // here to detect if fs changed under 431 // us while we were preparing for 432 // journal. 433 int fs_gen; // We store fs generation here to find if someone 434 // changes fs under our feet 435 436 copy_item_head (&tmp_ih, ih); // Remember itemhead 437 fs_gen = get_generation (inode->i_sb); // remember fs generation 438 reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing. 439 if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { 440 // Sigh, fs was changed under us, we need to look for new 441 // location of item we are working with 442 443 /* unmark prepaerd area as journaled and search for it's 444 new position */ 445 reiserfs_restore_prepared_buffer(inode->i_sb, bh); 446 res = search_for_position_by_key(inode->i_sb, &key, &path); 447 if ( res == IO_ERROR) { 448 res = -EIO; 449 goto error_exit_free_blocks; 450 } 451 bh=get_last_bh(&path); 452 ih=get_ih(&path); 453 item = get_item(&path); 454 itempos = path.pos_in_item; 455 goto retry; 456 } 457 modifying_this_item = 1; 458 } 459 item[itempos] = allocated_blocks[curr_block]; // Assign new block 460 curr_block++; 461 } 462 itempos++; 463 } 464 465 if ( modifying_this_item ) { // We need to log last-accessed block, if it 466 // was modified, but not logged yet. 467 journal_mark_dirty (th, inode->i_sb, bh); 468 } 469 470 if ( curr_block < blocks_to_allocate ) { 471 // Oh, well need to append to indirect item, or to create indirect item 472 // if there weren't any 473 if ( is_indirect_le_ih(ih) ) { 474 // Existing indirect item - append. First calculate key for append 475 // position. We do not need to recalculate path as it should 476 // already point to correct place. 477 make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3); 478 res = reiserfs_paste_into_item( th, &path, &key, inode, (char *)(allocated_blocks+curr_block), UNFM_P_SIZE*(blocks_to_allocate-curr_block)); 479 if ( res ) { 480 goto error_exit_free_blocks; 481 } 482 } else if (is_statdata_le_ih(ih) ) { 483 // Last found item was statdata. That means we need to create indirect item. 484 struct item_head ins_ih; /* itemhead for new item */ 485 486 /* create a key for our new item */ 487 make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one, 488 // because that's 489 // where first 490 // indirect item 491 // begins 492 /* Create new item head for our new item */ 493 make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT, 494 (blocks_to_allocate-curr_block)*UNFM_P_SIZE, 495 0 /* free space */); 496 /* Find where such item should live in the tree */ 497 res = search_item (inode->i_sb, &key, &path); 498 if ( res != ITEM_NOT_FOUND ) { 499 /* Well, if we have found such item already, or some error 500 occured, we need to warn user and return error */ 501 if ( res != -ENOSPC ) { 502 reiserfs_warning (inode->i_sb, 503 "green-9009: search_by_key (%K) " 504 "returned %d", &key, res); 505 } 506 res = -EIO; 507 goto error_exit_free_blocks; 508 } 509 /* Insert item into the tree with the data as its body */ 510 res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block)); 511 } else { 512 reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key); 513 } 514 } 515 516 // the caller is responsible for closing the transaction 517 // unless we return an error, they are also responsible for logging 518 // the inode. 519 // 520 pathrelse(&path); 521 /* 522 * cleanup prellocation from previous writes 523 * if this is a partial block write 524 */ 525 if (write_bytes & (inode->i_sb->s_blocksize -1)) 526 reiserfs_discard_prealloc(th, inode); 527 reiserfs_write_unlock(inode->i_sb); 528 529 // go through all the pages/buffers and map the buffers to newly allocated 530 // blocks (so that system knows where to write these pages later). 531 curr_block = 0; 532 for ( i = 0; i < num_pages ; i++ ) { 533 struct page *page=prepared_pages[i]; //current page 534 struct buffer_head *head = page_buffers(page);// first buffer for a page 535 int block_start, block_end; // in-page offsets for buffers. 536 537 if (!page_buffers(page)) 538 reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???"); 539 540 /* For each buffer in page */ 541 for(bh = head, block_start = 0; bh != head || !block_start; 542 block_start=block_end, bh = bh->b_this_page) { 543 if (!bh) 544 reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?"); 545 block_end = block_start+inode->i_sb->s_blocksize; 546 if (i == 0 && block_end <= from ) 547 /* if this buffer is before requested data to map, skip it */ 548 continue; 549 if (i == num_pages - 1 && block_start >= to) 550 /* If this buffer is after requested data to map, abort 551 processing of current page */ 552 break; 553 554 if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it 555 map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block])); 556 curr_block++; 557 set_buffer_new(bh); 558 } 559 } 560 } 561 562 RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird"); 563 564 kfree(allocated_blocks); 565 return 0; 566 567// Need to deal with transaction here. 568error_exit_free_blocks: 569 pathrelse(&path); 570 // free blocks 571 for( i = 0; i < blocks_to_allocate; i++ ) 572 reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1); 573 574error_exit: 575 if (th->t_trans_id) { 576 int err; 577 // update any changes we made to blk count 578 reiserfs_update_sd(th, inode); 579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); 580 if (err) 581 res = err; 582 } 583 reiserfs_write_unlock(inode->i_sb); 584 kfree(allocated_blocks); 585 586 return res; 587} 588 589/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */ 590static void reiserfs_unprepare_pages(struct page **prepared_pages, /* list of locked pages */ 591 size_t num_pages /* amount of pages */) { 592 int i; // loop counter 593 594 for (i=0; i < num_pages ; i++) { 595 struct page *page = prepared_pages[i]; 596 597 try_to_free_buffers(page); 598 unlock_page(page); 599 page_cache_release(page); 600 } 601} 602 603/* This function will copy data from userspace to specified pages within 604 supplied byte range */ 605static int reiserfs_copy_from_user_to_file_region( 606 loff_t pos, /* In-file position */ 607 int num_pages, /* Number of pages affected */ 608 int write_bytes, /* Amount of bytes to write */ 609 struct page **prepared_pages, /* pointer to 610 array to 611 prepared pages 612 */ 613 const char __user *buf /* Pointer to user-supplied 614 data*/ 615 ) 616{ 617 long page_fault=0; // status of copy_from_user. 618 int i; // loop counter. 619 int offset; // offset in page 620 621 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 622 size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 623 struct page *page=prepared_pages[i]; // Current page we process. 624 625 fault_in_pages_readable( buf, count); 626 627 /* Copy data from userspace to the current page */ 628 kmap(page); 629 page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data. 630 /* Flush processor's dcache for this page */ 631 flush_dcache_page(page); 632 kunmap(page); 633 buf+=count; 634 write_bytes-=count; 635 636 if (page_fault) 637 break; // Was there a fault? abort. 638 } 639 640 return page_fault?-EFAULT:0; 641} 642 643/* taken fs/buffer.c:__block_commit_write */ 644int reiserfs_commit_page(struct inode *inode, struct page *page, 645 unsigned from, unsigned to) 646{ 647 unsigned block_start, block_end; 648 int partial = 0; 649 unsigned blocksize; 650 struct buffer_head *bh, *head; 651 unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; 652 int new; 653 int logit = reiserfs_file_data_log(inode); 654 struct super_block *s = inode->i_sb; 655 int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 656 struct reiserfs_transaction_handle th; 657 int ret = 0; 658 659 th.t_trans_id = 0; 660 blocksize = 1 << inode->i_blkbits; 661 662 if (logit) { 663 reiserfs_write_lock(s); 664 ret = journal_begin(&th, s, bh_per_page + 1); 665 if (ret) 666 goto drop_write_lock; 667 reiserfs_update_inode_transaction(inode); 668 } 669 for(bh = head = page_buffers(page), block_start = 0; 670 bh != head || !block_start; 671 block_start=block_end, bh = bh->b_this_page) 672 { 673 674 new = buffer_new(bh); 675 clear_buffer_new(bh); 676 block_end = block_start + blocksize; 677 if (block_end <= from || block_start >= to) { 678 if (!buffer_uptodate(bh)) 679 partial = 1; 680 } else { 681 set_buffer_uptodate(bh); 682 if (logit) { 683 reiserfs_prepare_for_journal(s, bh, 1); 684 journal_mark_dirty(&th, s, bh); 685 } else if (!buffer_dirty(bh)) { 686 mark_buffer_dirty(bh); 687 /* do data=ordered on any page past the end 688 * of file and any buffer marked BH_New. 689 */ 690 if (reiserfs_data_ordered(inode->i_sb) && 691 (new || page->index >= i_size_index)) { 692 reiserfs_add_ordered_list(inode, bh); 693 } 694 } 695 } 696 } 697 if (logit) { 698 ret = journal_end(&th, s, bh_per_page + 1); 699drop_write_lock: 700 reiserfs_write_unlock(s); 701 } 702 /* 703 * If this is a partial write which happened to make all buffers 704 * uptodate then we can optimize away a bogus readpage() for 705 * the next read(). Here we 'discover' whether the page went 706 * uptodate as a result of this (potentially partial) write. 707 */ 708 if (!partial) 709 SetPageUptodate(page); 710 return ret; 711} 712 713 714/* Submit pages for write. This was separated from actual file copying 715 because we might want to allocate block numbers in-between. 716 This function assumes that caller will adjust file size to correct value. */ 717static int reiserfs_submit_file_region_for_write( 718 struct reiserfs_transaction_handle *th, 719 struct inode *inode, 720 loff_t pos, /* Writing position offset */ 721 size_t num_pages, /* Number of pages to write */ 722 size_t write_bytes, /* number of bytes to write */ 723 struct page **prepared_pages /* list of pages */ 724 ) 725{ 726 int status; // return status of block_commit_write. 727 int retval = 0; // Return value we are going to return. 728 int i; // loop counter 729 int offset; // Writing offset in page. 730 int orig_write_bytes = write_bytes; 731 int sd_update = 0; 732 733 for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) { 734 int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page 735 struct page *page=prepared_pages[i]; // Current page we process. 736 737 status = reiserfs_commit_page(inode, page, offset, offset+count); 738 if ( status ) 739 retval = status; // To not overcomplicate matters We are going to 740 // submit all the pages even if there was error. 741 // we only remember error status to report it on 742 // exit. 743 write_bytes-=count; 744 } 745 /* now that we've gotten all the ordered buffers marked dirty, 746 * we can safely update i_size and close any running transaction 747 */ 748 if ( pos + orig_write_bytes > inode->i_size) { 749 inode->i_size = pos + orig_write_bytes; // Set new size 750 /* If the file have grown so much that tail packing is no 751 * longer possible, reset "need to pack" flag */ 752 if ( (have_large_tails (inode->i_sb) && 753 inode->i_size > i_block_size (inode)*4) || 754 (have_small_tails (inode->i_sb) && 755 inode->i_size > i_block_size(inode)) ) 756 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ; 757 else if ( (have_large_tails (inode->i_sb) && 758 inode->i_size < i_block_size (inode)*4) || 759 (have_small_tails (inode->i_sb) && 760 inode->i_size < i_block_size(inode)) ) 761 REISERFS_I(inode)->i_flags |= i_pack_on_close_mask ; 762 763 if (th->t_trans_id) { 764 reiserfs_write_lock(inode->i_sb); 765 reiserfs_update_sd(th, inode); // And update on-disk metadata 766 reiserfs_write_unlock(inode->i_sb); 767 } else 768 inode->i_sb->s_op->dirty_inode(inode); 769 770 sd_update = 1; 771 } 772 if (th->t_trans_id) { 773 reiserfs_write_lock(inode->i_sb); 774 if (!sd_update) 775 reiserfs_update_sd(th, inode); 776 status = journal_end(th, th->t_super, th->t_blocks_allocated); 777 if (status) 778 retval = status; 779 reiserfs_write_unlock(inode->i_sb); 780 } 781 th->t_trans_id = 0; 782 783 /* 784 * we have to unlock the pages after updating i_size, otherwise 785 * we race with writepage 786 */ 787 for ( i = 0; i < num_pages ; i++) { 788 struct page *page=prepared_pages[i]; 789 unlock_page(page); 790 mark_page_accessed(page); 791 page_cache_release(page); 792 } 793 return retval; 794} 795 796/* Look if passed writing region is going to touch file's tail 797 (if it is present). And if it is, convert the tail to unformatted node */ 798static int reiserfs_check_for_tail_and_convert( struct inode *inode, /* inode to deal with */ 799 loff_t pos, /* Writing position */ 800 int write_bytes /* amount of bytes to write */ 801 ) 802{ 803 INITIALIZE_PATH(path); // needed for search_for_position 804 struct cpu_key key; // Key that would represent last touched writing byte. 805 struct item_head *ih; // item header of found block; 806 int res; // Return value of various functions we call. 807 int cont_expand_offset; // We will put offset for generic_cont_expand here 808 // This can be int just because tails are created 809 // only for small files. 810 811/* this embodies a dependency on a particular tail policy */ 812 if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) { 813 /* such a big files do not have tails, so we won't bother ourselves 814 to look for tails, simply return */ 815 return 0; 816 } 817 818 reiserfs_write_lock(inode->i_sb); 819 /* find the item containing the last byte to be written, or if 820 * writing past the end of the file then the last item of the 821 * file (and then we check its type). */ 822 make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/*key length*/); 823 res = search_for_position_by_key(inode->i_sb, &key, &path); 824 if ( res == IO_ERROR ) { 825 reiserfs_write_unlock(inode->i_sb); 826 return -EIO; 827 } 828 ih = get_ih(&path); 829 res = 0; 830 if ( is_direct_le_ih(ih) ) { 831 /* Ok, closest item is file tail (tails are stored in "direct" 832 * items), so we need to unpack it. */ 833 /* To not overcomplicate matters, we just call generic_cont_expand 834 which will in turn call other stuff and finally will boil down to 835 reiserfs_get_block() that would do necessary conversion. */ 836 cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key)); 837 pathrelse(&path); 838 res = generic_cont_expand( inode, cont_expand_offset); 839 } else 840 pathrelse(&path); 841 842 reiserfs_write_unlock(inode->i_sb); 843 return res; 844} 845 846/* This function locks pages starting from @pos for @inode. 847 @num_pages pages are locked and stored in 848 @prepared_pages array. Also buffers are allocated for these pages. 849 First and last page of the region is read if it is overwritten only 850 partially. If last page did not exist before write (file hole or file 851 append), it is zeroed, then. 852 Returns number of unallocated blocks that should be allocated to cover 853 new file data.*/ 854static int reiserfs_prepare_file_region_for_write( 855 struct inode *inode /* Inode of the file */, 856 loff_t pos, /* position in the file */ 857 size_t num_pages, /* number of pages to 858 prepare */ 859 size_t write_bytes, /* Amount of bytes to be 860 overwritten from 861 @pos */ 862 struct page **prepared_pages /* pointer to array 863 where to store 864 prepared pages */ 865 ) 866{ 867 int res=0; // Return values of different functions we call. 868 unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages. 869 int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page 870 int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; 871 /* offset of last modified byte in last 872 page */ 873 struct address_space *mapping = inode->i_mapping; // Pages are mapped here. 874 int i; // Simple counter 875 int blocks = 0; /* Return value (blocks that should be allocated) */ 876 struct buffer_head *bh, *head; // Current bufferhead and first bufferhead 877 // of a page. 878 unsigned block_start, block_end; // Starting and ending offsets of current 879 // buffer in the page. 880 struct buffer_head *wait[2], **wait_bh=wait; // Buffers for page, if 881 // Page appeared to be not up 882 // to date. Note how we have 883 // at most 2 buffers, this is 884 // because we at most may 885 // partially overwrite two 886 // buffers for one page. One at // the beginning of write area 887 // and one at the end. 888 // Everything inthe middle gets // overwritten totally. 889 890 struct cpu_key key; // cpu key of item that we are going to deal with 891 struct item_head *ih = NULL; // pointer to item head that we are going to deal with 892 struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with 893 INITIALIZE_PATH(path); // path to item, that we are going to deal with. 894 __le32 * item=NULL; // pointer to item we are going to deal with 895 int item_pos=-1; /* Position in indirect item */ 896 897 898 if ( num_pages < 1 ) { 899 reiserfs_warning (inode->i_sb, 900 "green-9001: reiserfs_prepare_file_region_for_write " 901 "called with zero number of pages to process"); 902 return -EFAULT; 903 } 904 905 /* We have 2 loops for pages. In first loop we grab and lock the pages, so 906 that nobody would touch these until we release the pages. Then 907 we'd start to deal with mapping buffers to blocks. */ 908 for ( i = 0; i < num_pages; i++) { 909 prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page 910 if ( !prepared_pages[i]) { 911 res = -ENOMEM; 912 goto failed_page_grabbing; 913 } 914 if (!page_has_buffers(prepared_pages[i])) 915 create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0); 916 } 917 918 /* Let's count amount of blocks for a case where all the blocks 919 overwritten are new (we will substract already allocated blocks later)*/ 920 if ( num_pages > 2 ) 921 /* These are full-overwritten pages so we count all the blocks in 922 these pages are counted as needed to be allocated */ 923 blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits); 924 925 /* count blocks needed for first page (possibly partially written) */ 926 blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + 927 !!(from & (inode->i_sb->s_blocksize-1)); /* roundup */ 928 929 /* Now we account for last page. If last page == first page (we 930 overwrite only one page), we substract all the blocks past the 931 last writing position in a page out of already calculated number 932 of blocks */ 933 blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) - 934 ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits); 935 /* Note how we do not roundup here since partial blocks still 936 should be allocated */ 937 938 /* Now if all the write area lies past the file end, no point in 939 maping blocks, since there is none, so we just zero out remaining 940 parts of first and last pages in write area (if needed) */ 941 if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) { 942 if ( from != 0 ) {/* First page needs to be partially zeroed */ 943 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); 944 memset(kaddr, 0, from); 945 kunmap_atomic( kaddr, KM_USER0); 946 } 947 if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */ 948 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); 949 memset(kaddr+to, 0, PAGE_CACHE_SIZE - to); 950 kunmap_atomic( kaddr, KM_USER0); 951 } 952 953 /* Since all blocks are new - use already calculated value */ 954 return blocks; 955 } 956 957 /* Well, since we write somewhere into the middle of a file, there is 958 possibility we are writing over some already allocated blocks, so 959 let's map these blocks and substract number of such blocks out of blocks 960 we need to allocate (calculated above) */ 961 /* Mask write position to start on blocksize, we do it out of the 962 loop for performance reasons */ 963 pos &= ~((loff_t) inode->i_sb->s_blocksize - 1); 964 /* Set cpu key to the starting position in a file (on left block boundary)*/ 965 make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/*key length*/); 966 967 reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key() 968 for ( i = 0; i < num_pages ; i++ ) { 969 970 head = page_buffers(prepared_pages[i]); 971 /* For each buffer in the page */ 972 for(bh = head, block_start = 0; bh != head || !block_start; 973 block_start=block_end, bh = bh->b_this_page) { 974 if (!bh) 975 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 976 /* Find where this buffer ends */ 977 block_end = block_start+inode->i_sb->s_blocksize; 978 if (i == 0 && block_end <= from ) 979 /* if this buffer is before requested data to map, skip it*/ 980 continue; 981 982 if (i == num_pages - 1 && block_start >= to) { 983 /* If this buffer is after requested data to map, abort 984 processing of current page */ 985 break; 986 } 987 988 if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) { 989 /* This is optimisation for a case where buffer is mapped 990 and have blocknumber assigned. In case significant amount 991 of such buffers are present, we may avoid some amount 992 of search_by_key calls. 993 Probably it would be possible to move parts of this code 994 out of BKL, but I afraid that would overcomplicate code 995 without any noticeable benefit. 996 */ 997 item_pos++; 998 /* Update the key */ 999 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); 1000 blocks--; // Decrease the amount of blocks that need to be 1001 // allocated 1002 continue; // Go to the next buffer 1003 } 1004 1005 if ( !itembuf || /* if first iteration */ 1006 item_pos >= ih_item_len(ih)/UNFM_P_SIZE) 1007 { /* or if we progressed past the 1008 current unformatted_item */ 1009 /* Try to find next item */ 1010 res = search_for_position_by_key(inode->i_sb, &key, &path); 1011 /* Abort if no more items */ 1012 if ( res != POSITION_FOUND ) { 1013 /* make sure later loops don't use this item */ 1014 itembuf = NULL; 1015 item = NULL; 1016 break; 1017 } 1018 1019 /* Update information about current indirect item */ 1020 itembuf = get_last_bh( &path ); 1021 ih = get_ih( &path ); 1022 item = get_item( &path ); 1023 item_pos = path.pos_in_item; 1024 1025 RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected"); 1026 } 1027 1028 /* See if there is some block associated with the file 1029 at that position, map the buffer to this block */ 1030 if ( get_block_num(item,item_pos) ) { 1031 map_bh(bh, inode->i_sb, get_block_num(item,item_pos)); 1032 blocks--; // Decrease the amount of blocks that need to be 1033 // allocated 1034 } 1035 item_pos++; 1036 /* Update the key */ 1037 set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize); 1038 } 1039 } 1040 pathrelse(&path); // Free the path 1041 reiserfs_write_unlock(inode->i_sb); 1042 1043 /* Now zero out unmappend buffers for the first and last pages of 1044 write area or issue read requests if page is mapped. */ 1045 /* First page, see if it is not uptodate */ 1046 if ( !PageUptodate(prepared_pages[0]) ) { 1047 head = page_buffers(prepared_pages[0]); 1048 1049 /* For each buffer in page */ 1050 for(bh = head, block_start = 0; bh != head || !block_start; 1051 block_start=block_end, bh = bh->b_this_page) { 1052 1053 if (!bh) 1054 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1055 /* Find where this buffer ends */ 1056 block_end = block_start+inode->i_sb->s_blocksize; 1057 if ( block_end <= from ) 1058 /* if this buffer is before requested data to map, skip it*/ 1059 continue; 1060 if ( block_start < from ) { /* Aha, our partial buffer */ 1061 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1062 issue READ request for it to 1063 not loose data */ 1064 ll_rw_block(READ, 1, &bh); 1065 *wait_bh++=bh; 1066 } else { /* Not mapped, zero it */ 1067 char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0); 1068 memset(kaddr+block_start, 0, from-block_start); 1069 kunmap_atomic( kaddr, KM_USER0); 1070 set_buffer_uptodate(bh); 1071 } 1072 } 1073 } 1074 } 1075 1076 /* Last page, see if it is not uptodate, or if the last page is past the end of the file. */ 1077 if ( !PageUptodate(prepared_pages[num_pages-1]) || 1078 ((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) { 1079 head = page_buffers(prepared_pages[num_pages-1]); 1080 1081 /* for each buffer in page */ 1082 for(bh = head, block_start = 0; bh != head || !block_start; 1083 block_start=block_end, bh = bh->b_this_page) { 1084 1085 if (!bh) 1086 reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?"); 1087 /* Find where this buffer ends */ 1088 block_end = block_start+inode->i_sb->s_blocksize; 1089 if ( block_start >= to ) 1090 /* if this buffer is after requested data to map, skip it*/ 1091 break; 1092 if ( block_end > to ) { /* Aha, our partial buffer */ 1093 if ( buffer_mapped(bh) ) { /* If it is mapped, we need to 1094 issue READ request for it to 1095 not loose data */ 1096 ll_rw_block(READ, 1, &bh); 1097 *wait_bh++=bh; 1098 } else { /* Not mapped, zero it */ 1099 char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0); 1100 memset(kaddr+to, 0, block_end-to); 1101 kunmap_atomic( kaddr, KM_USER0); 1102 set_buffer_uptodate(bh); 1103 } 1104 } 1105 } 1106 } 1107 1108 /* Wait for read requests we made to happen, if necessary */ 1109 while(wait_bh > wait) { 1110 wait_on_buffer(*--wait_bh); 1111 if (!buffer_uptodate(*wait_bh)) { 1112 res = -EIO; 1113 goto failed_read; 1114 } 1115 } 1116 1117 return blocks; 1118failed_page_grabbing: 1119 num_pages = i; 1120failed_read: 1121 reiserfs_unprepare_pages(prepared_pages, num_pages); 1122 return res; 1123} 1124 1125/* Write @count bytes at position @ppos in a file indicated by @file 1126 from the buffer @buf. 1127 1128 generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want 1129 something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was 1130 written for (ext2/3). This is for several reasons: 1131 1132 * It has no understanding of any filesystem specific optimizations. 1133 1134 * It enters the filesystem repeatedly for each page that is written. 1135 1136 * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key 1137 * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time 1138 * to reiserfs which allows for fewer tree traversals. 1139 1140 * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. 1141 1142 * Asking the block allocation code for blocks one at a time is slightly less efficient. 1143 1144 All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to 1145 use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make 1146 things right finally. 1147 1148 Future Features: providing search_by_key with hints. 1149 1150*/ 1151static ssize_t reiserfs_file_write( struct file *file, /* the file we are going to write into */ 1152 const char __user *buf, /* pointer to user supplied data 1153(in userspace) */ 1154 size_t count, /* amount of bytes to write */ 1155 loff_t *ppos /* pointer to position in file that we start writing at. Should be updated to 1156 * new current position before returning. */ ) 1157{ 1158 size_t already_written = 0; // Number of bytes already written to the file. 1159 loff_t pos; // Current position in the file. 1160 ssize_t res; // return value of various functions that we call. 1161 int err = 0; 1162 struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to. 1163 /* To simplify coding at this time, we store 1164 locked pages in array for now */ 1165 struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME]; 1166 struct reiserfs_transaction_handle th; 1167 th.t_trans_id = 0; 1168 1169 if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment 1170 ssize_t result, after_file_end = 0; 1171 if ( (*ppos + count >= inode->i_size) || (file->f_flags & O_APPEND) ) { 1172 /* If we are appending a file, we need to put this savelink in here. 1173 If we will crash while doing direct io, finish_unfinished will 1174 cut the garbage from the file end. */ 1175 reiserfs_write_lock(inode->i_sb); 1176 err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1177 if (err) { 1178 reiserfs_write_unlock (inode->i_sb); 1179 return err; 1180 } 1181 reiserfs_update_inode_transaction(inode); 1182 add_save_link (&th, inode, 1 /* Truncate */); 1183 after_file_end = 1; 1184 err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT ); 1185 reiserfs_write_unlock(inode->i_sb); 1186 if (err) 1187 return err; 1188 } 1189 result = generic_file_write(file, buf, count, ppos); 1190 1191 if ( after_file_end ) { /* Now update i_size and remove the savelink */ 1192 struct reiserfs_transaction_handle th; 1193 reiserfs_write_lock(inode->i_sb); 1194 err = journal_begin(&th, inode->i_sb, 1); 1195 if (err) { 1196 reiserfs_write_unlock (inode->i_sb); 1197 return err; 1198 } 1199 reiserfs_update_inode_transaction(inode); 1200 reiserfs_update_sd(&th, inode); 1201 err = journal_end(&th, inode->i_sb, 1); 1202 if (err) { 1203 reiserfs_write_unlock (inode->i_sb); 1204 return err; 1205 } 1206 err = remove_save_link (inode, 1/* truncate */); 1207 reiserfs_write_unlock(inode->i_sb); 1208 if (err) 1209 return err; 1210 } 1211 1212 return result; 1213 } 1214 1215 if ( unlikely((ssize_t) count < 0 )) 1216 return -EINVAL; 1217 1218 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 1219 return -EFAULT; 1220 1221 down(&inode->i_sem); // locks the entire file for just us 1222 1223 pos = *ppos; 1224 1225 /* Check if we can write to specified region of file, file 1226 is not overly big and this kind of stuff. Adjust pos and 1227 count, if needed */ 1228 res = generic_write_checks(file, &pos, &count, 0); 1229 if (res) 1230 goto out; 1231 1232 if ( count == 0 ) 1233 goto out; 1234 1235 res = remove_suid(file->f_dentry); 1236 if (res) 1237 goto out; 1238 1239 inode_update_time(inode, 1); /* Both mtime and ctime */ 1240 1241 // Ok, we are done with all the checks. 1242 1243 // Now we should start real work 1244 1245 /* If we are going to write past the file's packed tail or if we are going 1246 to overwrite part of the tail, we need that tail to be converted into 1247 unformatted node */ 1248 res = reiserfs_check_for_tail_and_convert( inode, pos, count); 1249 if (res) 1250 goto out; 1251 1252 while ( count > 0) { 1253 /* This is the main loop in which we running until some error occures 1254 or until we write all of the data. */ 1255 size_t num_pages;/* amount of pages we are going to write this iteration */ 1256 size_t write_bytes; /* amount of bytes to write during this iteration */ 1257 size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */ 1258 1259 /* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/ 1260 num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial 1261 pages */ 1262 ((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT); 1263 /* convert size to amount of 1264 pages */ 1265 reiserfs_write_lock(inode->i_sb); 1266 if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME 1267 || num_pages > reiserfs_can_fit_pages(inode->i_sb) ) { 1268 /* If we were asked to write more data than we want to or if there 1269 is not that much space, then we shorten amount of data to write 1270 for this iteration. */ 1271 num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb)); 1272 /* Also we should not forget to set size in bytes accordingly */ 1273 write_bytes = (num_pages << PAGE_CACHE_SHIFT) - 1274 (pos & (PAGE_CACHE_SIZE-1)); 1275 /* If position is not on the 1276 start of the page, we need 1277 to substract the offset 1278 within page */ 1279 } else 1280 write_bytes = count; 1281 1282 /* reserve the blocks to be allocated later, so that later on 1283 we still have the space to write the blocks to */ 1284 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1285 reiserfs_write_unlock(inode->i_sb); 1286 1287 if ( !num_pages ) { /* If we do not have enough space even for a single page... */ 1288 if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) { 1289 res = -ENOSPC; 1290 break; // In case we are writing past the end of the last file block, break. 1291 } 1292 // Otherwise we are possibly overwriting the file, so 1293 // let's set write size to be equal or less than blocksize. 1294 // This way we get it correctly for file holes. 1295 // But overwriting files on absolutelly full volumes would not 1296 // be very efficient. Well, people are not supposed to fill 1297 // 100% of disk space anyway. 1298 write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1))); 1299 num_pages = 1; 1300 // No blocks were claimed before, so do it now. 1301 reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1302 } 1303 1304 /* Prepare for writing into the region, read in all the 1305 partially overwritten pages, if needed. And lock the pages, 1306 so that nobody else can access these until we are done. 1307 We get number of actual blocks needed as a result.*/ 1308 blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages); 1309 if ( blocks_to_allocate < 0 ) { 1310 res = blocks_to_allocate; 1311 reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits)); 1312 break; 1313 } 1314 1315 /* First we correct our estimate of how many blocks we need */ 1316 reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate ); 1317 1318 if ( blocks_to_allocate > 0) {/*We only allocate blocks if we need to*/ 1319 /* Fill in all the possible holes and append the file if needed */ 1320 res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate); 1321 } 1322 1323 /* well, we have allocated the blocks, so it is time to free 1324 the reservation we made earlier. */ 1325 reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate); 1326 if ( res ) { 1327 reiserfs_unprepare_pages(prepared_pages, num_pages); 1328 break; 1329 } 1330 1331/* NOTE that allocating blocks and filling blocks can be done in reverse order 1332 and probably we would do that just to get rid of garbage in files after a 1333 crash */ 1334 1335 /* Copy data from user-supplied buffer to file's pages */ 1336 res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf); 1337 if ( res ) { 1338 reiserfs_unprepare_pages(prepared_pages, num_pages); 1339 break; 1340 } 1341 1342 /* Send the pages to disk and unlock them. */ 1343 res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages, 1344 write_bytes,prepared_pages); 1345 if ( res ) 1346 break; 1347 1348 already_written += write_bytes; 1349 buf += write_bytes; 1350 *ppos = pos += write_bytes; 1351 count -= write_bytes; 1352 balance_dirty_pages_ratelimited(inode->i_mapping); 1353 } 1354 1355 /* this is only true on error */ 1356 if (th.t_trans_id) { 1357 reiserfs_write_lock(inode->i_sb); 1358 err = journal_end(&th, th.t_super, th.t_blocks_allocated); 1359 reiserfs_write_unlock(inode->i_sb); 1360 if (err) { 1361 res = err; 1362 goto out; 1363 } 1364 } 1365 1366 if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) 1367 res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA|OSYNC_DATA); 1368 1369 up(&inode->i_sem); 1370 reiserfs_async_progress_wait(inode->i_sb); 1371 return (already_written != 0)?already_written:res; 1372 1373out: 1374 up(&inode->i_sem); // unlock the file on exit. 1375 return res; 1376} 1377 1378static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user *buf, 1379 size_t count, loff_t pos) 1380{ 1381 return generic_file_aio_write(iocb, buf, count, pos); 1382} 1383 1384 1385 1386struct file_operations reiserfs_file_operations = { 1387 .read = generic_file_read, 1388 .write = reiserfs_file_write, 1389 .ioctl = reiserfs_ioctl, 1390 .mmap = generic_file_mmap, 1391 .release = reiserfs_file_release, 1392 .fsync = reiserfs_sync_file, 1393 .sendfile = generic_file_sendfile, 1394 .aio_read = generic_file_aio_read, 1395 .aio_write = reiserfs_aio_write, 1396}; 1397 1398 1399struct inode_operations reiserfs_file_inode_operations = { 1400 .truncate = reiserfs_vfs_truncate_file, 1401 .setattr = reiserfs_setattr, 1402 .setxattr = reiserfs_setxattr, 1403 .getxattr = reiserfs_getxattr, 1404 .listxattr = reiserfs_listxattr, 1405 .removexattr = reiserfs_removexattr, 1406 .permission = reiserfs_permission, 1407}; 1408 1409 1410