unix_io.c revision 2e8ca9a26b0bd7dae546a3f9a98df67b043fe3be
1/* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Public 14 * License. 15 * %End-Header% 16 */ 17 18#define _LARGEFILE_SOURCE 19#define _LARGEFILE64_SOURCE 20 21#include <stdio.h> 22#include <string.h> 23#if HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#if HAVE_ERRNO_H 27#include <errno.h> 28#endif 29#include <fcntl.h> 30#include <time.h> 31#ifdef __linux__ 32#include <sys/utsname.h> 33#endif 34#if HAVE_SYS_STAT_H 35#include <sys/stat.h> 36#endif 37#if HAVE_SYS_TYPES_H 38#include <sys/types.h> 39#endif 40#if HAVE_SYS_RESOURCE_H 41#include <sys/resource.h> 42#endif 43 44#include "ext2_fs.h" 45#include "ext2fs.h" 46 47/* 48 * For checking structure magic numbers... 49 */ 50 51#define EXT2_CHECK_MAGIC(struct, code) \ 52 if ((struct)->magic != (code)) return (code) 53 54struct unix_cache { 55 char *buf; 56 unsigned long block; 57 int access_time; 58 unsigned dirty:1; 59 unsigned in_use:1; 60}; 61 62#define CACHE_SIZE 8 63#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 64#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 65 66struct unix_private_data { 67 int magic; 68 int dev; 69 int flags; 70 int access_time; 71 ext2_loff_t offset; 72 struct unix_cache cache[CACHE_SIZE]; 73}; 74 75static errcode_t unix_open(const char *name, int flags, io_channel *channel); 76static errcode_t unix_close(io_channel channel); 77static errcode_t unix_set_blksize(io_channel channel, int blksize); 78static errcode_t unix_read_blk(io_channel channel, unsigned long block, 79 int count, void *data); 80static errcode_t unix_write_blk(io_channel channel, unsigned long block, 81 int count, const void *data); 82static errcode_t unix_flush(io_channel channel); 83static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 84 int size, const void *data); 85static errcode_t unix_set_option(io_channel channel, const char *option, 86 const char *arg); 87 88static void reuse_cache(io_channel channel, struct unix_private_data *data, 89 struct unix_cache *cache, unsigned long block); 90 91/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel 92 * does not know buffered block devices - everything is raw. */ 93#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 94#define NEED_BOUNCE_BUFFER 95#else 96#undef NEED_BOUNCE_BUFFER 97#endif 98 99static struct struct_io_manager struct_unix_manager = { 100 EXT2_ET_MAGIC_IO_MANAGER, 101 "Unix I/O Manager", 102 unix_open, 103 unix_close, 104 unix_set_blksize, 105 unix_read_blk, 106 unix_write_blk, 107 unix_flush, 108#ifdef NEED_BOUNCE_BUFFER 109 0, 110#else 111 unix_write_byte, 112#endif 113 unix_set_option 114}; 115 116io_manager unix_io_manager = &struct_unix_manager; 117 118/* 119 * Here are the raw I/O functions 120 */ 121#ifndef NEED_BOUNCE_BUFFER 122static errcode_t raw_read_blk(io_channel channel, 123 struct unix_private_data *data, 124 unsigned long block, 125 int count, void *buf) 126{ 127 errcode_t retval; 128 ssize_t size; 129 ext2_loff_t location; 130 int actual = 0; 131 132 size = (count < 0) ? -count : count * channel->block_size; 133 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 134 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 135 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 136 goto error_out; 137 } 138 actual = read(data->dev, buf, size); 139 if (actual != size) { 140 if (actual < 0) 141 actual = 0; 142 retval = EXT2_ET_SHORT_READ; 143 goto error_out; 144 } 145 return 0; 146 147error_out: 148 memset((char *) buf+actual, 0, size-actual); 149 if (channel->read_error) 150 retval = (channel->read_error)(channel, block, count, buf, 151 size, actual, retval); 152 return retval; 153} 154#else /* NEED_BOUNCE_BUFFER */ 155/* 156 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size 157 */ 158static errcode_t raw_read_blk(io_channel channel, 159 struct unix_private_data *data, 160 unsigned long block, 161 int count, void *buf) 162{ 163 errcode_t retval; 164 size_t size, alignsize, fragment; 165 ext2_loff_t location; 166 int total = 0, actual; 167#define BLOCKALIGN 512 168 char sector[BLOCKALIGN]; 169 170 size = (count < 0) ? -count : count * channel->block_size; 171 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 172#ifdef DEBUG 173 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n", 174 count, size, block, channel->block_size, location); 175#endif 176 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 177 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 178 goto error_out; 179 } 180 fragment = size % BLOCKALIGN; 181 alignsize = size - fragment; 182 if (alignsize) { 183 actual = read(data->dev, buf, alignsize); 184 if (actual != alignsize) 185 goto short_read; 186 } 187 if (fragment) { 188 actual = read(data->dev, sector, BLOCKALIGN); 189 if (actual != BLOCKALIGN) 190 goto short_read; 191 memcpy(buf+alignsize, sector, fragment); 192 } 193 return 0; 194 195short_read: 196 if (actual>0) 197 total += actual; 198 retval = EXT2_ET_SHORT_READ; 199 200error_out: 201 memset((char *) buf+total, 0, size-actual); 202 if (channel->read_error) 203 retval = (channel->read_error)(channel, block, count, buf, 204 size, actual, retval); 205 return retval; 206} 207#endif 208 209static errcode_t raw_write_blk(io_channel channel, 210 struct unix_private_data *data, 211 unsigned long block, 212 int count, const void *buf) 213{ 214 ssize_t size; 215 ext2_loff_t location; 216 int actual = 0; 217 errcode_t retval; 218 219 if (count == 1) 220 size = channel->block_size; 221 else { 222 if (count < 0) 223 size = -count; 224 else 225 size = count * channel->block_size; 226 } 227 228 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 229 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 230 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 231 goto error_out; 232 } 233 234 actual = write(data->dev, buf, size); 235 if (actual != size) { 236 retval = EXT2_ET_SHORT_WRITE; 237 goto error_out; 238 } 239 return 0; 240 241error_out: 242 if (channel->write_error) 243 retval = (channel->write_error)(channel, block, count, buf, 244 size, actual, retval); 245 return retval; 246} 247 248 249/* 250 * Here we implement the cache functions 251 */ 252 253/* Allocate the cache buffers */ 254static errcode_t alloc_cache(io_channel channel, 255 struct unix_private_data *data) 256{ 257 errcode_t retval; 258 struct unix_cache *cache; 259 int i; 260 261 data->access_time = 0; 262 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 263 cache->block = 0; 264 cache->access_time = 0; 265 cache->dirty = 0; 266 cache->in_use = 0; 267 if ((retval = ext2fs_get_mem(channel->block_size, 268 &cache->buf))) 269 return retval; 270 } 271 return 0; 272} 273 274/* Free the cache buffers */ 275static void free_cache(struct unix_private_data *data) 276{ 277 struct unix_cache *cache; 278 int i; 279 280 data->access_time = 0; 281 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 282 cache->block = 0; 283 cache->access_time = 0; 284 cache->dirty = 0; 285 cache->in_use = 0; 286 if (cache->buf) 287 ext2fs_free_mem(&cache->buf); 288 cache->buf = 0; 289 } 290} 291 292#ifndef NO_IO_CACHE 293/* 294 * Try to find a block in the cache. If the block is not found, and 295 * eldest is a non-zero pointer, then fill in eldest with the cache 296 * entry to that should be reused. 297 */ 298static struct unix_cache *find_cached_block(struct unix_private_data *data, 299 unsigned long block, 300 struct unix_cache **eldest) 301{ 302 struct unix_cache *cache, *unused_cache, *oldest_cache; 303 int i; 304 305 unused_cache = oldest_cache = 0; 306 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 307 if (!cache->in_use) { 308 if (!unused_cache) 309 unused_cache = cache; 310 continue; 311 } 312 if (cache->block == block) { 313 cache->access_time = ++data->access_time; 314 return cache; 315 } 316 if (!oldest_cache || 317 (cache->access_time < oldest_cache->access_time)) 318 oldest_cache = cache; 319 } 320 if (eldest) 321 *eldest = (unused_cache) ? unused_cache : oldest_cache; 322 return 0; 323} 324 325/* 326 * Reuse a particular cache entry for another block. 327 */ 328static void reuse_cache(io_channel channel, struct unix_private_data *data, 329 struct unix_cache *cache, unsigned long block) 330{ 331 if (cache->dirty && cache->in_use) 332 raw_write_blk(channel, data, cache->block, 1, cache->buf); 333 334 cache->in_use = 1; 335 cache->dirty = 0; 336 cache->block = block; 337 cache->access_time = ++data->access_time; 338} 339 340/* 341 * Flush all of the blocks in the cache 342 */ 343static errcode_t flush_cached_blocks(io_channel channel, 344 struct unix_private_data *data, 345 int invalidate) 346 347{ 348 struct unix_cache *cache; 349 errcode_t retval, retval2; 350 int i; 351 352 retval2 = 0; 353 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 354 if (!cache->in_use) 355 continue; 356 357 if (invalidate) 358 cache->in_use = 0; 359 360 if (!cache->dirty) 361 continue; 362 363 retval = raw_write_blk(channel, data, 364 cache->block, 1, cache->buf); 365 if (retval) 366 retval2 = retval; 367 else 368 cache->dirty = 0; 369 } 370 return retval2; 371} 372#endif /* NO_IO_CACHE */ 373 374static errcode_t unix_open(const char *name, int flags, io_channel *channel) 375{ 376 io_channel io = NULL; 377 struct unix_private_data *data = NULL; 378 errcode_t retval; 379 int open_flags; 380 struct stat st; 381#ifdef __linux__ 382 struct utsname ut; 383#endif 384 385 if (name == 0) 386 return EXT2_ET_BAD_DEVICE_NAME; 387 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 388 if (retval) 389 return retval; 390 memset(io, 0, sizeof(struct struct_io_channel)); 391 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 392 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 393 if (retval) 394 goto cleanup; 395 396 io->manager = unix_io_manager; 397 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 398 if (retval) 399 goto cleanup; 400 401 strcpy(io->name, name); 402 io->private_data = data; 403 io->block_size = 1024; 404 io->read_error = 0; 405 io->write_error = 0; 406 io->refcount = 1; 407 408 memset(data, 0, sizeof(struct unix_private_data)); 409 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 410 411 if ((retval = alloc_cache(io, data))) 412 goto cleanup; 413 414 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 415#ifdef HAVE_OPEN64 416 data->dev = open64(io->name, open_flags); 417#else 418 data->dev = open(io->name, open_flags); 419#endif 420 if (data->dev < 0) { 421 retval = errno; 422 goto cleanup; 423 } 424 425#ifdef __linux__ 426#undef RLIM_INFINITY 427#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 428#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 429#else 430#define RLIM_INFINITY (~0UL) 431#endif 432 /* 433 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 434 * block devices are wrongly getting hit by the filesize 435 * limit. This workaround isn't perfect, since it won't work 436 * if glibc wasn't built against 2.2 header files. (Sigh.) 437 * 438 */ 439 if ((flags & IO_FLAG_RW) && 440 (uname(&ut) == 0) && 441 ((ut.release[0] == '2') && (ut.release[1] == '.') && 442 (ut.release[2] == '4') && (ut.release[3] == '.') && 443 (ut.release[4] == '1') && (ut.release[5] >= '0') && 444 (ut.release[5] < '8')) && 445 (fstat(data->dev, &st) == 0) && 446 (S_ISBLK(st.st_mode))) { 447 struct rlimit rlim; 448 449 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 450 setrlimit(RLIMIT_FSIZE, &rlim); 451 getrlimit(RLIMIT_FSIZE, &rlim); 452 if (((unsigned long) rlim.rlim_cur) < 453 ((unsigned long) rlim.rlim_max)) { 454 rlim.rlim_cur = rlim.rlim_max; 455 setrlimit(RLIMIT_FSIZE, &rlim); 456 } 457 } 458#endif 459 *channel = io; 460 return 0; 461 462cleanup: 463 if (data) { 464 free_cache(data); 465 ext2fs_free_mem(&data); 466 } 467 if (io) 468 ext2fs_free_mem(&io); 469 return retval; 470} 471 472static errcode_t unix_close(io_channel channel) 473{ 474 struct unix_private_data *data; 475 errcode_t retval = 0; 476 477 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 478 data = (struct unix_private_data *) channel->private_data; 479 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 480 481 if (--channel->refcount > 0) 482 return 0; 483 484#ifndef NO_IO_CACHE 485 retval = flush_cached_blocks(channel, data, 0); 486#endif 487 488 if (close(data->dev) < 0) 489 retval = errno; 490 free_cache(data); 491 492 ext2fs_free_mem(&channel->private_data); 493 if (channel->name) 494 ext2fs_free_mem(&channel->name); 495 ext2fs_free_mem(&channel); 496 return retval; 497} 498 499static errcode_t unix_set_blksize(io_channel channel, int blksize) 500{ 501 struct unix_private_data *data; 502 errcode_t retval; 503 504 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 505 data = (struct unix_private_data *) channel->private_data; 506 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 507 508 if (channel->block_size != blksize) { 509#ifndef NO_IO_CACHE 510 if ((retval = flush_cached_blocks(channel, data, 0))) 511 return retval; 512#endif 513 514 channel->block_size = blksize; 515 free_cache(data); 516 if ((retval = alloc_cache(channel, data))) 517 return retval; 518 } 519 return 0; 520} 521 522 523static errcode_t unix_read_blk(io_channel channel, unsigned long block, 524 int count, void *buf) 525{ 526 struct unix_private_data *data; 527 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 528 errcode_t retval; 529 char *cp; 530 int i, j; 531 532 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 533 data = (struct unix_private_data *) channel->private_data; 534 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 535 536#ifdef NO_IO_CACHE 537 return raw_read_blk(channel, data, block, count, buf); 538#else 539 /* 540 * If we're doing an odd-sized read or a very large read, 541 * flush out the cache and then do a direct read. 542 */ 543 if (count < 0 || count > WRITE_DIRECT_SIZE) { 544 if ((retval = flush_cached_blocks(channel, data, 0))) 545 return retval; 546 return raw_read_blk(channel, data, block, count, buf); 547 } 548 549 cp = buf; 550 while (count > 0) { 551 /* If it's in the cache, use it! */ 552 if ((cache = find_cached_block(data, block, &reuse[0]))) { 553#ifdef DEBUG 554 printf("Using cached block %d\n", block); 555#endif 556 memcpy(cp, cache->buf, channel->block_size); 557 count--; 558 block++; 559 cp += channel->block_size; 560 continue; 561 } 562 /* 563 * Find the number of uncached blocks so we can do a 564 * single read request 565 */ 566 for (i=1; i < count; i++) 567 if (find_cached_block(data, block+i, &reuse[i])) 568 break; 569#ifdef DEBUG 570 printf("Reading %d blocks starting at %d\n", i, block); 571#endif 572 if ((retval = raw_read_blk(channel, data, block, i, cp))) 573 return retval; 574 575 /* Save the results in the cache */ 576 for (j=0; j < i; j++) { 577 count--; 578 cache = reuse[j]; 579 reuse_cache(channel, data, cache, block++); 580 memcpy(cache->buf, cp, channel->block_size); 581 cp += channel->block_size; 582 } 583 } 584 return 0; 585#endif /* NO_IO_CACHE */ 586} 587 588static errcode_t unix_write_blk(io_channel channel, unsigned long block, 589 int count, const void *buf) 590{ 591 struct unix_private_data *data; 592 struct unix_cache *cache, *reuse; 593 errcode_t retval = 0; 594 const char *cp; 595 int writethrough; 596 597 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 598 data = (struct unix_private_data *) channel->private_data; 599 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 600 601#ifdef NO_IO_CACHE 602 return raw_write_blk(channel, data, block, count, buf); 603#else 604 /* 605 * If we're doing an odd-sized write or a very large write, 606 * flush out the cache completely and then do a direct write. 607 */ 608 if (count < 0 || count > WRITE_DIRECT_SIZE) { 609 if ((retval = flush_cached_blocks(channel, data, 1))) 610 return retval; 611 return raw_write_blk(channel, data, block, count, buf); 612 } 613 614 /* 615 * For a moderate-sized multi-block write, first force a write 616 * if we're in write-through cache mode, and then fill the 617 * cache with the blocks. 618 */ 619 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 620 if (writethrough) 621 retval = raw_write_blk(channel, data, block, count, buf); 622 623 cp = buf; 624 while (count > 0) { 625 cache = find_cached_block(data, block, &reuse); 626 if (!cache) { 627 cache = reuse; 628 reuse_cache(channel, data, cache, block); 629 } 630 memcpy(cache->buf, cp, channel->block_size); 631 cache->dirty = !writethrough; 632 count--; 633 block++; 634 cp += channel->block_size; 635 } 636 return retval; 637#endif /* NO_IO_CACHE */ 638} 639 640static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 641 int size, const void *buf) 642{ 643 struct unix_private_data *data; 644 errcode_t retval = 0; 645 ssize_t actual; 646 647 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 648 data = (struct unix_private_data *) channel->private_data; 649 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 650 651#ifndef NO_IO_CACHE 652 /* 653 * Flush out the cache completely 654 */ 655 if ((retval = flush_cached_blocks(channel, data, 1))) 656 return retval; 657#endif 658 659 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 660 return errno; 661 662 actual = write(data->dev, buf, size); 663 if (actual != size) 664 return EXT2_ET_SHORT_WRITE; 665 666 return 0; 667} 668 669/* 670 * Flush data buffers to disk. 671 */ 672static errcode_t unix_flush(io_channel channel) 673{ 674 struct unix_private_data *data; 675 errcode_t retval = 0; 676 677 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 678 data = (struct unix_private_data *) channel->private_data; 679 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 680 681#ifndef NO_IO_CACHE 682 retval = flush_cached_blocks(channel, data, 0); 683#endif 684 fsync(data->dev); 685 return retval; 686} 687 688static errcode_t unix_set_option(io_channel channel, const char *option, 689 const char *arg) 690{ 691 struct unix_private_data *data; 692 unsigned long tmp; 693 char *end; 694 695 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 696 data = (struct unix_private_data *) channel->private_data; 697 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 698 699 if (!strcmp(option, "offset")) { 700 if (!arg) 701 return EXT2_ET_INVALID_ARGUMENT; 702 703 tmp = strtoul(arg, &end, 0); 704 if (*end) 705 return EXT2_ET_INVALID_ARGUMENT; 706 data->offset = tmp; 707 return 0; 708 } 709 return EXT2_ET_INVALID_ARGUMENT; 710} 711