unix_io.c revision b8a953157bce577bff6f9d8437e8d7f2c881fe63
1/* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Public 14 * License. 15 * %End-Header% 16 */ 17 18#define _LARGEFILE_SOURCE 19#define _LARGEFILE64_SOURCE 20 21#include <stdio.h> 22#include <string.h> 23#if HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#if HAVE_ERRNO_H 27#include <errno.h> 28#endif 29#include <fcntl.h> 30#include <time.h> 31#ifdef __linux__ 32#include <sys/utsname.h> 33#endif 34#if HAVE_SYS_STAT_H 35#include <sys/stat.h> 36#endif 37#if HAVE_SYS_TYPES_H 38#include <sys/types.h> 39#endif 40#if HAVE_SYS_RESOURCE_H 41#include <sys/resource.h> 42#endif 43 44#include "ext2_fs.h" 45#include "ext2fs.h" 46 47/* 48 * For checking structure magic numbers... 49 */ 50 51#define EXT2_CHECK_MAGIC(struct, code) \ 52 if ((struct)->magic != (code)) return (code) 53 54struct unix_cache { 55 char *buf; 56 unsigned long block; 57 int access_time; 58 int dirty:1; 59 int in_use:1; 60}; 61 62#define CACHE_SIZE 8 63#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 64#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 65 66struct unix_private_data { 67 int magic; 68 int dev; 69 int flags; 70 int access_time; 71 struct unix_cache cache[CACHE_SIZE]; 72}; 73 74static errcode_t unix_open(const char *name, int flags, io_channel *channel); 75static errcode_t unix_close(io_channel channel); 76static errcode_t unix_set_blksize(io_channel channel, int blksize); 77static errcode_t unix_read_blk(io_channel channel, unsigned long block, 78 int count, void *data); 79static errcode_t unix_write_blk(io_channel channel, unsigned long block, 80 int count, const void *data); 81static errcode_t unix_flush(io_channel channel); 82static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 83 int size, const void *data); 84 85static void reuse_cache(io_channel channel, struct unix_private_data *data, 86 struct unix_cache *cache, unsigned long block); 87 88static struct struct_io_manager struct_unix_manager = { 89 EXT2_ET_MAGIC_IO_MANAGER, 90 "Unix I/O Manager", 91 unix_open, 92 unix_close, 93 unix_set_blksize, 94 unix_read_blk, 95 unix_write_blk, 96 unix_flush, 97#ifdef __CYGWIN__ 98 0 99#else 100 unix_write_byte 101#endif 102}; 103 104io_manager unix_io_manager = &struct_unix_manager; 105 106/* 107 * Here are the raw I/O functions 108 */ 109#ifndef __CYGWIN__ 110static errcode_t raw_read_blk(io_channel channel, 111 struct unix_private_data *data, 112 unsigned long block, 113 int count, void *buf) 114{ 115 errcode_t retval; 116 size_t size; 117 ext2_loff_t location; 118 int actual = 0; 119 120 size = (count < 0) ? -count : count * channel->block_size; 121 location = (ext2_loff_t) block * channel->block_size; 122 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 123 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 124 goto error_out; 125 } 126 actual = read(data->dev, buf, size); 127 if (actual != size) { 128 if (actual < 0) 129 actual = 0; 130 retval = EXT2_ET_SHORT_READ; 131 goto error_out; 132 } 133 return 0; 134 135error_out: 136 memset((char *) buf+actual, 0, size-actual); 137 if (channel->read_error) 138 retval = (channel->read_error)(channel, block, count, buf, 139 size, actual, retval); 140 return retval; 141} 142#else /* __CYGWIN__ */ 143/* 144 * Windows block devices only allow sector alignment IO in offset and size 145 */ 146static errcode_t raw_read_blk(io_channel channel, 147 struct unix_private_data *data, 148 unsigned long block, 149 int count, void *buf) 150{ 151 errcode_t retval; 152 size_t size, alignsize, fragment; 153 ext2_loff_t location; 154 int total = 0, actual; 155#define BLOCKALIGN 512 156 char sector[BLOCKALIGN]; 157 158 size = (count < 0) ? -count : count * channel->block_size; 159 location = (ext2_loff_t) block * channel->block_size; 160#ifdef DEBUG 161 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n", 162 count, size, block, channel->block_size, location); 163#endif 164 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 165 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 166 goto error_out; 167 } 168 fragment = size % BLOCKALIGN; 169 alignsize = size - fragment; 170 if (alignsize) { 171 actual = read(data->dev, buf, alignsize); 172 if (actual != alignsize) 173 goto short_read; 174 } 175 if (fragment) { 176 actual = read(data->dev, sector, BLOCKALIGN); 177 if (actual != BLOCKALIGN) 178 goto short_read; 179 memcpy(buf+alignsize, sector, fragment); 180 } 181 return 0; 182 183short_read: 184 if (actual>0) 185 total += actual; 186 retval = EXT2_ET_SHORT_READ; 187 188error_out: 189 memset((char *) buf+total, 0, size-actual); 190 if (channel->read_error) 191 retval = (channel->read_error)(channel, block, count, buf, 192 size, actual, retval); 193 return retval; 194} 195#endif 196 197static errcode_t raw_write_blk(io_channel channel, 198 struct unix_private_data *data, 199 unsigned long block, 200 int count, const void *buf) 201{ 202 size_t size; 203 ext2_loff_t location; 204 int actual = 0; 205 errcode_t retval; 206 207 if (count == 1) 208 size = channel->block_size; 209 else { 210 if (count < 0) 211 size = -count; 212 else 213 size = count * channel->block_size; 214 } 215 216 location = (ext2_loff_t) block * channel->block_size; 217 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 218 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 219 goto error_out; 220 } 221 222 actual = write(data->dev, buf, size); 223 if (actual != size) { 224 retval = EXT2_ET_SHORT_WRITE; 225 goto error_out; 226 } 227 return 0; 228 229error_out: 230 if (channel->write_error) 231 retval = (channel->write_error)(channel, block, count, buf, 232 size, actual, retval); 233 return retval; 234} 235 236 237/* 238 * Here we implement the cache functions 239 */ 240 241/* Allocate the cache buffers */ 242static errcode_t alloc_cache(io_channel channel, 243 struct unix_private_data *data) 244{ 245 errcode_t retval; 246 struct unix_cache *cache; 247 int i; 248 249 data->access_time = 0; 250 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 251 cache->block = 0; 252 cache->access_time = 0; 253 cache->dirty = 0; 254 cache->in_use = 0; 255 if ((retval = ext2fs_get_mem(channel->block_size, 256 (void **) &cache->buf))) 257 return retval; 258 } 259 return 0; 260} 261 262/* Free the cache buffers */ 263static void free_cache(io_channel channel, 264 struct unix_private_data *data) 265{ 266 struct unix_cache *cache; 267 int i; 268 269 data->access_time = 0; 270 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 271 cache->block = 0; 272 cache->access_time = 0; 273 cache->dirty = 0; 274 cache->in_use = 0; 275 if (cache->buf) 276 ext2fs_free_mem((void **) &cache->buf); 277 cache->buf = 0; 278 } 279} 280 281#ifndef NO_IO_CACHE 282/* 283 * Try to find a block in the cache. If the block is not found, and 284 * eldest is a non-zero pointer, then fill in eldest with the cache 285 * entry to that should be reused. 286 */ 287static struct unix_cache *find_cached_block(io_channel channel, 288 struct unix_private_data *data, 289 unsigned long block, 290 struct unix_cache **eldest) 291{ 292 struct unix_cache *cache, *unused_cache, *oldest_cache; 293 int i; 294 295 unused_cache = oldest_cache = 0; 296 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 297 if (!cache->in_use) { 298 if (!unused_cache) 299 unused_cache = cache; 300 continue; 301 } 302 if (cache->block == block) { 303 cache->access_time = ++data->access_time; 304 return cache; 305 } 306 if (!oldest_cache || 307 (cache->access_time < oldest_cache->access_time)) 308 oldest_cache = cache; 309 } 310 if (eldest) 311 *eldest = (unused_cache) ? unused_cache : oldest_cache; 312 return 0; 313} 314 315/* 316 * Reuse a particular cache entry for another block. 317 */ 318static void reuse_cache(io_channel channel, struct unix_private_data *data, 319 struct unix_cache *cache, unsigned long block) 320{ 321 if (cache->dirty && cache->in_use) 322 raw_write_blk(channel, data, cache->block, 1, cache->buf); 323 324 cache->in_use = 1; 325 cache->dirty = 0; 326 cache->block = block; 327 cache->access_time = ++data->access_time; 328} 329 330/* 331 * Flush all of the blocks in the cache 332 */ 333static errcode_t flush_cached_blocks(io_channel channel, 334 struct unix_private_data *data, 335 int invalidate) 336 337{ 338 struct unix_cache *cache; 339 errcode_t retval, retval2; 340 int i; 341 342 retval2 = 0; 343 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 344 if (!cache->in_use) 345 continue; 346 347 if (invalidate) 348 cache->in_use = 0; 349 350 if (!cache->dirty) 351 continue; 352 353 retval = raw_write_blk(channel, data, 354 cache->block, 1, cache->buf); 355 if (retval) 356 retval2 = retval; 357 else 358 cache->dirty = 0; 359 } 360 return retval2; 361} 362#endif /* NO_IO_CACHE */ 363 364static errcode_t unix_open(const char *name, int flags, io_channel *channel) 365{ 366 io_channel io = NULL; 367 struct unix_private_data *data = NULL; 368 errcode_t retval; 369 int open_flags; 370 struct stat st; 371#ifdef __linux__ 372 struct utsname ut; 373#endif 374 375 if (name == 0) 376 return EXT2_ET_BAD_DEVICE_NAME; 377 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), 378 (void **) &io); 379 if (retval) 380 return retval; 381 memset(io, 0, sizeof(struct struct_io_channel)); 382 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 383 retval = ext2fs_get_mem(sizeof(struct unix_private_data), 384 (void **) &data); 385 if (retval) 386 goto cleanup; 387 388 io->manager = unix_io_manager; 389 retval = ext2fs_get_mem(strlen(name)+1, (void **) &io->name); 390 if (retval) 391 goto cleanup; 392 393 strcpy(io->name, name); 394 io->private_data = data; 395 io->block_size = 1024; 396 io->read_error = 0; 397 io->write_error = 0; 398 io->refcount = 1; 399 400 memset(data, 0, sizeof(struct unix_private_data)); 401 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 402 403 if ((retval = alloc_cache(io, data))) 404 goto cleanup; 405 406 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 407#ifdef HAVE_OPEN64 408 data->dev = open64(name, open_flags); 409#else 410 data->dev = open(name, open_flags); 411#endif 412 if (data->dev < 0) { 413 retval = errno; 414 goto cleanup; 415 } 416 417#ifdef __linux__ 418#undef RLIM_INFINITY 419#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 420#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 421#else 422#define RLIM_INFINITY (~0UL) 423#endif 424 /* 425 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 426 * block devices are wrongly getting hit by the filesize 427 * limit. This workaround isn't perfect, since it won't work 428 * if glibc wasn't built against 2.2 header files. (Sigh.) 429 * 430 */ 431 if ((flags & IO_FLAG_RW) && 432 (uname(&ut) == 0) && 433 ((ut.release[0] == '2') && (ut.release[1] == '.') && 434 (ut.release[2] == '4') && (ut.release[3] == '.') && 435 (ut.release[4] == '1') && (ut.release[5] >= '0') && 436 (ut.release[5] < '8')) && 437 (fstat(data->dev, &st) == 0) && 438 (S_ISBLK(st.st_mode))) { 439 struct rlimit rlim; 440 441 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 442 setrlimit(RLIMIT_FSIZE, &rlim); 443 getrlimit(RLIMIT_FSIZE, &rlim); 444 if (((unsigned long) rlim.rlim_cur) < 445 ((unsigned long) rlim.rlim_max)) { 446 rlim.rlim_cur = rlim.rlim_max; 447 setrlimit(RLIMIT_FSIZE, &rlim); 448 } 449 } 450#endif 451 *channel = io; 452 return 0; 453 454cleanup: 455 if (data) { 456 free_cache(io, data); 457 ext2fs_free_mem((void **) &data); 458 } 459 if (io) 460 ext2fs_free_mem((void **) &io); 461 return retval; 462} 463 464static errcode_t unix_close(io_channel channel) 465{ 466 struct unix_private_data *data; 467 errcode_t retval = 0; 468 469 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 470 data = (struct unix_private_data *) channel->private_data; 471 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 472 473 if (--channel->refcount > 0) 474 return 0; 475 476#ifndef NO_IO_CACHE 477 retval = flush_cached_blocks(channel, data, 0); 478#endif 479 480 if (close(data->dev) < 0) 481 retval = errno; 482 free_cache(channel, data); 483 484 ext2fs_free_mem((void **) &channel->private_data); 485 if (channel->name) 486 ext2fs_free_mem((void **) &channel->name); 487 ext2fs_free_mem((void **) &channel); 488 return retval; 489} 490 491static errcode_t unix_set_blksize(io_channel channel, int blksize) 492{ 493 struct unix_private_data *data; 494 errcode_t retval; 495 496 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 497 data = (struct unix_private_data *) channel->private_data; 498 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 499 500 if (channel->block_size != blksize) { 501#ifndef NO_IO_CACHE 502 if ((retval = flush_cached_blocks(channel, data, 0))) 503 return retval; 504#endif 505 506 channel->block_size = blksize; 507 free_cache(channel, data); 508 if ((retval = alloc_cache(channel, data))) 509 return retval; 510 } 511 return 0; 512} 513 514 515static errcode_t unix_read_blk(io_channel channel, unsigned long block, 516 int count, void *buf) 517{ 518 struct unix_private_data *data; 519 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 520 errcode_t retval; 521 char *cp; 522 int i, j; 523 524 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 525 data = (struct unix_private_data *) channel->private_data; 526 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 527 528#ifdef NO_IO_CACHE 529 return raw_read_blk(channel, data, block, count, buf); 530#else 531 /* 532 * If we're doing an odd-sized read or a very large read, 533 * flush out the cache and then do a direct read. 534 */ 535 if (count < 0 || count > WRITE_DIRECT_SIZE) { 536 if ((retval = flush_cached_blocks(channel, data, 0))) 537 return retval; 538 return raw_read_blk(channel, data, block, count, buf); 539 } 540 541 cp = buf; 542 while (count > 0) { 543 /* If it's in the cache, use it! */ 544 if ((cache = find_cached_block(channel, data, block, 545 &reuse[0]))) { 546#ifdef DEBUG 547 printf("Using cached block %d\n", block); 548#endif 549 memcpy(cp, cache->buf, channel->block_size); 550 count--; 551 block++; 552 cp += channel->block_size; 553 continue; 554 } 555 /* 556 * Find the number of uncached blocks so we can do a 557 * single read request 558 */ 559 for (i=1; i < count; i++) 560 if (find_cached_block(channel, data, block+i, 561 &reuse[i])) 562 break; 563#ifdef DEBUG 564 printf("Reading %d blocks starting at %d\n", i, block); 565#endif 566 if ((retval = raw_read_blk(channel, data, block, i, cp))) 567 return retval; 568 569 /* Save the results in the cache */ 570 for (j=0; j < i; j++) { 571 count--; 572 cache = reuse[j]; 573 reuse_cache(channel, data, cache, block++); 574 memcpy(cache->buf, cp, channel->block_size); 575 cp += channel->block_size; 576 } 577 } 578 return 0; 579#endif /* NO_IO_CACHE */ 580} 581 582static errcode_t unix_write_blk(io_channel channel, unsigned long block, 583 int count, const void *buf) 584{ 585 struct unix_private_data *data; 586 struct unix_cache *cache, *reuse; 587 errcode_t retval = 0; 588 const char *cp; 589 int writethrough; 590 591 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 592 data = (struct unix_private_data *) channel->private_data; 593 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 594 595#ifdef NO_IO_CACHE 596 return raw_write_blk(channel, data, block, count, buf); 597#else 598 /* 599 * If we're doing an odd-sized write or a very large write, 600 * flush out the cache completely and then do a direct write. 601 */ 602 if (count < 0 || count > WRITE_DIRECT_SIZE) { 603 if ((retval = flush_cached_blocks(channel, data, 1))) 604 return retval; 605 return raw_write_blk(channel, data, block, count, buf); 606 } 607 608 /* 609 * For a moderate-sized multi-block write, first force a write 610 * if we're in write-through cache mode, and then fill the 611 * cache with the blocks. 612 */ 613 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 614 if (writethrough) 615 retval = raw_write_blk(channel, data, block, count, buf); 616 617 cp = buf; 618 while (count > 0) { 619 cache = find_cached_block(channel, data, block, &reuse); 620 if (!cache) { 621 cache = reuse; 622 reuse_cache(channel, data, cache, block); 623 } 624 memcpy(cache->buf, cp, channel->block_size); 625 cache->dirty = !writethrough; 626 count--; 627 block++; 628 cp += channel->block_size; 629 } 630 return retval; 631#endif /* NO_IO_CACHE */ 632} 633 634static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 635 int size, const void *buf) 636{ 637 struct unix_private_data *data; 638 errcode_t retval = 0; 639 size_t actual; 640 641 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 642 data = (struct unix_private_data *) channel->private_data; 643 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 644 645#ifndef NO_IO_CACHE 646 /* 647 * Flush out the cache completely 648 */ 649 if ((retval = flush_cached_blocks(channel, data, 1))) 650 return retval; 651#endif 652 653 if (lseek(data->dev, offset, SEEK_SET) < 0) 654 return errno; 655 656 actual = write(data->dev, buf, size); 657 if (actual != size) 658 return EXT2_ET_SHORT_WRITE; 659 660 return 0; 661} 662 663/* 664 * Flush data buffers to disk. 665 */ 666static errcode_t unix_flush(io_channel channel) 667{ 668 struct unix_private_data *data; 669 errcode_t retval = 0; 670 671 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 672 data = (struct unix_private_data *) channel->private_data; 673 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 674 675#ifndef NO_IO_CACHE 676 retval = flush_cached_blocks(channel, data, 0); 677#endif 678 fsync(data->dev); 679 return retval; 680} 681 682