unix_io.c revision 289e0557c24c68290b6d9b73b09674447801fdac
1/* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Public 14 * License. 15 * %End-Header% 16 */ 17 18#define _LARGEFILE_SOURCE 19#define _LARGEFILE64_SOURCE 20 21#include <stdio.h> 22#include <string.h> 23#if HAVE_UNISTD_H 24#include <unistd.h> 25#endif 26#if HAVE_ERRNO_H 27#include <errno.h> 28#endif 29#include <fcntl.h> 30#include <time.h> 31#ifdef __linux__ 32#include <sys/utsname.h> 33#endif 34#if HAVE_SYS_STAT_H 35#include <sys/stat.h> 36#endif 37#if HAVE_SYS_TYPES_H 38#include <sys/types.h> 39#endif 40#if HAVE_SYS_RESOURCE_H 41#include <sys/resource.h> 42#endif 43 44#include "ext2_fs.h" 45#include "ext2fs.h" 46 47/* 48 * For checking structure magic numbers... 49 */ 50 51#define EXT2_CHECK_MAGIC(struct, code) \ 52 if ((struct)->magic != (code)) return (code) 53 54struct unix_cache { 55 char *buf; 56 unsigned long block; 57 int access_time; 58 int dirty:1; 59 int in_use:1; 60}; 61 62#define CACHE_SIZE 8 63#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 64#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 65 66struct unix_private_data { 67 int magic; 68 int dev; 69 int flags; 70 int access_time; 71 struct unix_cache cache[CACHE_SIZE]; 72}; 73 74static errcode_t unix_open(const char *name, int flags, io_channel *channel); 75static errcode_t unix_close(io_channel channel); 76static errcode_t unix_set_blksize(io_channel channel, int blksize); 77static errcode_t unix_read_blk(io_channel channel, unsigned long block, 78 int count, void *data); 79static errcode_t unix_write_blk(io_channel channel, unsigned long block, 80 int count, const void *data); 81static errcode_t unix_flush(io_channel channel); 82static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 83 int size, const void *data); 84 85static void reuse_cache(io_channel channel, struct unix_private_data *data, 86 struct unix_cache *cache, unsigned long block); 87 88/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel 89 * does not know buffered block devices - everything is raw. */ 90#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 91#define NEED_BOUNCE_BUFFER 92#else 93#undef NEED_BOUNCE_BUFFER 94#endif 95 96static struct struct_io_manager struct_unix_manager = { 97 EXT2_ET_MAGIC_IO_MANAGER, 98 "Unix I/O Manager", 99 unix_open, 100 unix_close, 101 unix_set_blksize, 102 unix_read_blk, 103 unix_write_blk, 104 unix_flush, 105#ifdef NEED_BOUNCE_BUFFER 106 0 107#else 108 unix_write_byte 109#endif 110}; 111 112io_manager unix_io_manager = &struct_unix_manager; 113 114/* 115 * Here are the raw I/O functions 116 */ 117#ifndef NEED_BOUNCE_BUFFER 118static errcode_t raw_read_blk(io_channel channel, 119 struct unix_private_data *data, 120 unsigned long block, 121 int count, void *buf) 122{ 123 errcode_t retval; 124 ssize_t size; 125 ext2_loff_t location; 126 int actual = 0; 127 128 size = (count < 0) ? -count : count * channel->block_size; 129 location = (ext2_loff_t) block * channel->block_size; 130 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 131 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 132 goto error_out; 133 } 134 actual = read(data->dev, buf, size); 135 if (actual != size) { 136 if (actual < 0) 137 actual = 0; 138 retval = EXT2_ET_SHORT_READ; 139 goto error_out; 140 } 141 return 0; 142 143error_out: 144 memset((char *) buf+actual, 0, size-actual); 145 if (channel->read_error) 146 retval = (channel->read_error)(channel, block, count, buf, 147 size, actual, retval); 148 return retval; 149} 150#else /* NEED_BOUNCE_BUFFER */ 151/* 152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size 153 */ 154static errcode_t raw_read_blk(io_channel channel, 155 struct unix_private_data *data, 156 unsigned long block, 157 int count, void *buf) 158{ 159 errcode_t retval; 160 size_t size, alignsize, fragment; 161 ext2_loff_t location; 162 int total = 0, actual; 163#define BLOCKALIGN 512 164 char sector[BLOCKALIGN]; 165 166 size = (count < 0) ? -count : count * channel->block_size; 167 location = (ext2_loff_t) block * channel->block_size; 168#ifdef DEBUG 169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n", 170 count, size, block, channel->block_size, location); 171#endif 172 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 173 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 174 goto error_out; 175 } 176 fragment = size % BLOCKALIGN; 177 alignsize = size - fragment; 178 if (alignsize) { 179 actual = read(data->dev, buf, alignsize); 180 if (actual != alignsize) 181 goto short_read; 182 } 183 if (fragment) { 184 actual = read(data->dev, sector, BLOCKALIGN); 185 if (actual != BLOCKALIGN) 186 goto short_read; 187 memcpy(buf+alignsize, sector, fragment); 188 } 189 return 0; 190 191short_read: 192 if (actual>0) 193 total += actual; 194 retval = EXT2_ET_SHORT_READ; 195 196error_out: 197 memset((char *) buf+total, 0, size-actual); 198 if (channel->read_error) 199 retval = (channel->read_error)(channel, block, count, buf, 200 size, actual, retval); 201 return retval; 202} 203#endif 204 205static errcode_t raw_write_blk(io_channel channel, 206 struct unix_private_data *data, 207 unsigned long block, 208 int count, const void *buf) 209{ 210 ssize_t size; 211 ext2_loff_t location; 212 int actual = 0; 213 errcode_t retval; 214 215 if (count == 1) 216 size = channel->block_size; 217 else { 218 if (count < 0) 219 size = -count; 220 else 221 size = count * channel->block_size; 222 } 223 224 location = (ext2_loff_t) block * channel->block_size; 225 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 226 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 227 goto error_out; 228 } 229 230 actual = write(data->dev, buf, size); 231 if (actual != size) { 232 retval = EXT2_ET_SHORT_WRITE; 233 goto error_out; 234 } 235 return 0; 236 237error_out: 238 if (channel->write_error) 239 retval = (channel->write_error)(channel, block, count, buf, 240 size, actual, retval); 241 return retval; 242} 243 244 245/* 246 * Here we implement the cache functions 247 */ 248 249/* Allocate the cache buffers */ 250static errcode_t alloc_cache(io_channel channel, 251 struct unix_private_data *data) 252{ 253 errcode_t retval; 254 struct unix_cache *cache; 255 int i; 256 257 data->access_time = 0; 258 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 259 cache->block = 0; 260 cache->access_time = 0; 261 cache->dirty = 0; 262 cache->in_use = 0; 263 if ((retval = ext2fs_get_mem(channel->block_size, 264 &cache->buf))) 265 return retval; 266 } 267 return 0; 268} 269 270/* Free the cache buffers */ 271static void free_cache(struct unix_private_data *data) 272{ 273 struct unix_cache *cache; 274 int i; 275 276 data->access_time = 0; 277 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 278 cache->block = 0; 279 cache->access_time = 0; 280 cache->dirty = 0; 281 cache->in_use = 0; 282 if (cache->buf) 283 ext2fs_free_mem(&cache->buf); 284 cache->buf = 0; 285 } 286} 287 288#ifndef NO_IO_CACHE 289/* 290 * Try to find a block in the cache. If the block is not found, and 291 * eldest is a non-zero pointer, then fill in eldest with the cache 292 * entry to that should be reused. 293 */ 294static struct unix_cache *find_cached_block(struct unix_private_data *data, 295 unsigned long block, 296 struct unix_cache **eldest) 297{ 298 struct unix_cache *cache, *unused_cache, *oldest_cache; 299 int i; 300 301 unused_cache = oldest_cache = 0; 302 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 303 if (!cache->in_use) { 304 if (!unused_cache) 305 unused_cache = cache; 306 continue; 307 } 308 if (cache->block == block) { 309 cache->access_time = ++data->access_time; 310 return cache; 311 } 312 if (!oldest_cache || 313 (cache->access_time < oldest_cache->access_time)) 314 oldest_cache = cache; 315 } 316 if (eldest) 317 *eldest = (unused_cache) ? unused_cache : oldest_cache; 318 return 0; 319} 320 321/* 322 * Reuse a particular cache entry for another block. 323 */ 324static void reuse_cache(io_channel channel, struct unix_private_data *data, 325 struct unix_cache *cache, unsigned long block) 326{ 327 if (cache->dirty && cache->in_use) 328 raw_write_blk(channel, data, cache->block, 1, cache->buf); 329 330 cache->in_use = 1; 331 cache->dirty = 0; 332 cache->block = block; 333 cache->access_time = ++data->access_time; 334} 335 336/* 337 * Flush all of the blocks in the cache 338 */ 339static errcode_t flush_cached_blocks(io_channel channel, 340 struct unix_private_data *data, 341 int invalidate) 342 343{ 344 struct unix_cache *cache; 345 errcode_t retval, retval2; 346 int i; 347 348 retval2 = 0; 349 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 350 if (!cache->in_use) 351 continue; 352 353 if (invalidate) 354 cache->in_use = 0; 355 356 if (!cache->dirty) 357 continue; 358 359 retval = raw_write_blk(channel, data, 360 cache->block, 1, cache->buf); 361 if (retval) 362 retval2 = retval; 363 else 364 cache->dirty = 0; 365 } 366 return retval2; 367} 368#endif /* NO_IO_CACHE */ 369 370static errcode_t unix_open(const char *name, int flags, io_channel *channel) 371{ 372 io_channel io = NULL; 373 struct unix_private_data *data = NULL; 374 errcode_t retval; 375 int open_flags; 376 struct stat st; 377#ifdef __linux__ 378 struct utsname ut; 379#endif 380 381 if (name == 0) 382 return EXT2_ET_BAD_DEVICE_NAME; 383 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 384 if (retval) 385 return retval; 386 memset(io, 0, sizeof(struct struct_io_channel)); 387 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 388 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 389 if (retval) 390 goto cleanup; 391 392 io->manager = unix_io_manager; 393 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 394 if (retval) 395 goto cleanup; 396 397 strcpy(io->name, name); 398 io->private_data = data; 399 io->block_size = 1024; 400 io->read_error = 0; 401 io->write_error = 0; 402 io->refcount = 1; 403 404 memset(data, 0, sizeof(struct unix_private_data)); 405 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 406 407 if ((retval = alloc_cache(io, data))) 408 goto cleanup; 409 410 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 411#ifdef HAVE_OPEN64 412 data->dev = open64(name, open_flags); 413#else 414 data->dev = open(name, open_flags); 415#endif 416 if (data->dev < 0) { 417 retval = errno; 418 goto cleanup; 419 } 420 421#ifdef __linux__ 422#undef RLIM_INFINITY 423#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 424#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 425#else 426#define RLIM_INFINITY (~0UL) 427#endif 428 /* 429 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 430 * block devices are wrongly getting hit by the filesize 431 * limit. This workaround isn't perfect, since it won't work 432 * if glibc wasn't built against 2.2 header files. (Sigh.) 433 * 434 */ 435 if ((flags & IO_FLAG_RW) && 436 (uname(&ut) == 0) && 437 ((ut.release[0] == '2') && (ut.release[1] == '.') && 438 (ut.release[2] == '4') && (ut.release[3] == '.') && 439 (ut.release[4] == '1') && (ut.release[5] >= '0') && 440 (ut.release[5] < '8')) && 441 (fstat(data->dev, &st) == 0) && 442 (S_ISBLK(st.st_mode))) { 443 struct rlimit rlim; 444 445 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 446 setrlimit(RLIMIT_FSIZE, &rlim); 447 getrlimit(RLIMIT_FSIZE, &rlim); 448 if (((unsigned long) rlim.rlim_cur) < 449 ((unsigned long) rlim.rlim_max)) { 450 rlim.rlim_cur = rlim.rlim_max; 451 setrlimit(RLIMIT_FSIZE, &rlim); 452 } 453 } 454#endif 455 *channel = io; 456 return 0; 457 458cleanup: 459 if (data) { 460 free_cache(data); 461 ext2fs_free_mem(&data); 462 } 463 if (io) 464 ext2fs_free_mem(&io); 465 return retval; 466} 467 468static errcode_t unix_close(io_channel channel) 469{ 470 struct unix_private_data *data; 471 errcode_t retval = 0; 472 473 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 474 data = (struct unix_private_data *) channel->private_data; 475 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 476 477 if (--channel->refcount > 0) 478 return 0; 479 480#ifndef NO_IO_CACHE 481 retval = flush_cached_blocks(channel, data, 0); 482#endif 483 484 if (close(data->dev) < 0) 485 retval = errno; 486 free_cache(data); 487 488 ext2fs_free_mem(&channel->private_data); 489 if (channel->name) 490 ext2fs_free_mem(&channel->name); 491 ext2fs_free_mem(&channel); 492 return retval; 493} 494 495static errcode_t unix_set_blksize(io_channel channel, int blksize) 496{ 497 struct unix_private_data *data; 498 errcode_t retval; 499 500 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 501 data = (struct unix_private_data *) channel->private_data; 502 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 503 504 if (channel->block_size != blksize) { 505#ifndef NO_IO_CACHE 506 if ((retval = flush_cached_blocks(channel, data, 0))) 507 return retval; 508#endif 509 510 channel->block_size = blksize; 511 free_cache(data); 512 if ((retval = alloc_cache(channel, data))) 513 return retval; 514 } 515 return 0; 516} 517 518 519static errcode_t unix_read_blk(io_channel channel, unsigned long block, 520 int count, void *buf) 521{ 522 struct unix_private_data *data; 523 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 524 errcode_t retval; 525 char *cp; 526 int i, j; 527 528 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 529 data = (struct unix_private_data *) channel->private_data; 530 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 531 532#ifdef NO_IO_CACHE 533 return raw_read_blk(channel, data, block, count, buf); 534#else 535 /* 536 * If we're doing an odd-sized read or a very large read, 537 * flush out the cache and then do a direct read. 538 */ 539 if (count < 0 || count > WRITE_DIRECT_SIZE) { 540 if ((retval = flush_cached_blocks(channel, data, 0))) 541 return retval; 542 return raw_read_blk(channel, data, block, count, buf); 543 } 544 545 cp = buf; 546 while (count > 0) { 547 /* If it's in the cache, use it! */ 548 if ((cache = find_cached_block(data, block, &reuse[0]))) { 549#ifdef DEBUG 550 printf("Using cached block %d\n", block); 551#endif 552 memcpy(cp, cache->buf, channel->block_size); 553 count--; 554 block++; 555 cp += channel->block_size; 556 continue; 557 } 558 /* 559 * Find the number of uncached blocks so we can do a 560 * single read request 561 */ 562 for (i=1; i < count; i++) 563 if (find_cached_block(data, block+i, &reuse[i])) 564 break; 565#ifdef DEBUG 566 printf("Reading %d blocks starting at %d\n", i, block); 567#endif 568 if ((retval = raw_read_blk(channel, data, block, i, cp))) 569 return retval; 570 571 /* Save the results in the cache */ 572 for (j=0; j < i; j++) { 573 count--; 574 cache = reuse[j]; 575 reuse_cache(channel, data, cache, block++); 576 memcpy(cache->buf, cp, channel->block_size); 577 cp += channel->block_size; 578 } 579 } 580 return 0; 581#endif /* NO_IO_CACHE */ 582} 583 584static errcode_t unix_write_blk(io_channel channel, unsigned long block, 585 int count, const void *buf) 586{ 587 struct unix_private_data *data; 588 struct unix_cache *cache, *reuse; 589 errcode_t retval = 0; 590 const char *cp; 591 int writethrough; 592 593 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 594 data = (struct unix_private_data *) channel->private_data; 595 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 596 597#ifdef NO_IO_CACHE 598 return raw_write_blk(channel, data, block, count, buf); 599#else 600 /* 601 * If we're doing an odd-sized write or a very large write, 602 * flush out the cache completely and then do a direct write. 603 */ 604 if (count < 0 || count > WRITE_DIRECT_SIZE) { 605 if ((retval = flush_cached_blocks(channel, data, 1))) 606 return retval; 607 return raw_write_blk(channel, data, block, count, buf); 608 } 609 610 /* 611 * For a moderate-sized multi-block write, first force a write 612 * if we're in write-through cache mode, and then fill the 613 * cache with the blocks. 614 */ 615 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 616 if (writethrough) 617 retval = raw_write_blk(channel, data, block, count, buf); 618 619 cp = buf; 620 while (count > 0) { 621 cache = find_cached_block(data, block, &reuse); 622 if (!cache) { 623 cache = reuse; 624 reuse_cache(channel, data, cache, block); 625 } 626 memcpy(cache->buf, cp, channel->block_size); 627 cache->dirty = !writethrough; 628 count--; 629 block++; 630 cp += channel->block_size; 631 } 632 return retval; 633#endif /* NO_IO_CACHE */ 634} 635 636static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 637 int size, const void *buf) 638{ 639 struct unix_private_data *data; 640 errcode_t retval = 0; 641 ssize_t actual; 642 643 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 644 data = (struct unix_private_data *) channel->private_data; 645 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 646 647#ifndef NO_IO_CACHE 648 /* 649 * Flush out the cache completely 650 */ 651 if ((retval = flush_cached_blocks(channel, data, 1))) 652 return retval; 653#endif 654 655 if (lseek(data->dev, offset, SEEK_SET) < 0) 656 return errno; 657 658 actual = write(data->dev, buf, size); 659 if (actual != size) 660 return EXT2_ET_SHORT_WRITE; 661 662 return 0; 663} 664 665/* 666 * Flush data buffers to disk. 667 */ 668static errcode_t unix_flush(io_channel channel) 669{ 670 struct unix_private_data *data; 671 errcode_t retval = 0; 672 673 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 674 data = (struct unix_private_data *) channel->private_data; 675 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 676 677#ifndef NO_IO_CACHE 678 retval = flush_cached_blocks(channel, data, 0); 679#endif 680 fsync(data->dev); 681 return retval; 682} 683 684