unix_io.c revision 0a05b90330d8b505469c72143964b954776be232
1/* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Library 14 * General Public License, version 2. 15 * %End-Header% 16 */ 17 18#define _LARGEFILE_SOURCE 19#define _LARGEFILE64_SOURCE 20#ifndef _GNU_SOURCE 21#define _GNU_SOURCE 22#endif 23 24#include "config.h" 25#include <stdio.h> 26#include <string.h> 27#if HAVE_UNISTD_H 28#include <unistd.h> 29#endif 30#if HAVE_ERRNO_H 31#include <errno.h> 32#endif 33#include <fcntl.h> 34#include <time.h> 35#ifdef __linux__ 36#include <sys/utsname.h> 37#endif 38#ifdef HAVE_SYS_IOCTL_H 39#include <sys/ioctl.h> 40#endif 41#ifdef HAVE_SYS_MOUNT_H 42#include <sys/mount.h> 43#endif 44#if HAVE_SYS_STAT_H 45#include <sys/stat.h> 46#endif 47#if HAVE_SYS_TYPES_H 48#include <sys/types.h> 49#endif 50#if HAVE_SYS_RESOURCE_H 51#include <sys/resource.h> 52#endif 53#if HAVE_LINUX_FALLOC_H 54#include <linux/falloc.h> 55#endif 56 57#if defined(__linux__) && defined(_IO) && !defined(BLKROGET) 58#define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */ 59#endif 60 61#if defined(__linux__) && defined(_IO) && !defined(BLKSSZGET) 62#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ 63#endif 64 65#undef ALIGN_DEBUG 66 67#include "ext2_fs.h" 68#include "ext2fs.h" 69 70/* 71 * For checking structure magic numbers... 72 */ 73 74#define EXT2_CHECK_MAGIC(struct, code) \ 75 if ((struct)->magic != (code)) return (code) 76 77struct unix_cache { 78 char *buf; 79 unsigned long block; 80 int access_time; 81 unsigned dirty:1; 82 unsigned in_use:1; 83}; 84 85#define CACHE_SIZE 8 86#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 87#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 88 89struct unix_private_data { 90 int magic; 91 int dev; 92 int flags; 93 int align; 94 int access_time; 95 ext2_loff_t offset; 96 struct unix_cache cache[CACHE_SIZE]; 97 void *bounce; 98 struct struct_io_stats io_stats; 99}; 100 101#define IS_ALIGNED(n, align) ((((unsigned long) n) & \ 102 ((unsigned long) ((align)-1))) == 0) 103 104static errcode_t unix_open(const char *name, int flags, io_channel *channel); 105static errcode_t unix_close(io_channel channel); 106static errcode_t unix_set_blksize(io_channel channel, int blksize); 107static errcode_t unix_read_blk(io_channel channel, unsigned long block, 108 int count, void *data); 109static errcode_t unix_write_blk(io_channel channel, unsigned long block, 110 int count, const void *data); 111static errcode_t unix_flush(io_channel channel); 112static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 113 int size, const void *data); 114static errcode_t unix_set_option(io_channel channel, const char *option, 115 const char *arg); 116static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 117; 118static void reuse_cache(io_channel channel, struct unix_private_data *data, 119 struct unix_cache *cache, unsigned long long block); 120static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 121 int count, void *data); 122static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 123 int count, const void *data); 124static errcode_t unix_discard(io_channel channel, unsigned long long block, 125 unsigned long long count); 126 127static struct struct_io_manager struct_unix_manager = { 128 EXT2_ET_MAGIC_IO_MANAGER, 129 "Unix I/O Manager", 130 unix_open, 131 unix_close, 132 unix_set_blksize, 133 unix_read_blk, 134 unix_write_blk, 135 unix_flush, 136 unix_write_byte, 137 unix_set_option, 138 unix_get_stats, 139 unix_read_blk64, 140 unix_write_blk64, 141 unix_discard, 142}; 143 144io_manager unix_io_manager = &struct_unix_manager; 145 146static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 147{ 148 errcode_t retval = 0; 149 150 struct unix_private_data *data; 151 152 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 153 data = (struct unix_private_data *) channel->private_data; 154 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 155 156 if (stats) 157 *stats = &data->io_stats; 158 159 return retval; 160} 161 162/* 163 * Here are the raw I/O functions 164 */ 165static errcode_t raw_read_blk(io_channel channel, 166 struct unix_private_data *data, 167 unsigned long long block, 168 int count, void *bufv) 169{ 170 errcode_t retval; 171 ssize_t size; 172 ext2_loff_t location; 173 int actual = 0; 174 unsigned char *buf = bufv; 175 176 size = (count < 0) ? -count : count * channel->block_size; 177 data->io_stats.bytes_read += size; 178 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 179 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 180 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 181 goto error_out; 182 } 183 if ((channel->align == 0) || 184 (IS_ALIGNED(buf, channel->align) && 185 IS_ALIGNED(size, channel->align))) { 186 actual = read(data->dev, buf, size); 187 if (actual != size) { 188 short_read: 189 if (actual < 0) 190 actual = 0; 191 retval = EXT2_ET_SHORT_READ; 192 goto error_out; 193 } 194 return 0; 195 } 196 197#ifdef ALIGN_DEBUG 198 printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf, 199 (unsigned long) size); 200#endif 201 202 /* 203 * The buffer or size which we're trying to read isn't aligned 204 * to the O_DIRECT rules, so we need to do this the hard way... 205 */ 206 while (size > 0) { 207 actual = read(data->dev, data->bounce, channel->block_size); 208 if (actual != channel->block_size) 209 goto short_read; 210 actual = size; 211 if (size > channel->block_size) 212 actual = channel->block_size; 213 memcpy(buf, data->bounce, actual); 214 size -= actual; 215 buf += actual; 216 } 217 return 0; 218 219error_out: 220 memset((char *) buf+actual, 0, size-actual); 221 if (channel->read_error) 222 retval = (channel->read_error)(channel, block, count, buf, 223 size, actual, retval); 224 return retval; 225} 226 227static errcode_t raw_write_blk(io_channel channel, 228 struct unix_private_data *data, 229 unsigned long long block, 230 int count, const void *bufv) 231{ 232 ssize_t size; 233 ext2_loff_t location; 234 int actual = 0; 235 errcode_t retval; 236 const unsigned char *buf = bufv; 237 238 if (count == 1) 239 size = channel->block_size; 240 else { 241 if (count < 0) 242 size = -count; 243 else 244 size = count * channel->block_size; 245 } 246 data->io_stats.bytes_written += size; 247 248 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 249 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 250 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 251 goto error_out; 252 } 253 254 if ((channel->align == 0) || 255 (IS_ALIGNED(buf, channel->align) && 256 IS_ALIGNED(size, channel->align))) { 257 actual = write(data->dev, buf, size); 258 if (actual != size) { 259 short_write: 260 retval = EXT2_ET_SHORT_WRITE; 261 goto error_out; 262 } 263 return 0; 264 } 265 266#ifdef ALIGN_DEBUG 267 printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf, 268 (unsigned long) size); 269#endif 270 /* 271 * The buffer or size which we're trying to write isn't aligned 272 * to the O_DIRECT rules, so we need to do this the hard way... 273 */ 274 while (size > 0) { 275 if (size < channel->block_size) { 276 actual = read(data->dev, data->bounce, 277 channel->block_size); 278 if (actual != channel->block_size) { 279 retval = EXT2_ET_SHORT_READ; 280 goto error_out; 281 } 282 } 283 actual = size; 284 if (size > channel->block_size) 285 actual = channel->block_size; 286 memcpy(data->bounce, buf, actual); 287 actual = write(data->dev, data->bounce, channel->block_size); 288 if (actual != channel->block_size) 289 goto short_write; 290 size -= actual; 291 buf += actual; 292 } 293 return 0; 294 295error_out: 296 if (channel->write_error) 297 retval = (channel->write_error)(channel, block, count, buf, 298 size, actual, retval); 299 return retval; 300} 301 302 303/* 304 * Here we implement the cache functions 305 */ 306 307/* Allocate the cache buffers */ 308static errcode_t alloc_cache(io_channel channel, 309 struct unix_private_data *data) 310{ 311 errcode_t retval; 312 struct unix_cache *cache; 313 int i; 314 315 data->access_time = 0; 316 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 317 cache->block = 0; 318 cache->access_time = 0; 319 cache->dirty = 0; 320 cache->in_use = 0; 321 if (cache->buf) 322 ext2fs_free_mem(&cache->buf); 323 retval = ext2fs_get_memalign(channel->block_size, 324 channel->align, &cache->buf); 325 if (retval) 326 return retval; 327 } 328 if (channel->align) { 329 if (data->bounce) 330 ext2fs_free_mem(&data->bounce); 331 retval = ext2fs_get_memalign(channel->block_size, 332 channel->align, 333 &data->bounce); 334 } 335 return retval; 336} 337 338/* Free the cache buffers */ 339static void free_cache(struct unix_private_data *data) 340{ 341 struct unix_cache *cache; 342 int i; 343 344 data->access_time = 0; 345 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 346 cache->block = 0; 347 cache->access_time = 0; 348 cache->dirty = 0; 349 cache->in_use = 0; 350 if (cache->buf) 351 ext2fs_free_mem(&cache->buf); 352 } 353 if (data->bounce) 354 ext2fs_free_mem(&data->bounce); 355} 356 357#ifndef NO_IO_CACHE 358/* 359 * Try to find a block in the cache. If the block is not found, and 360 * eldest is a non-zero pointer, then fill in eldest with the cache 361 * entry to that should be reused. 362 */ 363static struct unix_cache *find_cached_block(struct unix_private_data *data, 364 unsigned long long block, 365 struct unix_cache **eldest) 366{ 367 struct unix_cache *cache, *unused_cache, *oldest_cache; 368 int i; 369 370 unused_cache = oldest_cache = 0; 371 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 372 if (!cache->in_use) { 373 if (!unused_cache) 374 unused_cache = cache; 375 continue; 376 } 377 if (cache->block == block) { 378 cache->access_time = ++data->access_time; 379 return cache; 380 } 381 if (!oldest_cache || 382 (cache->access_time < oldest_cache->access_time)) 383 oldest_cache = cache; 384 } 385 if (eldest) 386 *eldest = (unused_cache) ? unused_cache : oldest_cache; 387 return 0; 388} 389 390/* 391 * Reuse a particular cache entry for another block. 392 */ 393static void reuse_cache(io_channel channel, struct unix_private_data *data, 394 struct unix_cache *cache, unsigned long long block) 395{ 396 if (cache->dirty && cache->in_use) 397 raw_write_blk(channel, data, cache->block, 1, cache->buf); 398 399 cache->in_use = 1; 400 cache->dirty = 0; 401 cache->block = block; 402 cache->access_time = ++data->access_time; 403} 404 405/* 406 * Flush all of the blocks in the cache 407 */ 408static errcode_t flush_cached_blocks(io_channel channel, 409 struct unix_private_data *data, 410 int invalidate) 411 412{ 413 struct unix_cache *cache; 414 errcode_t retval, retval2; 415 int i; 416 417 retval2 = 0; 418 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 419 if (!cache->in_use) 420 continue; 421 422 if (invalidate) 423 cache->in_use = 0; 424 425 if (!cache->dirty) 426 continue; 427 428 retval = raw_write_blk(channel, data, 429 cache->block, 1, cache->buf); 430 if (retval) 431 retval2 = retval; 432 else 433 cache->dirty = 0; 434 } 435 return retval2; 436} 437#endif /* NO_IO_CACHE */ 438 439#ifdef __linux__ 440#ifndef BLKDISCARDZEROES 441#define BLKDISCARDZEROES _IO(0x12,124) 442#endif 443#endif 444 445static errcode_t unix_open(const char *name, int flags, io_channel *channel) 446{ 447 io_channel io = NULL; 448 struct unix_private_data *data = NULL; 449 errcode_t retval; 450 int open_flags, zeroes = 0; 451 int f_nocache = 0; 452 ext2fs_struct_stat st; 453#ifdef __linux__ 454 struct utsname ut; 455#endif 456 457 if (name == 0) 458 return EXT2_ET_BAD_DEVICE_NAME; 459 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 460 if (retval) 461 goto cleanup; 462 memset(io, 0, sizeof(struct struct_io_channel)); 463 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 464 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 465 if (retval) 466 goto cleanup; 467 468 io->manager = unix_io_manager; 469 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 470 if (retval) 471 goto cleanup; 472 473 strcpy(io->name, name); 474 io->private_data = data; 475 io->block_size = 1024; 476 io->read_error = 0; 477 io->write_error = 0; 478 io->refcount = 1; 479 480 memset(data, 0, sizeof(struct unix_private_data)); 481 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 482 data->io_stats.num_fields = 2; 483 484 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 485 if (flags & IO_FLAG_EXCLUSIVE) 486 open_flags |= O_EXCL; 487#if defined(O_DIRECT) 488 if (flags & IO_FLAG_DIRECT_IO) 489 open_flags |= O_DIRECT; 490#elif defined(F_NOCACHE) 491 if (flags & IO_FLAG_DIRECT_IO) 492 f_nocache = F_NOCACHE; 493#endif 494 data->flags = flags; 495 496 data->dev = ext2fs_open_file(io->name, open_flags, 0); 497 if (data->dev < 0) { 498 retval = errno; 499 goto cleanup; 500 } 501 if (f_nocache) { 502 if (fcntl(data->dev, f_nocache, 1) < 0) { 503 retval = errno; 504 goto cleanup; 505 } 506 } 507 508 /* 509 * If the device is really a block device, then set the 510 * appropriate flag, otherwise we can set DISCARD_ZEROES flag 511 * because we are going to use punch hole instead of discard 512 * and if it succeed, subsequent read from sparse area returns 513 * zero. 514 */ 515 if (ext2fs_stat(io->name, &st) == 0) { 516 if (S_ISBLK(st.st_mode)) 517 io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; 518 else 519 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 520 } 521 522#ifdef BLKSSZGET 523 if (flags & IO_FLAG_DIRECT_IO) { 524 if (ioctl(data->dev, BLKSSZGET, &io->align) != 0) 525 io->align = io->block_size; 526 } 527#endif 528 529#ifdef BLKDISCARDZEROES 530 ioctl(data->dev, BLKDISCARDZEROES, &zeroes); 531 if (zeroes) 532 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 533#endif 534 535#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 536 /* 537 * Some operating systems require that the buffers be aligned, 538 * regardless of O_DIRECT 539 */ 540 io->align = 512; 541#endif 542 543 544 if ((retval = alloc_cache(io, data))) 545 goto cleanup; 546 547#ifdef BLKROGET 548 if (flags & IO_FLAG_RW) { 549 int error; 550 int readonly = 0; 551 552 /* Is the block device actually writable? */ 553 error = ioctl(data->dev, BLKROGET, &readonly); 554 if (!error && readonly) { 555 close(data->dev); 556 retval = EPERM; 557 goto cleanup; 558 } 559 } 560#endif 561 562#ifdef __linux__ 563#undef RLIM_INFINITY 564#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 565#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 566#else 567#define RLIM_INFINITY (~0UL) 568#endif 569 /* 570 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 571 * block devices are wrongly getting hit by the filesize 572 * limit. This workaround isn't perfect, since it won't work 573 * if glibc wasn't built against 2.2 header files. (Sigh.) 574 * 575 */ 576 if ((flags & IO_FLAG_RW) && 577 (uname(&ut) == 0) && 578 ((ut.release[0] == '2') && (ut.release[1] == '.') && 579 (ut.release[2] == '4') && (ut.release[3] == '.') && 580 (ut.release[4] == '1') && (ut.release[5] >= '0') && 581 (ut.release[5] < '8')) && 582 (ext2fs_stat(io->name, &st) == 0) && 583 (S_ISBLK(st.st_mode))) { 584 struct rlimit rlim; 585 586 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 587 setrlimit(RLIMIT_FSIZE, &rlim); 588 getrlimit(RLIMIT_FSIZE, &rlim); 589 if (((unsigned long) rlim.rlim_cur) < 590 ((unsigned long) rlim.rlim_max)) { 591 rlim.rlim_cur = rlim.rlim_max; 592 setrlimit(RLIMIT_FSIZE, &rlim); 593 } 594 } 595#endif 596 *channel = io; 597 return 0; 598 599cleanup: 600 if (data) { 601 free_cache(data); 602 ext2fs_free_mem(&data); 603 } 604 if (io) 605 ext2fs_free_mem(&io); 606 return retval; 607} 608 609static errcode_t unix_close(io_channel channel) 610{ 611 struct unix_private_data *data; 612 errcode_t retval = 0; 613 614 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 615 data = (struct unix_private_data *) channel->private_data; 616 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 617 618 if (--channel->refcount > 0) 619 return 0; 620 621#ifndef NO_IO_CACHE 622 retval = flush_cached_blocks(channel, data, 0); 623#endif 624 625 if (close(data->dev) < 0) 626 retval = errno; 627 free_cache(data); 628 629 ext2fs_free_mem(&channel->private_data); 630 if (channel->name) 631 ext2fs_free_mem(&channel->name); 632 ext2fs_free_mem(&channel); 633 return retval; 634} 635 636static errcode_t unix_set_blksize(io_channel channel, int blksize) 637{ 638 struct unix_private_data *data; 639 errcode_t retval; 640 641 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 642 data = (struct unix_private_data *) channel->private_data; 643 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 644 645 if (channel->block_size != blksize) { 646#ifndef NO_IO_CACHE 647 if ((retval = flush_cached_blocks(channel, data, 0))) 648 return retval; 649#endif 650 651 channel->block_size = blksize; 652 free_cache(data); 653 if ((retval = alloc_cache(channel, data))) 654 return retval; 655 } 656 return 0; 657} 658 659 660static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 661 int count, void *buf) 662{ 663 struct unix_private_data *data; 664 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 665 errcode_t retval; 666 char *cp; 667 int i, j; 668 669 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 670 data = (struct unix_private_data *) channel->private_data; 671 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 672 673#ifdef NO_IO_CACHE 674 return raw_read_blk(channel, data, block, count, buf); 675#else 676 /* 677 * If we're doing an odd-sized read or a very large read, 678 * flush out the cache and then do a direct read. 679 */ 680 if (count < 0 || count > WRITE_DIRECT_SIZE) { 681 if ((retval = flush_cached_blocks(channel, data, 0))) 682 return retval; 683 return raw_read_blk(channel, data, block, count, buf); 684 } 685 686 cp = buf; 687 while (count > 0) { 688 /* If it's in the cache, use it! */ 689 if ((cache = find_cached_block(data, block, &reuse[0]))) { 690#ifdef DEBUG 691 printf("Using cached block %lu\n", block); 692#endif 693 memcpy(cp, cache->buf, channel->block_size); 694 count--; 695 block++; 696 cp += channel->block_size; 697 continue; 698 } 699 if (count == 1) { 700 /* 701 * Special case where we read directly into the 702 * cache buffer; important in the O_DIRECT case 703 */ 704 cache = reuse[0]; 705 reuse_cache(channel, data, cache, block); 706 if ((retval = raw_read_blk(channel, data, block, 1, 707 cache->buf))) { 708 cache->in_use = 0; 709 return retval; 710 } 711 memcpy(cp, cache->buf, channel->block_size); 712 return 0; 713 } 714 715 /* 716 * Find the number of uncached blocks so we can do a 717 * single read request 718 */ 719 for (i=1; i < count; i++) 720 if (find_cached_block(data, block+i, &reuse[i])) 721 break; 722#ifdef DEBUG 723 printf("Reading %d blocks starting at %lu\n", i, block); 724#endif 725 if ((retval = raw_read_blk(channel, data, block, i, cp))) 726 return retval; 727 728 /* Save the results in the cache */ 729 for (j=0; j < i; j++) { 730 count--; 731 cache = reuse[j]; 732 reuse_cache(channel, data, cache, block++); 733 memcpy(cache->buf, cp, channel->block_size); 734 cp += channel->block_size; 735 } 736 } 737 return 0; 738#endif /* NO_IO_CACHE */ 739} 740 741static errcode_t unix_read_blk(io_channel channel, unsigned long block, 742 int count, void *buf) 743{ 744 return unix_read_blk64(channel, block, count, buf); 745} 746 747static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 748 int count, const void *buf) 749{ 750 struct unix_private_data *data; 751 struct unix_cache *cache, *reuse; 752 errcode_t retval = 0; 753 const char *cp; 754 int writethrough; 755 756 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 757 data = (struct unix_private_data *) channel->private_data; 758 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 759 760#ifdef NO_IO_CACHE 761 return raw_write_blk(channel, data, block, count, buf); 762#else 763 /* 764 * If we're doing an odd-sized write or a very large write, 765 * flush out the cache completely and then do a direct write. 766 */ 767 if (count < 0 || count > WRITE_DIRECT_SIZE) { 768 if ((retval = flush_cached_blocks(channel, data, 1))) 769 return retval; 770 return raw_write_blk(channel, data, block, count, buf); 771 } 772 773 /* 774 * For a moderate-sized multi-block write, first force a write 775 * if we're in write-through cache mode, and then fill the 776 * cache with the blocks. 777 */ 778 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 779 if (writethrough) 780 retval = raw_write_blk(channel, data, block, count, buf); 781 782 cp = buf; 783 while (count > 0) { 784 cache = find_cached_block(data, block, &reuse); 785 if (!cache) { 786 cache = reuse; 787 reuse_cache(channel, data, cache, block); 788 } 789 memcpy(cache->buf, cp, channel->block_size); 790 cache->dirty = !writethrough; 791 count--; 792 block++; 793 cp += channel->block_size; 794 } 795 return retval; 796#endif /* NO_IO_CACHE */ 797} 798 799static errcode_t unix_write_blk(io_channel channel, unsigned long block, 800 int count, const void *buf) 801{ 802 return unix_write_blk64(channel, block, count, buf); 803} 804 805static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 806 int size, const void *buf) 807{ 808 struct unix_private_data *data; 809 errcode_t retval = 0; 810 ssize_t actual; 811 812 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 813 data = (struct unix_private_data *) channel->private_data; 814 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 815 816 if (channel->align != 0) { 817#ifdef ALIGN_DEBUG 818 printf("unix_write_byte: O_DIRECT fallback\n"); 819#endif 820 return EXT2_ET_UNIMPLEMENTED; 821 } 822 823#ifndef NO_IO_CACHE 824 /* 825 * Flush out the cache completely 826 */ 827 if ((retval = flush_cached_blocks(channel, data, 1))) 828 return retval; 829#endif 830 831 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 832 return errno; 833 834 actual = write(data->dev, buf, size); 835 if (actual != size) 836 return EXT2_ET_SHORT_WRITE; 837 838 return 0; 839} 840 841/* 842 * Flush data buffers to disk. 843 */ 844static errcode_t unix_flush(io_channel channel) 845{ 846 struct unix_private_data *data; 847 errcode_t retval = 0; 848 849 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 850 data = (struct unix_private_data *) channel->private_data; 851 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 852 853#ifndef NO_IO_CACHE 854 retval = flush_cached_blocks(channel, data, 0); 855#endif 856 fsync(data->dev); 857 return retval; 858} 859 860static errcode_t unix_set_option(io_channel channel, const char *option, 861 const char *arg) 862{ 863 struct unix_private_data *data; 864 unsigned long long tmp; 865 char *end; 866 867 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 868 data = (struct unix_private_data *) channel->private_data; 869 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 870 871 if (!strcmp(option, "offset")) { 872 if (!arg) 873 return EXT2_ET_INVALID_ARGUMENT; 874 875 tmp = strtoull(arg, &end, 0); 876 if (*end) 877 return EXT2_ET_INVALID_ARGUMENT; 878 data->offset = tmp; 879 if (data->offset < 0) 880 return EXT2_ET_INVALID_ARGUMENT; 881 return 0; 882 } 883 return EXT2_ET_INVALID_ARGUMENT; 884} 885 886#if defined(__linux__) && !defined(BLKDISCARD) 887#define BLKDISCARD _IO(0x12,119) 888#endif 889 890static errcode_t unix_discard(io_channel channel, unsigned long long block, 891 unsigned long long count) 892{ 893 struct unix_private_data *data; 894 __uint64_t range[2]; 895 int ret; 896 897 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 898 data = (struct unix_private_data *) channel->private_data; 899 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 900 901 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { 902#ifdef BLKDISCARD 903 range[0] = (__uint64_t)(block) * channel->block_size; 904 range[1] = (__uint64_t)(count) * channel->block_size; 905 906 ret = ioctl(data->dev, BLKDISCARD, &range); 907#else 908 goto unimplemented; 909#endif 910 } else { 911#if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE) 912 /* 913 * If we are not on block device, try to use punch hole 914 * to reclaim free space. 915 */ 916 ret = fallocate(data->dev, 917 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 918 (off_t)(block) * channel->block_size, 919 (off_t)(count) * channel->block_size); 920#else 921 goto unimplemented; 922#endif 923 } 924 if (ret < 0) { 925 if (errno == EOPNOTSUPP) 926 goto unimplemented; 927 return errno; 928 } 929 return 0; 930unimplemented: 931 return EXT2_ET_UNIMPLEMENTED; 932} 933