unix_io.c revision d9a5d37535794842358e1cfe4faa4a89804ed209
1/* 2 * unix_io.c --- This is the Unix (well, really POSIX) implementation 3 * of the I/O manager. 4 * 5 * Implements a one-block write-through cache. 6 * 7 * Includes support for Windows NT support under Cygwin. 8 * 9 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 10 * 2002 by Theodore Ts'o. 11 * 12 * %Begin-Header% 13 * This file may be redistributed under the terms of the GNU Library 14 * General Public License, version 2. 15 * %End-Header% 16 */ 17 18#define _LARGEFILE_SOURCE 19#define _LARGEFILE64_SOURCE 20#ifndef _GNU_SOURCE 21#define _GNU_SOURCE 22#endif 23 24#include "config.h" 25#include <stdio.h> 26#include <string.h> 27#if HAVE_UNISTD_H 28#include <unistd.h> 29#endif 30#if HAVE_ERRNO_H 31#include <errno.h> 32#endif 33#include <fcntl.h> 34#include <time.h> 35#ifdef __linux__ 36#include <sys/utsname.h> 37#endif 38#ifdef HAVE_SYS_IOCTL_H 39#include <sys/ioctl.h> 40#endif 41#ifdef HAVE_SYS_MOUNT_H 42#include <sys/mount.h> 43#endif 44#if HAVE_SYS_STAT_H 45#include <sys/stat.h> 46#endif 47#if HAVE_SYS_TYPES_H 48#include <sys/types.h> 49#endif 50#if HAVE_SYS_RESOURCE_H 51#include <sys/resource.h> 52#endif 53#if HAVE_LINUX_FALLOC_H 54#include <linux/falloc.h> 55#endif 56 57#if defined(__linux__) && defined(_IO) && !defined(BLKROGET) 58#define BLKROGET _IO(0x12, 94) /* Get read-only status (0 = read_write). */ 59#endif 60 61#if defined(__linux__) && defined(_IO) && !defined(BLKSSZGET) 62#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ 63#endif 64 65#undef ALIGN_DEBUG 66 67#include "ext2_fs.h" 68#include "ext2fs.h" 69 70/* 71 * For checking structure magic numbers... 72 */ 73 74#define EXT2_CHECK_MAGIC(struct, code) \ 75 if ((struct)->magic != (code)) return (code) 76 77struct unix_cache { 78 char *buf; 79 unsigned long block; 80 int access_time; 81 unsigned dirty:1; 82 unsigned in_use:1; 83}; 84 85#define CACHE_SIZE 8 86#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */ 87#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */ 88 89struct unix_private_data { 90 int magic; 91 int dev; 92 int flags; 93 int align; 94 int access_time; 95 ext2_loff_t offset; 96 struct unix_cache cache[CACHE_SIZE]; 97 void *bounce; 98 struct struct_io_stats io_stats; 99}; 100 101#define IS_ALIGNED(n, align) ((((unsigned long) n) & \ 102 ((unsigned long) ((align)-1))) == 0) 103 104static errcode_t unix_open(const char *name, int flags, io_channel *channel); 105static errcode_t unix_close(io_channel channel); 106static errcode_t unix_set_blksize(io_channel channel, int blksize); 107static errcode_t unix_read_blk(io_channel channel, unsigned long block, 108 int count, void *data); 109static errcode_t unix_write_blk(io_channel channel, unsigned long block, 110 int count, const void *data); 111static errcode_t unix_flush(io_channel channel); 112static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 113 int size, const void *data); 114static errcode_t unix_set_option(io_channel channel, const char *option, 115 const char *arg); 116static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 117; 118static void reuse_cache(io_channel channel, struct unix_private_data *data, 119 struct unix_cache *cache, unsigned long long block); 120static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 121 int count, void *data); 122static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 123 int count, const void *data); 124static errcode_t unix_discard(io_channel channel, unsigned long long block, 125 unsigned long long count); 126 127static struct struct_io_manager struct_unix_manager = { 128 EXT2_ET_MAGIC_IO_MANAGER, 129 "Unix I/O Manager", 130 unix_open, 131 unix_close, 132 unix_set_blksize, 133 unix_read_blk, 134 unix_write_blk, 135 unix_flush, 136 unix_write_byte, 137 unix_set_option, 138 unix_get_stats, 139 unix_read_blk64, 140 unix_write_blk64, 141 unix_discard, 142}; 143 144io_manager unix_io_manager = &struct_unix_manager; 145 146static errcode_t unix_get_stats(io_channel channel, io_stats *stats) 147{ 148 errcode_t retval = 0; 149 150 struct unix_private_data *data; 151 152 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 153 data = (struct unix_private_data *) channel->private_data; 154 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 155 156 if (stats) 157 *stats = &data->io_stats; 158 159 return retval; 160} 161 162/* 163 * Here are the raw I/O functions 164 */ 165static errcode_t raw_read_blk(io_channel channel, 166 struct unix_private_data *data, 167 unsigned long long block, 168 int count, void *bufv) 169{ 170 errcode_t retval; 171 ssize_t size; 172 ext2_loff_t location; 173 int actual = 0; 174 unsigned char *buf = bufv; 175 176 size = (count < 0) ? -count : count * channel->block_size; 177 data->io_stats.bytes_read += size; 178 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 179 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 180 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 181 goto error_out; 182 } 183 if ((data->align == 0) || 184 ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) { 185 actual = read(data->dev, buf, size); 186 if (actual != size) { 187 short_read: 188 if (actual < 0) 189 actual = 0; 190 retval = EXT2_ET_SHORT_READ; 191 goto error_out; 192 } 193 return 0; 194 } 195 196#ifdef ALIGN_DEBUG 197 printf("raw_read_blk: O_DIRECT fallback: %p %lu\n", buf, 198 (unsigned long) size); 199#endif 200 201 /* 202 * The buffer or size which we're trying to read isn't aligned 203 * to the O_DIRECT rules, so we need to do this the hard way... 204 */ 205 while (size > 0) { 206 actual = read(data->dev, data->bounce, channel->block_size); 207 if (actual != channel->block_size) 208 goto short_read; 209 actual = size; 210 if (size > channel->block_size) 211 actual = channel->block_size; 212 memcpy(buf, data->bounce, actual); 213 size -= actual; 214 buf += actual; 215 } 216 return 0; 217 218error_out: 219 memset((char *) buf+actual, 0, size-actual); 220 if (channel->read_error) 221 retval = (channel->read_error)(channel, block, count, buf, 222 size, actual, retval); 223 return retval; 224} 225 226static errcode_t raw_write_blk(io_channel channel, 227 struct unix_private_data *data, 228 unsigned long long block, 229 int count, const void *bufv) 230{ 231 ssize_t size; 232 ext2_loff_t location; 233 int actual = 0; 234 errcode_t retval; 235 const unsigned char *buf = bufv; 236 237 if (count == 1) 238 size = channel->block_size; 239 else { 240 if (count < 0) 241 size = -count; 242 else 243 size = count * channel->block_size; 244 } 245 data->io_stats.bytes_written += size; 246 247 location = ((ext2_loff_t) block * channel->block_size) + data->offset; 248 if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) { 249 retval = errno ? errno : EXT2_ET_LLSEEK_FAILED; 250 goto error_out; 251 } 252 253 if ((data->align == 0) || 254 ((IS_ALIGNED(buf, data->align)) && IS_ALIGNED(size, data->align))) { 255 actual = write(data->dev, buf, size); 256 if (actual != size) { 257 short_write: 258 retval = EXT2_ET_SHORT_WRITE; 259 goto error_out; 260 } 261 return 0; 262 } 263 264#ifdef ALIGN_DEBUG 265 printf("raw_write_blk: O_DIRECT fallback: %p %lu\n", buf, 266 (unsigned long) size); 267#endif 268 /* 269 * The buffer or size which we're trying to write isn't aligned 270 * to the O_DIRECT rules, so we need to do this the hard way... 271 */ 272 while (size > 0) { 273 if (size < channel->block_size) { 274 actual = read(data->dev, data->bounce, 275 channel->block_size); 276 if (actual != channel->block_size) { 277 retval = EXT2_ET_SHORT_READ; 278 goto error_out; 279 } 280 } 281 actual = size; 282 if (size > channel->block_size) 283 actual = channel->block_size; 284 memcpy(data->bounce, buf, actual); 285 actual = write(data->dev, data->bounce, channel->block_size); 286 if (actual != channel->block_size) 287 goto short_write; 288 size -= actual; 289 buf += actual; 290 } 291 return 0; 292 293error_out: 294 if (channel->write_error) 295 retval = (channel->write_error)(channel, block, count, buf, 296 size, actual, retval); 297 return retval; 298} 299 300 301/* 302 * Here we implement the cache functions 303 */ 304 305/* Allocate the cache buffers */ 306static errcode_t alloc_cache(io_channel channel, 307 struct unix_private_data *data) 308{ 309 errcode_t retval; 310 struct unix_cache *cache; 311 int i; 312 313 data->access_time = 0; 314 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 315 cache->block = 0; 316 cache->access_time = 0; 317 cache->dirty = 0; 318 cache->in_use = 0; 319 if (cache->buf) 320 ext2fs_free_mem(&cache->buf); 321 retval = ext2fs_get_memalign(channel->block_size, 322 data->align, &cache->buf); 323 if (retval) 324 return retval; 325 } 326 if (data->align) { 327 if (data->bounce) 328 ext2fs_free_mem(&data->bounce); 329 retval = ext2fs_get_memalign(channel->block_size, data->align, 330 &data->bounce); 331 } 332 return retval; 333} 334 335/* Free the cache buffers */ 336static void free_cache(struct unix_private_data *data) 337{ 338 struct unix_cache *cache; 339 int i; 340 341 data->access_time = 0; 342 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 343 cache->block = 0; 344 cache->access_time = 0; 345 cache->dirty = 0; 346 cache->in_use = 0; 347 if (cache->buf) 348 ext2fs_free_mem(&cache->buf); 349 } 350 if (data->bounce) 351 ext2fs_free_mem(&data->bounce); 352} 353 354#ifndef NO_IO_CACHE 355/* 356 * Try to find a block in the cache. If the block is not found, and 357 * eldest is a non-zero pointer, then fill in eldest with the cache 358 * entry to that should be reused. 359 */ 360static struct unix_cache *find_cached_block(struct unix_private_data *data, 361 unsigned long long block, 362 struct unix_cache **eldest) 363{ 364 struct unix_cache *cache, *unused_cache, *oldest_cache; 365 int i; 366 367 unused_cache = oldest_cache = 0; 368 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 369 if (!cache->in_use) { 370 if (!unused_cache) 371 unused_cache = cache; 372 continue; 373 } 374 if (cache->block == block) { 375 cache->access_time = ++data->access_time; 376 return cache; 377 } 378 if (!oldest_cache || 379 (cache->access_time < oldest_cache->access_time)) 380 oldest_cache = cache; 381 } 382 if (eldest) 383 *eldest = (unused_cache) ? unused_cache : oldest_cache; 384 return 0; 385} 386 387/* 388 * Reuse a particular cache entry for another block. 389 */ 390static void reuse_cache(io_channel channel, struct unix_private_data *data, 391 struct unix_cache *cache, unsigned long long block) 392{ 393 if (cache->dirty && cache->in_use) 394 raw_write_blk(channel, data, cache->block, 1, cache->buf); 395 396 cache->in_use = 1; 397 cache->dirty = 0; 398 cache->block = block; 399 cache->access_time = ++data->access_time; 400} 401 402/* 403 * Flush all of the blocks in the cache 404 */ 405static errcode_t flush_cached_blocks(io_channel channel, 406 struct unix_private_data *data, 407 int invalidate) 408 409{ 410 struct unix_cache *cache; 411 errcode_t retval, retval2; 412 int i; 413 414 retval2 = 0; 415 for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) { 416 if (!cache->in_use) 417 continue; 418 419 if (invalidate) 420 cache->in_use = 0; 421 422 if (!cache->dirty) 423 continue; 424 425 retval = raw_write_blk(channel, data, 426 cache->block, 1, cache->buf); 427 if (retval) 428 retval2 = retval; 429 else 430 cache->dirty = 0; 431 } 432 return retval2; 433} 434#endif /* NO_IO_CACHE */ 435 436#ifdef __linux__ 437#ifndef BLKDISCARDZEROES 438#define BLKDISCARDZEROES _IO(0x12,124) 439#endif 440#endif 441 442static errcode_t unix_open(const char *name, int flags, io_channel *channel) 443{ 444 io_channel io = NULL; 445 struct unix_private_data *data = NULL; 446 errcode_t retval; 447 int open_flags, zeroes = 0; 448 int f_nocache = 0; 449 ext2fs_struct_stat st; 450#ifdef __linux__ 451 struct utsname ut; 452#endif 453 454 if (name == 0) 455 return EXT2_ET_BAD_DEVICE_NAME; 456 retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io); 457 if (retval) 458 goto cleanup; 459 memset(io, 0, sizeof(struct struct_io_channel)); 460 io->magic = EXT2_ET_MAGIC_IO_CHANNEL; 461 retval = ext2fs_get_mem(sizeof(struct unix_private_data), &data); 462 if (retval) 463 goto cleanup; 464 465 io->manager = unix_io_manager; 466 retval = ext2fs_get_mem(strlen(name)+1, &io->name); 467 if (retval) 468 goto cleanup; 469 470 strcpy(io->name, name); 471 io->private_data = data; 472 io->block_size = 1024; 473 io->read_error = 0; 474 io->write_error = 0; 475 io->refcount = 1; 476 477 memset(data, 0, sizeof(struct unix_private_data)); 478 data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL; 479 data->io_stats.num_fields = 2; 480 481 open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY; 482 if (flags & IO_FLAG_EXCLUSIVE) 483 open_flags |= O_EXCL; 484#if defined(O_DIRECT) 485 if (flags & IO_FLAG_DIRECT_IO) 486 open_flags |= O_DIRECT; 487#elif defined(F_NOCACHE) 488 if (flags & IO_FLAG_DIRECT_IO) 489 f_nocache = F_NOCACHE; 490#endif 491 data->flags = flags; 492 493 data->dev = ext2fs_open_file(io->name, open_flags, 0); 494 if (data->dev < 0) { 495 retval = errno; 496 goto cleanup; 497 } 498 if (f_nocache) { 499 if (fcntl(data->dev, f_nocache, 1) < 0) { 500 retval = errno; 501 goto cleanup; 502 } 503 } 504 505 /* 506 * If the device is really a block device, then set the 507 * appropriate flag, otherwise we can set DISCARD_ZEROES flag 508 * because we are going to use punch hole instead of discard 509 * and if it succeed, subsequent read from sparse area returns 510 * zero. 511 */ 512 if (ext2fs_stat(io->name, &st) == 0) { 513 if (S_ISBLK(st.st_mode)) 514 io->flags |= CHANNEL_FLAGS_BLOCK_DEVICE; 515 else 516 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 517 } 518 519#ifdef BLKSSZGET 520 if (flags & IO_FLAG_DIRECT_IO) { 521 if (ioctl(data->dev, BLKSSZGET, &data->align) != 0) 522 data->align = io->block_size; 523 } 524#endif 525 526#ifdef BLKDISCARDZEROES 527 ioctl(data->dev, BLKDISCARDZEROES, &zeroes); 528 if (zeroes) 529 io->flags |= CHANNEL_FLAGS_DISCARD_ZEROES; 530#endif 531 532#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 533 /* 534 * Some operating systems require that the buffers be aligned, 535 * regardless of O_DIRECT 536 */ 537 data->align = 512; 538#endif 539 540 541 if ((retval = alloc_cache(io, data))) 542 goto cleanup; 543 544#ifdef BLKROGET 545 if (flags & IO_FLAG_RW) { 546 int error; 547 int readonly = 0; 548 549 /* Is the block device actually writable? */ 550 error = ioctl(data->dev, BLKROGET, &readonly); 551 if (!error && readonly) { 552 close(data->dev); 553 retval = EPERM; 554 goto cleanup; 555 } 556 } 557#endif 558 559#ifdef __linux__ 560#undef RLIM_INFINITY 561#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4))) 562#define RLIM_INFINITY ((unsigned long)(~0UL>>1)) 563#else 564#define RLIM_INFINITY (~0UL) 565#endif 566 /* 567 * Work around a bug in 2.4.10-2.4.18 kernels where writes to 568 * block devices are wrongly getting hit by the filesize 569 * limit. This workaround isn't perfect, since it won't work 570 * if glibc wasn't built against 2.2 header files. (Sigh.) 571 * 572 */ 573 if ((flags & IO_FLAG_RW) && 574 (uname(&ut) == 0) && 575 ((ut.release[0] == '2') && (ut.release[1] == '.') && 576 (ut.release[2] == '4') && (ut.release[3] == '.') && 577 (ut.release[4] == '1') && (ut.release[5] >= '0') && 578 (ut.release[5] < '8')) && 579 (ext2fs_stat(io->name, &st) == 0) && 580 (S_ISBLK(st.st_mode))) { 581 struct rlimit rlim; 582 583 rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY; 584 setrlimit(RLIMIT_FSIZE, &rlim); 585 getrlimit(RLIMIT_FSIZE, &rlim); 586 if (((unsigned long) rlim.rlim_cur) < 587 ((unsigned long) rlim.rlim_max)) { 588 rlim.rlim_cur = rlim.rlim_max; 589 setrlimit(RLIMIT_FSIZE, &rlim); 590 } 591 } 592#endif 593 *channel = io; 594 return 0; 595 596cleanup: 597 if (data) { 598 free_cache(data); 599 ext2fs_free_mem(&data); 600 } 601 if (io) 602 ext2fs_free_mem(&io); 603 return retval; 604} 605 606static errcode_t unix_close(io_channel channel) 607{ 608 struct unix_private_data *data; 609 errcode_t retval = 0; 610 611 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 612 data = (struct unix_private_data *) channel->private_data; 613 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 614 615 if (--channel->refcount > 0) 616 return 0; 617 618#ifndef NO_IO_CACHE 619 retval = flush_cached_blocks(channel, data, 0); 620#endif 621 622 if (close(data->dev) < 0) 623 retval = errno; 624 free_cache(data); 625 626 ext2fs_free_mem(&channel->private_data); 627 if (channel->name) 628 ext2fs_free_mem(&channel->name); 629 ext2fs_free_mem(&channel); 630 return retval; 631} 632 633static errcode_t unix_set_blksize(io_channel channel, int blksize) 634{ 635 struct unix_private_data *data; 636 errcode_t retval; 637 638 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 639 data = (struct unix_private_data *) channel->private_data; 640 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 641 642 if (channel->block_size != blksize) { 643#ifndef NO_IO_CACHE 644 if ((retval = flush_cached_blocks(channel, data, 0))) 645 return retval; 646#endif 647 648 channel->block_size = blksize; 649 free_cache(data); 650 if ((retval = alloc_cache(channel, data))) 651 return retval; 652 } 653 return 0; 654} 655 656 657static errcode_t unix_read_blk64(io_channel channel, unsigned long long block, 658 int count, void *buf) 659{ 660 struct unix_private_data *data; 661 struct unix_cache *cache, *reuse[READ_DIRECT_SIZE]; 662 errcode_t retval; 663 char *cp; 664 int i, j; 665 666 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 667 data = (struct unix_private_data *) channel->private_data; 668 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 669 670#ifdef NO_IO_CACHE 671 return raw_read_blk(channel, data, block, count, buf); 672#else 673 /* 674 * If we're doing an odd-sized read or a very large read, 675 * flush out the cache and then do a direct read. 676 */ 677 if (count < 0 || count > WRITE_DIRECT_SIZE) { 678 if ((retval = flush_cached_blocks(channel, data, 0))) 679 return retval; 680 return raw_read_blk(channel, data, block, count, buf); 681 } 682 683 cp = buf; 684 while (count > 0) { 685 /* If it's in the cache, use it! */ 686 if ((cache = find_cached_block(data, block, &reuse[0]))) { 687#ifdef DEBUG 688 printf("Using cached block %lu\n", block); 689#endif 690 memcpy(cp, cache->buf, channel->block_size); 691 count--; 692 block++; 693 cp += channel->block_size; 694 continue; 695 } 696 if (count == 1) { 697 /* 698 * Special case where we read directly into the 699 * cache buffer; important in the O_DIRECT case 700 */ 701 cache = reuse[0]; 702 reuse_cache(channel, data, cache, block); 703 if ((retval = raw_read_blk(channel, data, block, 1, 704 cache->buf))) { 705 cache->in_use = 0; 706 return retval; 707 } 708 memcpy(cp, cache->buf, channel->block_size); 709 return 0; 710 } 711 712 /* 713 * Find the number of uncached blocks so we can do a 714 * single read request 715 */ 716 for (i=1; i < count; i++) 717 if (find_cached_block(data, block+i, &reuse[i])) 718 break; 719#ifdef DEBUG 720 printf("Reading %d blocks starting at %lu\n", i, block); 721#endif 722 if ((retval = raw_read_blk(channel, data, block, i, cp))) 723 return retval; 724 725 /* Save the results in the cache */ 726 for (j=0; j < i; j++) { 727 count--; 728 cache = reuse[j]; 729 reuse_cache(channel, data, cache, block++); 730 memcpy(cache->buf, cp, channel->block_size); 731 cp += channel->block_size; 732 } 733 } 734 return 0; 735#endif /* NO_IO_CACHE */ 736} 737 738static errcode_t unix_read_blk(io_channel channel, unsigned long block, 739 int count, void *buf) 740{ 741 return unix_read_blk64(channel, block, count, buf); 742} 743 744static errcode_t unix_write_blk64(io_channel channel, unsigned long long block, 745 int count, const void *buf) 746{ 747 struct unix_private_data *data; 748 struct unix_cache *cache, *reuse; 749 errcode_t retval = 0; 750 const char *cp; 751 int writethrough; 752 753 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 754 data = (struct unix_private_data *) channel->private_data; 755 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 756 757#ifdef NO_IO_CACHE 758 return raw_write_blk(channel, data, block, count, buf); 759#else 760 /* 761 * If we're doing an odd-sized write or a very large write, 762 * flush out the cache completely and then do a direct write. 763 */ 764 if (count < 0 || count > WRITE_DIRECT_SIZE) { 765 if ((retval = flush_cached_blocks(channel, data, 1))) 766 return retval; 767 return raw_write_blk(channel, data, block, count, buf); 768 } 769 770 /* 771 * For a moderate-sized multi-block write, first force a write 772 * if we're in write-through cache mode, and then fill the 773 * cache with the blocks. 774 */ 775 writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH; 776 if (writethrough) 777 retval = raw_write_blk(channel, data, block, count, buf); 778 779 cp = buf; 780 while (count > 0) { 781 cache = find_cached_block(data, block, &reuse); 782 if (!cache) { 783 cache = reuse; 784 reuse_cache(channel, data, cache, block); 785 } 786 memcpy(cache->buf, cp, channel->block_size); 787 cache->dirty = !writethrough; 788 count--; 789 block++; 790 cp += channel->block_size; 791 } 792 return retval; 793#endif /* NO_IO_CACHE */ 794} 795 796static errcode_t unix_write_blk(io_channel channel, unsigned long block, 797 int count, const void *buf) 798{ 799 return unix_write_blk64(channel, block, count, buf); 800} 801 802static errcode_t unix_write_byte(io_channel channel, unsigned long offset, 803 int size, const void *buf) 804{ 805 struct unix_private_data *data; 806 errcode_t retval = 0; 807 ssize_t actual; 808 809 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 810 data = (struct unix_private_data *) channel->private_data; 811 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 812 813 if (data->align != 0) { 814#ifdef ALIGN_DEBUG 815 printf("unix_write_byte: O_DIRECT fallback\n"); 816#endif 817 return EXT2_ET_UNIMPLEMENTED; 818 } 819 820#ifndef NO_IO_CACHE 821 /* 822 * Flush out the cache completely 823 */ 824 if ((retval = flush_cached_blocks(channel, data, 1))) 825 return retval; 826#endif 827 828 if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0) 829 return errno; 830 831 actual = write(data->dev, buf, size); 832 if (actual != size) 833 return EXT2_ET_SHORT_WRITE; 834 835 return 0; 836} 837 838/* 839 * Flush data buffers to disk. 840 */ 841static errcode_t unix_flush(io_channel channel) 842{ 843 struct unix_private_data *data; 844 errcode_t retval = 0; 845 846 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 847 data = (struct unix_private_data *) channel->private_data; 848 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 849 850#ifndef NO_IO_CACHE 851 retval = flush_cached_blocks(channel, data, 0); 852#endif 853 fsync(data->dev); 854 return retval; 855} 856 857static errcode_t unix_set_option(io_channel channel, const char *option, 858 const char *arg) 859{ 860 struct unix_private_data *data; 861 unsigned long long tmp; 862 char *end; 863 864 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 865 data = (struct unix_private_data *) channel->private_data; 866 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 867 868 if (!strcmp(option, "offset")) { 869 if (!arg) 870 return EXT2_ET_INVALID_ARGUMENT; 871 872 tmp = strtoull(arg, &end, 0); 873 if (*end) 874 return EXT2_ET_INVALID_ARGUMENT; 875 data->offset = tmp; 876 if (data->offset < 0) 877 return EXT2_ET_INVALID_ARGUMENT; 878 return 0; 879 } 880 return EXT2_ET_INVALID_ARGUMENT; 881} 882 883#if defined(__linux__) && !defined(BLKDISCARD) 884#define BLKDISCARD _IO(0x12,119) 885#endif 886 887static errcode_t unix_discard(io_channel channel, unsigned long long block, 888 unsigned long long count) 889{ 890 struct unix_private_data *data; 891 __uint64_t range[2]; 892 int ret; 893 894 EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL); 895 data = (struct unix_private_data *) channel->private_data; 896 EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL); 897 898 if (channel->flags & CHANNEL_FLAGS_BLOCK_DEVICE) { 899#ifdef BLKDISCARD 900 range[0] = (__uint64_t)(block) * channel->block_size; 901 range[1] = (__uint64_t)(count) * channel->block_size; 902 903 ret = ioctl(data->dev, BLKDISCARD, &range); 904#else 905 goto unimplemented; 906#endif 907 } else { 908#ifdef FALLOC_FL_PUNCH_HOLE 909 /* 910 * If we are not on block device, try to use punch hole 911 * to reclaim free space. 912 */ 913 ret = fallocate(data->dev, 914 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 915 (off_t)(block) * channel->block_size, 916 (off_t)(count) * channel->block_size); 917#else 918 goto unimplemented; 919#endif 920 } 921 if (ret < 0) { 922 if (errno == EOPNOTSUPP) 923 goto unimplemented; 924 return errno; 925 } 926 return 0; 927unimplemented: 928 return EXT2_ET_UNIMPLEMENTED; 929} 930