io_u.c revision c311cd2a9e3ca77f4d2cb2f6dd8bd08185e8c0f6
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10 11struct io_completion_data { 12 int nr; /* input */ 13 14 int error; /* output */ 15 unsigned long bytes_done[2]; /* output */ 16 struct timeval time; /* output */ 17}; 18 19/* 20 * The ->file_map[] contains a map of blocks we have or have not done io 21 * to yet. Used to make sure we cover the entire range in a fair fashion. 22 */ 23static int random_map_free(struct fio_file *f, const unsigned long long block) 24{ 25 unsigned int idx = RAND_MAP_IDX(f, block); 26 unsigned int bit = RAND_MAP_BIT(f, block); 27 28 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 29 30 return (f->file_map[idx] & (1 << bit)) == 0; 31} 32 33/* 34 * Mark a given offset as used in the map. 35 */ 36static void mark_random_map(struct thread_data *td, struct io_u *io_u) 37{ 38 unsigned int min_bs = td->o.rw_min_bs; 39 struct fio_file *f = io_u->file; 40 unsigned long long block; 41 unsigned int blocks, nr_blocks; 42 43 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 44 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 45 blocks = 0; 46 47 while (nr_blocks) { 48 unsigned int this_blocks, mask; 49 unsigned int idx, bit; 50 51 /* 52 * If we have a mixed random workload, we may 53 * encounter blocks we already did IO to. 54 */ 55 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 56 if (!blocks) 57 blocks = 1; 58 break; 59 } 60 61 idx = RAND_MAP_IDX(f, block); 62 bit = RAND_MAP_BIT(f, block); 63 64 fio_assert(td, idx < f->num_maps); 65 66 this_blocks = nr_blocks; 67 if (this_blocks + bit > BLOCKS_PER_MAP) 68 this_blocks = BLOCKS_PER_MAP - bit; 69 70 if (this_blocks == BLOCKS_PER_MAP) 71 mask = -1U; 72 else 73 mask = ((1U << this_blocks) - 1) << bit; 74 75 f->file_map[idx] |= mask; 76 nr_blocks -= this_blocks; 77 blocks += this_blocks; 78 block += this_blocks; 79 } 80 81 if ((blocks * min_bs) < io_u->buflen) 82 io_u->buflen = blocks * min_bs; 83} 84 85static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 86 enum fio_ddir ddir) 87{ 88 unsigned long long max_blocks; 89 unsigned long long max_size; 90 91 /* 92 * Hmm, should we make sure that ->io_size <= ->real_file_size? 93 */ 94 max_size = f->io_size; 95 if (max_size > f->real_file_size) 96 max_size = f->real_file_size; 97 98 max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; 99 if (!max_blocks) 100 return 0; 101 102 return max_blocks; 103} 104 105/* 106 * Return the next free block in the map. 107 */ 108static int get_next_free_block(struct thread_data *td, struct fio_file *f, 109 enum fio_ddir ddir, unsigned long long *b) 110{ 111 unsigned long long min_bs = td->o.rw_min_bs; 112 int i; 113 114 i = f->last_free_lookup; 115 *b = (i * BLOCKS_PER_MAP); 116 while ((*b) * min_bs < f->real_file_size && 117 (*b) * min_bs < f->io_size) { 118 if (f->file_map[i] != (unsigned int) -1) { 119 *b += ffz(f->file_map[i]); 120 if (*b > last_block(td, f, ddir)) 121 break; 122 f->last_free_lookup = i; 123 return 0; 124 } 125 126 *b += BLOCKS_PER_MAP; 127 i++; 128 } 129 130 dprint(FD_IO, "failed finding a free block\n"); 131 return 1; 132} 133 134static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 135 enum fio_ddir ddir, unsigned long long *b) 136{ 137 unsigned long long r; 138 int loops = 5; 139 140 do { 141 r = os_random_long(&td->random_state); 142 dprint(FD_RANDOM, "off rand %llu\n", r); 143 *b = (last_block(td, f, ddir) - 1) 144 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 145 146 /* 147 * if we are not maintaining a random map, we are done. 148 */ 149 if (!file_randommap(td, f)) 150 return 0; 151 152 /* 153 * calculate map offset and check if it's free 154 */ 155 if (random_map_free(f, *b)) 156 return 0; 157 158 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 159 *b); 160 } while (--loops); 161 162 /* 163 * we get here, if we didn't suceed in looking up a block. generate 164 * a random start offset into the filemap, and find the first free 165 * block from there. 166 */ 167 loops = 10; 168 do { 169 f->last_free_lookup = (f->num_maps - 1) * 170 (r / (OS_RAND_MAX + 1.0)); 171 if (!get_next_free_block(td, f, ddir, b)) 172 return 0; 173 174 r = os_random_long(&td->random_state); 175 } while (--loops); 176 177 /* 178 * that didn't work either, try exhaustive search from the start 179 */ 180 f->last_free_lookup = 0; 181 return get_next_free_block(td, f, ddir, b); 182} 183 184/* 185 * For random io, generate a random new block and see if it's used. Repeat 186 * until we find a free one. For sequential io, just return the end of 187 * the last io issued. 188 */ 189static int get_next_offset(struct thread_data *td, struct io_u *io_u) 190{ 191 struct fio_file *f = io_u->file; 192 unsigned long long b; 193 enum fio_ddir ddir = io_u->ddir; 194 195 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 196 td->ddir_nr = td->o.ddir_nr; 197 198 if (get_next_rand_offset(td, f, ddir, &b)) { 199 dprint(FD_IO, "%s: getting rand offset failed\n", 200 f->file_name); 201 return 1; 202 } 203 } else { 204 if (f->last_pos >= f->real_file_size) { 205 if (!td_random(td) || 206 get_next_rand_offset(td, f, ddir, &b)) { 207 dprint(FD_IO, "%s: pos %llu > size %llu\n", 208 f->file_name, f->last_pos, 209 f->real_file_size); 210 return 1; 211 } 212 } else 213 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 214 } 215 216 io_u->offset = b * td->o.ba[ddir]; 217 if (io_u->offset >= f->io_size) { 218 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 219 io_u->offset, f->io_size); 220 return 1; 221 } 222 223 io_u->offset += f->file_offset; 224 if (io_u->offset >= f->real_file_size) { 225 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 226 io_u->offset, f->real_file_size); 227 return 1; 228 } 229 230 return 0; 231} 232 233static inline int is_power_of_2(unsigned int val) 234{ 235 return (val != 0 && ((val & (val - 1)) == 0)); 236} 237 238static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 239{ 240 const int ddir = io_u->ddir; 241 unsigned int uninitialized_var(buflen); 242 unsigned int minbs, maxbs; 243 long r; 244 245 minbs = td->o.min_bs[ddir]; 246 maxbs = td->o.max_bs[ddir]; 247 248 if (minbs == maxbs) 249 buflen = minbs; 250 else { 251 r = os_random_long(&td->bsrange_state); 252 if (!td->o.bssplit_nr[ddir]) { 253 buflen = 1 + (unsigned int) ((double) maxbs * 254 (r / (OS_RAND_MAX + 1.0))); 255 if (buflen < minbs) 256 buflen = minbs; 257 } else { 258 long perc = 0; 259 unsigned int i; 260 261 for (i = 0; i < td->o.bssplit_nr[ddir]; i++) { 262 struct bssplit *bsp = &td->o.bssplit[ddir][i]; 263 264 buflen = bsp->bs; 265 perc += bsp->perc; 266 if (r <= ((OS_RAND_MAX / 100L) * perc)) 267 break; 268 } 269 } 270 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 271 buflen = (buflen + minbs - 1) & ~(minbs - 1); 272 } 273 274 if (io_u->offset + buflen > io_u->file->real_file_size) { 275 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 276 minbs, ddir); 277 buflen = minbs; 278 } 279 280 return buflen; 281} 282 283static void set_rwmix_bytes(struct thread_data *td) 284{ 285 unsigned int diff; 286 287 /* 288 * we do time or byte based switch. this is needed because 289 * buffered writes may issue a lot quicker than they complete, 290 * whereas reads do not. 291 */ 292 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 293 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 294} 295 296static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 297{ 298 unsigned int v; 299 long r; 300 301 r = os_random_long(&td->rwmix_state); 302 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 303 if (v <= td->o.rwmix[DDIR_READ]) 304 return DDIR_READ; 305 306 return DDIR_WRITE; 307} 308 309/* 310 * Return the data direction for the next io_u. If the job is a 311 * mixed read/write workload, check the rwmix cycle and switch if 312 * necessary. 313 */ 314static enum fio_ddir get_rw_ddir(struct thread_data *td) 315{ 316 if (td_rw(td)) { 317 /* 318 * Check if it's time to seed a new data direction. 319 */ 320 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 321 unsigned long long max_bytes; 322 enum fio_ddir ddir; 323 324 /* 325 * Put a top limit on how many bytes we do for 326 * one data direction, to avoid overflowing the 327 * ranges too much 328 */ 329 ddir = get_rand_ddir(td); 330 max_bytes = td->this_io_bytes[ddir]; 331 if (max_bytes >= 332 (td->o.size * td->o.rwmix[ddir] / 100)) { 333 if (!td->rw_end_set[ddir]) { 334 td->rw_end_set[ddir] = 1; 335 fio_gettime(&td->rw_end[ddir], NULL); 336 } 337 338 ddir ^= 1; 339 } 340 341 if (ddir != td->rwmix_ddir) 342 set_rwmix_bytes(td); 343 344 td->rwmix_ddir = ddir; 345 } 346 return td->rwmix_ddir; 347 } else if (td_read(td)) 348 return DDIR_READ; 349 else 350 return DDIR_WRITE; 351} 352 353static void put_file_log(struct thread_data *td, struct fio_file *f) 354{ 355 int ret = put_file(td, f); 356 357 if (ret) 358 td_verror(td, ret, "file close"); 359} 360 361void put_io_u(struct thread_data *td, struct io_u *io_u) 362{ 363 assert((io_u->flags & IO_U_F_FREE) == 0); 364 io_u->flags |= IO_U_F_FREE; 365 366 if (io_u->file) 367 put_file_log(td, io_u->file); 368 369 io_u->file = NULL; 370 flist_del(&io_u->list); 371 flist_add(&io_u->list, &td->io_u_freelist); 372 td->cur_depth--; 373} 374 375void requeue_io_u(struct thread_data *td, struct io_u **io_u) 376{ 377 struct io_u *__io_u = *io_u; 378 379 dprint(FD_IO, "requeue %p\n", __io_u); 380 381 __io_u->flags |= IO_U_F_FREE; 382 if ((__io_u->flags & IO_U_F_FLIGHT) && (__io_u->ddir != DDIR_SYNC)) 383 td->io_issues[__io_u->ddir]--; 384 385 __io_u->flags &= ~IO_U_F_FLIGHT; 386 387 flist_del(&__io_u->list); 388 flist_add_tail(&__io_u->list, &td->io_u_requeues); 389 td->cur_depth--; 390 *io_u = NULL; 391} 392 393static int fill_io_u(struct thread_data *td, struct io_u *io_u) 394{ 395 if (td->io_ops->flags & FIO_NOIO) 396 goto out; 397 398 /* 399 * see if it's time to sync 400 */ 401 if (td->o.fsync_blocks && 402 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 403 td->io_issues[DDIR_WRITE] && should_fsync(td)) { 404 io_u->ddir = DDIR_SYNC; 405 goto out; 406 } 407 408 io_u->ddir = get_rw_ddir(td); 409 410 /* 411 * See if it's time to switch to a new zone 412 */ 413 if (td->zone_bytes >= td->o.zone_size) { 414 td->zone_bytes = 0; 415 io_u->file->last_pos += td->o.zone_skip; 416 td->io_skip_bytes += td->o.zone_skip; 417 } 418 419 /* 420 * No log, let the seq/rand engine retrieve the next buflen and 421 * position. 422 */ 423 if (get_next_offset(td, io_u)) { 424 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 425 return 1; 426 } 427 428 io_u->buflen = get_next_buflen(td, io_u); 429 if (!io_u->buflen) { 430 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 431 return 1; 432 } 433 434 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 435 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 436 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 437 io_u->buflen, io_u->file->real_file_size); 438 return 1; 439 } 440 441 /* 442 * mark entry before potentially trimming io_u 443 */ 444 if (td_random(td) && file_randommap(td, io_u->file)) 445 mark_random_map(td, io_u); 446 447 /* 448 * If using a write iolog, store this entry. 449 */ 450out: 451 dprint_io_u(io_u, "fill_io_u"); 452 td->zone_bytes += io_u->buflen; 453 log_io_u(td, io_u); 454 return 0; 455} 456 457static void __io_u_mark_map(unsigned int *map, unsigned int nr) 458{ 459 int index = 0; 460 461 switch (nr) { 462 default: 463 index = 6; 464 break; 465 case 33 ... 64: 466 index = 5; 467 break; 468 case 17 ... 32: 469 index = 4; 470 break; 471 case 9 ... 16: 472 index = 3; 473 break; 474 case 5 ... 8: 475 index = 2; 476 break; 477 case 1 ... 4: 478 index = 1; 479 case 0: 480 break; 481 } 482 483 map[index]++; 484} 485 486void io_u_mark_submit(struct thread_data *td, unsigned int nr) 487{ 488 __io_u_mark_map(td->ts.io_u_submit, nr); 489 td->ts.total_submit++; 490} 491 492void io_u_mark_complete(struct thread_data *td, unsigned int nr) 493{ 494 __io_u_mark_map(td->ts.io_u_complete, nr); 495 td->ts.total_complete++; 496} 497 498void io_u_mark_depth(struct thread_data *td, unsigned int nr) 499{ 500 int index = 0; 501 502 switch (td->cur_depth) { 503 default: 504 index = 6; 505 break; 506 case 32 ... 63: 507 index = 5; 508 break; 509 case 16 ... 31: 510 index = 4; 511 break; 512 case 8 ... 15: 513 index = 3; 514 break; 515 case 4 ... 7: 516 index = 2; 517 break; 518 case 2 ... 3: 519 index = 1; 520 case 1: 521 break; 522 } 523 524 td->ts.io_u_map[index] += nr; 525} 526 527static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 528{ 529 int index = 0; 530 531 assert(usec < 1000); 532 533 switch (usec) { 534 case 750 ... 999: 535 index = 9; 536 break; 537 case 500 ... 749: 538 index = 8; 539 break; 540 case 250 ... 499: 541 index = 7; 542 break; 543 case 100 ... 249: 544 index = 6; 545 break; 546 case 50 ... 99: 547 index = 5; 548 break; 549 case 20 ... 49: 550 index = 4; 551 break; 552 case 10 ... 19: 553 index = 3; 554 break; 555 case 4 ... 9: 556 index = 2; 557 break; 558 case 2 ... 3: 559 index = 1; 560 case 0 ... 1: 561 break; 562 } 563 564 assert(index < FIO_IO_U_LAT_U_NR); 565 td->ts.io_u_lat_u[index]++; 566} 567 568static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 569{ 570 int index = 0; 571 572 switch (msec) { 573 default: 574 index = 11; 575 break; 576 case 1000 ... 1999: 577 index = 10; 578 break; 579 case 750 ... 999: 580 index = 9; 581 break; 582 case 500 ... 749: 583 index = 8; 584 break; 585 case 250 ... 499: 586 index = 7; 587 break; 588 case 100 ... 249: 589 index = 6; 590 break; 591 case 50 ... 99: 592 index = 5; 593 break; 594 case 20 ... 49: 595 index = 4; 596 break; 597 case 10 ... 19: 598 index = 3; 599 break; 600 case 4 ... 9: 601 index = 2; 602 break; 603 case 2 ... 3: 604 index = 1; 605 case 0 ... 1: 606 break; 607 } 608 609 assert(index < FIO_IO_U_LAT_M_NR); 610 td->ts.io_u_lat_m[index]++; 611} 612 613static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 614{ 615 if (usec < 1000) 616 io_u_mark_lat_usec(td, usec); 617 else 618 io_u_mark_lat_msec(td, usec / 1000); 619} 620 621/* 622 * Get next file to service by choosing one at random 623 */ 624static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf, 625 int badf) 626{ 627 struct fio_file *f; 628 int fno; 629 630 do { 631 long r = os_random_long(&td->next_file_state); 632 int opened = 0; 633 634 fno = (unsigned int) ((double) td->o.nr_files 635 * (r / (OS_RAND_MAX + 1.0))); 636 f = td->files[fno]; 637 if (f->flags & FIO_FILE_DONE) 638 continue; 639 640 if (!(f->flags & FIO_FILE_OPEN)) { 641 int err; 642 643 err = td_io_open_file(td, f); 644 if (err) 645 continue; 646 opened = 1; 647 } 648 649 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 650 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 651 return f; 652 } 653 if (opened) 654 td_io_close_file(td, f); 655 } while (1); 656} 657 658/* 659 * Get next file to service by doing round robin between all available ones 660 */ 661static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 662 int badf) 663{ 664 unsigned int old_next_file = td->next_file; 665 struct fio_file *f; 666 667 do { 668 int opened = 0; 669 670 f = td->files[td->next_file]; 671 672 td->next_file++; 673 if (td->next_file >= td->o.nr_files) 674 td->next_file = 0; 675 676 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 677 if (f->flags & FIO_FILE_DONE) { 678 f = NULL; 679 continue; 680 } 681 682 if (!(f->flags & FIO_FILE_OPEN)) { 683 int err; 684 685 err = td_io_open_file(td, f); 686 if (err) { 687 dprint(FD_FILE, "error %d on open of %s\n", 688 err, f->file_name); 689 continue; 690 } 691 opened = 1; 692 } 693 694 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf, f->flags); 695 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 696 break; 697 698 if (opened) 699 td_io_close_file(td, f); 700 701 f = NULL; 702 } while (td->next_file != old_next_file); 703 704 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 705 return f; 706} 707 708static struct fio_file *get_next_file(struct thread_data *td) 709{ 710 struct fio_file *f; 711 712 assert(td->o.nr_files <= td->files_index); 713 714 if (td->nr_done_files >= td->o.nr_files) { 715 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 716 " nr_files=%d\n", td->nr_open_files, 717 td->nr_done_files, 718 td->o.nr_files); 719 return NULL; 720 } 721 722 f = td->file_service_file; 723 if (f && (f->flags & FIO_FILE_OPEN) && !(f->flags & FIO_FILE_CLOSING)) { 724 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 725 goto out; 726 if (td->file_service_left--) 727 goto out; 728 } 729 730 if (td->o.file_service_type == FIO_FSERVICE_RR || 731 td->o.file_service_type == FIO_FSERVICE_SEQ) 732 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 733 else 734 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 735 736 td->file_service_file = f; 737 td->file_service_left = td->file_service_nr - 1; 738out: 739 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 740 return f; 741} 742 743static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 744{ 745 struct fio_file *f; 746 747 do { 748 f = get_next_file(td); 749 if (!f) 750 return 1; 751 752 io_u->file = f; 753 get_file(f); 754 755 if (!fill_io_u(td, io_u)) 756 break; 757 758 put_file_log(td, f); 759 td_io_close_file(td, f); 760 io_u->file = NULL; 761 f->flags |= FIO_FILE_DONE; 762 td->nr_done_files++; 763 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, td->nr_done_files, td->o.nr_files); 764 } while (1); 765 766 return 0; 767} 768 769 770struct io_u *__get_io_u(struct thread_data *td) 771{ 772 struct io_u *io_u = NULL; 773 774 if (!flist_empty(&td->io_u_requeues)) 775 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 776 else if (!queue_full(td)) { 777 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 778 779 io_u->buflen = 0; 780 io_u->resid = 0; 781 io_u->file = NULL; 782 io_u->end_io = NULL; 783 } 784 785 if (io_u) { 786 assert(io_u->flags & IO_U_F_FREE); 787 io_u->flags &= ~IO_U_F_FREE; 788 789 io_u->error = 0; 790 flist_del(&io_u->list); 791 flist_add(&io_u->list, &td->io_u_busylist); 792 td->cur_depth++; 793 } 794 795 return io_u; 796} 797 798/* 799 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 800 * etc. The returned io_u is fully ready to be prepped and submitted. 801 */ 802struct io_u *get_io_u(struct thread_data *td) 803{ 804 struct fio_file *f; 805 struct io_u *io_u; 806 807 io_u = __get_io_u(td); 808 if (!io_u) { 809 dprint(FD_IO, "__get_io_u failed\n"); 810 return NULL; 811 } 812 813 /* 814 * from a requeue, io_u already setup 815 */ 816 if (io_u->file) 817 goto out; 818 819 /* 820 * If using an iolog, grab next piece if any available. 821 */ 822 if (td->o.read_iolog_file) { 823 if (read_iolog_get(td, io_u)) 824 goto err_put; 825 } else if (set_io_u_file(td, io_u)) { 826 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 827 goto err_put; 828 } 829 830 f = io_u->file; 831 assert(f->flags & FIO_FILE_OPEN); 832 833 if (io_u->ddir != DDIR_SYNC) { 834 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 835 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 836 goto err_put; 837 } 838 839 f->last_pos = io_u->offset + io_u->buflen; 840 841 if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) 842 populate_verify_io_u(td, io_u); 843 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 844 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 845 } 846 847 /* 848 * Set io data pointers. 849 */ 850 io_u->endpos = io_u->offset + io_u->buflen; 851 io_u->xfer_buf = io_u->buf; 852 io_u->xfer_buflen = io_u->buflen; 853 854out: 855 if (!td_io_prep(td, io_u)) { 856 if (!td->o.disable_slat) 857 fio_gettime(&io_u->start_time, NULL); 858 return io_u; 859 } 860err_put: 861 dprint(FD_IO, "get_io_u failed\n"); 862 put_io_u(td, io_u); 863 return NULL; 864} 865 866void io_u_log_error(struct thread_data *td, struct io_u *io_u) 867{ 868 const char *msg[] = { "read", "write", "sync" }; 869 870 log_err("fio: io_u error"); 871 872 if (io_u->file) 873 log_err(" on file %s", io_u->file->file_name); 874 875 log_err(": %s\n", strerror(io_u->error)); 876 877 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 878 io_u->offset, io_u->xfer_buflen); 879 880 if (!td->error) 881 td_verror(td, io_u->error, "io_u error"); 882} 883 884static void io_completed(struct thread_data *td, struct io_u *io_u, 885 struct io_completion_data *icd) 886{ 887 /* 888 * Older gcc's are too dumb to realize that usec is always used 889 * initialized, silence that warning. 890 */ 891 unsigned long uninitialized_var(usec); 892 893 dprint_io_u(io_u, "io complete"); 894 895 assert(io_u->flags & IO_U_F_FLIGHT); 896 io_u->flags &= ~IO_U_F_FLIGHT; 897 898 if (io_u->ddir == DDIR_SYNC) { 899 td->last_was_sync = 1; 900 return; 901 } 902 903 td->last_was_sync = 0; 904 905 if (!io_u->error) { 906 unsigned int bytes = io_u->buflen - io_u->resid; 907 const enum fio_ddir idx = io_u->ddir; 908 int ret; 909 910 td->io_blocks[idx]++; 911 td->io_bytes[idx] += bytes; 912 td->this_io_bytes[idx] += bytes; 913 914 if (ramp_time_over(td)) { 915 if (!td->o.disable_clat || !td->o.disable_bw) 916 usec = utime_since(&io_u->issue_time, 917 &icd->time); 918 919 if (!td->o.disable_clat) { 920 add_clat_sample(td, idx, usec); 921 io_u_mark_latency(td, usec); 922 } 923 if (!td->o.disable_bw) 924 add_bw_sample(td, idx, &icd->time); 925 } 926 927 if (td_write(td) && idx == DDIR_WRITE && 928 td->o.do_verify && 929 td->o.verify != VERIFY_NONE) 930 log_io_piece(td, io_u); 931 932 icd->bytes_done[idx] += bytes; 933 934 if (io_u->end_io) { 935 ret = io_u->end_io(td, io_u); 936 if (ret && !icd->error) 937 icd->error = ret; 938 } 939 } else { 940 icd->error = io_u->error; 941 io_u_log_error(td, io_u); 942 } 943} 944 945static void init_icd(struct thread_data *td, struct io_completion_data *icd, 946 int nr) 947{ 948 if (!td->o.disable_clat || !td->o.disable_bw) 949 fio_gettime(&icd->time, NULL); 950 951 icd->nr = nr; 952 953 icd->error = 0; 954 icd->bytes_done[0] = icd->bytes_done[1] = 0; 955} 956 957static void ios_completed(struct thread_data *td, 958 struct io_completion_data *icd) 959{ 960 struct io_u *io_u; 961 int i; 962 963 for (i = 0; i < icd->nr; i++) { 964 io_u = td->io_ops->event(td, i); 965 966 io_completed(td, io_u, icd); 967 put_io_u(td, io_u); 968 } 969} 970 971/* 972 * Complete a single io_u for the sync engines. 973 */ 974long io_u_sync_complete(struct thread_data *td, struct io_u *io_u) 975{ 976 struct io_completion_data icd; 977 978 init_icd(td, &icd, 1); 979 io_completed(td, io_u, &icd); 980 put_io_u(td, io_u); 981 982 if (!icd.error) 983 return icd.bytes_done[0] + icd.bytes_done[1]; 984 985 td_verror(td, icd.error, "io_u_sync_complete"); 986 return -1; 987} 988 989/* 990 * Called to complete min_events number of io for the async engines. 991 */ 992long io_u_queued_complete(struct thread_data *td, int min_evts) 993{ 994 struct io_completion_data icd; 995 struct timespec *tvp = NULL; 996 int ret; 997 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 998 999 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 1000 1001 if (!min_evts) 1002 tvp = &ts; 1003 1004 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1005 if (ret < 0) { 1006 td_verror(td, -ret, "td_io_getevents"); 1007 return ret; 1008 } else if (!ret) 1009 return ret; 1010 1011 init_icd(td, &icd, ret); 1012 ios_completed(td, &icd); 1013 if (!icd.error) 1014 return icd.bytes_done[0] + icd.bytes_done[1]; 1015 1016 td_verror(td, icd.error, "io_u_queued_complete"); 1017 return -1; 1018} 1019 1020/* 1021 * Call when io_u is really queued, to update the submission latency. 1022 */ 1023void io_u_queued(struct thread_data *td, struct io_u *io_u) 1024{ 1025 if (!td->o.disable_slat) { 1026 unsigned long slat_time; 1027 1028 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1029 add_slat_sample(td, io_u->ddir, slat_time); 1030 } 1031} 1032 1033/* 1034 * "randomly" fill the buffer contents 1035 */ 1036void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1037 unsigned int max_bs) 1038{ 1039 long *ptr = io_u->buf; 1040 1041 if (!td->o.zero_buffers) { 1042 while ((void *) ptr - io_u->buf < max_bs) { 1043 *ptr = rand() * GOLDEN_RATIO_PRIME; 1044 ptr++; 1045 } 1046 } else 1047 memset(ptr, 0, max_bs); 1048} 1049