io_u.c revision b29ee5b3dee417016164198fb240344ef666de2b
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10 11/* 12 * Change this define to play with the timeout handling 13 */ 14#undef FIO_USE_TIMEOUT 15 16struct io_completion_data { 17 int nr; /* input */ 18 19 int error; /* output */ 20 unsigned long bytes_done[2]; /* output */ 21 struct timeval time; /* output */ 22}; 23 24/* 25 * The ->file_map[] contains a map of blocks we have or have not done io 26 * to yet. Used to make sure we cover the entire range in a fair fashion. 27 */ 28static int random_map_free(struct fio_file *f, const unsigned long long block) 29{ 30 unsigned int idx = RAND_MAP_IDX(f, block); 31 unsigned int bit = RAND_MAP_BIT(f, block); 32 33 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 34 35 return (f->file_map[idx] & (1 << bit)) == 0; 36} 37 38/* 39 * Mark a given offset as used in the map. 40 */ 41static void mark_random_map(struct thread_data *td, struct io_u *io_u) 42{ 43 unsigned int min_bs = td->o.rw_min_bs; 44 struct fio_file *f = io_u->file; 45 unsigned long long block; 46 unsigned int blocks, nr_blocks; 47 48 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 49 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 50 blocks = 0; 51 52 while (nr_blocks) { 53 unsigned int this_blocks, mask; 54 unsigned int idx, bit; 55 56 /* 57 * If we have a mixed random workload, we may 58 * encounter blocks we already did IO to. 59 */ 60 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 61 if (!blocks) 62 blocks = 1; 63 break; 64 } 65 66 idx = RAND_MAP_IDX(f, block); 67 bit = RAND_MAP_BIT(f, block); 68 69 fio_assert(td, idx < f->num_maps); 70 71 this_blocks = nr_blocks; 72 if (this_blocks + bit > BLOCKS_PER_MAP) 73 this_blocks = BLOCKS_PER_MAP - bit; 74 75 if (this_blocks == BLOCKS_PER_MAP) 76 mask = -1U; 77 else 78 mask = ((1U << this_blocks) - 1) << bit; 79 80 f->file_map[idx] |= mask; 81 nr_blocks -= this_blocks; 82 blocks += this_blocks; 83 block += this_blocks; 84 } 85 86 if ((blocks * min_bs) < io_u->buflen) 87 io_u->buflen = blocks * min_bs; 88} 89 90static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 91 enum fio_ddir ddir) 92{ 93 unsigned long long max_blocks; 94 unsigned long long max_size; 95 96 /* 97 * Hmm, should we make sure that ->io_size <= ->real_file_size? 98 */ 99 max_size = f->io_size; 100 if (max_size > f->real_file_size) 101 max_size = f->real_file_size; 102 103 max_blocks = max_size / (unsigned long long) td->o.min_bs[ddir]; 104 if (!max_blocks) 105 return 0; 106 107 return max_blocks; 108} 109 110/* 111 * Return the next free block in the map. 112 */ 113static int get_next_free_block(struct thread_data *td, struct fio_file *f, 114 enum fio_ddir ddir, unsigned long long *b) 115{ 116 unsigned long long min_bs = td->o.rw_min_bs; 117 int i; 118 119 i = f->last_free_lookup; 120 *b = (i * BLOCKS_PER_MAP); 121 while ((*b) * min_bs < f->real_file_size) { 122 if (f->file_map[i] != (unsigned int) -1) { 123 *b += ffz(f->file_map[i]); 124 if (*b > last_block(td, f, ddir)) 125 break; 126 f->last_free_lookup = i; 127 return 0; 128 } 129 130 *b += BLOCKS_PER_MAP; 131 i++; 132 } 133 134 dprint(FD_IO, "failed finding a free block\n"); 135 return 1; 136} 137 138static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 139 enum fio_ddir ddir, unsigned long long *b) 140{ 141 unsigned long long r; 142 int loops = 5; 143 144 do { 145 r = os_random_long(&td->random_state); 146 dprint(FD_RANDOM, "off rand %llu\n", r); 147 *b = (last_block(td, f, ddir) - 1) 148 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 149 150 /* 151 * if we are not maintaining a random map, we are done. 152 */ 153 if (!file_randommap(td, f)) 154 return 0; 155 156 /* 157 * calculate map offset and check if it's free 158 */ 159 if (random_map_free(f, *b)) 160 return 0; 161 162 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 163 *b); 164 } while (--loops); 165 166 /* 167 * we get here, if we didn't suceed in looking up a block. generate 168 * a random start offset into the filemap, and find the first free 169 * block from there. 170 */ 171 loops = 10; 172 do { 173 f->last_free_lookup = (f->num_maps - 1) * 174 (r / (OS_RAND_MAX + 1.0)); 175 if (!get_next_free_block(td, f, ddir, b)) 176 return 0; 177 178 r = os_random_long(&td->random_state); 179 } while (--loops); 180 181 /* 182 * that didn't work either, try exhaustive search from the start 183 */ 184 f->last_free_lookup = 0; 185 return get_next_free_block(td, f, ddir, b); 186} 187 188/* 189 * For random io, generate a random new block and see if it's used. Repeat 190 * until we find a free one. For sequential io, just return the end of 191 * the last io issued. 192 */ 193static int get_next_offset(struct thread_data *td, struct io_u *io_u) 194{ 195 struct fio_file *f = io_u->file; 196 unsigned long long b; 197 enum fio_ddir ddir = io_u->ddir; 198 199 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 200 td->ddir_nr = td->o.ddir_nr; 201 202 if (get_next_rand_offset(td, f, ddir, &b)) 203 return 1; 204 } else { 205 if (f->last_pos >= f->real_file_size) { 206 if (!td_random(td) || 207 get_next_rand_offset(td, f, ddir, &b)) 208 return 1; 209 } else 210 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 211 } 212 213 io_u->offset = b * td->o.min_bs[ddir]; 214 if (io_u->offset >= f->io_size) { 215 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 216 io_u->offset, f->io_size); 217 return 1; 218 } 219 220 io_u->offset += f->file_offset; 221 if (io_u->offset >= f->real_file_size) { 222 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 223 io_u->offset, f->real_file_size); 224 return 1; 225 } 226 227 return 0; 228} 229 230static inline int is_power_of_2(unsigned int val) 231{ 232 return (val != 0 && ((val & (val - 1)) == 0)); 233} 234 235static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 236{ 237 const int ddir = io_u->ddir; 238 unsigned int uninitialized_var(buflen); 239 unsigned int minbs, maxbs; 240 long r; 241 242 minbs = td->o.min_bs[ddir]; 243 maxbs = td->o.max_bs[ddir]; 244 245 if (minbs == maxbs) 246 buflen = minbs; 247 else { 248 r = os_random_long(&td->bsrange_state); 249 if (!td->o.bssplit_nr) { 250 buflen = 1 + (unsigned int) ((double) maxbs * 251 (r / (OS_RAND_MAX + 1.0))); 252 if (buflen < minbs) 253 buflen = minbs; 254 } else { 255 long perc = 0; 256 unsigned int i; 257 258 for (i = 0; i < td->o.bssplit_nr; i++) { 259 struct bssplit *bsp = &td->o.bssplit[i]; 260 261 buflen = bsp->bs; 262 perc += bsp->perc; 263 if (r <= ((OS_RAND_MAX / 100L) * perc)) 264 break; 265 } 266 } 267 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 268 buflen = (buflen + minbs - 1) & ~(minbs - 1); 269 } 270 271 if (io_u->offset + buflen > io_u->file->real_file_size) { 272 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 273 minbs, ddir); 274 buflen = minbs; 275 } 276 277 return buflen; 278} 279 280static void set_rwmix_bytes(struct thread_data *td) 281{ 282 unsigned int diff; 283 284 /* 285 * we do time or byte based switch. this is needed because 286 * buffered writes may issue a lot quicker than they complete, 287 * whereas reads do not. 288 */ 289 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 290 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 291} 292 293static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 294{ 295 unsigned int v; 296 long r; 297 298 r = os_random_long(&td->rwmix_state); 299 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 300 if (v <= td->o.rwmix[DDIR_READ]) 301 return DDIR_READ; 302 303 return DDIR_WRITE; 304} 305 306/* 307 * Return the data direction for the next io_u. If the job is a 308 * mixed read/write workload, check the rwmix cycle and switch if 309 * necessary. 310 */ 311static enum fio_ddir get_rw_ddir(struct thread_data *td) 312{ 313 if (td_rw(td)) { 314 /* 315 * Check if it's time to seed a new data direction. 316 */ 317 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 318 unsigned long long max_bytes; 319 enum fio_ddir ddir; 320 321 /* 322 * Put a top limit on how many bytes we do for 323 * one data direction, to avoid overflowing the 324 * ranges too much 325 */ 326 ddir = get_rand_ddir(td); 327 max_bytes = td->this_io_bytes[ddir]; 328 if (max_bytes >= 329 (td->o.size * td->o.rwmix[ddir] / 100)) { 330 if (!td->rw_end_set[ddir]) { 331 td->rw_end_set[ddir] = 1; 332 fio_gettime(&td->rw_end[ddir], NULL); 333 } 334 335 ddir ^= 1; 336 } 337 338 if (ddir != td->rwmix_ddir) 339 set_rwmix_bytes(td); 340 341 td->rwmix_ddir = ddir; 342 } 343 return td->rwmix_ddir; 344 } else if (td_read(td)) 345 return DDIR_READ; 346 else 347 return DDIR_WRITE; 348} 349 350static void put_file_log(struct thread_data *td, struct fio_file *f) 351{ 352 int ret = put_file(td, f); 353 354 if (ret) 355 td_verror(td, ret, "file close"); 356} 357 358void put_io_u(struct thread_data *td, struct io_u *io_u) 359{ 360 assert((io_u->flags & IO_U_F_FREE) == 0); 361 io_u->flags |= IO_U_F_FREE; 362 363 if (io_u->file) 364 put_file_log(td, io_u->file); 365 366 io_u->file = NULL; 367 flist_del(&io_u->list); 368 flist_add(&io_u->list, &td->io_u_freelist); 369 td->cur_depth--; 370} 371 372void requeue_io_u(struct thread_data *td, struct io_u **io_u) 373{ 374 struct io_u *__io_u = *io_u; 375 376 dprint(FD_IO, "requeue %p\n", __io_u); 377 378 __io_u->flags |= IO_U_F_FREE; 379 if ((__io_u->flags & IO_U_F_FLIGHT) && (__io_u->ddir != DDIR_SYNC)) 380 td->io_issues[__io_u->ddir]--; 381 382 __io_u->flags &= ~IO_U_F_FLIGHT; 383 384 flist_del(&__io_u->list); 385 flist_add_tail(&__io_u->list, &td->io_u_requeues); 386 td->cur_depth--; 387 *io_u = NULL; 388} 389 390static int fill_io_u(struct thread_data *td, struct io_u *io_u) 391{ 392 if (td->io_ops->flags & FIO_NOIO) 393 goto out; 394 395 /* 396 * see if it's time to sync 397 */ 398 if (td->o.fsync_blocks && 399 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 400 td->io_issues[DDIR_WRITE] && should_fsync(td)) { 401 io_u->ddir = DDIR_SYNC; 402 goto out; 403 } 404 405 io_u->ddir = get_rw_ddir(td); 406 407 /* 408 * See if it's time to switch to a new zone 409 */ 410 if (td->zone_bytes >= td->o.zone_size) { 411 td->zone_bytes = 0; 412 io_u->file->last_pos += td->o.zone_skip; 413 td->io_skip_bytes += td->o.zone_skip; 414 } 415 416 /* 417 * No log, let the seq/rand engine retrieve the next buflen and 418 * position. 419 */ 420 if (get_next_offset(td, io_u)) { 421 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 422 return 1; 423 } 424 425 io_u->buflen = get_next_buflen(td, io_u); 426 if (!io_u->buflen) { 427 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 428 return 1; 429 } 430 431 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 432 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 433 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 434 io_u->buflen, io_u->file->real_file_size); 435 return 1; 436 } 437 438 /* 439 * mark entry before potentially trimming io_u 440 */ 441 if (td_random(td) && file_randommap(td, io_u->file)) 442 mark_random_map(td, io_u); 443 444 /* 445 * If using a write iolog, store this entry. 446 */ 447out: 448 dprint_io_u(io_u, "fill_io_u"); 449 td->zone_bytes += io_u->buflen; 450 log_io_u(td, io_u); 451 return 0; 452} 453 454static void __io_u_mark_map(unsigned int *map, unsigned int nr) 455{ 456 int index = 0; 457 458 switch (nr) { 459 default: 460 index = 6; 461 break; 462 case 33 ... 64: 463 index = 5; 464 break; 465 case 17 ... 32: 466 index = 4; 467 break; 468 case 9 ... 16: 469 index = 3; 470 break; 471 case 5 ... 8: 472 index = 2; 473 break; 474 case 1 ... 4: 475 index = 1; 476 case 0: 477 break; 478 } 479 480 map[index]++; 481} 482 483void io_u_mark_submit(struct thread_data *td, unsigned int nr) 484{ 485 __io_u_mark_map(td->ts.io_u_submit, nr); 486 td->ts.total_submit++; 487} 488 489void io_u_mark_complete(struct thread_data *td, unsigned int nr) 490{ 491 __io_u_mark_map(td->ts.io_u_complete, nr); 492 td->ts.total_complete++; 493} 494 495void io_u_mark_depth(struct thread_data *td, unsigned int nr) 496{ 497 int index = 0; 498 499 switch (td->cur_depth) { 500 default: 501 index = 6; 502 break; 503 case 32 ... 63: 504 index = 5; 505 break; 506 case 16 ... 31: 507 index = 4; 508 break; 509 case 8 ... 15: 510 index = 3; 511 break; 512 case 4 ... 7: 513 index = 2; 514 break; 515 case 2 ... 3: 516 index = 1; 517 case 1: 518 break; 519 } 520 521 td->ts.io_u_map[index] += nr; 522} 523 524static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 525{ 526 int index = 0; 527 528 assert(usec < 1000); 529 530 switch (usec) { 531 case 750 ... 999: 532 index = 9; 533 break; 534 case 500 ... 749: 535 index = 8; 536 break; 537 case 250 ... 499: 538 index = 7; 539 break; 540 case 100 ... 249: 541 index = 6; 542 break; 543 case 50 ... 99: 544 index = 5; 545 break; 546 case 20 ... 49: 547 index = 4; 548 break; 549 case 10 ... 19: 550 index = 3; 551 break; 552 case 4 ... 9: 553 index = 2; 554 break; 555 case 2 ... 3: 556 index = 1; 557 case 0 ... 1: 558 break; 559 } 560 561 assert(index < FIO_IO_U_LAT_U_NR); 562 td->ts.io_u_lat_u[index]++; 563} 564 565static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 566{ 567 int index = 0; 568 569 switch (msec) { 570 default: 571 index = 11; 572 break; 573 case 1000 ... 1999: 574 index = 10; 575 break; 576 case 750 ... 999: 577 index = 9; 578 break; 579 case 500 ... 749: 580 index = 8; 581 break; 582 case 250 ... 499: 583 index = 7; 584 break; 585 case 100 ... 249: 586 index = 6; 587 break; 588 case 50 ... 99: 589 index = 5; 590 break; 591 case 20 ... 49: 592 index = 4; 593 break; 594 case 10 ... 19: 595 index = 3; 596 break; 597 case 4 ... 9: 598 index = 2; 599 break; 600 case 2 ... 3: 601 index = 1; 602 case 0 ... 1: 603 break; 604 } 605 606 assert(index < FIO_IO_U_LAT_M_NR); 607 td->ts.io_u_lat_m[index]++; 608} 609 610static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 611{ 612 if (usec < 1000) 613 io_u_mark_lat_usec(td, usec); 614 else 615 io_u_mark_lat_msec(td, usec / 1000); 616} 617 618/* 619 * Get next file to service by choosing one at random 620 */ 621static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf, 622 int badf) 623{ 624 struct fio_file *f; 625 int fno; 626 627 do { 628 long r = os_random_long(&td->next_file_state); 629 630 fno = (unsigned int) ((double) td->o.nr_files 631 * (r / (OS_RAND_MAX + 1.0))); 632 f = td->files[fno]; 633 if (f->flags & FIO_FILE_DONE) 634 continue; 635 636 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 637 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 638 return f; 639 } 640 } while (1); 641} 642 643/* 644 * Get next file to service by doing round robin between all available ones 645 */ 646static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 647 int badf) 648{ 649 unsigned int old_next_file = td->next_file; 650 struct fio_file *f; 651 652 do { 653 f = td->files[td->next_file]; 654 655 td->next_file++; 656 if (td->next_file >= td->o.nr_files) 657 td->next_file = 0; 658 659 if (f->flags & FIO_FILE_DONE) { 660 f = NULL; 661 continue; 662 } 663 664 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 665 break; 666 667 f = NULL; 668 } while (td->next_file != old_next_file); 669 670 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 671 return f; 672} 673 674static struct fio_file *get_next_file(struct thread_data *td) 675{ 676 struct fio_file *f; 677 678 assert(td->o.nr_files <= td->files_index); 679 680 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files) { 681 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 682 " nr_files=%d\n", td->nr_open_files, 683 td->nr_done_files, 684 td->o.nr_files); 685 return NULL; 686 } 687 688 f = td->file_service_file; 689 if (f && (f->flags & FIO_FILE_OPEN) && td->file_service_left--) 690 goto out; 691 692 if (td->o.file_service_type == FIO_FSERVICE_RR) 693 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 694 else 695 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 696 697 td->file_service_file = f; 698 td->file_service_left = td->file_service_nr - 1; 699out: 700 dprint(FD_FILE, "get_next_file: %p\n", f); 701 return f; 702} 703 704static struct fio_file *find_next_new_file(struct thread_data *td) 705{ 706 struct fio_file *f; 707 708 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files) 709 return NULL; 710 711 if (td->o.file_service_type == FIO_FSERVICE_RR) 712 f = get_next_file_rr(td, 0, FIO_FILE_OPEN); 713 else 714 f = get_next_file_rand(td, 0, FIO_FILE_OPEN); 715 716 return f; 717} 718 719static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 720{ 721 struct fio_file *f; 722 723 do { 724 f = get_next_file(td); 725 if (!f) 726 return 1; 727 728set_file: 729 io_u->file = f; 730 get_file(f); 731 732 if (!fill_io_u(td, io_u)) 733 break; 734 735 /* 736 * optimization to prevent close/open of the same file. This 737 * way we preserve queueing etc. 738 */ 739 if (td->o.nr_files == 1 && td->o.time_based) { 740 put_file_log(td, f); 741 fio_file_reset(f); 742 goto set_file; 743 } 744 745 /* 746 * td_io_close() does a put_file() as well, so no need to 747 * do that here. 748 */ 749 io_u->file = NULL; 750 td_io_close_file(td, f); 751 f->flags |= FIO_FILE_DONE; 752 td->nr_done_files++; 753 754 /* 755 * probably not the right place to do this, but see 756 * if we need to open a new file 757 */ 758 if (td->nr_open_files < td->o.open_files && 759 td->o.open_files != td->o.nr_files) { 760 f = find_next_new_file(td); 761 762 if (!f || td_io_open_file(td, f)) 763 return 1; 764 765 goto set_file; 766 } 767 } while (1); 768 769 return 0; 770} 771 772 773struct io_u *__get_io_u(struct thread_data *td) 774{ 775 struct io_u *io_u = NULL; 776 777 if (!flist_empty(&td->io_u_requeues)) 778 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 779 else if (!queue_full(td)) { 780 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 781 782 io_u->buflen = 0; 783 io_u->resid = 0; 784 io_u->file = NULL; 785 io_u->end_io = NULL; 786 } 787 788 if (io_u) { 789 assert(io_u->flags & IO_U_F_FREE); 790 io_u->flags &= ~IO_U_F_FREE; 791 792 io_u->error = 0; 793 flist_del(&io_u->list); 794 flist_add(&io_u->list, &td->io_u_busylist); 795 td->cur_depth++; 796 } 797 798 return io_u; 799} 800 801/* 802 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 803 * etc. The returned io_u is fully ready to be prepped and submitted. 804 */ 805struct io_u *get_io_u(struct thread_data *td) 806{ 807 struct fio_file *f; 808 struct io_u *io_u; 809 810 io_u = __get_io_u(td); 811 if (!io_u) { 812 dprint(FD_IO, "__get_io_u failed\n"); 813 return NULL; 814 } 815 816 /* 817 * from a requeue, io_u already setup 818 */ 819 if (io_u->file) 820 goto out; 821 822 /* 823 * If using an iolog, grab next piece if any available. 824 */ 825 if (td->o.read_iolog_file) { 826 if (read_iolog_get(td, io_u)) 827 goto err_put; 828 } else if (set_io_u_file(td, io_u)) { 829 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 830 goto err_put; 831 } 832 833 f = io_u->file; 834 assert(f->flags & FIO_FILE_OPEN); 835 836 if (io_u->ddir != DDIR_SYNC) { 837 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 838 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 839 goto err_put; 840 } 841 842 f->last_pos = io_u->offset + io_u->buflen; 843 844 if (td->o.verify != VERIFY_NONE) 845 populate_verify_io_u(td, io_u); 846 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 847 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 848 } 849 850 /* 851 * Set io data pointers. 852 */ 853 io_u->endpos = io_u->offset + io_u->buflen; 854 io_u->xfer_buf = io_u->buf; 855 io_u->xfer_buflen = io_u->buflen; 856 857out: 858 if (!td_io_prep(td, io_u)) { 859 fio_gettime(&io_u->start_time, NULL); 860 return io_u; 861 } 862err_put: 863 dprint(FD_IO, "get_io_u failed\n"); 864 put_io_u(td, io_u); 865 return NULL; 866} 867 868void io_u_log_error(struct thread_data *td, struct io_u *io_u) 869{ 870 const char *msg[] = { "read", "write", "sync" }; 871 872 log_err("fio: io_u error"); 873 874 if (io_u->file) 875 log_err(" on file %s", io_u->file->file_name); 876 877 log_err(": %s\n", strerror(io_u->error)); 878 879 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 880 io_u->offset, io_u->xfer_buflen); 881 882 if (!td->error) 883 td_verror(td, io_u->error, "io_u error"); 884} 885 886static void io_completed(struct thread_data *td, struct io_u *io_u, 887 struct io_completion_data *icd) 888{ 889 unsigned long usec; 890 891 dprint_io_u(io_u, "io complete"); 892 893 assert(io_u->flags & IO_U_F_FLIGHT); 894 io_u->flags &= ~IO_U_F_FLIGHT; 895 896 if (io_u->ddir == DDIR_SYNC) { 897 td->last_was_sync = 1; 898 return; 899 } 900 901 td->last_was_sync = 0; 902 903 if (!io_u->error) { 904 unsigned int bytes = io_u->buflen - io_u->resid; 905 const enum fio_ddir idx = io_u->ddir; 906 int ret; 907 908 td->io_blocks[idx]++; 909 td->io_bytes[idx] += bytes; 910 td->this_io_bytes[idx] += bytes; 911 912 if (ramp_time_over(td)) { 913 usec = utime_since(&io_u->issue_time, &icd->time); 914 915 add_clat_sample(td, idx, usec); 916 add_bw_sample(td, idx, &icd->time); 917 io_u_mark_latency(td, usec); 918 } 919 920 if (td_write(td) && idx == DDIR_WRITE && 921 td->o.do_verify && 922 td->o.verify != VERIFY_NONE) 923 log_io_piece(td, io_u); 924 925 icd->bytes_done[idx] += bytes; 926 927 if (io_u->end_io) { 928 ret = io_u->end_io(td, io_u); 929 if (ret && !icd->error) 930 icd->error = ret; 931 } 932 } else { 933 icd->error = io_u->error; 934 io_u_log_error(td, io_u); 935 } 936} 937 938static void init_icd(struct io_completion_data *icd, int nr) 939{ 940 fio_gettime(&icd->time, NULL); 941 942 icd->nr = nr; 943 944 icd->error = 0; 945 icd->bytes_done[0] = icd->bytes_done[1] = 0; 946} 947 948static void ios_completed(struct thread_data *td, 949 struct io_completion_data *icd) 950{ 951 struct io_u *io_u; 952 int i; 953 954 for (i = 0; i < icd->nr; i++) { 955 io_u = td->io_ops->event(td, i); 956 957 io_completed(td, io_u, icd); 958 put_io_u(td, io_u); 959 } 960} 961 962/* 963 * Complete a single io_u for the sync engines. 964 */ 965long io_u_sync_complete(struct thread_data *td, struct io_u *io_u) 966{ 967 struct io_completion_data icd; 968 969 init_icd(&icd, 1); 970 io_completed(td, io_u, &icd); 971 put_io_u(td, io_u); 972 973 if (!icd.error) 974 return icd.bytes_done[0] + icd.bytes_done[1]; 975 976 td_verror(td, icd.error, "io_u_sync_complete"); 977 return -1; 978} 979 980/* 981 * Called to complete min_events number of io for the async engines. 982 */ 983long io_u_queued_complete(struct thread_data *td, int min_evts) 984{ 985 struct io_completion_data icd; 986 struct timespec *tvp = NULL; 987 int ret; 988 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 989 990 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 991 992 if (!min_evts) 993 tvp = &ts; 994 995 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 996 if (ret < 0) { 997 td_verror(td, -ret, "td_io_getevents"); 998 return ret; 999 } else if (!ret) 1000 return ret; 1001 1002 init_icd(&icd, ret); 1003 ios_completed(td, &icd); 1004 if (!icd.error) 1005 return icd.bytes_done[0] + icd.bytes_done[1]; 1006 1007 td_verror(td, icd.error, "io_u_queued_complete"); 1008 return -1; 1009} 1010 1011/* 1012 * Call when io_u is really queued, to update the submission latency. 1013 */ 1014void io_u_queued(struct thread_data *td, struct io_u *io_u) 1015{ 1016 unsigned long slat_time; 1017 1018 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1019 add_slat_sample(td, io_u->ddir, slat_time); 1020} 1021 1022/* 1023 * "randomly" fill the buffer contents 1024 */ 1025void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1026 unsigned int max_bs) 1027{ 1028 long *ptr = io_u->buf; 1029 1030 if (!td->o.zero_buffers) { 1031 while ((void *) ptr - io_u->buf < max_bs) { 1032 *ptr = rand() * GOLDEN_RATIO_PRIME; 1033 ptr++; 1034 } 1035 } else 1036 memset(ptr, 0, max_bs); 1037} 1038 1039#ifdef FIO_USE_TIMEOUT 1040void io_u_set_timeout(struct thread_data *td) 1041{ 1042 assert(td->cur_depth); 1043 1044 td->timer.it_interval.tv_sec = 0; 1045 td->timer.it_interval.tv_usec = 0; 1046 td->timer.it_value.tv_sec = IO_U_TIMEOUT + IO_U_TIMEOUT_INC; 1047 td->timer.it_value.tv_usec = 0; 1048 setitimer(ITIMER_REAL, &td->timer, NULL); 1049 fio_gettime(&td->timeout_end, NULL); 1050} 1051 1052static void io_u_dump(struct io_u *io_u) 1053{ 1054 unsigned long t_start = mtime_since_now(&io_u->start_time); 1055 unsigned long t_issue = mtime_since_now(&io_u->issue_time); 1056 1057 log_err("io_u=%p, t_start=%lu, t_issue=%lu\n", io_u, t_start, t_issue); 1058 log_err(" buf=%p/%p, len=%lu/%lu, offset=%llu\n", io_u->buf, 1059 io_u->xfer_buf, io_u->buflen, 1060 io_u->xfer_buflen, 1061 io_u->offset); 1062 log_err(" ddir=%d, fname=%s\n", io_u->ddir, io_u->file->file_name); 1063} 1064#else 1065void io_u_set_timeout(struct thread_data fio_unused *td) 1066{ 1067} 1068#endif 1069 1070#ifdef FIO_USE_TIMEOUT 1071static void io_u_timeout_handler(int fio_unused sig) 1072{ 1073 struct thread_data *td, *__td; 1074 pid_t pid = getpid(); 1075 struct flist_head *entry; 1076 struct io_u *io_u; 1077 int i; 1078 1079 log_err("fio: io_u timeout\n"); 1080 1081 /* 1082 * TLS would be nice... 1083 */ 1084 td = NULL; 1085 for_each_td(__td, i) { 1086 if (__td->pid == pid) { 1087 td = __td; 1088 break; 1089 } 1090 } 1091 1092 if (!td) { 1093 log_err("fio: io_u timeout, can't find job\n"); 1094 exit(1); 1095 } 1096 1097 if (!td->cur_depth) { 1098 log_err("fio: timeout without pending work?\n"); 1099 return; 1100 } 1101 1102 log_err("fio: io_u timeout: job=%s, pid=%d\n", td->o.name, td->pid); 1103 1104 flist_for_each(entry, &td->io_u_busylist) { 1105 io_u = flist_entry(entry, struct io_u, list); 1106 1107 io_u_dump(io_u); 1108 } 1109 1110 td_verror(td, ETIMEDOUT, "io_u timeout"); 1111 exit(1); 1112} 1113#endif 1114 1115void io_u_init_timeout(void) 1116{ 1117#ifdef FIO_USE_TIMEOUT 1118 signal(SIGALRM, io_u_timeout_handler); 1119#endif 1120} 1121