io_u.c revision f940128526dbe468a1951cce10c2fe5dbd23875f
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10#include "verify.h" 11#include "trim.h" 12#include "lib/rand.h" 13#include "lib/axmap.h" 14 15struct io_completion_data { 16 int nr; /* input */ 17 18 int error; /* output */ 19 uint64_t bytes_done[DDIR_RWDIR_CNT]; /* output */ 20 struct timeval time; /* output */ 21}; 22 23/* 24 * The ->io_axmap contains a map of blocks we have or have not done io 25 * to yet. Used to make sure we cover the entire range in a fair fashion. 26 */ 27static int random_map_free(struct fio_file *f, const uint64_t block) 28{ 29 return !axmap_isset(f->io_axmap, block); 30} 31 32/* 33 * Mark a given offset as used in the map. 34 */ 35static void mark_random_map(struct thread_data *td, struct io_u *io_u) 36{ 37 unsigned int min_bs = td->o.rw_min_bs; 38 struct fio_file *f = io_u->file; 39 unsigned int nr_blocks; 40 uint64_t block; 41 42 block = (io_u->offset - f->file_offset) / (uint64_t) min_bs; 43 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 44 45 if (!(io_u->flags & IO_U_F_BUSY_OK)) 46 nr_blocks = axmap_set_nr(f->io_axmap, block, nr_blocks); 47 48 if ((nr_blocks * min_bs) < io_u->buflen) 49 io_u->buflen = nr_blocks * min_bs; 50} 51 52static uint64_t last_block(struct thread_data *td, struct fio_file *f, 53 enum fio_ddir ddir) 54{ 55 uint64_t max_blocks; 56 uint64_t max_size; 57 58 assert(ddir_rw(ddir)); 59 60 /* 61 * Hmm, should we make sure that ->io_size <= ->real_file_size? 62 */ 63 max_size = f->io_size; 64 if (max_size > f->real_file_size) 65 max_size = f->real_file_size; 66 67 if (td->o.zone_range) 68 max_size = td->o.zone_range; 69 70 max_blocks = max_size / (uint64_t) td->o.ba[ddir]; 71 if (!max_blocks) 72 return 0; 73 74 return max_blocks; 75} 76 77struct rand_off { 78 struct flist_head list; 79 uint64_t off; 80}; 81 82static int __get_next_rand_offset(struct thread_data *td, struct fio_file *f, 83 enum fio_ddir ddir, uint64_t *b) 84{ 85 uint64_t r, lastb; 86 87 lastb = last_block(td, f, ddir); 88 if (!lastb) 89 return 1; 90 91 if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE) { 92 uint64_t rmax; 93 94 rmax = td->o.use_os_rand ? OS_RAND_MAX : FRAND_MAX; 95 96 if (td->o.use_os_rand) { 97 rmax = OS_RAND_MAX; 98 r = os_random_long(&td->random_state); 99 } else { 100 rmax = FRAND_MAX; 101 r = __rand(&td->__random_state); 102 } 103 104 dprint(FD_RANDOM, "off rand %llu\n", (unsigned long long) r); 105 106 *b = (lastb - 1) * (r / ((uint64_t) rmax + 1.0)); 107 } else { 108 uint64_t off = 0; 109 110 if (lfsr_next(&f->lfsr, &off, lastb)) 111 return 1; 112 113 *b = off; 114 } 115 116 /* 117 * if we are not maintaining a random map, we are done. 118 */ 119 if (!file_randommap(td, f)) 120 goto ret; 121 122 /* 123 * calculate map offset and check if it's free 124 */ 125 if (random_map_free(f, *b)) 126 goto ret; 127 128 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 129 (unsigned long long) *b); 130 131 *b = axmap_next_free(f->io_axmap, *b); 132 if (*b == (uint64_t) -1ULL) 133 return 1; 134ret: 135 return 0; 136} 137 138static int __get_next_rand_offset_zipf(struct thread_data *td, 139 struct fio_file *f, enum fio_ddir ddir, 140 uint64_t *b) 141{ 142 *b = zipf_next(&f->zipf); 143 return 0; 144} 145 146static int __get_next_rand_offset_pareto(struct thread_data *td, 147 struct fio_file *f, enum fio_ddir ddir, 148 uint64_t *b) 149{ 150 *b = pareto_next(&f->zipf); 151 return 0; 152} 153 154static int flist_cmp(void *data, struct flist_head *a, struct flist_head *b) 155{ 156 struct rand_off *r1 = flist_entry(a, struct rand_off, list); 157 struct rand_off *r2 = flist_entry(b, struct rand_off, list); 158 159 return r1->off - r2->off; 160} 161 162static int get_off_from_method(struct thread_data *td, struct fio_file *f, 163 enum fio_ddir ddir, uint64_t *b) 164{ 165 if (td->o.random_distribution == FIO_RAND_DIST_RANDOM) 166 return __get_next_rand_offset(td, f, ddir, b); 167 else if (td->o.random_distribution == FIO_RAND_DIST_ZIPF) 168 return __get_next_rand_offset_zipf(td, f, ddir, b); 169 else if (td->o.random_distribution == FIO_RAND_DIST_PARETO) 170 return __get_next_rand_offset_pareto(td, f, ddir, b); 171 172 log_err("fio: unknown random distribution: %d\n", td->o.random_distribution); 173 return 1; 174} 175 176/* 177 * Sort the reads for a verify phase in batches of verifysort_nr, if 178 * specified. 179 */ 180static inline int should_sort_io(struct thread_data *td) 181{ 182 if (!td->o.verifysort_nr || !td->o.do_verify) 183 return 0; 184 if (!td_random(td)) 185 return 0; 186 if (td->runstate != TD_VERIFYING) 187 return 0; 188 if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE) 189 return 0; 190 191 return 1; 192} 193 194static int should_do_random(struct thread_data *td, enum fio_ddir ddir) 195{ 196 unsigned int v; 197 unsigned long r; 198 199 if (td->o.perc_rand[ddir] == 100) 200 return 1; 201 202 if (td->o.use_os_rand) { 203 r = os_random_long(&td->seq_rand_state[ddir]); 204 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 205 } else { 206 r = __rand(&td->__seq_rand_state[ddir]); 207 v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); 208 } 209 210 return v <= td->o.perc_rand[ddir]; 211} 212 213static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 214 enum fio_ddir ddir, uint64_t *b) 215{ 216 struct rand_off *r; 217 int i, ret = 1; 218 219 if (!should_sort_io(td)) 220 return get_off_from_method(td, f, ddir, b); 221 222 if (!flist_empty(&td->next_rand_list)) { 223 struct rand_off *r; 224fetch: 225 r = flist_entry(td->next_rand_list.next, struct rand_off, list); 226 flist_del(&r->list); 227 *b = r->off; 228 free(r); 229 return 0; 230 } 231 232 for (i = 0; i < td->o.verifysort_nr; i++) { 233 r = malloc(sizeof(*r)); 234 235 ret = get_off_from_method(td, f, ddir, &r->off); 236 if (ret) { 237 free(r); 238 break; 239 } 240 241 flist_add(&r->list, &td->next_rand_list); 242 } 243 244 if (ret && !i) 245 return ret; 246 247 assert(!flist_empty(&td->next_rand_list)); 248 flist_sort(NULL, &td->next_rand_list, flist_cmp); 249 goto fetch; 250} 251 252static int get_next_rand_block(struct thread_data *td, struct fio_file *f, 253 enum fio_ddir ddir, uint64_t *b) 254{ 255 if (!get_next_rand_offset(td, f, ddir, b)) 256 return 0; 257 258 if (td->o.time_based) { 259 fio_file_reset(td, f); 260 if (!get_next_rand_offset(td, f, ddir, b)) 261 return 0; 262 } 263 264 dprint(FD_IO, "%s: rand offset failed, last=%llu, size=%llu\n", 265 f->file_name, (unsigned long long) f->last_pos, 266 (unsigned long long) f->real_file_size); 267 return 1; 268} 269 270static int get_next_seq_offset(struct thread_data *td, struct fio_file *f, 271 enum fio_ddir ddir, uint64_t *offset) 272{ 273 assert(ddir_rw(ddir)); 274 275 if (f->last_pos >= f->io_size + get_start_offset(td) && td->o.time_based) 276 f->last_pos = f->last_pos - f->io_size; 277 278 if (f->last_pos < f->real_file_size) { 279 uint64_t pos; 280 281 if (f->last_pos == f->file_offset && td->o.ddir_seq_add < 0) 282 f->last_pos = f->real_file_size; 283 284 pos = f->last_pos - f->file_offset; 285 if (pos) 286 pos += td->o.ddir_seq_add; 287 288 *offset = pos; 289 return 0; 290 } 291 292 return 1; 293} 294 295static int get_next_block(struct thread_data *td, struct io_u *io_u, 296 enum fio_ddir ddir, int rw_seq, 297 unsigned int *is_random) 298{ 299 struct fio_file *f = io_u->file; 300 uint64_t b, offset; 301 int ret; 302 303 assert(ddir_rw(ddir)); 304 305 b = offset = -1ULL; 306 307 if (rw_seq) { 308 if (td_random(td)) { 309 if (should_do_random(td, ddir)) { 310 ret = get_next_rand_block(td, f, ddir, &b); 311 *is_random = 1; 312 } else { 313 *is_random = 0; 314 io_u->flags |= IO_U_F_BUSY_OK; 315 ret = get_next_seq_offset(td, f, ddir, &offset); 316 if (ret) 317 ret = get_next_rand_block(td, f, ddir, &b); 318 } 319 } else { 320 *is_random = 0; 321 ret = get_next_seq_offset(td, f, ddir, &offset); 322 } 323 } else { 324 io_u->flags |= IO_U_F_BUSY_OK; 325 *is_random = 0; 326 327 if (td->o.rw_seq == RW_SEQ_SEQ) { 328 ret = get_next_seq_offset(td, f, ddir, &offset); 329 if (ret) { 330 ret = get_next_rand_block(td, f, ddir, &b); 331 *is_random = 0; 332 } 333 } else if (td->o.rw_seq == RW_SEQ_IDENT) { 334 if (f->last_start != -1ULL) 335 offset = f->last_start - f->file_offset; 336 else 337 offset = 0; 338 ret = 0; 339 } else { 340 log_err("fio: unknown rw_seq=%d\n", td->o.rw_seq); 341 ret = 1; 342 } 343 } 344 345 if (!ret) { 346 if (offset != -1ULL) 347 io_u->offset = offset; 348 else if (b != -1ULL) 349 io_u->offset = b * td->o.ba[ddir]; 350 else { 351 log_err("fio: bug in offset generation: offset=%llu, b=%llu\n", (unsigned long long) offset, (unsigned long long) b); 352 ret = 1; 353 } 354 } 355 356 return ret; 357} 358 359/* 360 * For random io, generate a random new block and see if it's used. Repeat 361 * until we find a free one. For sequential io, just return the end of 362 * the last io issued. 363 */ 364static int __get_next_offset(struct thread_data *td, struct io_u *io_u, 365 unsigned int *is_random) 366{ 367 struct fio_file *f = io_u->file; 368 enum fio_ddir ddir = io_u->ddir; 369 int rw_seq_hit = 0; 370 371 assert(ddir_rw(ddir)); 372 373 if (td->o.ddir_seq_nr && !--td->ddir_seq_nr) { 374 rw_seq_hit = 1; 375 td->ddir_seq_nr = td->o.ddir_seq_nr; 376 } 377 378 if (get_next_block(td, io_u, ddir, rw_seq_hit, is_random)) 379 return 1; 380 381 if (io_u->offset >= f->io_size) { 382 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 383 (unsigned long long) io_u->offset, 384 (unsigned long long) f->io_size); 385 return 1; 386 } 387 388 io_u->offset += f->file_offset; 389 if (io_u->offset >= f->real_file_size) { 390 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 391 (unsigned long long) io_u->offset, 392 (unsigned long long) f->real_file_size); 393 return 1; 394 } 395 396 return 0; 397} 398 399static int get_next_offset(struct thread_data *td, struct io_u *io_u, 400 unsigned int *is_random) 401{ 402 if (td->flags & TD_F_PROFILE_OPS) { 403 struct prof_io_ops *ops = &td->prof_io_ops; 404 405 if (ops->fill_io_u_off) 406 return ops->fill_io_u_off(td, io_u, is_random); 407 } 408 409 return __get_next_offset(td, io_u, is_random); 410} 411 412static inline int io_u_fits(struct thread_data *td, struct io_u *io_u, 413 unsigned int buflen) 414{ 415 struct fio_file *f = io_u->file; 416 417 return io_u->offset + buflen <= f->io_size + get_start_offset(td); 418} 419 420static unsigned int __get_next_buflen(struct thread_data *td, struct io_u *io_u, 421 unsigned int is_random) 422{ 423 int ddir = io_u->ddir; 424 unsigned int buflen = 0; 425 unsigned int minbs, maxbs; 426 unsigned long r, rand_max; 427 428 assert(ddir_rw(io_u->ddir)); 429 430 if (td->o.bs_is_seq_rand) 431 ddir = is_random ? DDIR_WRITE: DDIR_READ; 432 else 433 ddir = io_u->ddir; 434 435 minbs = td->o.min_bs[ddir]; 436 maxbs = td->o.max_bs[ddir]; 437 438 if (minbs == maxbs) 439 return minbs; 440 441 /* 442 * If we can't satisfy the min block size from here, then fail 443 */ 444 if (!io_u_fits(td, io_u, minbs)) 445 return 0; 446 447 if (td->o.use_os_rand) 448 rand_max = OS_RAND_MAX; 449 else 450 rand_max = FRAND_MAX; 451 452 do { 453 if (td->o.use_os_rand) 454 r = os_random_long(&td->bsrange_state); 455 else 456 r = __rand(&td->__bsrange_state); 457 458 if (!td->o.bssplit_nr[ddir]) { 459 buflen = 1 + (unsigned int) ((double) maxbs * 460 (r / (rand_max + 1.0))); 461 if (buflen < minbs) 462 buflen = minbs; 463 } else { 464 long perc = 0; 465 unsigned int i; 466 467 for (i = 0; i < td->o.bssplit_nr[ddir]; i++) { 468 struct bssplit *bsp = &td->o.bssplit[ddir][i]; 469 470 buflen = bsp->bs; 471 perc += bsp->perc; 472 if ((r <= ((rand_max / 100L) * perc)) && 473 io_u_fits(td, io_u, buflen)) 474 break; 475 } 476 } 477 478 if (td->o.do_verify && td->o.verify != VERIFY_NONE) 479 buflen = (buflen + td->o.verify_interval - 1) & 480 ~(td->o.verify_interval - 1); 481 482 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 483 buflen = (buflen + minbs - 1) & ~(minbs - 1); 484 485 } while (!io_u_fits(td, io_u, buflen)); 486 487 return buflen; 488} 489 490static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u, 491 unsigned int is_random) 492{ 493 if (td->flags & TD_F_PROFILE_OPS) { 494 struct prof_io_ops *ops = &td->prof_io_ops; 495 496 if (ops->fill_io_u_size) 497 return ops->fill_io_u_size(td, io_u, is_random); 498 } 499 500 return __get_next_buflen(td, io_u, is_random); 501} 502 503static void set_rwmix_bytes(struct thread_data *td) 504{ 505 unsigned int diff; 506 507 /* 508 * we do time or byte based switch. this is needed because 509 * buffered writes may issue a lot quicker than they complete, 510 * whereas reads do not. 511 */ 512 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 513 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 514} 515 516static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 517{ 518 unsigned int v; 519 unsigned long r; 520 521 if (td->o.use_os_rand) { 522 r = os_random_long(&td->rwmix_state); 523 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 524 } else { 525 r = __rand(&td->__rwmix_state); 526 v = 1 + (int) (100.0 * (r / (FRAND_MAX + 1.0))); 527 } 528 529 if (v <= td->o.rwmix[DDIR_READ]) 530 return DDIR_READ; 531 532 return DDIR_WRITE; 533} 534 535void io_u_quiesce(struct thread_data *td) 536{ 537 /* 538 * We are going to sleep, ensure that we flush anything pending as 539 * not to skew our latency numbers. 540 * 541 * Changed to only monitor 'in flight' requests here instead of the 542 * td->cur_depth, b/c td->cur_depth does not accurately represent 543 * io's that have been actually submitted to an async engine, 544 * and cur_depth is meaningless for sync engines. 545 */ 546 while (td->io_u_in_flight) { 547 int fio_unused ret; 548 549 ret = io_u_queued_complete(td, 1, NULL); 550 } 551} 552 553static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) 554{ 555 enum fio_ddir odir = ddir ^ 1; 556 struct timeval t; 557 long usec; 558 559 assert(ddir_rw(ddir)); 560 561 if (td->rate_pending_usleep[ddir] <= 0) 562 return ddir; 563 564 /* 565 * We have too much pending sleep in this direction. See if we 566 * should switch. 567 */ 568 if (td_rw(td) && td->o.rwmix[odir]) { 569 /* 570 * Other direction does not have too much pending, switch 571 */ 572 if (td->rate_pending_usleep[odir] < 100000) 573 return odir; 574 575 /* 576 * Both directions have pending sleep. Sleep the minimum time 577 * and deduct from both. 578 */ 579 if (td->rate_pending_usleep[ddir] <= 580 td->rate_pending_usleep[odir]) { 581 usec = td->rate_pending_usleep[ddir]; 582 } else { 583 usec = td->rate_pending_usleep[odir]; 584 ddir = odir; 585 } 586 } else 587 usec = td->rate_pending_usleep[ddir]; 588 589 io_u_quiesce(td); 590 591 fio_gettime(&t, NULL); 592 usec_sleep(td, usec); 593 usec = utime_since_now(&t); 594 595 td->rate_pending_usleep[ddir] -= usec; 596 597 odir = ddir ^ 1; 598 if (td_rw(td) && __should_check_rate(td, odir)) 599 td->rate_pending_usleep[odir] -= usec; 600 601 if (ddir_trim(ddir)) 602 return ddir; 603 604 return ddir; 605} 606 607/* 608 * Return the data direction for the next io_u. If the job is a 609 * mixed read/write workload, check the rwmix cycle and switch if 610 * necessary. 611 */ 612static enum fio_ddir get_rw_ddir(struct thread_data *td) 613{ 614 enum fio_ddir ddir; 615 616 /* 617 * see if it's time to fsync 618 */ 619 if (td->o.fsync_blocks && 620 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 621 td->io_issues[DDIR_WRITE] && should_fsync(td)) 622 return DDIR_SYNC; 623 624 /* 625 * see if it's time to fdatasync 626 */ 627 if (td->o.fdatasync_blocks && 628 !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) && 629 td->io_issues[DDIR_WRITE] && should_fsync(td)) 630 return DDIR_DATASYNC; 631 632 /* 633 * see if it's time to sync_file_range 634 */ 635 if (td->sync_file_range_nr && 636 !(td->io_issues[DDIR_WRITE] % td->sync_file_range_nr) && 637 td->io_issues[DDIR_WRITE] && should_fsync(td)) 638 return DDIR_SYNC_FILE_RANGE; 639 640 if (td_rw(td)) { 641 /* 642 * Check if it's time to seed a new data direction. 643 */ 644 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 645 /* 646 * Put a top limit on how many bytes we do for 647 * one data direction, to avoid overflowing the 648 * ranges too much 649 */ 650 ddir = get_rand_ddir(td); 651 652 if (ddir != td->rwmix_ddir) 653 set_rwmix_bytes(td); 654 655 td->rwmix_ddir = ddir; 656 } 657 ddir = td->rwmix_ddir; 658 } else if (td_read(td)) 659 ddir = DDIR_READ; 660 else if (td_write(td)) 661 ddir = DDIR_WRITE; 662 else 663 ddir = DDIR_TRIM; 664 665 td->rwmix_ddir = rate_ddir(td, ddir); 666 return td->rwmix_ddir; 667} 668 669static void set_rw_ddir(struct thread_data *td, struct io_u *io_u) 670{ 671 io_u->ddir = io_u->acct_ddir = get_rw_ddir(td); 672 673 if (io_u->ddir == DDIR_WRITE && (td->io_ops->flags & FIO_BARRIER) && 674 td->o.barrier_blocks && 675 !(td->io_issues[DDIR_WRITE] % td->o.barrier_blocks) && 676 td->io_issues[DDIR_WRITE]) 677 io_u->flags |= IO_U_F_BARRIER; 678} 679 680void put_file_log(struct thread_data *td, struct fio_file *f) 681{ 682 int ret = put_file(td, f); 683 684 if (ret) 685 td_verror(td, ret, "file close"); 686} 687 688void put_io_u(struct thread_data *td, struct io_u *io_u) 689{ 690 td_io_u_lock(td); 691 692 if (io_u->file && !(io_u->flags & IO_U_F_FREE_DEF)) 693 put_file_log(td, io_u->file); 694 io_u->file = NULL; 695 io_u->flags &= ~IO_U_F_FREE_DEF; 696 io_u->flags |= IO_U_F_FREE; 697 698 if (io_u->flags & IO_U_F_IN_CUR_DEPTH) 699 td->cur_depth--; 700 io_u_qpush(&td->io_u_freelist, io_u); 701 td_io_u_unlock(td); 702 td_io_u_free_notify(td); 703} 704 705void clear_io_u(struct thread_data *td, struct io_u *io_u) 706{ 707 io_u->flags &= ~IO_U_F_FLIGHT; 708 put_io_u(td, io_u); 709} 710 711void requeue_io_u(struct thread_data *td, struct io_u **io_u) 712{ 713 struct io_u *__io_u = *io_u; 714 enum fio_ddir ddir = acct_ddir(__io_u); 715 716 dprint(FD_IO, "requeue %p\n", __io_u); 717 718 td_io_u_lock(td); 719 720 __io_u->flags |= IO_U_F_FREE; 721 if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(ddir)) 722 td->io_issues[ddir]--; 723 724 __io_u->flags &= ~IO_U_F_FLIGHT; 725 if (__io_u->flags & IO_U_F_IN_CUR_DEPTH) 726 td->cur_depth--; 727 728 io_u_rpush(&td->io_u_requeues, __io_u); 729 td_io_u_unlock(td); 730 *io_u = NULL; 731} 732 733static int fill_io_u(struct thread_data *td, struct io_u *io_u) 734{ 735 unsigned int is_random; 736 737 if (td->io_ops->flags & FIO_NOIO) 738 goto out; 739 740 set_rw_ddir(td, io_u); 741 742 /* 743 * fsync() or fdatasync() or trim etc, we are done 744 */ 745 if (!ddir_rw(io_u->ddir)) 746 goto out; 747 748 /* 749 * See if it's time to switch to a new zone 750 */ 751 if (td->zone_bytes >= td->o.zone_size && td->o.zone_skip) { 752 td->zone_bytes = 0; 753 io_u->file->file_offset += td->o.zone_range + td->o.zone_skip; 754 io_u->file->last_pos = io_u->file->file_offset; 755 td->io_skip_bytes += td->o.zone_skip; 756 } 757 758 /* 759 * No log, let the seq/rand engine retrieve the next buflen and 760 * position. 761 */ 762 if (get_next_offset(td, io_u, &is_random)) { 763 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 764 return 1; 765 } 766 767 io_u->buflen = get_next_buflen(td, io_u, is_random); 768 if (!io_u->buflen) { 769 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 770 return 1; 771 } 772 773 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 774 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 775 dprint(FD_IO, " off=%llu/%lu > %llu\n", 776 (unsigned long long) io_u->offset, io_u->buflen, 777 (unsigned long long) io_u->file->real_file_size); 778 return 1; 779 } 780 781 /* 782 * mark entry before potentially trimming io_u 783 */ 784 if (td_random(td) && file_randommap(td, io_u->file)) 785 mark_random_map(td, io_u); 786 787out: 788 dprint_io_u(io_u, "fill_io_u"); 789 td->zone_bytes += io_u->buflen; 790 return 0; 791} 792 793static void __io_u_mark_map(unsigned int *map, unsigned int nr) 794{ 795 int idx = 0; 796 797 switch (nr) { 798 default: 799 idx = 6; 800 break; 801 case 33 ... 64: 802 idx = 5; 803 break; 804 case 17 ... 32: 805 idx = 4; 806 break; 807 case 9 ... 16: 808 idx = 3; 809 break; 810 case 5 ... 8: 811 idx = 2; 812 break; 813 case 1 ... 4: 814 idx = 1; 815 case 0: 816 break; 817 } 818 819 map[idx]++; 820} 821 822void io_u_mark_submit(struct thread_data *td, unsigned int nr) 823{ 824 __io_u_mark_map(td->ts.io_u_submit, nr); 825 td->ts.total_submit++; 826} 827 828void io_u_mark_complete(struct thread_data *td, unsigned int nr) 829{ 830 __io_u_mark_map(td->ts.io_u_complete, nr); 831 td->ts.total_complete++; 832} 833 834void io_u_mark_depth(struct thread_data *td, unsigned int nr) 835{ 836 int idx = 0; 837 838 switch (td->cur_depth) { 839 default: 840 idx = 6; 841 break; 842 case 32 ... 63: 843 idx = 5; 844 break; 845 case 16 ... 31: 846 idx = 4; 847 break; 848 case 8 ... 15: 849 idx = 3; 850 break; 851 case 4 ... 7: 852 idx = 2; 853 break; 854 case 2 ... 3: 855 idx = 1; 856 case 1: 857 break; 858 } 859 860 td->ts.io_u_map[idx] += nr; 861} 862 863static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 864{ 865 int idx = 0; 866 867 assert(usec < 1000); 868 869 switch (usec) { 870 case 750 ... 999: 871 idx = 9; 872 break; 873 case 500 ... 749: 874 idx = 8; 875 break; 876 case 250 ... 499: 877 idx = 7; 878 break; 879 case 100 ... 249: 880 idx = 6; 881 break; 882 case 50 ... 99: 883 idx = 5; 884 break; 885 case 20 ... 49: 886 idx = 4; 887 break; 888 case 10 ... 19: 889 idx = 3; 890 break; 891 case 4 ... 9: 892 idx = 2; 893 break; 894 case 2 ... 3: 895 idx = 1; 896 case 0 ... 1: 897 break; 898 } 899 900 assert(idx < FIO_IO_U_LAT_U_NR); 901 td->ts.io_u_lat_u[idx]++; 902} 903 904static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 905{ 906 int idx = 0; 907 908 switch (msec) { 909 default: 910 idx = 11; 911 break; 912 case 1000 ... 1999: 913 idx = 10; 914 break; 915 case 750 ... 999: 916 idx = 9; 917 break; 918 case 500 ... 749: 919 idx = 8; 920 break; 921 case 250 ... 499: 922 idx = 7; 923 break; 924 case 100 ... 249: 925 idx = 6; 926 break; 927 case 50 ... 99: 928 idx = 5; 929 break; 930 case 20 ... 49: 931 idx = 4; 932 break; 933 case 10 ... 19: 934 idx = 3; 935 break; 936 case 4 ... 9: 937 idx = 2; 938 break; 939 case 2 ... 3: 940 idx = 1; 941 case 0 ... 1: 942 break; 943 } 944 945 assert(idx < FIO_IO_U_LAT_M_NR); 946 td->ts.io_u_lat_m[idx]++; 947} 948 949static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 950{ 951 if (usec < 1000) 952 io_u_mark_lat_usec(td, usec); 953 else 954 io_u_mark_lat_msec(td, usec / 1000); 955} 956 957/* 958 * Get next file to service by choosing one at random 959 */ 960static struct fio_file *get_next_file_rand(struct thread_data *td, 961 enum fio_file_flags goodf, 962 enum fio_file_flags badf) 963{ 964 struct fio_file *f; 965 int fno; 966 967 do { 968 int opened = 0; 969 unsigned long r; 970 971 if (td->o.use_os_rand) { 972 r = os_random_long(&td->next_file_state); 973 fno = (unsigned int) ((double) td->o.nr_files 974 * (r / (OS_RAND_MAX + 1.0))); 975 } else { 976 r = __rand(&td->__next_file_state); 977 fno = (unsigned int) ((double) td->o.nr_files 978 * (r / (FRAND_MAX + 1.0))); 979 } 980 981 f = td->files[fno]; 982 if (fio_file_done(f)) 983 continue; 984 985 if (!fio_file_open(f)) { 986 int err; 987 988 err = td_io_open_file(td, f); 989 if (err) 990 continue; 991 opened = 1; 992 } 993 994 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 995 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 996 return f; 997 } 998 if (opened) 999 td_io_close_file(td, f); 1000 } while (1); 1001} 1002 1003/* 1004 * Get next file to service by doing round robin between all available ones 1005 */ 1006static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 1007 int badf) 1008{ 1009 unsigned int old_next_file = td->next_file; 1010 struct fio_file *f; 1011 1012 do { 1013 int opened = 0; 1014 1015 f = td->files[td->next_file]; 1016 1017 td->next_file++; 1018 if (td->next_file >= td->o.nr_files) 1019 td->next_file = 0; 1020 1021 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 1022 if (fio_file_done(f)) { 1023 f = NULL; 1024 continue; 1025 } 1026 1027 if (!fio_file_open(f)) { 1028 int err; 1029 1030 err = td_io_open_file(td, f); 1031 if (err) { 1032 dprint(FD_FILE, "error %d on open of %s\n", 1033 err, f->file_name); 1034 f = NULL; 1035 continue; 1036 } 1037 opened = 1; 1038 } 1039 1040 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf, 1041 f->flags); 1042 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 1043 break; 1044 1045 if (opened) 1046 td_io_close_file(td, f); 1047 1048 f = NULL; 1049 } while (td->next_file != old_next_file); 1050 1051 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 1052 return f; 1053} 1054 1055static struct fio_file *__get_next_file(struct thread_data *td) 1056{ 1057 struct fio_file *f; 1058 1059 assert(td->o.nr_files <= td->files_index); 1060 1061 if (td->nr_done_files >= td->o.nr_files) { 1062 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 1063 " nr_files=%d\n", td->nr_open_files, 1064 td->nr_done_files, 1065 td->o.nr_files); 1066 return NULL; 1067 } 1068 1069 f = td->file_service_file; 1070 if (f && fio_file_open(f) && !fio_file_closing(f)) { 1071 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 1072 goto out; 1073 if (td->file_service_left--) 1074 goto out; 1075 } 1076 1077 if (td->o.file_service_type == FIO_FSERVICE_RR || 1078 td->o.file_service_type == FIO_FSERVICE_SEQ) 1079 f = get_next_file_rr(td, FIO_FILE_open, FIO_FILE_closing); 1080 else 1081 f = get_next_file_rand(td, FIO_FILE_open, FIO_FILE_closing); 1082 1083 td->file_service_file = f; 1084 td->file_service_left = td->file_service_nr - 1; 1085out: 1086 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 1087 return f; 1088} 1089 1090static struct fio_file *get_next_file(struct thread_data *td) 1091{ 1092 if (!(td->flags & TD_F_PROFILE_OPS)) { 1093 struct prof_io_ops *ops = &td->prof_io_ops; 1094 1095 if (ops->get_next_file) 1096 return ops->get_next_file(td); 1097 } 1098 1099 return __get_next_file(td); 1100} 1101 1102static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 1103{ 1104 struct fio_file *f; 1105 1106 do { 1107 f = get_next_file(td); 1108 if (!f) 1109 return 1; 1110 1111 io_u->file = f; 1112 get_file(f); 1113 1114 if (!fill_io_u(td, io_u)) 1115 break; 1116 1117 put_file_log(td, f); 1118 td_io_close_file(td, f); 1119 io_u->file = NULL; 1120 fio_file_set_done(f); 1121 td->nr_done_files++; 1122 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, 1123 td->nr_done_files, td->o.nr_files); 1124 } while (1); 1125 1126 return 0; 1127} 1128 1129static void lat_fatal(struct thread_data *td, struct io_completion_data *icd, 1130 unsigned long tusec, unsigned long max_usec) 1131{ 1132 if (!td->error) 1133 log_err("fio: latency of %lu usec exceeds specified max (%lu usec)\n", tusec, max_usec); 1134 td_verror(td, ETIMEDOUT, "max latency exceeded"); 1135 icd->error = ETIMEDOUT; 1136} 1137 1138static void lat_new_cycle(struct thread_data *td) 1139{ 1140 fio_gettime(&td->latency_ts, NULL); 1141 td->latency_ios = ddir_rw_sum(td->io_blocks); 1142 td->latency_failed = 0; 1143} 1144 1145/* 1146 * We had an IO outside the latency target. Reduce the queue depth. If we 1147 * are at QD=1, then it's time to give up. 1148 */ 1149static int __lat_target_failed(struct thread_data *td) 1150{ 1151 if (td->latency_qd == 1) 1152 return 1; 1153 1154 td->latency_qd_high = td->latency_qd; 1155 td->latency_qd = (td->latency_qd + td->latency_qd_low) / 2; 1156 1157 dprint(FD_RATE, "Ramped down: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high); 1158 1159 /* 1160 * When we ramp QD down, quiesce existing IO to prevent 1161 * a storm of ramp downs due to pending higher depth. 1162 */ 1163 io_u_quiesce(td); 1164 lat_new_cycle(td); 1165 return 0; 1166} 1167 1168static int lat_target_failed(struct thread_data *td) 1169{ 1170 if (td->o.latency_percentile.u.f == 100.0) 1171 return __lat_target_failed(td); 1172 1173 td->latency_failed++; 1174 return 0; 1175} 1176 1177void lat_target_init(struct thread_data *td) 1178{ 1179 if (td->o.latency_target) { 1180 dprint(FD_RATE, "Latency target=%llu\n", td->o.latency_target); 1181 fio_gettime(&td->latency_ts, NULL); 1182 td->latency_qd = 1; 1183 td->latency_qd_high = td->o.iodepth; 1184 td->latency_qd_low = 1; 1185 td->latency_ios = ddir_rw_sum(td->io_blocks); 1186 } else 1187 td->latency_qd = td->o.iodepth; 1188} 1189 1190static void lat_target_success(struct thread_data *td) 1191{ 1192 const unsigned int qd = td->latency_qd; 1193 1194 td->latency_qd_low = td->latency_qd; 1195 1196 /* 1197 * If we haven't failed yet, we double up to a failing value instead 1198 * of bisecting from highest possible queue depth. If we have set 1199 * a limit other than td->o.iodepth, bisect between that. 1200 */ 1201 if (td->latency_qd_high != td->o.iodepth) 1202 td->latency_qd = (td->latency_qd + td->latency_qd_high) / 2; 1203 else 1204 td->latency_qd *= 2; 1205 1206 if (td->latency_qd > td->o.iodepth) 1207 td->latency_qd = td->o.iodepth; 1208 1209 dprint(FD_RATE, "Ramped up: %d %d %d\n", td->latency_qd_low, td->latency_qd, td->latency_qd_high); 1210 /* 1211 * Same as last one, we are done 1212 */ 1213 if (td->latency_qd == qd) 1214 td->done = 1; 1215 1216 lat_new_cycle(td); 1217} 1218 1219/* 1220 * Check if we can bump the queue depth 1221 */ 1222void lat_target_check(struct thread_data *td) 1223{ 1224 uint64_t usec_window; 1225 uint64_t ios; 1226 double success_ios; 1227 1228 usec_window = utime_since_now(&td->latency_ts); 1229 if (usec_window < td->o.latency_window) 1230 return; 1231 1232 ios = ddir_rw_sum(td->io_blocks) - td->latency_ios; 1233 success_ios = (double) (ios - td->latency_failed) / (double) ios; 1234 success_ios *= 100.0; 1235 1236 dprint(FD_RATE, "Success rate: %.2f%% (target %.2f%%)\n", success_ios, td->o.latency_percentile.u.f); 1237 1238 if (success_ios >= td->o.latency_percentile.u.f) 1239 lat_target_success(td); 1240 else 1241 __lat_target_failed(td); 1242} 1243 1244/* 1245 * If latency target is enabled, we might be ramping up or down and not 1246 * using the full queue depth available. 1247 */ 1248int queue_full(struct thread_data *td) 1249{ 1250 const int qempty = io_u_qempty(&td->io_u_freelist); 1251 1252 if (qempty) 1253 return 1; 1254 if (!td->o.latency_target) 1255 return 0; 1256 1257 return td->cur_depth >= td->latency_qd; 1258} 1259 1260struct io_u *__get_io_u(struct thread_data *td) 1261{ 1262 struct io_u *io_u; 1263 1264 td_io_u_lock(td); 1265 1266again: 1267 if (!io_u_rempty(&td->io_u_requeues)) 1268 io_u = io_u_rpop(&td->io_u_requeues); 1269 else if (!queue_full(td)) { 1270 io_u = io_u_qpop(&td->io_u_freelist); 1271 1272 io_u->buflen = 0; 1273 io_u->resid = 0; 1274 io_u->file = NULL; 1275 io_u->end_io = NULL; 1276 } 1277 1278 if (io_u) { 1279 assert(io_u->flags & IO_U_F_FREE); 1280 io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF); 1281 io_u->flags &= ~(IO_U_F_TRIMMED | IO_U_F_BARRIER); 1282 io_u->flags &= ~IO_U_F_VER_LIST; 1283 1284 io_u->error = 0; 1285 io_u->acct_ddir = -1; 1286 td->cur_depth++; 1287 io_u->flags |= IO_U_F_IN_CUR_DEPTH; 1288 io_u->ipo = NULL; 1289 } else if (td->o.verify_async) { 1290 /* 1291 * We ran out, wait for async verify threads to finish and 1292 * return one 1293 */ 1294 pthread_cond_wait(&td->free_cond, &td->io_u_lock); 1295 goto again; 1296 } 1297 1298 td_io_u_unlock(td); 1299 return io_u; 1300} 1301 1302static int check_get_trim(struct thread_data *td, struct io_u *io_u) 1303{ 1304 if (!(td->flags & TD_F_TRIM_BACKLOG)) 1305 return 0; 1306 1307 if (td->trim_entries) { 1308 int get_trim = 0; 1309 1310 if (td->trim_batch) { 1311 td->trim_batch--; 1312 get_trim = 1; 1313 } else if (!(td->io_hist_len % td->o.trim_backlog) && 1314 td->last_ddir != DDIR_READ) { 1315 td->trim_batch = td->o.trim_batch; 1316 if (!td->trim_batch) 1317 td->trim_batch = td->o.trim_backlog; 1318 get_trim = 1; 1319 } 1320 1321 if (get_trim && !get_next_trim(td, io_u)) 1322 return 1; 1323 } 1324 1325 return 0; 1326} 1327 1328static int check_get_verify(struct thread_data *td, struct io_u *io_u) 1329{ 1330 if (!(td->flags & TD_F_VER_BACKLOG)) 1331 return 0; 1332 1333 if (td->io_hist_len) { 1334 int get_verify = 0; 1335 1336 if (td->verify_batch) 1337 get_verify = 1; 1338 else if (!(td->io_hist_len % td->o.verify_backlog) && 1339 td->last_ddir != DDIR_READ) { 1340 td->verify_batch = td->o.verify_batch; 1341 if (!td->verify_batch) 1342 td->verify_batch = td->o.verify_backlog; 1343 get_verify = 1; 1344 } 1345 1346 if (get_verify && !get_next_verify(td, io_u)) { 1347 td->verify_batch--; 1348 return 1; 1349 } 1350 } 1351 1352 return 0; 1353} 1354 1355/* 1356 * Fill offset and start time into the buffer content, to prevent too 1357 * easy compressible data for simple de-dupe attempts. Do this for every 1358 * 512b block in the range, since that should be the smallest block size 1359 * we can expect from a device. 1360 */ 1361static void small_content_scramble(struct io_u *io_u) 1362{ 1363 unsigned int i, nr_blocks = io_u->buflen / 512; 1364 uint64_t boffset; 1365 unsigned int offset; 1366 void *p, *end; 1367 1368 if (!nr_blocks) 1369 return; 1370 1371 p = io_u->xfer_buf; 1372 boffset = io_u->offset; 1373 io_u->buf_filled_len = 0; 1374 1375 for (i = 0; i < nr_blocks; i++) { 1376 /* 1377 * Fill the byte offset into a "random" start offset of 1378 * the buffer, given by the product of the usec time 1379 * and the actual offset. 1380 */ 1381 offset = (io_u->start_time.tv_usec ^ boffset) & 511; 1382 offset &= ~(sizeof(uint64_t) - 1); 1383 if (offset >= 512 - sizeof(uint64_t)) 1384 offset -= sizeof(uint64_t); 1385 memcpy(p + offset, &boffset, sizeof(boffset)); 1386 1387 end = p + 512 - sizeof(io_u->start_time); 1388 memcpy(end, &io_u->start_time, sizeof(io_u->start_time)); 1389 p += 512; 1390 boffset += 512; 1391 } 1392} 1393 1394/* 1395 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 1396 * etc. The returned io_u is fully ready to be prepped and submitted. 1397 */ 1398struct io_u *get_io_u(struct thread_data *td) 1399{ 1400 struct fio_file *f; 1401 struct io_u *io_u; 1402 int do_scramble = 0; 1403 1404 io_u = __get_io_u(td); 1405 if (!io_u) { 1406 dprint(FD_IO, "__get_io_u failed\n"); 1407 return NULL; 1408 } 1409 1410 if (check_get_verify(td, io_u)) 1411 goto out; 1412 if (check_get_trim(td, io_u)) 1413 goto out; 1414 1415 /* 1416 * from a requeue, io_u already setup 1417 */ 1418 if (io_u->file) 1419 goto out; 1420 1421 /* 1422 * If using an iolog, grab next piece if any available. 1423 */ 1424 if (td->flags & TD_F_READ_IOLOG) { 1425 if (read_iolog_get(td, io_u)) 1426 goto err_put; 1427 } else if (set_io_u_file(td, io_u)) { 1428 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 1429 goto err_put; 1430 } 1431 1432 f = io_u->file; 1433 assert(fio_file_open(f)); 1434 1435 if (ddir_rw(io_u->ddir)) { 1436 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 1437 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 1438 goto err_put; 1439 } 1440 1441 f->last_start = io_u->offset; 1442 f->last_pos = io_u->offset + io_u->buflen; 1443 1444 if (io_u->ddir == DDIR_WRITE) { 1445 if (td->flags & TD_F_REFILL_BUFFERS) { 1446 io_u_fill_buffer(td, io_u, 1447 io_u->xfer_buflen, io_u->xfer_buflen); 1448 } else if (td->flags & TD_F_SCRAMBLE_BUFFERS) 1449 do_scramble = 1; 1450 if (td->flags & TD_F_VER_NONE) { 1451 populate_verify_io_u(td, io_u); 1452 do_scramble = 0; 1453 } 1454 } else if (io_u->ddir == DDIR_READ) { 1455 /* 1456 * Reset the buf_filled parameters so next time if the 1457 * buffer is used for writes it is refilled. 1458 */ 1459 io_u->buf_filled_len = 0; 1460 } 1461 } 1462 1463 /* 1464 * Set io data pointers. 1465 */ 1466 io_u->xfer_buf = io_u->buf; 1467 io_u->xfer_buflen = io_u->buflen; 1468 1469out: 1470 assert(io_u->file); 1471 if (!td_io_prep(td, io_u)) { 1472 if (!td->o.disable_slat) 1473 fio_gettime(&io_u->start_time, NULL); 1474 if (do_scramble) 1475 small_content_scramble(io_u); 1476 return io_u; 1477 } 1478err_put: 1479 dprint(FD_IO, "get_io_u failed\n"); 1480 put_io_u(td, io_u); 1481 return NULL; 1482} 1483 1484void io_u_log_error(struct thread_data *td, struct io_u *io_u) 1485{ 1486 enum error_type_bit eb = td_error_type(io_u->ddir, io_u->error); 1487 const char *msg[] = { "read", "write", "sync", "datasync", 1488 "sync_file_range", "wait", "trim" }; 1489 1490 if (td_non_fatal_error(td, eb, io_u->error) && !td->o.error_dump) 1491 return; 1492 1493 log_err("fio: io_u error"); 1494 1495 if (io_u->file) 1496 log_err(" on file %s", io_u->file->file_name); 1497 1498 log_err(": %s\n", strerror(io_u->error)); 1499 1500 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 1501 io_u->offset, io_u->xfer_buflen); 1502 1503 if (!td->error) 1504 td_verror(td, io_u->error, "io_u error"); 1505} 1506 1507static void account_io_completion(struct thread_data *td, struct io_u *io_u, 1508 struct io_completion_data *icd, 1509 const enum fio_ddir idx, unsigned int bytes) 1510{ 1511 unsigned long lusec = 0; 1512 1513 if (!td->o.disable_clat || !td->o.disable_bw) 1514 lusec = utime_since(&io_u->issue_time, &icd->time); 1515 1516 if (!td->o.disable_lat) { 1517 unsigned long tusec; 1518 1519 tusec = utime_since(&io_u->start_time, &icd->time); 1520 add_lat_sample(td, idx, tusec, bytes); 1521 1522 if (td->flags & TD_F_PROFILE_OPS) { 1523 struct prof_io_ops *ops = &td->prof_io_ops; 1524 1525 if (ops->io_u_lat) 1526 icd->error = ops->io_u_lat(td, tusec); 1527 } 1528 1529 if (td->o.max_latency && tusec > td->o.max_latency) 1530 lat_fatal(td, icd, tusec, td->o.max_latency); 1531 if (td->o.latency_target && tusec > td->o.latency_target) { 1532 if (lat_target_failed(td)) 1533 lat_fatal(td, icd, tusec, td->o.latency_target); 1534 } 1535 } 1536 1537 if (!td->o.disable_clat) { 1538 add_clat_sample(td, idx, lusec, bytes); 1539 io_u_mark_latency(td, lusec); 1540 } 1541 1542 if (!td->o.disable_bw) 1543 add_bw_sample(td, idx, bytes, &icd->time); 1544 1545 add_iops_sample(td, idx, bytes, &icd->time); 1546 1547 if (td->o.number_ios && !--td->o.number_ios) 1548 td->done = 1; 1549} 1550 1551static long long usec_for_io(struct thread_data *td, enum fio_ddir ddir) 1552{ 1553 uint64_t secs, remainder, bps, bytes; 1554 1555 bytes = td->this_io_bytes[ddir]; 1556 bps = td->rate_bps[ddir]; 1557 secs = bytes / bps; 1558 remainder = bytes % bps; 1559 return remainder * 1000000 / bps + secs * 1000000; 1560} 1561 1562static void io_completed(struct thread_data *td, struct io_u *io_u, 1563 struct io_completion_data *icd) 1564{ 1565 struct fio_file *f; 1566 1567 dprint_io_u(io_u, "io complete"); 1568 1569 td_io_u_lock(td); 1570 assert(io_u->flags & IO_U_F_FLIGHT); 1571 io_u->flags &= ~(IO_U_F_FLIGHT | IO_U_F_BUSY_OK); 1572 1573 /* 1574 * Mark IO ok to verify 1575 */ 1576 if (io_u->ipo) { 1577 io_u->ipo->flags &= ~IP_F_IN_FLIGHT; 1578 write_barrier(); 1579 } 1580 1581 td_io_u_unlock(td); 1582 1583 if (ddir_sync(io_u->ddir)) { 1584 td->last_was_sync = 1; 1585 f = io_u->file; 1586 if (f) { 1587 f->first_write = -1ULL; 1588 f->last_write = -1ULL; 1589 } 1590 return; 1591 } 1592 1593 td->last_was_sync = 0; 1594 td->last_ddir = io_u->ddir; 1595 1596 if (!io_u->error && ddir_rw(io_u->ddir)) { 1597 unsigned int bytes = io_u->buflen - io_u->resid; 1598 const enum fio_ddir idx = io_u->ddir; 1599 const enum fio_ddir odx = io_u->ddir ^ 1; 1600 int ret; 1601 1602 td->io_blocks[idx]++; 1603 td->this_io_blocks[idx]++; 1604 td->io_bytes[idx] += bytes; 1605 1606 if (!(io_u->flags & IO_U_F_VER_LIST)) 1607 td->this_io_bytes[idx] += bytes; 1608 1609 if (idx == DDIR_WRITE) { 1610 f = io_u->file; 1611 if (f) { 1612 if (f->first_write == -1ULL || 1613 io_u->offset < f->first_write) 1614 f->first_write = io_u->offset; 1615 if (f->last_write == -1ULL || 1616 ((io_u->offset + bytes) > f->last_write)) 1617 f->last_write = io_u->offset + bytes; 1618 } 1619 } 1620 1621 if (ramp_time_over(td) && (td->runstate == TD_RUNNING || 1622 td->runstate == TD_VERIFYING)) { 1623 account_io_completion(td, io_u, icd, idx, bytes); 1624 1625 if (__should_check_rate(td, idx)) { 1626 td->rate_pending_usleep[idx] = 1627 (usec_for_io(td, idx) - 1628 utime_since_now(&td->start)); 1629 } 1630 if (idx != DDIR_TRIM && __should_check_rate(td, odx)) 1631 td->rate_pending_usleep[odx] = 1632 (usec_for_io(td, odx) - 1633 utime_since_now(&td->start)); 1634 } 1635 1636 icd->bytes_done[idx] += bytes; 1637 1638 if (io_u->end_io) { 1639 ret = io_u->end_io(td, io_u); 1640 if (ret && !icd->error) 1641 icd->error = ret; 1642 } 1643 } else if (io_u->error) { 1644 icd->error = io_u->error; 1645 io_u_log_error(td, io_u); 1646 } 1647 if (icd->error) { 1648 enum error_type_bit eb = td_error_type(io_u->ddir, icd->error); 1649 if (!td_non_fatal_error(td, eb, icd->error)) 1650 return; 1651 /* 1652 * If there is a non_fatal error, then add to the error count 1653 * and clear all the errors. 1654 */ 1655 update_error_count(td, icd->error); 1656 td_clear_error(td); 1657 icd->error = 0; 1658 io_u->error = 0; 1659 } 1660} 1661 1662static void init_icd(struct thread_data *td, struct io_completion_data *icd, 1663 int nr) 1664{ 1665 int ddir; 1666 if (!td->o.disable_clat || !td->o.disable_bw) 1667 fio_gettime(&icd->time, NULL); 1668 1669 icd->nr = nr; 1670 1671 icd->error = 0; 1672 for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) 1673 icd->bytes_done[ddir] = 0; 1674} 1675 1676static void ios_completed(struct thread_data *td, 1677 struct io_completion_data *icd) 1678{ 1679 struct io_u *io_u; 1680 int i; 1681 1682 for (i = 0; i < icd->nr; i++) { 1683 io_u = td->io_ops->event(td, i); 1684 1685 io_completed(td, io_u, icd); 1686 1687 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1688 put_io_u(td, io_u); 1689 } 1690} 1691 1692/* 1693 * Complete a single io_u for the sync engines. 1694 */ 1695int io_u_sync_complete(struct thread_data *td, struct io_u *io_u, 1696 uint64_t *bytes) 1697{ 1698 struct io_completion_data icd; 1699 1700 init_icd(td, &icd, 1); 1701 io_completed(td, io_u, &icd); 1702 1703 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1704 put_io_u(td, io_u); 1705 1706 if (icd.error) { 1707 td_verror(td, icd.error, "io_u_sync_complete"); 1708 return -1; 1709 } 1710 1711 if (bytes) { 1712 int ddir; 1713 1714 for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) 1715 bytes[ddir] += icd.bytes_done[ddir]; 1716 } 1717 1718 return 0; 1719} 1720 1721/* 1722 * Called to complete min_events number of io for the async engines. 1723 */ 1724int io_u_queued_complete(struct thread_data *td, int min_evts, 1725 uint64_t *bytes) 1726{ 1727 struct io_completion_data icd; 1728 struct timespec *tvp = NULL; 1729 int ret; 1730 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 1731 1732 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 1733 1734 if (!min_evts) 1735 tvp = &ts; 1736 1737 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1738 if (ret < 0) { 1739 td_verror(td, -ret, "td_io_getevents"); 1740 return ret; 1741 } else if (!ret) 1742 return ret; 1743 1744 init_icd(td, &icd, ret); 1745 ios_completed(td, &icd); 1746 if (icd.error) { 1747 td_verror(td, icd.error, "io_u_queued_complete"); 1748 return -1; 1749 } 1750 1751 if (bytes) { 1752 int ddir; 1753 1754 for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) 1755 bytes[ddir] += icd.bytes_done[ddir]; 1756 } 1757 1758 return 0; 1759} 1760 1761/* 1762 * Call when io_u is really queued, to update the submission latency. 1763 */ 1764void io_u_queued(struct thread_data *td, struct io_u *io_u) 1765{ 1766 if (!td->o.disable_slat) { 1767 unsigned long slat_time; 1768 1769 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1770 add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen); 1771 } 1772} 1773 1774void fill_io_buffer(struct thread_data *td, void *buf, unsigned int min_write, 1775 unsigned int max_bs) 1776{ 1777 if (td->o.buffer_pattern_bytes) 1778 fill_buffer_pattern(td, buf, max_bs); 1779 else if (!td->o.zero_buffers) { 1780 unsigned int perc = td->o.compress_percentage; 1781 1782 if (perc) { 1783 unsigned int seg = min_write; 1784 1785 seg = min(min_write, td->o.compress_chunk); 1786 if (!seg) 1787 seg = min_write; 1788 1789 fill_random_buf_percentage(&td->buf_state, buf, 1790 perc, seg, max_bs); 1791 } else 1792 fill_random_buf(&td->buf_state, buf, max_bs); 1793 } else 1794 memset(buf, 0, max_bs); 1795} 1796 1797/* 1798 * "randomly" fill the buffer contents 1799 */ 1800void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1801 unsigned int min_write, unsigned int max_bs) 1802{ 1803 io_u->buf_filled_len = 0; 1804 fill_io_buffer(td, io_u->buf, min_write, max_bs); 1805} 1806