io_u.c revision 2ecc1b57721e3cb72bbf558bc169c97037fe3d0a
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10#include "verify.h" 11 12struct io_completion_data { 13 int nr; /* input */ 14 15 int error; /* output */ 16 unsigned long bytes_done[2]; /* output */ 17 struct timeval time; /* output */ 18}; 19 20/* 21 * The ->file_map[] contains a map of blocks we have or have not done io 22 * to yet. Used to make sure we cover the entire range in a fair fashion. 23 */ 24static int random_map_free(struct fio_file *f, const unsigned long long block) 25{ 26 unsigned int idx = RAND_MAP_IDX(f, block); 27 unsigned int bit = RAND_MAP_BIT(f, block); 28 29 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 30 31 return (f->file_map[idx] & (1 << bit)) == 0; 32} 33 34/* 35 * Mark a given offset as used in the map. 36 */ 37static void mark_random_map(struct thread_data *td, struct io_u *io_u) 38{ 39 unsigned int min_bs = td->o.rw_min_bs; 40 struct fio_file *f = io_u->file; 41 unsigned long long block; 42 unsigned int blocks, nr_blocks; 43 44 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 45 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 46 blocks = 0; 47 48 while (nr_blocks) { 49 unsigned int this_blocks, mask; 50 unsigned int idx, bit; 51 52 /* 53 * If we have a mixed random workload, we may 54 * encounter blocks we already did IO to. 55 */ 56 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 57 if (!blocks) 58 blocks = 1; 59 break; 60 } 61 62 idx = RAND_MAP_IDX(f, block); 63 bit = RAND_MAP_BIT(f, block); 64 65 fio_assert(td, idx < f->num_maps); 66 67 this_blocks = nr_blocks; 68 if (this_blocks + bit > BLOCKS_PER_MAP) 69 this_blocks = BLOCKS_PER_MAP - bit; 70 71 if (this_blocks == BLOCKS_PER_MAP) 72 mask = -1U; 73 else 74 mask = ((1U << this_blocks) - 1) << bit; 75 76 f->file_map[idx] |= mask; 77 nr_blocks -= this_blocks; 78 blocks += this_blocks; 79 block += this_blocks; 80 } 81 82 if ((blocks * min_bs) < io_u->buflen) 83 io_u->buflen = blocks * min_bs; 84} 85 86static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 87 enum fio_ddir ddir) 88{ 89 unsigned long long max_blocks; 90 unsigned long long max_size; 91 92 /* 93 * Hmm, should we make sure that ->io_size <= ->real_file_size? 94 */ 95 max_size = f->io_size; 96 if (max_size > f->real_file_size) 97 max_size = f->real_file_size; 98 99 max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; 100 if (!max_blocks) 101 return 0; 102 103 return max_blocks; 104} 105 106/* 107 * Return the next free block in the map. 108 */ 109static int get_next_free_block(struct thread_data *td, struct fio_file *f, 110 enum fio_ddir ddir, unsigned long long *b) 111{ 112 unsigned long long min_bs = td->o.rw_min_bs; 113 int i; 114 115 i = f->last_free_lookup; 116 *b = (i * BLOCKS_PER_MAP); 117 while ((*b) * min_bs < f->real_file_size && 118 (*b) * min_bs < f->io_size) { 119 if (f->file_map[i] != (unsigned int) -1) { 120 *b += ffz(f->file_map[i]); 121 if (*b > last_block(td, f, ddir)) 122 break; 123 f->last_free_lookup = i; 124 return 0; 125 } 126 127 *b += BLOCKS_PER_MAP; 128 i++; 129 } 130 131 dprint(FD_IO, "failed finding a free block\n"); 132 return 1; 133} 134 135static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 136 enum fio_ddir ddir, unsigned long long *b) 137{ 138 unsigned long long r; 139 int loops = 5; 140 141 do { 142 r = os_random_long(&td->random_state); 143 dprint(FD_RANDOM, "off rand %llu\n", r); 144 *b = (last_block(td, f, ddir) - 1) 145 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 146 147 /* 148 * if we are not maintaining a random map, we are done. 149 */ 150 if (!file_randommap(td, f)) 151 return 0; 152 153 /* 154 * calculate map offset and check if it's free 155 */ 156 if (random_map_free(f, *b)) 157 return 0; 158 159 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 160 *b); 161 } while (--loops); 162 163 /* 164 * we get here, if we didn't suceed in looking up a block. generate 165 * a random start offset into the filemap, and find the first free 166 * block from there. 167 */ 168 loops = 10; 169 do { 170 f->last_free_lookup = (f->num_maps - 1) * 171 (r / (OS_RAND_MAX + 1.0)); 172 if (!get_next_free_block(td, f, ddir, b)) 173 return 0; 174 175 r = os_random_long(&td->random_state); 176 } while (--loops); 177 178 /* 179 * that didn't work either, try exhaustive search from the start 180 */ 181 f->last_free_lookup = 0; 182 return get_next_free_block(td, f, ddir, b); 183} 184 185/* 186 * For random io, generate a random new block and see if it's used. Repeat 187 * until we find a free one. For sequential io, just return the end of 188 * the last io issued. 189 */ 190static int get_next_offset(struct thread_data *td, struct io_u *io_u) 191{ 192 struct fio_file *f = io_u->file; 193 unsigned long long b; 194 enum fio_ddir ddir = io_u->ddir; 195 196 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 197 td->ddir_nr = td->o.ddir_nr; 198 199 if (get_next_rand_offset(td, f, ddir, &b)) { 200 dprint(FD_IO, "%s: getting rand offset failed\n", 201 f->file_name); 202 return 1; 203 } 204 } else { 205 if (f->last_pos >= f->real_file_size) { 206 if (!td_random(td) || 207 get_next_rand_offset(td, f, ddir, &b)) { 208 dprint(FD_IO, "%s: pos %llu > size %llu\n", 209 f->file_name, f->last_pos, 210 f->real_file_size); 211 return 1; 212 } 213 } else 214 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 215 } 216 217 io_u->offset = b * td->o.ba[ddir]; 218 if (io_u->offset >= f->io_size) { 219 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 220 io_u->offset, f->io_size); 221 return 1; 222 } 223 224 io_u->offset += f->file_offset; 225 if (io_u->offset >= f->real_file_size) { 226 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 227 io_u->offset, f->real_file_size); 228 return 1; 229 } 230 231 return 0; 232} 233 234static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 235{ 236 const int ddir = io_u->ddir; 237 unsigned int uninitialized_var(buflen); 238 unsigned int minbs, maxbs; 239 long r; 240 241 minbs = td->o.min_bs[ddir]; 242 maxbs = td->o.max_bs[ddir]; 243 244 if (minbs == maxbs) 245 buflen = minbs; 246 else { 247 r = os_random_long(&td->bsrange_state); 248 if (!td->o.bssplit_nr[ddir]) { 249 buflen = 1 + (unsigned int) ((double) maxbs * 250 (r / (OS_RAND_MAX + 1.0))); 251 if (buflen < minbs) 252 buflen = minbs; 253 } else { 254 long perc = 0; 255 unsigned int i; 256 257 for (i = 0; i < td->o.bssplit_nr[ddir]; i++) { 258 struct bssplit *bsp = &td->o.bssplit[ddir][i]; 259 260 buflen = bsp->bs; 261 perc += bsp->perc; 262 if (r <= ((OS_RAND_MAX / 100L) * perc)) 263 break; 264 } 265 } 266 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 267 buflen = (buflen + minbs - 1) & ~(minbs - 1); 268 } 269 270 if (io_u->offset + buflen > io_u->file->real_file_size) { 271 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 272 minbs, ddir); 273 buflen = minbs; 274 } 275 276 return buflen; 277} 278 279static void set_rwmix_bytes(struct thread_data *td) 280{ 281 unsigned int diff; 282 283 /* 284 * we do time or byte based switch. this is needed because 285 * buffered writes may issue a lot quicker than they complete, 286 * whereas reads do not. 287 */ 288 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 289 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 290} 291 292static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 293{ 294 unsigned int v; 295 long r; 296 297 r = os_random_long(&td->rwmix_state); 298 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 299 if (v <= td->o.rwmix[DDIR_READ]) 300 return DDIR_READ; 301 302 return DDIR_WRITE; 303} 304 305static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) 306{ 307 enum fio_ddir odir = ddir ^ 1; 308 struct timeval t; 309 long usec; 310 311 if (td->rate_pending_usleep[ddir] <= 0) 312 return ddir; 313 314 /* 315 * We have too much pending sleep in this direction. See if we 316 * should switch. 317 */ 318 if (td_rw(td)) { 319 /* 320 * Other direction does not have too much pending, switch 321 */ 322 if (td->rate_pending_usleep[odir] < 100000) 323 return odir; 324 325 /* 326 * Both directions have pending sleep. Sleep the minimum time 327 * and deduct from both. 328 */ 329 if (td->rate_pending_usleep[ddir] <= 330 td->rate_pending_usleep[odir]) { 331 usec = td->rate_pending_usleep[ddir]; 332 } else { 333 usec = td->rate_pending_usleep[odir]; 334 ddir = odir; 335 } 336 } else 337 usec = td->rate_pending_usleep[ddir]; 338 339 fio_gettime(&t, NULL); 340 usec_sleep(td, usec); 341 usec = utime_since_now(&t); 342 343 td->rate_pending_usleep[ddir] -= usec; 344 345 odir = ddir ^ 1; 346 if (td_rw(td) && __should_check_rate(td, odir)) 347 td->rate_pending_usleep[odir] -= usec; 348 349 return ddir; 350} 351 352/* 353 * Return the data direction for the next io_u. If the job is a 354 * mixed read/write workload, check the rwmix cycle and switch if 355 * necessary. 356 */ 357static enum fio_ddir get_rw_ddir(struct thread_data *td) 358{ 359 enum fio_ddir ddir; 360 361 /* 362 * see if it's time to fsync 363 */ 364 if (td->o.fsync_blocks && 365 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 366 td->io_issues[DDIR_WRITE] && should_fsync(td)) 367 return DDIR_SYNC; 368 369 /* 370 * see if it's time to fdatasync 371 */ 372 if (td->o.fdatasync_blocks && 373 !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) && 374 td->io_issues[DDIR_WRITE] && should_fsync(td)) 375 return DDIR_DATASYNC; 376 377 if (td_rw(td)) { 378 /* 379 * Check if it's time to seed a new data direction. 380 */ 381 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 382 /* 383 * Put a top limit on how many bytes we do for 384 * one data direction, to avoid overflowing the 385 * ranges too much 386 */ 387 ddir = get_rand_ddir(td); 388 389 if (ddir != td->rwmix_ddir) 390 set_rwmix_bytes(td); 391 392 td->rwmix_ddir = ddir; 393 } 394 ddir = td->rwmix_ddir; 395 } else if (td_read(td)) 396 ddir = DDIR_READ; 397 else 398 ddir = DDIR_WRITE; 399 400 td->rwmix_ddir = rate_ddir(td, ddir); 401 return td->rwmix_ddir; 402} 403 404void put_file_log(struct thread_data *td, struct fio_file *f) 405{ 406 int ret = put_file(td, f); 407 408 if (ret) 409 td_verror(td, ret, "file close"); 410} 411 412void put_io_u(struct thread_data *td, struct io_u *io_u) 413{ 414 td_io_u_lock(td); 415 416 io_u->flags |= IO_U_F_FREE; 417 io_u->flags &= ~IO_U_F_FREE_DEF; 418 419 if (io_u->file) 420 put_file_log(td, io_u->file); 421 422 io_u->file = NULL; 423 if (io_u->flags & IO_U_F_IN_CUR_DEPTH) 424 td->cur_depth--; 425 flist_del_init(&io_u->list); 426 flist_add(&io_u->list, &td->io_u_freelist); 427 td_io_u_unlock(td); 428 td_io_u_free_notify(td); 429} 430 431void clear_io_u(struct thread_data *td, struct io_u *io_u) 432{ 433 io_u->flags &= ~IO_U_F_FLIGHT; 434 put_io_u(td, io_u); 435} 436 437void requeue_io_u(struct thread_data *td, struct io_u **io_u) 438{ 439 struct io_u *__io_u = *io_u; 440 441 dprint(FD_IO, "requeue %p\n", __io_u); 442 443 td_io_u_lock(td); 444 445 __io_u->flags |= IO_U_F_FREE; 446 if ((__io_u->flags & IO_U_F_FLIGHT) && !ddir_sync(__io_u->ddir)) 447 td->io_issues[__io_u->ddir]--; 448 449 __io_u->flags &= ~IO_U_F_FLIGHT; 450 if (__io_u->flags & IO_U_F_IN_CUR_DEPTH) 451 td->cur_depth--; 452 flist_del(&__io_u->list); 453 flist_add_tail(&__io_u->list, &td->io_u_requeues); 454 td_io_u_unlock(td); 455 *io_u = NULL; 456} 457 458static int fill_io_u(struct thread_data *td, struct io_u *io_u) 459{ 460 if (td->io_ops->flags & FIO_NOIO) 461 goto out; 462 463 io_u->ddir = get_rw_ddir(td); 464 465 /* 466 * fsync() or fdatasync(), we are done 467 */ 468 if (ddir_sync(io_u->ddir)) 469 goto out; 470 471 /* 472 * See if it's time to switch to a new zone 473 */ 474 if (td->zone_bytes >= td->o.zone_size) { 475 td->zone_bytes = 0; 476 io_u->file->last_pos += td->o.zone_skip; 477 td->io_skip_bytes += td->o.zone_skip; 478 } 479 480 /* 481 * No log, let the seq/rand engine retrieve the next buflen and 482 * position. 483 */ 484 if (get_next_offset(td, io_u)) { 485 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 486 return 1; 487 } 488 489 io_u->buflen = get_next_buflen(td, io_u); 490 if (!io_u->buflen) { 491 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 492 return 1; 493 } 494 495 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 496 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 497 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 498 io_u->buflen, io_u->file->real_file_size); 499 return 1; 500 } 501 502 /* 503 * mark entry before potentially trimming io_u 504 */ 505 if (td_random(td) && file_randommap(td, io_u->file)) 506 mark_random_map(td, io_u); 507 508 /* 509 * If using a write iolog, store this entry. 510 */ 511out: 512 dprint_io_u(io_u, "fill_io_u"); 513 td->zone_bytes += io_u->buflen; 514 log_io_u(td, io_u); 515 return 0; 516} 517 518static void __io_u_mark_map(unsigned int *map, unsigned int nr) 519{ 520 int index = 0; 521 522 switch (nr) { 523 default: 524 index = 6; 525 break; 526 case 33 ... 64: 527 index = 5; 528 break; 529 case 17 ... 32: 530 index = 4; 531 break; 532 case 9 ... 16: 533 index = 3; 534 break; 535 case 5 ... 8: 536 index = 2; 537 break; 538 case 1 ... 4: 539 index = 1; 540 case 0: 541 break; 542 } 543 544 map[index]++; 545} 546 547void io_u_mark_submit(struct thread_data *td, unsigned int nr) 548{ 549 __io_u_mark_map(td->ts.io_u_submit, nr); 550 td->ts.total_submit++; 551} 552 553void io_u_mark_complete(struct thread_data *td, unsigned int nr) 554{ 555 __io_u_mark_map(td->ts.io_u_complete, nr); 556 td->ts.total_complete++; 557} 558 559void io_u_mark_depth(struct thread_data *td, unsigned int nr) 560{ 561 int index = 0; 562 563 switch (td->cur_depth) { 564 default: 565 index = 6; 566 break; 567 case 32 ... 63: 568 index = 5; 569 break; 570 case 16 ... 31: 571 index = 4; 572 break; 573 case 8 ... 15: 574 index = 3; 575 break; 576 case 4 ... 7: 577 index = 2; 578 break; 579 case 2 ... 3: 580 index = 1; 581 case 1: 582 break; 583 } 584 585 td->ts.io_u_map[index] += nr; 586} 587 588static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 589{ 590 int index = 0; 591 592 assert(usec < 1000); 593 594 switch (usec) { 595 case 750 ... 999: 596 index = 9; 597 break; 598 case 500 ... 749: 599 index = 8; 600 break; 601 case 250 ... 499: 602 index = 7; 603 break; 604 case 100 ... 249: 605 index = 6; 606 break; 607 case 50 ... 99: 608 index = 5; 609 break; 610 case 20 ... 49: 611 index = 4; 612 break; 613 case 10 ... 19: 614 index = 3; 615 break; 616 case 4 ... 9: 617 index = 2; 618 break; 619 case 2 ... 3: 620 index = 1; 621 case 0 ... 1: 622 break; 623 } 624 625 assert(index < FIO_IO_U_LAT_U_NR); 626 td->ts.io_u_lat_u[index]++; 627} 628 629static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 630{ 631 int index = 0; 632 633 switch (msec) { 634 default: 635 index = 11; 636 break; 637 case 1000 ... 1999: 638 index = 10; 639 break; 640 case 750 ... 999: 641 index = 9; 642 break; 643 case 500 ... 749: 644 index = 8; 645 break; 646 case 250 ... 499: 647 index = 7; 648 break; 649 case 100 ... 249: 650 index = 6; 651 break; 652 case 50 ... 99: 653 index = 5; 654 break; 655 case 20 ... 49: 656 index = 4; 657 break; 658 case 10 ... 19: 659 index = 3; 660 break; 661 case 4 ... 9: 662 index = 2; 663 break; 664 case 2 ... 3: 665 index = 1; 666 case 0 ... 1: 667 break; 668 } 669 670 assert(index < FIO_IO_U_LAT_M_NR); 671 td->ts.io_u_lat_m[index]++; 672} 673 674static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 675{ 676 if (usec < 1000) 677 io_u_mark_lat_usec(td, usec); 678 else 679 io_u_mark_lat_msec(td, usec / 1000); 680} 681 682/* 683 * Get next file to service by choosing one at random 684 */ 685static struct fio_file *get_next_file_rand(struct thread_data *td, 686 enum fio_file_flags goodf, 687 enum fio_file_flags badf) 688{ 689 struct fio_file *f; 690 int fno; 691 692 do { 693 long r = os_random_long(&td->next_file_state); 694 int opened = 0; 695 696 fno = (unsigned int) ((double) td->o.nr_files 697 * (r / (OS_RAND_MAX + 1.0))); 698 f = td->files[fno]; 699 if (fio_file_done(f)) 700 continue; 701 702 if (!fio_file_open(f)) { 703 int err; 704 705 err = td_io_open_file(td, f); 706 if (err) 707 continue; 708 opened = 1; 709 } 710 711 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 712 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 713 return f; 714 } 715 if (opened) 716 td_io_close_file(td, f); 717 } while (1); 718} 719 720/* 721 * Get next file to service by doing round robin between all available ones 722 */ 723static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 724 int badf) 725{ 726 unsigned int old_next_file = td->next_file; 727 struct fio_file *f; 728 729 do { 730 int opened = 0; 731 732 f = td->files[td->next_file]; 733 734 td->next_file++; 735 if (td->next_file >= td->o.nr_files) 736 td->next_file = 0; 737 738 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 739 if (fio_file_done(f)) { 740 f = NULL; 741 continue; 742 } 743 744 if (!fio_file_open(f)) { 745 int err; 746 747 err = td_io_open_file(td, f); 748 if (err) { 749 dprint(FD_FILE, "error %d on open of %s\n", 750 err, f->file_name); 751 f = NULL; 752 continue; 753 } 754 opened = 1; 755 } 756 757 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf, 758 f->flags); 759 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 760 break; 761 762 if (opened) 763 td_io_close_file(td, f); 764 765 f = NULL; 766 } while (td->next_file != old_next_file); 767 768 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 769 return f; 770} 771 772static struct fio_file *get_next_file(struct thread_data *td) 773{ 774 struct fio_file *f; 775 776 assert(td->o.nr_files <= td->files_index); 777 778 if (td->nr_done_files >= td->o.nr_files) { 779 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 780 " nr_files=%d\n", td->nr_open_files, 781 td->nr_done_files, 782 td->o.nr_files); 783 return NULL; 784 } 785 786 f = td->file_service_file; 787 if (f && fio_file_open(f) && !fio_file_closing(f)) { 788 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 789 goto out; 790 if (td->file_service_left--) 791 goto out; 792 } 793 794 if (td->o.file_service_type == FIO_FSERVICE_RR || 795 td->o.file_service_type == FIO_FSERVICE_SEQ) 796 f = get_next_file_rr(td, FIO_FILE_open, FIO_FILE_closing); 797 else 798 f = get_next_file_rand(td, FIO_FILE_open, FIO_FILE_closing); 799 800 td->file_service_file = f; 801 td->file_service_left = td->file_service_nr - 1; 802out: 803 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 804 return f; 805} 806 807static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 808{ 809 struct fio_file *f; 810 811 do { 812 f = get_next_file(td); 813 if (!f) 814 return 1; 815 816 io_u->file = f; 817 get_file(f); 818 819 if (!fill_io_u(td, io_u)) 820 break; 821 822 put_file_log(td, f); 823 td_io_close_file(td, f); 824 io_u->file = NULL; 825 fio_file_set_done(f); 826 td->nr_done_files++; 827 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, 828 td->nr_done_files, td->o.nr_files); 829 } while (1); 830 831 return 0; 832} 833 834 835struct io_u *__get_io_u(struct thread_data *td) 836{ 837 struct io_u *io_u = NULL; 838 839 td_io_u_lock(td); 840 841again: 842 if (!flist_empty(&td->io_u_requeues)) 843 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 844 else if (!queue_full(td)) { 845 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 846 847 io_u->buflen = 0; 848 io_u->resid = 0; 849 io_u->file = NULL; 850 io_u->end_io = NULL; 851 } 852 853 /* 854 * We ran out, wait for async verify threads to finish and return one 855 */ 856 if (!io_u && td->o.verify_async) { 857 pthread_cond_wait(&td->free_cond, &td->io_u_lock); 858 goto again; 859 } 860 861 if (io_u) { 862 assert(io_u->flags & IO_U_F_FREE); 863 io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF); 864 865 io_u->error = 0; 866 flist_del(&io_u->list); 867 flist_add(&io_u->list, &td->io_u_busylist); 868 td->cur_depth++; 869 io_u->flags |= IO_U_F_IN_CUR_DEPTH; 870 } 871 872 td_io_u_unlock(td); 873 return io_u; 874} 875 876/* 877 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 878 * etc. The returned io_u is fully ready to be prepped and submitted. 879 */ 880struct io_u *get_io_u(struct thread_data *td) 881{ 882 struct fio_file *f; 883 struct io_u *io_u; 884 885 io_u = __get_io_u(td); 886 if (!io_u) { 887 dprint(FD_IO, "__get_io_u failed\n"); 888 return NULL; 889 } 890 891 /* 892 * from a requeue, io_u already setup 893 */ 894 if (io_u->file) 895 goto out; 896 897 /* 898 * If using an iolog, grab next piece if any available. 899 */ 900 if (td->o.read_iolog_file) { 901 if (read_iolog_get(td, io_u)) 902 goto err_put; 903 } else if (set_io_u_file(td, io_u)) { 904 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 905 goto err_put; 906 } 907 908 f = io_u->file; 909 assert(fio_file_open(f)); 910 911 if (!ddir_sync(io_u->ddir)) { 912 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 913 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 914 goto err_put; 915 } 916 917 f->last_pos = io_u->offset + io_u->buflen; 918 919 if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) 920 populate_verify_io_u(td, io_u); 921 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 922 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 923 } 924 925 /* 926 * Set io data pointers. 927 */ 928 io_u->xfer_buf = io_u->buf; 929 io_u->xfer_buflen = io_u->buflen; 930 931out: 932 if (!td_io_prep(td, io_u)) { 933 if (!td->o.disable_slat) 934 fio_gettime(&io_u->start_time, NULL); 935 return io_u; 936 } 937err_put: 938 dprint(FD_IO, "get_io_u failed\n"); 939 put_io_u(td, io_u); 940 return NULL; 941} 942 943void io_u_log_error(struct thread_data *td, struct io_u *io_u) 944{ 945 const char *msg[] = { "read", "write", "sync" }; 946 947 log_err("fio: io_u error"); 948 949 if (io_u->file) 950 log_err(" on file %s", io_u->file->file_name); 951 952 log_err(": %s\n", strerror(io_u->error)); 953 954 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 955 io_u->offset, io_u->xfer_buflen); 956 957 if (!td->error) 958 td_verror(td, io_u->error, "io_u error"); 959} 960 961static void io_completed(struct thread_data *td, struct io_u *io_u, 962 struct io_completion_data *icd) 963{ 964 /* 965 * Older gcc's are too dumb to realize that usec is always used 966 * initialized, silence that warning. 967 */ 968 unsigned long uninitialized_var(usec); 969 970 dprint_io_u(io_u, "io complete"); 971 972 td_io_u_lock(td); 973 assert(io_u->flags & IO_U_F_FLIGHT); 974 io_u->flags &= ~IO_U_F_FLIGHT; 975 td_io_u_unlock(td); 976 977 if (ddir_sync(io_u->ddir)) { 978 td->last_was_sync = 1; 979 return; 980 } 981 982 td->last_was_sync = 0; 983 984 if (!io_u->error) { 985 unsigned int bytes = io_u->buflen - io_u->resid; 986 const enum fio_ddir idx = io_u->ddir; 987 int ret; 988 989 td->io_blocks[idx]++; 990 td->io_bytes[idx] += bytes; 991 td->this_io_bytes[idx] += bytes; 992 993 if (ramp_time_over(td)) { 994 unsigned long uninitialized_var(lusec); 995 unsigned long uninitialized_var(rusec); 996 997 if (!td->o.disable_clat || !td->o.disable_bw) 998 lusec = utime_since(&io_u->issue_time, 999 &icd->time); 1000 if (__should_check_rate(td, idx) || 1001 __should_check_rate(td, idx ^ 1)) 1002 rusec = utime_since(&io_u->start_time, 1003 &icd->time); 1004 1005 if (!td->o.disable_clat) { 1006 add_clat_sample(td, idx, lusec, bytes); 1007 io_u_mark_latency(td, lusec); 1008 } 1009 if (!td->o.disable_bw) 1010 add_bw_sample(td, idx, bytes, &icd->time); 1011 if (__should_check_rate(td, idx)) { 1012 td->rate_pending_usleep[idx] += 1013 (long) td->rate_usec_cycle[idx] - rusec; 1014 } 1015 if (__should_check_rate(td, idx ^ 1)) 1016 td->rate_pending_usleep[idx ^ 1] -= rusec; 1017 } 1018 1019 if (td_write(td) && idx == DDIR_WRITE && 1020 td->o.do_verify && 1021 td->o.verify != VERIFY_NONE) 1022 log_io_piece(td, io_u); 1023 1024 icd->bytes_done[idx] += bytes; 1025 1026 if (io_u->end_io) { 1027 ret = io_u->end_io(td, io_u); 1028 if (ret && !icd->error) 1029 icd->error = ret; 1030 } 1031 } else { 1032 icd->error = io_u->error; 1033 io_u_log_error(td, io_u); 1034 } 1035 if (td->o.continue_on_error && icd->error && 1036 td_non_fatal_error(icd->error)) { 1037 /* 1038 * If there is a non_fatal error, then add to the error count 1039 * and clear all the errors. 1040 */ 1041 update_error_count(td, icd->error); 1042 td_clear_error(td); 1043 icd->error = 0; 1044 io_u->error = 0; 1045 } 1046} 1047 1048static void init_icd(struct thread_data *td, struct io_completion_data *icd, 1049 int nr) 1050{ 1051 if (!td->o.disable_clat || !td->o.disable_bw) 1052 fio_gettime(&icd->time, NULL); 1053 1054 icd->nr = nr; 1055 1056 icd->error = 0; 1057 icd->bytes_done[0] = icd->bytes_done[1] = 0; 1058} 1059 1060static void ios_completed(struct thread_data *td, 1061 struct io_completion_data *icd) 1062{ 1063 struct io_u *io_u; 1064 int i; 1065 1066 for (i = 0; i < icd->nr; i++) { 1067 io_u = td->io_ops->event(td, i); 1068 1069 io_completed(td, io_u, icd); 1070 1071 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1072 put_io_u(td, io_u); 1073 } 1074} 1075 1076/* 1077 * Complete a single io_u for the sync engines. 1078 */ 1079int io_u_sync_complete(struct thread_data *td, struct io_u *io_u, 1080 unsigned long *bytes) 1081{ 1082 struct io_completion_data icd; 1083 1084 init_icd(td, &icd, 1); 1085 io_completed(td, io_u, &icd); 1086 1087 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1088 put_io_u(td, io_u); 1089 1090 if (icd.error) { 1091 td_verror(td, icd.error, "io_u_sync_complete"); 1092 return -1; 1093 } 1094 1095 if (bytes) { 1096 bytes[0] += icd.bytes_done[0]; 1097 bytes[1] += icd.bytes_done[1]; 1098 } 1099 1100 return 0; 1101} 1102 1103/* 1104 * Called to complete min_events number of io for the async engines. 1105 */ 1106int io_u_queued_complete(struct thread_data *td, int min_evts, 1107 unsigned long *bytes) 1108{ 1109 struct io_completion_data icd; 1110 struct timespec *tvp = NULL; 1111 int ret; 1112 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 1113 1114 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 1115 1116 if (!min_evts) 1117 tvp = &ts; 1118 1119 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1120 if (ret < 0) { 1121 td_verror(td, -ret, "td_io_getevents"); 1122 return ret; 1123 } else if (!ret) 1124 return ret; 1125 1126 init_icd(td, &icd, ret); 1127 ios_completed(td, &icd); 1128 if (icd.error) { 1129 td_verror(td, icd.error, "io_u_queued_complete"); 1130 return -1; 1131 } 1132 1133 if (bytes) { 1134 bytes[0] += icd.bytes_done[0]; 1135 bytes[1] += icd.bytes_done[1]; 1136 } 1137 1138 return 0; 1139} 1140 1141/* 1142 * Call when io_u is really queued, to update the submission latency. 1143 */ 1144void io_u_queued(struct thread_data *td, struct io_u *io_u) 1145{ 1146 if (!td->o.disable_slat) { 1147 unsigned long slat_time; 1148 1149 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1150 add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen); 1151 } 1152} 1153 1154/* 1155 * "randomly" fill the buffer contents 1156 */ 1157void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1158 unsigned int max_bs) 1159{ 1160 long *ptr = io_u->buf; 1161 1162 if (!td->o.zero_buffers) { 1163 while ((void *) ptr - io_u->buf < max_bs) { 1164 *ptr = rand() * GOLDEN_RATIO_PRIME; 1165 ptr++; 1166 } 1167 } else 1168 memset(ptr, 0, max_bs); 1169} 1170