io_u.c revision 0b9d69ecb14045cf3b2622ae922756b9889b25e6
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10#include "verify.h" 11 12struct io_completion_data { 13 int nr; /* input */ 14 15 int error; /* output */ 16 unsigned long bytes_done[2]; /* output */ 17 struct timeval time; /* output */ 18}; 19 20/* 21 * The ->file_map[] contains a map of blocks we have or have not done io 22 * to yet. Used to make sure we cover the entire range in a fair fashion. 23 */ 24static int random_map_free(struct fio_file *f, const unsigned long long block) 25{ 26 unsigned int idx = RAND_MAP_IDX(f, block); 27 unsigned int bit = RAND_MAP_BIT(f, block); 28 29 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 30 31 return (f->file_map[idx] & (1 << bit)) == 0; 32} 33 34/* 35 * Mark a given offset as used in the map. 36 */ 37static void mark_random_map(struct thread_data *td, struct io_u *io_u) 38{ 39 unsigned int min_bs = td->o.rw_min_bs; 40 struct fio_file *f = io_u->file; 41 unsigned long long block; 42 unsigned int blocks, nr_blocks; 43 44 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 45 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 46 blocks = 0; 47 48 while (nr_blocks) { 49 unsigned int this_blocks, mask; 50 unsigned int idx, bit; 51 52 /* 53 * If we have a mixed random workload, we may 54 * encounter blocks we already did IO to. 55 */ 56 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 57 if (!blocks) 58 blocks = 1; 59 break; 60 } 61 62 idx = RAND_MAP_IDX(f, block); 63 bit = RAND_MAP_BIT(f, block); 64 65 fio_assert(td, idx < f->num_maps); 66 67 this_blocks = nr_blocks; 68 if (this_blocks + bit > BLOCKS_PER_MAP) 69 this_blocks = BLOCKS_PER_MAP - bit; 70 71 if (this_blocks == BLOCKS_PER_MAP) 72 mask = -1U; 73 else 74 mask = ((1U << this_blocks) - 1) << bit; 75 76 f->file_map[idx] |= mask; 77 nr_blocks -= this_blocks; 78 blocks += this_blocks; 79 block += this_blocks; 80 } 81 82 if ((blocks * min_bs) < io_u->buflen) 83 io_u->buflen = blocks * min_bs; 84} 85 86static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 87 enum fio_ddir ddir) 88{ 89 unsigned long long max_blocks; 90 unsigned long long max_size; 91 92 /* 93 * Hmm, should we make sure that ->io_size <= ->real_file_size? 94 */ 95 max_size = f->io_size; 96 if (max_size > f->real_file_size) 97 max_size = f->real_file_size; 98 99 max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; 100 if (!max_blocks) 101 return 0; 102 103 return max_blocks; 104} 105 106/* 107 * Return the next free block in the map. 108 */ 109static int get_next_free_block(struct thread_data *td, struct fio_file *f, 110 enum fio_ddir ddir, unsigned long long *b) 111{ 112 unsigned long long min_bs = td->o.rw_min_bs; 113 int i; 114 115 i = f->last_free_lookup; 116 *b = (i * BLOCKS_PER_MAP); 117 while ((*b) * min_bs < f->real_file_size && 118 (*b) * min_bs < f->io_size) { 119 if (f->file_map[i] != (unsigned int) -1) { 120 *b += ffz(f->file_map[i]); 121 if (*b > last_block(td, f, ddir)) 122 break; 123 f->last_free_lookup = i; 124 return 0; 125 } 126 127 *b += BLOCKS_PER_MAP; 128 i++; 129 } 130 131 dprint(FD_IO, "failed finding a free block\n"); 132 return 1; 133} 134 135static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 136 enum fio_ddir ddir, unsigned long long *b) 137{ 138 unsigned long long r; 139 int loops = 5; 140 141 do { 142 r = os_random_long(&td->random_state); 143 dprint(FD_RANDOM, "off rand %llu\n", r); 144 *b = (last_block(td, f, ddir) - 1) 145 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 146 147 /* 148 * if we are not maintaining a random map, we are done. 149 */ 150 if (!file_randommap(td, f)) 151 return 0; 152 153 /* 154 * calculate map offset and check if it's free 155 */ 156 if (random_map_free(f, *b)) 157 return 0; 158 159 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 160 *b); 161 } while (--loops); 162 163 /* 164 * we get here, if we didn't suceed in looking up a block. generate 165 * a random start offset into the filemap, and find the first free 166 * block from there. 167 */ 168 loops = 10; 169 do { 170 f->last_free_lookup = (f->num_maps - 1) * 171 (r / (OS_RAND_MAX + 1.0)); 172 if (!get_next_free_block(td, f, ddir, b)) 173 return 0; 174 175 r = os_random_long(&td->random_state); 176 } while (--loops); 177 178 /* 179 * that didn't work either, try exhaustive search from the start 180 */ 181 f->last_free_lookup = 0; 182 return get_next_free_block(td, f, ddir, b); 183} 184 185/* 186 * For random io, generate a random new block and see if it's used. Repeat 187 * until we find a free one. For sequential io, just return the end of 188 * the last io issued. 189 */ 190static int get_next_offset(struct thread_data *td, struct io_u *io_u) 191{ 192 struct fio_file *f = io_u->file; 193 unsigned long long b; 194 enum fio_ddir ddir = io_u->ddir; 195 196 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 197 td->ddir_nr = td->o.ddir_nr; 198 199 if (get_next_rand_offset(td, f, ddir, &b)) { 200 dprint(FD_IO, "%s: getting rand offset failed\n", 201 f->file_name); 202 return 1; 203 } 204 } else { 205 if (f->last_pos >= f->real_file_size) { 206 if (!td_random(td) || 207 get_next_rand_offset(td, f, ddir, &b)) { 208 dprint(FD_IO, "%s: pos %llu > size %llu\n", 209 f->file_name, f->last_pos, 210 f->real_file_size); 211 return 1; 212 } 213 } else 214 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 215 } 216 217 io_u->offset = b * td->o.ba[ddir]; 218 if (io_u->offset >= f->io_size) { 219 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 220 io_u->offset, f->io_size); 221 return 1; 222 } 223 224 io_u->offset += f->file_offset; 225 if (io_u->offset >= f->real_file_size) { 226 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 227 io_u->offset, f->real_file_size); 228 return 1; 229 } 230 231 return 0; 232} 233 234static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 235{ 236 const int ddir = io_u->ddir; 237 unsigned int uninitialized_var(buflen); 238 unsigned int minbs, maxbs; 239 long r; 240 241 minbs = td->o.min_bs[ddir]; 242 maxbs = td->o.max_bs[ddir]; 243 244 if (minbs == maxbs) 245 buflen = minbs; 246 else { 247 r = os_random_long(&td->bsrange_state); 248 if (!td->o.bssplit_nr[ddir]) { 249 buflen = 1 + (unsigned int) ((double) maxbs * 250 (r / (OS_RAND_MAX + 1.0))); 251 if (buflen < minbs) 252 buflen = minbs; 253 } else { 254 long perc = 0; 255 unsigned int i; 256 257 for (i = 0; i < td->o.bssplit_nr[ddir]; i++) { 258 struct bssplit *bsp = &td->o.bssplit[ddir][i]; 259 260 buflen = bsp->bs; 261 perc += bsp->perc; 262 if (r <= ((OS_RAND_MAX / 100L) * perc)) 263 break; 264 } 265 } 266 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 267 buflen = (buflen + minbs - 1) & ~(minbs - 1); 268 } 269 270 if (io_u->offset + buflen > io_u->file->real_file_size) { 271 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 272 minbs, ddir); 273 buflen = minbs; 274 } 275 276 return buflen; 277} 278 279static void set_rwmix_bytes(struct thread_data *td) 280{ 281 unsigned int diff; 282 283 /* 284 * we do time or byte based switch. this is needed because 285 * buffered writes may issue a lot quicker than they complete, 286 * whereas reads do not. 287 */ 288 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 289 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 290} 291 292static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 293{ 294 unsigned int v; 295 long r; 296 297 r = os_random_long(&td->rwmix_state); 298 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 299 if (v <= td->o.rwmix[DDIR_READ]) 300 return DDIR_READ; 301 302 return DDIR_WRITE; 303} 304 305static enum fio_ddir rate_ddir(struct thread_data *td, enum fio_ddir ddir) 306{ 307 enum fio_ddir odir = ddir ^ 1; 308 struct timeval t; 309 long usec; 310 311 if (td->rate_pending_usleep[ddir] <= 0) 312 return ddir; 313 314 /* 315 * We have too much pending sleep in this direction. See if we 316 * should switch. 317 */ 318 if (td_rw(td)) { 319 /* 320 * Other direction does not have too much pending, switch 321 */ 322 if (td->rate_pending_usleep[odir] < 100000) 323 return odir; 324 325 /* 326 * Both directions have pending sleep. Sleep the minimum time 327 * and deduct from both. 328 */ 329 if (td->rate_pending_usleep[ddir] <= 330 td->rate_pending_usleep[odir]) { 331 usec = td->rate_pending_usleep[ddir]; 332 } else { 333 usec = td->rate_pending_usleep[odir]; 334 ddir = odir; 335 } 336 } else 337 usec = td->rate_pending_usleep[ddir]; 338 339 fio_gettime(&t, NULL); 340 usec_sleep(td, usec); 341 usec = utime_since_now(&t); 342 343 td->rate_pending_usleep[ddir] -= usec; 344 345 odir = ddir ^ 1; 346 if (td_rw(td) && __should_check_rate(td, odir)) 347 td->rate_pending_usleep[odir] -= usec; 348 349 return ddir; 350} 351 352/* 353 * Return the data direction for the next io_u. If the job is a 354 * mixed read/write workload, check the rwmix cycle and switch if 355 * necessary. 356 */ 357static enum fio_ddir get_rw_ddir(struct thread_data *td) 358{ 359 enum fio_ddir ddir; 360 361 /* 362 * see if it's time to fsync 363 */ 364 if (td->o.fsync_blocks && 365 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 366 td->io_issues[DDIR_WRITE] && should_fsync(td)) 367 return DDIR_SYNC; 368 369 /* 370 * see if it's time to fdatasync 371 */ 372 if (td->o.fdatasync_blocks && 373 !(td->io_issues[DDIR_WRITE] % td->o.fdatasync_blocks) && 374 td->io_issues[DDIR_WRITE] && should_fsync(td)) 375 return DDIR_DATASYNC; 376 377 if (td_rw(td)) { 378 /* 379 * Check if it's time to seed a new data direction. 380 */ 381 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 382 /* 383 * Put a top limit on how many bytes we do for 384 * one data direction, to avoid overflowing the 385 * ranges too much 386 */ 387 ddir = get_rand_ddir(td); 388 389 if (ddir != td->rwmix_ddir) 390 set_rwmix_bytes(td); 391 392 td->rwmix_ddir = ddir; 393 } 394 ddir = td->rwmix_ddir; 395 } else if (td_read(td)) 396 ddir = DDIR_READ; 397 else 398 ddir = DDIR_WRITE; 399 400 td->rwmix_ddir = rate_ddir(td, ddir); 401 return td->rwmix_ddir; 402} 403 404void put_file_log(struct thread_data *td, struct fio_file *f) 405{ 406 int ret = put_file(td, f); 407 408 if (ret) 409 td_verror(td, ret, "file close"); 410} 411 412void put_io_u(struct thread_data *td, struct io_u *io_u) 413{ 414 td_io_u_lock(td); 415 416 assert((io_u->flags & IO_U_F_FREE) == 0); 417 io_u->flags |= IO_U_F_FREE; 418 io_u->flags &= ~IO_U_F_FREE_DEF; 419 420 if (io_u->file) 421 put_file_log(td, io_u->file); 422 423 io_u->file = NULL; 424 flist_del_init(&io_u->list); 425 flist_add(&io_u->list, &td->io_u_freelist); 426 td->cur_depth--; 427 td_io_u_unlock(td); 428 td_io_u_free_notify(td); 429} 430 431void clear_io_u(struct thread_data *td, struct io_u *io_u) 432{ 433 io_u->flags &= ~IO_U_F_FLIGHT; 434 put_io_u(td, io_u); 435} 436 437void requeue_io_u(struct thread_data *td, struct io_u **io_u) 438{ 439 struct io_u *__io_u = *io_u; 440 441 dprint(FD_IO, "requeue %p\n", __io_u); 442 443 td_io_u_lock(td); 444 445 __io_u->flags |= IO_U_F_FREE; 446 if ((__io_u->flags & IO_U_F_FLIGHT) && !ddir_sync(__io_u->ddir)) 447 td->io_issues[__io_u->ddir]--; 448 449 __io_u->flags &= ~IO_U_F_FLIGHT; 450 451 flist_del(&__io_u->list); 452 flist_add_tail(&__io_u->list, &td->io_u_requeues); 453 td->cur_depth--; 454 td_io_u_unlock(td); 455 *io_u = NULL; 456} 457 458static int fill_io_u(struct thread_data *td, struct io_u *io_u) 459{ 460 if (td->io_ops->flags & FIO_NOIO) 461 goto out; 462 463 io_u->ddir = get_rw_ddir(td); 464 465 /* 466 * fsync() or fdatasync(), we are done 467 */ 468 if (ddir_sync(io_u->ddir)) 469 goto out; 470 471 /* 472 * See if it's time to switch to a new zone 473 */ 474 if (td->zone_bytes >= td->o.zone_size) { 475 td->zone_bytes = 0; 476 io_u->file->last_pos += td->o.zone_skip; 477 td->io_skip_bytes += td->o.zone_skip; 478 } 479 480 /* 481 * No log, let the seq/rand engine retrieve the next buflen and 482 * position. 483 */ 484 if (get_next_offset(td, io_u)) { 485 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 486 return 1; 487 } 488 489 io_u->buflen = get_next_buflen(td, io_u); 490 if (!io_u->buflen) { 491 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 492 return 1; 493 } 494 495 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 496 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 497 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 498 io_u->buflen, io_u->file->real_file_size); 499 return 1; 500 } 501 502 /* 503 * mark entry before potentially trimming io_u 504 */ 505 if (td_random(td) && file_randommap(td, io_u->file)) 506 mark_random_map(td, io_u); 507 508 /* 509 * If using a write iolog, store this entry. 510 */ 511out: 512 dprint_io_u(io_u, "fill_io_u"); 513 td->zone_bytes += io_u->buflen; 514 log_io_u(td, io_u); 515 return 0; 516} 517 518static void __io_u_mark_map(unsigned int *map, unsigned int nr) 519{ 520 int index = 0; 521 522 switch (nr) { 523 default: 524 index = 6; 525 break; 526 case 33 ... 64: 527 index = 5; 528 break; 529 case 17 ... 32: 530 index = 4; 531 break; 532 case 9 ... 16: 533 index = 3; 534 break; 535 case 5 ... 8: 536 index = 2; 537 break; 538 case 1 ... 4: 539 index = 1; 540 case 0: 541 break; 542 } 543 544 map[index]++; 545} 546 547void io_u_mark_submit(struct thread_data *td, unsigned int nr) 548{ 549 __io_u_mark_map(td->ts.io_u_submit, nr); 550 td->ts.total_submit++; 551} 552 553void io_u_mark_complete(struct thread_data *td, unsigned int nr) 554{ 555 __io_u_mark_map(td->ts.io_u_complete, nr); 556 td->ts.total_complete++; 557} 558 559void io_u_mark_depth(struct thread_data *td, unsigned int nr) 560{ 561 int index = 0; 562 563 switch (td->cur_depth) { 564 default: 565 index = 6; 566 break; 567 case 32 ... 63: 568 index = 5; 569 break; 570 case 16 ... 31: 571 index = 4; 572 break; 573 case 8 ... 15: 574 index = 3; 575 break; 576 case 4 ... 7: 577 index = 2; 578 break; 579 case 2 ... 3: 580 index = 1; 581 case 1: 582 break; 583 } 584 585 td->ts.io_u_map[index] += nr; 586} 587 588static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 589{ 590 int index = 0; 591 592 assert(usec < 1000); 593 594 switch (usec) { 595 case 750 ... 999: 596 index = 9; 597 break; 598 case 500 ... 749: 599 index = 8; 600 break; 601 case 250 ... 499: 602 index = 7; 603 break; 604 case 100 ... 249: 605 index = 6; 606 break; 607 case 50 ... 99: 608 index = 5; 609 break; 610 case 20 ... 49: 611 index = 4; 612 break; 613 case 10 ... 19: 614 index = 3; 615 break; 616 case 4 ... 9: 617 index = 2; 618 break; 619 case 2 ... 3: 620 index = 1; 621 case 0 ... 1: 622 break; 623 } 624 625 assert(index < FIO_IO_U_LAT_U_NR); 626 td->ts.io_u_lat_u[index]++; 627} 628 629static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 630{ 631 int index = 0; 632 633 switch (msec) { 634 default: 635 index = 11; 636 break; 637 case 1000 ... 1999: 638 index = 10; 639 break; 640 case 750 ... 999: 641 index = 9; 642 break; 643 case 500 ... 749: 644 index = 8; 645 break; 646 case 250 ... 499: 647 index = 7; 648 break; 649 case 100 ... 249: 650 index = 6; 651 break; 652 case 50 ... 99: 653 index = 5; 654 break; 655 case 20 ... 49: 656 index = 4; 657 break; 658 case 10 ... 19: 659 index = 3; 660 break; 661 case 4 ... 9: 662 index = 2; 663 break; 664 case 2 ... 3: 665 index = 1; 666 case 0 ... 1: 667 break; 668 } 669 670 assert(index < FIO_IO_U_LAT_M_NR); 671 td->ts.io_u_lat_m[index]++; 672} 673 674static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 675{ 676 if (usec < 1000) 677 io_u_mark_lat_usec(td, usec); 678 else 679 io_u_mark_lat_msec(td, usec / 1000); 680} 681 682/* 683 * Get next file to service by choosing one at random 684 */ 685static struct fio_file *get_next_file_rand(struct thread_data *td, 686 enum fio_file_flags goodf, 687 enum fio_file_flags badf) 688{ 689 struct fio_file *f; 690 int fno; 691 692 do { 693 long r = os_random_long(&td->next_file_state); 694 int opened = 0; 695 696 fno = (unsigned int) ((double) td->o.nr_files 697 * (r / (OS_RAND_MAX + 1.0))); 698 f = td->files[fno]; 699 if (fio_file_done(f)) 700 continue; 701 702 if (!fio_file_open(f)) { 703 int err; 704 705 err = td_io_open_file(td, f); 706 if (err) 707 continue; 708 opened = 1; 709 } 710 711 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 712 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 713 return f; 714 } 715 if (opened) 716 td_io_close_file(td, f); 717 } while (1); 718} 719 720/* 721 * Get next file to service by doing round robin between all available ones 722 */ 723static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 724 int badf) 725{ 726 unsigned int old_next_file = td->next_file; 727 struct fio_file *f; 728 729 do { 730 int opened = 0; 731 732 f = td->files[td->next_file]; 733 734 td->next_file++; 735 if (td->next_file >= td->o.nr_files) 736 td->next_file = 0; 737 738 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 739 if (fio_file_done(f)) { 740 f = NULL; 741 continue; 742 } 743 744 if (!fio_file_open(f)) { 745 int err; 746 747 err = td_io_open_file(td, f); 748 if (err) { 749 dprint(FD_FILE, "error %d on open of %s\n", 750 err, f->file_name); 751 f = NULL; 752 continue; 753 } 754 opened = 1; 755 } 756 757 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf, 758 f->flags); 759 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 760 break; 761 762 if (opened) 763 td_io_close_file(td, f); 764 765 f = NULL; 766 } while (td->next_file != old_next_file); 767 768 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 769 return f; 770} 771 772static struct fio_file *get_next_file(struct thread_data *td) 773{ 774 struct fio_file *f; 775 776 assert(td->o.nr_files <= td->files_index); 777 778 if (td->nr_done_files >= td->o.nr_files) { 779 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 780 " nr_files=%d\n", td->nr_open_files, 781 td->nr_done_files, 782 td->o.nr_files); 783 return NULL; 784 } 785 786 f = td->file_service_file; 787 if (f && fio_file_open(f) && !fio_file_closing(f)) { 788 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 789 goto out; 790 if (td->file_service_left--) 791 goto out; 792 } 793 794 if (td->o.file_service_type == FIO_FSERVICE_RR || 795 td->o.file_service_type == FIO_FSERVICE_SEQ) 796 f = get_next_file_rr(td, FIO_FILE_open, FIO_FILE_closing); 797 else 798 f = get_next_file_rand(td, FIO_FILE_open, FIO_FILE_closing); 799 800 td->file_service_file = f; 801 td->file_service_left = td->file_service_nr - 1; 802out: 803 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 804 return f; 805} 806 807static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 808{ 809 struct fio_file *f; 810 811 do { 812 f = get_next_file(td); 813 if (!f) 814 return 1; 815 816 io_u->file = f; 817 get_file(f); 818 819 if (!fill_io_u(td, io_u)) 820 break; 821 822 put_file_log(td, f); 823 td_io_close_file(td, f); 824 io_u->file = NULL; 825 fio_file_set_done(f); 826 td->nr_done_files++; 827 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, 828 td->nr_done_files, td->o.nr_files); 829 } while (1); 830 831 return 0; 832} 833 834 835struct io_u *__get_io_u(struct thread_data *td) 836{ 837 struct io_u *io_u = NULL; 838 839 td_io_u_lock(td); 840 841again: 842 if (!flist_empty(&td->io_u_requeues)) 843 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 844 else if (!queue_full(td)) { 845 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 846 847 io_u->buflen = 0; 848 io_u->resid = 0; 849 io_u->file = NULL; 850 io_u->end_io = NULL; 851 } 852 853 /* 854 * We ran out, wait for async verify threads to finish and return one 855 */ 856 if (!io_u && td->o.verify_async) { 857 pthread_cond_wait(&td->free_cond, &td->io_u_lock); 858 goto again; 859 } 860 861 if (io_u) { 862 assert(io_u->flags & IO_U_F_FREE); 863 io_u->flags &= ~IO_U_F_FREE; 864 io_u->flags &= ~IO_U_F_FREE_DEF; 865 866 io_u->error = 0; 867 flist_del(&io_u->list); 868 flist_add(&io_u->list, &td->io_u_busylist); 869 td->cur_depth++; 870 } 871 872 td_io_u_unlock(td); 873 return io_u; 874} 875 876/* 877 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 878 * etc. The returned io_u is fully ready to be prepped and submitted. 879 */ 880struct io_u *get_io_u(struct thread_data *td) 881{ 882 struct fio_file *f; 883 struct io_u *io_u; 884 885 io_u = __get_io_u(td); 886 if (!io_u) { 887 dprint(FD_IO, "__get_io_u failed\n"); 888 return NULL; 889 } 890 891 /* 892 * from a requeue, io_u already setup 893 */ 894 if (io_u->file) 895 goto out; 896 897 /* 898 * If using an iolog, grab next piece if any available. 899 */ 900 if (td->o.read_iolog_file) { 901 if (read_iolog_get(td, io_u)) 902 goto err_put; 903 } else if (set_io_u_file(td, io_u)) { 904 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 905 goto err_put; 906 } 907 908 f = io_u->file; 909 assert(fio_file_open(f)); 910 911 if (!ddir_sync(io_u->ddir)) { 912 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 913 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 914 goto err_put; 915 } 916 917 f->last_pos = io_u->offset + io_u->buflen; 918 919 if (td->o.verify != VERIFY_NONE && io_u->ddir == DDIR_WRITE) 920 populate_verify_io_u(td, io_u); 921 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 922 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 923 } 924 925 /* 926 * Set io data pointers. 927 */ 928 io_u->xfer_buf = io_u->buf; 929 io_u->xfer_buflen = io_u->buflen; 930 931out: 932 if (!td_io_prep(td, io_u)) { 933 if (!td->o.disable_slat) 934 fio_gettime(&io_u->start_time, NULL); 935 return io_u; 936 } 937err_put: 938 dprint(FD_IO, "get_io_u failed\n"); 939 put_io_u(td, io_u); 940 return NULL; 941} 942 943void io_u_log_error(struct thread_data *td, struct io_u *io_u) 944{ 945 const char *msg[] = { "read", "write", "sync" }; 946 947 log_err("fio: io_u error"); 948 949 if (io_u->file) 950 log_err(" on file %s", io_u->file->file_name); 951 952 log_err(": %s\n", strerror(io_u->error)); 953 954 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 955 io_u->offset, io_u->xfer_buflen); 956 957 if (!td->error) 958 td_verror(td, io_u->error, "io_u error"); 959} 960 961static void io_completed(struct thread_data *td, struct io_u *io_u, 962 struct io_completion_data *icd) 963{ 964 /* 965 * Older gcc's are too dumb to realize that usec is always used 966 * initialized, silence that warning. 967 */ 968 unsigned long uninitialized_var(usec); 969 970 dprint_io_u(io_u, "io complete"); 971 972 assert(io_u->flags & IO_U_F_FLIGHT); 973 io_u->flags &= ~IO_U_F_FLIGHT; 974 975 if (ddir_sync(io_u->ddir)) { 976 td->last_was_sync = 1; 977 return; 978 } 979 980 td->last_was_sync = 0; 981 982 if (!io_u->error) { 983 unsigned int bytes = io_u->buflen - io_u->resid; 984 const enum fio_ddir idx = io_u->ddir; 985 int ret; 986 987 td->io_blocks[idx]++; 988 td->io_bytes[idx] += bytes; 989 td->this_io_bytes[idx] += bytes; 990 991 if (ramp_time_over(td)) { 992 unsigned long uninitialized_var(lusec); 993 unsigned long uninitialized_var(rusec); 994 995 if (!td->o.disable_clat || !td->o.disable_bw) 996 lusec = utime_since(&io_u->issue_time, 997 &icd->time); 998 if (__should_check_rate(td, idx) || 999 __should_check_rate(td, idx ^ 1)) 1000 rusec = utime_since(&io_u->start_time, 1001 &icd->time); 1002 1003 if (!td->o.disable_clat) { 1004 add_clat_sample(td, idx, lusec, bytes); 1005 io_u_mark_latency(td, lusec); 1006 } 1007 if (!td->o.disable_bw) 1008 add_bw_sample(td, idx, bytes, &icd->time); 1009 if (__should_check_rate(td, idx)) { 1010 td->rate_pending_usleep[idx] += 1011 (long) td->rate_usec_cycle[idx] - rusec; 1012 } 1013 if (__should_check_rate(td, idx ^ 1)) 1014 td->rate_pending_usleep[idx ^ 1] -= rusec; 1015 } 1016 1017 if (td_write(td) && idx == DDIR_WRITE && 1018 td->o.do_verify && 1019 td->o.verify != VERIFY_NONE) 1020 log_io_piece(td, io_u); 1021 1022 icd->bytes_done[idx] += bytes; 1023 1024 if (io_u->end_io) { 1025 ret = io_u->end_io(td, io_u); 1026 if (ret && !icd->error) 1027 icd->error = ret; 1028 } 1029 } else { 1030 icd->error = io_u->error; 1031 io_u_log_error(td, io_u); 1032 } 1033 if (td->o.continue_on_error && icd->error && 1034 td_non_fatal_error(icd->error)) { 1035 /* 1036 * If there is a non_fatal error, then add to the error count 1037 * and clear all the errors. 1038 */ 1039 update_error_count(td, icd->error); 1040 td_clear_error(td); 1041 icd->error = 0; 1042 io_u->error = 0; 1043 } 1044} 1045 1046static void init_icd(struct thread_data *td, struct io_completion_data *icd, 1047 int nr) 1048{ 1049 if (!td->o.disable_clat || !td->o.disable_bw) 1050 fio_gettime(&icd->time, NULL); 1051 1052 icd->nr = nr; 1053 1054 icd->error = 0; 1055 icd->bytes_done[0] = icd->bytes_done[1] = 0; 1056} 1057 1058static void ios_completed(struct thread_data *td, 1059 struct io_completion_data *icd) 1060{ 1061 struct io_u *io_u; 1062 int i; 1063 1064 for (i = 0; i < icd->nr; i++) { 1065 io_u = td->io_ops->event(td, i); 1066 1067 io_completed(td, io_u, icd); 1068 1069 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1070 put_io_u(td, io_u); 1071 } 1072} 1073 1074/* 1075 * Complete a single io_u for the sync engines. 1076 */ 1077int io_u_sync_complete(struct thread_data *td, struct io_u *io_u, 1078 unsigned long *bytes) 1079{ 1080 struct io_completion_data icd; 1081 1082 init_icd(td, &icd, 1); 1083 io_completed(td, io_u, &icd); 1084 1085 if (!(io_u->flags & IO_U_F_FREE_DEF)) 1086 put_io_u(td, io_u); 1087 1088 if (icd.error) { 1089 td_verror(td, icd.error, "io_u_sync_complete"); 1090 return -1; 1091 } 1092 1093 if (bytes) { 1094 bytes[0] += icd.bytes_done[0]; 1095 bytes[1] += icd.bytes_done[1]; 1096 } 1097 1098 return 0; 1099} 1100 1101/* 1102 * Called to complete min_events number of io for the async engines. 1103 */ 1104int io_u_queued_complete(struct thread_data *td, int min_evts, 1105 unsigned long *bytes) 1106{ 1107 struct io_completion_data icd; 1108 struct timespec *tvp = NULL; 1109 int ret; 1110 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 1111 1112 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 1113 1114 if (!min_evts) 1115 tvp = &ts; 1116 1117 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1118 if (ret < 0) { 1119 td_verror(td, -ret, "td_io_getevents"); 1120 return ret; 1121 } else if (!ret) 1122 return ret; 1123 1124 init_icd(td, &icd, ret); 1125 ios_completed(td, &icd); 1126 if (icd.error) { 1127 td_verror(td, icd.error, "io_u_queued_complete"); 1128 return -1; 1129 } 1130 1131 if (bytes) { 1132 bytes[0] += icd.bytes_done[0]; 1133 bytes[1] += icd.bytes_done[1]; 1134 } 1135 1136 return 0; 1137} 1138 1139/* 1140 * Call when io_u is really queued, to update the submission latency. 1141 */ 1142void io_u_queued(struct thread_data *td, struct io_u *io_u) 1143{ 1144 if (!td->o.disable_slat) { 1145 unsigned long slat_time; 1146 1147 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1148 add_slat_sample(td, io_u->ddir, slat_time, io_u->xfer_buflen); 1149 } 1150} 1151 1152/* 1153 * "randomly" fill the buffer contents 1154 */ 1155void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1156 unsigned int max_bs) 1157{ 1158 long *ptr = io_u->buf; 1159 1160 if (!td->o.zero_buffers) { 1161 while ((void *) ptr - io_u->buf < max_bs) { 1162 *ptr = rand() * GOLDEN_RATIO_PRIME; 1163 ptr++; 1164 } 1165 } else 1166 memset(ptr, 0, max_bs); 1167} 1168