io_u.c revision 87b10676354d10be02864ad615e1e9fe5a2376ab
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10 11struct io_completion_data { 12 int nr; /* input */ 13 14 int error; /* output */ 15 unsigned long bytes_done[2]; /* output */ 16 struct timeval time; /* output */ 17}; 18 19/* 20 * The ->file_map[] contains a map of blocks we have or have not done io 21 * to yet. Used to make sure we cover the entire range in a fair fashion. 22 */ 23static int random_map_free(struct fio_file *f, const unsigned long long block) 24{ 25 unsigned int idx = RAND_MAP_IDX(f, block); 26 unsigned int bit = RAND_MAP_BIT(f, block); 27 28 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 29 30 return (f->file_map[idx] & (1 << bit)) == 0; 31} 32 33/* 34 * Mark a given offset as used in the map. 35 */ 36static void mark_random_map(struct thread_data *td, struct io_u *io_u) 37{ 38 unsigned int min_bs = td->o.rw_min_bs; 39 struct fio_file *f = io_u->file; 40 unsigned long long block; 41 unsigned int blocks, nr_blocks; 42 43 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 44 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 45 blocks = 0; 46 47 while (nr_blocks) { 48 unsigned int this_blocks, mask; 49 unsigned int idx, bit; 50 51 /* 52 * If we have a mixed random workload, we may 53 * encounter blocks we already did IO to. 54 */ 55 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 56 if (!blocks) 57 blocks = 1; 58 break; 59 } 60 61 idx = RAND_MAP_IDX(f, block); 62 bit = RAND_MAP_BIT(f, block); 63 64 fio_assert(td, idx < f->num_maps); 65 66 this_blocks = nr_blocks; 67 if (this_blocks + bit > BLOCKS_PER_MAP) 68 this_blocks = BLOCKS_PER_MAP - bit; 69 70 if (this_blocks == BLOCKS_PER_MAP) 71 mask = -1U; 72 else 73 mask = ((1U << this_blocks) - 1) << bit; 74 75 f->file_map[idx] |= mask; 76 nr_blocks -= this_blocks; 77 blocks += this_blocks; 78 block += this_blocks; 79 } 80 81 if ((blocks * min_bs) < io_u->buflen) 82 io_u->buflen = blocks * min_bs; 83} 84 85static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 86 enum fio_ddir ddir) 87{ 88 unsigned long long max_blocks; 89 unsigned long long max_size; 90 91 /* 92 * Hmm, should we make sure that ->io_size <= ->real_file_size? 93 */ 94 max_size = f->io_size; 95 if (max_size > f->real_file_size) 96 max_size = f->real_file_size; 97 98 max_blocks = max_size / (unsigned long long) td->o.min_bs[ddir]; 99 if (!max_blocks) 100 return 0; 101 102 return max_blocks; 103} 104 105/* 106 * Return the next free block in the map. 107 */ 108static int get_next_free_block(struct thread_data *td, struct fio_file *f, 109 enum fio_ddir ddir, unsigned long long *b) 110{ 111 unsigned long long min_bs = td->o.rw_min_bs; 112 int i; 113 114 i = f->last_free_lookup; 115 *b = (i * BLOCKS_PER_MAP); 116 while ((*b) * min_bs < f->real_file_size) { 117 if (f->file_map[i] != (unsigned int) -1) { 118 *b += ffz(f->file_map[i]); 119 if (*b > last_block(td, f, ddir)) 120 break; 121 f->last_free_lookup = i; 122 return 0; 123 } 124 125 *b += BLOCKS_PER_MAP; 126 i++; 127 } 128 129 dprint(FD_IO, "failed finding a free block\n"); 130 return 1; 131} 132 133static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 134 enum fio_ddir ddir, unsigned long long *b) 135{ 136 unsigned long long r; 137 int loops = 5; 138 139 do { 140 r = os_random_long(&td->random_state); 141 dprint(FD_RANDOM, "off rand %llu\n", r); 142 *b = (last_block(td, f, ddir) - 1) 143 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 144 145 /* 146 * if we are not maintaining a random map, we are done. 147 */ 148 if (!file_randommap(td, f)) 149 return 0; 150 151 /* 152 * calculate map offset and check if it's free 153 */ 154 if (random_map_free(f, *b)) 155 return 0; 156 157 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 158 *b); 159 } while (--loops); 160 161 /* 162 * we get here, if we didn't suceed in looking up a block. generate 163 * a random start offset into the filemap, and find the first free 164 * block from there. 165 */ 166 loops = 10; 167 do { 168 f->last_free_lookup = (f->num_maps - 1) * 169 (r / (OS_RAND_MAX + 1.0)); 170 if (!get_next_free_block(td, f, ddir, b)) 171 return 0; 172 173 r = os_random_long(&td->random_state); 174 } while (--loops); 175 176 /* 177 * that didn't work either, try exhaustive search from the start 178 */ 179 f->last_free_lookup = 0; 180 return get_next_free_block(td, f, ddir, b); 181} 182 183/* 184 * For random io, generate a random new block and see if it's used. Repeat 185 * until we find a free one. For sequential io, just return the end of 186 * the last io issued. 187 */ 188static int get_next_offset(struct thread_data *td, struct io_u *io_u) 189{ 190 struct fio_file *f = io_u->file; 191 unsigned long long b; 192 enum fio_ddir ddir = io_u->ddir; 193 194 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 195 td->ddir_nr = td->o.ddir_nr; 196 197 if (get_next_rand_offset(td, f, ddir, &b)) { 198 dprint(FD_IO, "%s: getting rand offset failed\n", 199 f->file_name); 200 return 1; 201 } 202 } else { 203 if (f->last_pos >= f->real_file_size) { 204 if (!td_random(td) || 205 get_next_rand_offset(td, f, ddir, &b)) { 206 dprint(FD_IO, "%s: pos %llu > size %llu\n", 207 f->file_name, f->last_pos, 208 f->real_file_size); 209 return 1; 210 } 211 } else 212 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 213 } 214 215 io_u->offset = b * td->o.min_bs[ddir]; 216 if (io_u->offset >= f->io_size) { 217 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 218 io_u->offset, f->io_size); 219 return 1; 220 } 221 222 io_u->offset += f->file_offset; 223 if (io_u->offset >= f->real_file_size) { 224 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 225 io_u->offset, f->real_file_size); 226 return 1; 227 } 228 229 return 0; 230} 231 232static inline int is_power_of_2(unsigned int val) 233{ 234 return (val != 0 && ((val & (val - 1)) == 0)); 235} 236 237static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 238{ 239 const int ddir = io_u->ddir; 240 unsigned int uninitialized_var(buflen); 241 unsigned int minbs, maxbs; 242 long r; 243 244 minbs = td->o.min_bs[ddir]; 245 maxbs = td->o.max_bs[ddir]; 246 247 if (minbs == maxbs) 248 buflen = minbs; 249 else { 250 r = os_random_long(&td->bsrange_state); 251 if (!td->o.bssplit_nr) { 252 buflen = 1 + (unsigned int) ((double) maxbs * 253 (r / (OS_RAND_MAX + 1.0))); 254 if (buflen < minbs) 255 buflen = minbs; 256 } else { 257 long perc = 0; 258 unsigned int i; 259 260 for (i = 0; i < td->o.bssplit_nr; i++) { 261 struct bssplit *bsp = &td->o.bssplit[i]; 262 263 buflen = bsp->bs; 264 perc += bsp->perc; 265 if (r <= ((OS_RAND_MAX / 100L) * perc)) 266 break; 267 } 268 } 269 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 270 buflen = (buflen + minbs - 1) & ~(minbs - 1); 271 } 272 273 if (io_u->offset + buflen > io_u->file->real_file_size) { 274 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 275 minbs, ddir); 276 buflen = minbs; 277 } 278 279 return buflen; 280} 281 282static void set_rwmix_bytes(struct thread_data *td) 283{ 284 unsigned int diff; 285 286 /* 287 * we do time or byte based switch. this is needed because 288 * buffered writes may issue a lot quicker than they complete, 289 * whereas reads do not. 290 */ 291 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 292 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 293} 294 295static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 296{ 297 unsigned int v; 298 long r; 299 300 r = os_random_long(&td->rwmix_state); 301 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 302 if (v <= td->o.rwmix[DDIR_READ]) 303 return DDIR_READ; 304 305 return DDIR_WRITE; 306} 307 308/* 309 * Return the data direction for the next io_u. If the job is a 310 * mixed read/write workload, check the rwmix cycle and switch if 311 * necessary. 312 */ 313static enum fio_ddir get_rw_ddir(struct thread_data *td) 314{ 315 if (td_rw(td)) { 316 /* 317 * Check if it's time to seed a new data direction. 318 */ 319 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 320 unsigned long long max_bytes; 321 enum fio_ddir ddir; 322 323 /* 324 * Put a top limit on how many bytes we do for 325 * one data direction, to avoid overflowing the 326 * ranges too much 327 */ 328 ddir = get_rand_ddir(td); 329 max_bytes = td->this_io_bytes[ddir]; 330 if (max_bytes >= 331 (td->o.size * td->o.rwmix[ddir] / 100)) { 332 if (!td->rw_end_set[ddir]) { 333 td->rw_end_set[ddir] = 1; 334 fio_gettime(&td->rw_end[ddir], NULL); 335 } 336 337 ddir ^= 1; 338 } 339 340 if (ddir != td->rwmix_ddir) 341 set_rwmix_bytes(td); 342 343 td->rwmix_ddir = ddir; 344 } 345 return td->rwmix_ddir; 346 } else if (td_read(td)) 347 return DDIR_READ; 348 else 349 return DDIR_WRITE; 350} 351 352static void put_file_log(struct thread_data *td, struct fio_file *f) 353{ 354 int ret = put_file(td, f); 355 356 if (ret) 357 td_verror(td, ret, "file close"); 358} 359 360void put_io_u(struct thread_data *td, struct io_u *io_u) 361{ 362 assert((io_u->flags & IO_U_F_FREE) == 0); 363 io_u->flags |= IO_U_F_FREE; 364 365 if (io_u->file) 366 put_file_log(td, io_u->file); 367 368 io_u->file = NULL; 369 flist_del(&io_u->list); 370 flist_add(&io_u->list, &td->io_u_freelist); 371 td->cur_depth--; 372} 373 374void requeue_io_u(struct thread_data *td, struct io_u **io_u) 375{ 376 struct io_u *__io_u = *io_u; 377 378 dprint(FD_IO, "requeue %p\n", __io_u); 379 380 __io_u->flags |= IO_U_F_FREE; 381 if ((__io_u->flags & IO_U_F_FLIGHT) && (__io_u->ddir != DDIR_SYNC)) 382 td->io_issues[__io_u->ddir]--; 383 384 __io_u->flags &= ~IO_U_F_FLIGHT; 385 386 flist_del(&__io_u->list); 387 flist_add_tail(&__io_u->list, &td->io_u_requeues); 388 td->cur_depth--; 389 *io_u = NULL; 390} 391 392static int fill_io_u(struct thread_data *td, struct io_u *io_u) 393{ 394 if (td->io_ops->flags & FIO_NOIO) 395 goto out; 396 397 /* 398 * see if it's time to sync 399 */ 400 if (td->o.fsync_blocks && 401 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 402 td->io_issues[DDIR_WRITE] && should_fsync(td)) { 403 io_u->ddir = DDIR_SYNC; 404 goto out; 405 } 406 407 io_u->ddir = get_rw_ddir(td); 408 409 /* 410 * See if it's time to switch to a new zone 411 */ 412 if (td->zone_bytes >= td->o.zone_size) { 413 td->zone_bytes = 0; 414 io_u->file->last_pos += td->o.zone_skip; 415 td->io_skip_bytes += td->o.zone_skip; 416 } 417 418 /* 419 * No log, let the seq/rand engine retrieve the next buflen and 420 * position. 421 */ 422 if (get_next_offset(td, io_u)) { 423 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 424 return 1; 425 } 426 427 io_u->buflen = get_next_buflen(td, io_u); 428 if (!io_u->buflen) { 429 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 430 return 1; 431 } 432 433 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 434 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 435 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 436 io_u->buflen, io_u->file->real_file_size); 437 return 1; 438 } 439 440 /* 441 * mark entry before potentially trimming io_u 442 */ 443 if (td_random(td) && file_randommap(td, io_u->file)) 444 mark_random_map(td, io_u); 445 446 /* 447 * If using a write iolog, store this entry. 448 */ 449out: 450 dprint_io_u(io_u, "fill_io_u"); 451 td->zone_bytes += io_u->buflen; 452 log_io_u(td, io_u); 453 return 0; 454} 455 456static void __io_u_mark_map(unsigned int *map, unsigned int nr) 457{ 458 int index = 0; 459 460 switch (nr) { 461 default: 462 index = 6; 463 break; 464 case 33 ... 64: 465 index = 5; 466 break; 467 case 17 ... 32: 468 index = 4; 469 break; 470 case 9 ... 16: 471 index = 3; 472 break; 473 case 5 ... 8: 474 index = 2; 475 break; 476 case 1 ... 4: 477 index = 1; 478 case 0: 479 break; 480 } 481 482 map[index]++; 483} 484 485void io_u_mark_submit(struct thread_data *td, unsigned int nr) 486{ 487 __io_u_mark_map(td->ts.io_u_submit, nr); 488 td->ts.total_submit++; 489} 490 491void io_u_mark_complete(struct thread_data *td, unsigned int nr) 492{ 493 __io_u_mark_map(td->ts.io_u_complete, nr); 494 td->ts.total_complete++; 495} 496 497void io_u_mark_depth(struct thread_data *td, unsigned int nr) 498{ 499 int index = 0; 500 501 switch (td->cur_depth) { 502 default: 503 index = 6; 504 break; 505 case 32 ... 63: 506 index = 5; 507 break; 508 case 16 ... 31: 509 index = 4; 510 break; 511 case 8 ... 15: 512 index = 3; 513 break; 514 case 4 ... 7: 515 index = 2; 516 break; 517 case 2 ... 3: 518 index = 1; 519 case 1: 520 break; 521 } 522 523 td->ts.io_u_map[index] += nr; 524} 525 526static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 527{ 528 int index = 0; 529 530 assert(usec < 1000); 531 532 switch (usec) { 533 case 750 ... 999: 534 index = 9; 535 break; 536 case 500 ... 749: 537 index = 8; 538 break; 539 case 250 ... 499: 540 index = 7; 541 break; 542 case 100 ... 249: 543 index = 6; 544 break; 545 case 50 ... 99: 546 index = 5; 547 break; 548 case 20 ... 49: 549 index = 4; 550 break; 551 case 10 ... 19: 552 index = 3; 553 break; 554 case 4 ... 9: 555 index = 2; 556 break; 557 case 2 ... 3: 558 index = 1; 559 case 0 ... 1: 560 break; 561 } 562 563 assert(index < FIO_IO_U_LAT_U_NR); 564 td->ts.io_u_lat_u[index]++; 565} 566 567static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 568{ 569 int index = 0; 570 571 switch (msec) { 572 default: 573 index = 11; 574 break; 575 case 1000 ... 1999: 576 index = 10; 577 break; 578 case 750 ... 999: 579 index = 9; 580 break; 581 case 500 ... 749: 582 index = 8; 583 break; 584 case 250 ... 499: 585 index = 7; 586 break; 587 case 100 ... 249: 588 index = 6; 589 break; 590 case 50 ... 99: 591 index = 5; 592 break; 593 case 20 ... 49: 594 index = 4; 595 break; 596 case 10 ... 19: 597 index = 3; 598 break; 599 case 4 ... 9: 600 index = 2; 601 break; 602 case 2 ... 3: 603 index = 1; 604 case 0 ... 1: 605 break; 606 } 607 608 assert(index < FIO_IO_U_LAT_M_NR); 609 td->ts.io_u_lat_m[index]++; 610} 611 612static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 613{ 614 if (usec < 1000) 615 io_u_mark_lat_usec(td, usec); 616 else 617 io_u_mark_lat_msec(td, usec / 1000); 618} 619 620/* 621 * Get next file to service by choosing one at random 622 */ 623static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf, 624 int badf) 625{ 626 struct fio_file *f; 627 int fno; 628 629 do { 630 long r = os_random_long(&td->next_file_state); 631 int opened = 0; 632 633 fno = (unsigned int) ((double) td->o.nr_files 634 * (r / (OS_RAND_MAX + 1.0))); 635 f = td->files[fno]; 636 if (f->flags & FIO_FILE_DONE) 637 continue; 638 639 if (!(f->flags & FIO_FILE_OPEN)) { 640 int err; 641 642 err = td_io_open_file(td, f); 643 if (err) 644 continue; 645 opened = 1; 646 } 647 648 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 649 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 650 return f; 651 } 652 if (opened) 653 td_io_close_file(td, f); 654 } while (1); 655} 656 657/* 658 * Get next file to service by doing round robin between all available ones 659 */ 660static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 661 int badf) 662{ 663 unsigned int old_next_file = td->next_file; 664 struct fio_file *f; 665 666 do { 667 int opened = 0; 668 669 f = td->files[td->next_file]; 670 671 td->next_file++; 672 if (td->next_file >= td->o.nr_files) 673 td->next_file = 0; 674 675 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 676 if (f->flags & FIO_FILE_DONE) { 677 f = NULL; 678 continue; 679 } 680 681 if (!(f->flags & FIO_FILE_OPEN)) { 682 int err; 683 684 err = td_io_open_file(td, f); 685 if (err) 686 continue; 687 opened = 1; 688 } 689 690 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 691 break; 692 693 if (opened) 694 td_io_close_file(td, f); 695 696 f = NULL; 697 } while (td->next_file != old_next_file); 698 699 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 700 return f; 701} 702 703static struct fio_file *get_next_file(struct thread_data *td) 704{ 705 struct fio_file *f; 706 707 assert(td->o.nr_files <= td->files_index); 708 709 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files) { 710 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 711 " nr_files=%d\n", td->nr_open_files, 712 td->nr_done_files, 713 td->o.nr_files); 714 return NULL; 715 } 716 717 f = td->file_service_file; 718 if (f && (f->flags & FIO_FILE_OPEN) && !(f->flags & FIO_FILE_CLOSING)) { 719 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 720 goto out; 721 if (td->file_service_left--) 722 goto out; 723 } 724 725 if (td->o.file_service_type == FIO_FSERVICE_RR || 726 td->o.file_service_type == FIO_FSERVICE_SEQ) 727 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 728 else 729 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 730 731 td->file_service_file = f; 732 td->file_service_left = td->file_service_nr - 1; 733out: 734 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 735 return f; 736} 737 738static struct fio_file *find_next_new_file(struct thread_data *td) 739{ 740 struct fio_file *f; 741 742 if (!td->nr_open_files || td->nr_done_files >= td->o.nr_files) 743 return NULL; 744 745 if (td->o.file_service_type == FIO_FSERVICE_RR) 746 f = get_next_file_rr(td, 0, FIO_FILE_OPEN); 747 else 748 f = get_next_file_rand(td, 0, FIO_FILE_OPEN); 749 750 return f; 751} 752 753static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 754{ 755 struct fio_file *f; 756 757 do { 758 f = get_next_file(td); 759 if (!f) 760 return 1; 761 762set_file: 763 io_u->file = f; 764 get_file(f); 765 766 if (!fill_io_u(td, io_u)) 767 break; 768 769 /* 770 * optimization to prevent close/open of the same file. This 771 * way we preserve queueing etc. 772 */ 773 if (td->o.nr_files == 1 && td->o.time_based) { 774 put_file_log(td, f); 775 fio_file_reset(f); 776 goto set_file; 777 } 778 779 /* 780 * td_io_close() does a put_file() as well, so no need to 781 * do that here. 782 */ 783 io_u->file = NULL; 784 td_io_close_file(td, f); 785 f->flags |= FIO_FILE_DONE; 786 td->nr_done_files++; 787 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, td->nr_done_files, td->o.nr_files); 788 789 /* 790 * probably not the right place to do this, but see 791 * if we need to open a new file 792 */ 793 if (td->nr_open_files < td->o.open_files && 794 td->o.open_files != td->o.nr_files) { 795 f = find_next_new_file(td); 796 797 if (!f || td_io_open_file(td, f)) 798 return 1; 799 800 goto set_file; 801 } 802 } while (1); 803 804 return 0; 805} 806 807 808struct io_u *__get_io_u(struct thread_data *td) 809{ 810 struct io_u *io_u = NULL; 811 812 if (!flist_empty(&td->io_u_requeues)) 813 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 814 else if (!queue_full(td)) { 815 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 816 817 io_u->buflen = 0; 818 io_u->resid = 0; 819 io_u->file = NULL; 820 io_u->end_io = NULL; 821 } 822 823 if (io_u) { 824 assert(io_u->flags & IO_U_F_FREE); 825 io_u->flags &= ~IO_U_F_FREE; 826 827 io_u->error = 0; 828 flist_del(&io_u->list); 829 flist_add(&io_u->list, &td->io_u_busylist); 830 td->cur_depth++; 831 } 832 833 return io_u; 834} 835 836/* 837 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 838 * etc. The returned io_u is fully ready to be prepped and submitted. 839 */ 840struct io_u *get_io_u(struct thread_data *td) 841{ 842 struct fio_file *f; 843 struct io_u *io_u; 844 845 io_u = __get_io_u(td); 846 if (!io_u) { 847 dprint(FD_IO, "__get_io_u failed\n"); 848 return NULL; 849 } 850 851 /* 852 * from a requeue, io_u already setup 853 */ 854 if (io_u->file) 855 goto out; 856 857 /* 858 * If using an iolog, grab next piece if any available. 859 */ 860 if (td->o.read_iolog_file) { 861 if (read_iolog_get(td, io_u)) 862 goto err_put; 863 } else if (set_io_u_file(td, io_u)) { 864 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 865 goto err_put; 866 } 867 868 f = io_u->file; 869 assert(f->flags & FIO_FILE_OPEN); 870 871 if (io_u->ddir != DDIR_SYNC) { 872 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 873 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 874 goto err_put; 875 } 876 877 f->last_pos = io_u->offset + io_u->buflen; 878 879 if (td->o.verify != VERIFY_NONE) 880 populate_verify_io_u(td, io_u); 881 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 882 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 883 } 884 885 /* 886 * Set io data pointers. 887 */ 888 io_u->endpos = io_u->offset + io_u->buflen; 889 io_u->xfer_buf = io_u->buf; 890 io_u->xfer_buflen = io_u->buflen; 891 892out: 893 if (!td_io_prep(td, io_u)) { 894 if (!td->o.disable_slat) 895 fio_gettime(&io_u->start_time, NULL); 896 return io_u; 897 } 898err_put: 899 dprint(FD_IO, "get_io_u failed\n"); 900 put_io_u(td, io_u); 901 return NULL; 902} 903 904void io_u_log_error(struct thread_data *td, struct io_u *io_u) 905{ 906 const char *msg[] = { "read", "write", "sync" }; 907 908 log_err("fio: io_u error"); 909 910 if (io_u->file) 911 log_err(" on file %s", io_u->file->file_name); 912 913 log_err(": %s\n", strerror(io_u->error)); 914 915 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 916 io_u->offset, io_u->xfer_buflen); 917 918 if (!td->error) 919 td_verror(td, io_u->error, "io_u error"); 920} 921 922static void io_completed(struct thread_data *td, struct io_u *io_u, 923 struct io_completion_data *icd) 924{ 925 /* 926 * Older gcc's are too dumb to realize that usec is always used 927 * initialized, silence that warning. 928 */ 929 unsigned long uninitialized_var(usec); 930 931 dprint_io_u(io_u, "io complete"); 932 933 assert(io_u->flags & IO_U_F_FLIGHT); 934 io_u->flags &= ~IO_U_F_FLIGHT; 935 936 if (io_u->ddir == DDIR_SYNC) { 937 td->last_was_sync = 1; 938 return; 939 } 940 941 td->last_was_sync = 0; 942 943 if (!io_u->error) { 944 unsigned int bytes = io_u->buflen - io_u->resid; 945 const enum fio_ddir idx = io_u->ddir; 946 int ret; 947 948 td->io_blocks[idx]++; 949 td->io_bytes[idx] += bytes; 950 td->this_io_bytes[idx] += bytes; 951 952 if (ramp_time_over(td)) { 953 if (!td->o.disable_clat || !td->o.disable_bw) 954 usec = utime_since(&io_u->issue_time, 955 &icd->time); 956 957 if (!td->o.disable_clat) { 958 add_clat_sample(td, idx, usec); 959 io_u_mark_latency(td, usec); 960 } 961 if (!td->o.disable_bw) 962 add_bw_sample(td, idx, &icd->time); 963 } 964 965 if (td_write(td) && idx == DDIR_WRITE && 966 td->o.do_verify && 967 td->o.verify != VERIFY_NONE) 968 log_io_piece(td, io_u); 969 970 icd->bytes_done[idx] += bytes; 971 972 if (io_u->end_io) { 973 ret = io_u->end_io(td, io_u); 974 if (ret && !icd->error) 975 icd->error = ret; 976 } 977 } else { 978 icd->error = io_u->error; 979 io_u_log_error(td, io_u); 980 } 981} 982 983static void init_icd(struct thread_data *td, struct io_completion_data *icd, 984 int nr) 985{ 986 if (!td->o.disable_clat || !td->o.disable_bw) 987 fio_gettime(&icd->time, NULL); 988 989 icd->nr = nr; 990 991 icd->error = 0; 992 icd->bytes_done[0] = icd->bytes_done[1] = 0; 993} 994 995static void ios_completed(struct thread_data *td, 996 struct io_completion_data *icd) 997{ 998 struct io_u *io_u; 999 int i; 1000 1001 for (i = 0; i < icd->nr; i++) { 1002 io_u = td->io_ops->event(td, i); 1003 1004 io_completed(td, io_u, icd); 1005 put_io_u(td, io_u); 1006 } 1007} 1008 1009/* 1010 * Complete a single io_u for the sync engines. 1011 */ 1012long io_u_sync_complete(struct thread_data *td, struct io_u *io_u) 1013{ 1014 struct io_completion_data icd; 1015 1016 init_icd(td, &icd, 1); 1017 io_completed(td, io_u, &icd); 1018 put_io_u(td, io_u); 1019 1020 if (!icd.error) 1021 return icd.bytes_done[0] + icd.bytes_done[1]; 1022 1023 td_verror(td, icd.error, "io_u_sync_complete"); 1024 return -1; 1025} 1026 1027/* 1028 * Called to complete min_events number of io for the async engines. 1029 */ 1030long io_u_queued_complete(struct thread_data *td, int min_evts) 1031{ 1032 struct io_completion_data icd; 1033 struct timespec *tvp = NULL; 1034 int ret; 1035 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 1036 1037 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 1038 1039 if (!min_evts) 1040 tvp = &ts; 1041 1042 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1043 if (ret < 0) { 1044 td_verror(td, -ret, "td_io_getevents"); 1045 return ret; 1046 } else if (!ret) 1047 return ret; 1048 1049 init_icd(td, &icd, ret); 1050 ios_completed(td, &icd); 1051 if (!icd.error) 1052 return icd.bytes_done[0] + icd.bytes_done[1]; 1053 1054 td_verror(td, icd.error, "io_u_queued_complete"); 1055 return -1; 1056} 1057 1058/* 1059 * Call when io_u is really queued, to update the submission latency. 1060 */ 1061void io_u_queued(struct thread_data *td, struct io_u *io_u) 1062{ 1063 if (!td->o.disable_slat) { 1064 unsigned long slat_time; 1065 1066 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1067 add_slat_sample(td, io_u->ddir, slat_time); 1068 } 1069} 1070 1071/* 1072 * "randomly" fill the buffer contents 1073 */ 1074void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1075 unsigned int max_bs) 1076{ 1077 long *ptr = io_u->buf; 1078 1079 if (!td->o.zero_buffers) { 1080 while ((void *) ptr - io_u->buf < max_bs) { 1081 *ptr = rand() * GOLDEN_RATIO_PRIME; 1082 ptr++; 1083 } 1084 } else 1085 memset(ptr, 0, max_bs); 1086} 1087