io_u.c revision 5bfc35d7abe2582dc54127ca1d6e03792c9d62f5
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "hash.h" 10 11struct io_completion_data { 12 int nr; /* input */ 13 14 int error; /* output */ 15 unsigned long bytes_done[2]; /* output */ 16 struct timeval time; /* output */ 17}; 18 19/* 20 * The ->file_map[] contains a map of blocks we have or have not done io 21 * to yet. Used to make sure we cover the entire range in a fair fashion. 22 */ 23static int random_map_free(struct fio_file *f, const unsigned long long block) 24{ 25 unsigned int idx = RAND_MAP_IDX(f, block); 26 unsigned int bit = RAND_MAP_BIT(f, block); 27 28 dprint(FD_RANDOM, "free: b=%llu, idx=%u, bit=%u\n", block, idx, bit); 29 30 return (f->file_map[idx] & (1 << bit)) == 0; 31} 32 33/* 34 * Mark a given offset as used in the map. 35 */ 36static void mark_random_map(struct thread_data *td, struct io_u *io_u) 37{ 38 unsigned int min_bs = td->o.rw_min_bs; 39 struct fio_file *f = io_u->file; 40 unsigned long long block; 41 unsigned int blocks, nr_blocks; 42 43 block = (io_u->offset - f->file_offset) / (unsigned long long) min_bs; 44 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 45 blocks = 0; 46 47 while (nr_blocks) { 48 unsigned int this_blocks, mask; 49 unsigned int idx, bit; 50 51 /* 52 * If we have a mixed random workload, we may 53 * encounter blocks we already did IO to. 54 */ 55 if ((td->o.ddir_nr == 1) && !random_map_free(f, block)) { 56 if (!blocks) 57 blocks = 1; 58 break; 59 } 60 61 idx = RAND_MAP_IDX(f, block); 62 bit = RAND_MAP_BIT(f, block); 63 64 fio_assert(td, idx < f->num_maps); 65 66 this_blocks = nr_blocks; 67 if (this_blocks + bit > BLOCKS_PER_MAP) 68 this_blocks = BLOCKS_PER_MAP - bit; 69 70 if (this_blocks == BLOCKS_PER_MAP) 71 mask = -1U; 72 else 73 mask = ((1U << this_blocks) - 1) << bit; 74 75 f->file_map[idx] |= mask; 76 nr_blocks -= this_blocks; 77 blocks += this_blocks; 78 block += this_blocks; 79 } 80 81 if ((blocks * min_bs) < io_u->buflen) 82 io_u->buflen = blocks * min_bs; 83} 84 85static unsigned long long last_block(struct thread_data *td, struct fio_file *f, 86 enum fio_ddir ddir) 87{ 88 unsigned long long max_blocks; 89 unsigned long long max_size; 90 91 /* 92 * Hmm, should we make sure that ->io_size <= ->real_file_size? 93 */ 94 max_size = f->io_size; 95 if (max_size > f->real_file_size) 96 max_size = f->real_file_size; 97 98 max_blocks = max_size / (unsigned long long) td->o.ba[ddir]; 99 if (!max_blocks) 100 return 0; 101 102 return max_blocks; 103} 104 105/* 106 * Return the next free block in the map. 107 */ 108static int get_next_free_block(struct thread_data *td, struct fio_file *f, 109 enum fio_ddir ddir, unsigned long long *b) 110{ 111 unsigned long long min_bs = td->o.rw_min_bs; 112 int i; 113 114 i = f->last_free_lookup; 115 *b = (i * BLOCKS_PER_MAP); 116 while ((*b) * min_bs < f->real_file_size) { 117 if (f->file_map[i] != (unsigned int) -1) { 118 *b += ffz(f->file_map[i]); 119 if (*b > last_block(td, f, ddir)) 120 break; 121 f->last_free_lookup = i; 122 return 0; 123 } 124 125 *b += BLOCKS_PER_MAP; 126 i++; 127 } 128 129 dprint(FD_IO, "failed finding a free block\n"); 130 return 1; 131} 132 133static int get_next_rand_offset(struct thread_data *td, struct fio_file *f, 134 enum fio_ddir ddir, unsigned long long *b) 135{ 136 unsigned long long r; 137 int loops = 5; 138 139 do { 140 r = os_random_long(&td->random_state); 141 dprint(FD_RANDOM, "off rand %llu\n", r); 142 *b = (last_block(td, f, ddir) - 1) 143 * (r / ((unsigned long long) OS_RAND_MAX + 1.0)); 144 145 /* 146 * if we are not maintaining a random map, we are done. 147 */ 148 if (!file_randommap(td, f)) 149 return 0; 150 151 /* 152 * calculate map offset and check if it's free 153 */ 154 if (random_map_free(f, *b)) 155 return 0; 156 157 dprint(FD_RANDOM, "get_next_rand_offset: offset %llu busy\n", 158 *b); 159 } while (--loops); 160 161 /* 162 * we get here, if we didn't suceed in looking up a block. generate 163 * a random start offset into the filemap, and find the first free 164 * block from there. 165 */ 166 loops = 10; 167 do { 168 f->last_free_lookup = (f->num_maps - 1) * 169 (r / (OS_RAND_MAX + 1.0)); 170 if (!get_next_free_block(td, f, ddir, b)) 171 return 0; 172 173 r = os_random_long(&td->random_state); 174 } while (--loops); 175 176 /* 177 * that didn't work either, try exhaustive search from the start 178 */ 179 f->last_free_lookup = 0; 180 return get_next_free_block(td, f, ddir, b); 181} 182 183/* 184 * For random io, generate a random new block and see if it's used. Repeat 185 * until we find a free one. For sequential io, just return the end of 186 * the last io issued. 187 */ 188static int get_next_offset(struct thread_data *td, struct io_u *io_u) 189{ 190 struct fio_file *f = io_u->file; 191 unsigned long long b; 192 enum fio_ddir ddir = io_u->ddir; 193 194 if (td_random(td) && (td->o.ddir_nr && !--td->ddir_nr)) { 195 td->ddir_nr = td->o.ddir_nr; 196 197 if (get_next_rand_offset(td, f, ddir, &b)) { 198 dprint(FD_IO, "%s: getting rand offset failed\n", 199 f->file_name); 200 return 1; 201 } 202 } else { 203 if (f->last_pos >= f->real_file_size) { 204 if (!td_random(td) || 205 get_next_rand_offset(td, f, ddir, &b)) { 206 dprint(FD_IO, "%s: pos %llu > size %llu\n", 207 f->file_name, f->last_pos, 208 f->real_file_size); 209 return 1; 210 } 211 } else 212 b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir]; 213 } 214 215 io_u->offset = b * td->o.ba[ddir]; 216 if (io_u->offset >= f->io_size) { 217 dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", 218 io_u->offset, f->io_size); 219 return 1; 220 } 221 222 io_u->offset += f->file_offset; 223 if (io_u->offset >= f->real_file_size) { 224 dprint(FD_IO, "get_next_offset: offset %llu >= size %llu\n", 225 io_u->offset, f->real_file_size); 226 return 1; 227 } 228 229 return 0; 230} 231 232static inline int is_power_of_2(unsigned int val) 233{ 234 return (val != 0 && ((val & (val - 1)) == 0)); 235} 236 237static unsigned int get_next_buflen(struct thread_data *td, struct io_u *io_u) 238{ 239 const int ddir = io_u->ddir; 240 unsigned int uninitialized_var(buflen); 241 unsigned int minbs, maxbs; 242 long r; 243 244 minbs = td->o.min_bs[ddir]; 245 maxbs = td->o.max_bs[ddir]; 246 247 if (minbs == maxbs) 248 buflen = minbs; 249 else { 250 r = os_random_long(&td->bsrange_state); 251 if (!td->o.bssplit_nr) { 252 buflen = 1 + (unsigned int) ((double) maxbs * 253 (r / (OS_RAND_MAX + 1.0))); 254 if (buflen < minbs) 255 buflen = minbs; 256 } else { 257 long perc = 0; 258 unsigned int i; 259 260 for (i = 0; i < td->o.bssplit_nr; i++) { 261 struct bssplit *bsp = &td->o.bssplit[i]; 262 263 buflen = bsp->bs; 264 perc += bsp->perc; 265 if (r <= ((OS_RAND_MAX / 100L) * perc)) 266 break; 267 } 268 } 269 if (!td->o.bs_unaligned && is_power_of_2(minbs)) 270 buflen = (buflen + minbs - 1) & ~(minbs - 1); 271 } 272 273 if (io_u->offset + buflen > io_u->file->real_file_size) { 274 dprint(FD_IO, "lower buflen %u -> %u (ddir=%d)\n", buflen, 275 minbs, ddir); 276 buflen = minbs; 277 } 278 279 return buflen; 280} 281 282static void set_rwmix_bytes(struct thread_data *td) 283{ 284 unsigned int diff; 285 286 /* 287 * we do time or byte based switch. this is needed because 288 * buffered writes may issue a lot quicker than they complete, 289 * whereas reads do not. 290 */ 291 diff = td->o.rwmix[td->rwmix_ddir ^ 1]; 292 td->rwmix_issues = (td->io_issues[td->rwmix_ddir] * diff) / 100; 293} 294 295static inline enum fio_ddir get_rand_ddir(struct thread_data *td) 296{ 297 unsigned int v; 298 long r; 299 300 r = os_random_long(&td->rwmix_state); 301 v = 1 + (int) (100.0 * (r / (OS_RAND_MAX + 1.0))); 302 if (v <= td->o.rwmix[DDIR_READ]) 303 return DDIR_READ; 304 305 return DDIR_WRITE; 306} 307 308/* 309 * Return the data direction for the next io_u. If the job is a 310 * mixed read/write workload, check the rwmix cycle and switch if 311 * necessary. 312 */ 313static enum fio_ddir get_rw_ddir(struct thread_data *td) 314{ 315 if (td_rw(td)) { 316 /* 317 * Check if it's time to seed a new data direction. 318 */ 319 if (td->io_issues[td->rwmix_ddir] >= td->rwmix_issues) { 320 unsigned long long max_bytes; 321 enum fio_ddir ddir; 322 323 /* 324 * Put a top limit on how many bytes we do for 325 * one data direction, to avoid overflowing the 326 * ranges too much 327 */ 328 ddir = get_rand_ddir(td); 329 max_bytes = td->this_io_bytes[ddir]; 330 if (max_bytes >= 331 (td->o.size * td->o.rwmix[ddir] / 100)) { 332 if (!td->rw_end_set[ddir]) { 333 td->rw_end_set[ddir] = 1; 334 fio_gettime(&td->rw_end[ddir], NULL); 335 } 336 337 ddir ^= 1; 338 } 339 340 if (ddir != td->rwmix_ddir) 341 set_rwmix_bytes(td); 342 343 td->rwmix_ddir = ddir; 344 } 345 return td->rwmix_ddir; 346 } else if (td_read(td)) 347 return DDIR_READ; 348 else 349 return DDIR_WRITE; 350} 351 352static void put_file_log(struct thread_data *td, struct fio_file *f) 353{ 354 int ret = put_file(td, f); 355 356 if (ret) 357 td_verror(td, ret, "file close"); 358} 359 360void put_io_u(struct thread_data *td, struct io_u *io_u) 361{ 362 assert((io_u->flags & IO_U_F_FREE) == 0); 363 io_u->flags |= IO_U_F_FREE; 364 365 if (io_u->file) 366 put_file_log(td, io_u->file); 367 368 io_u->file = NULL; 369 flist_del(&io_u->list); 370 flist_add(&io_u->list, &td->io_u_freelist); 371 td->cur_depth--; 372} 373 374void requeue_io_u(struct thread_data *td, struct io_u **io_u) 375{ 376 struct io_u *__io_u = *io_u; 377 378 dprint(FD_IO, "requeue %p\n", __io_u); 379 380 __io_u->flags |= IO_U_F_FREE; 381 if ((__io_u->flags & IO_U_F_FLIGHT) && (__io_u->ddir != DDIR_SYNC)) 382 td->io_issues[__io_u->ddir]--; 383 384 __io_u->flags &= ~IO_U_F_FLIGHT; 385 386 flist_del(&__io_u->list); 387 flist_add_tail(&__io_u->list, &td->io_u_requeues); 388 td->cur_depth--; 389 *io_u = NULL; 390} 391 392static int fill_io_u(struct thread_data *td, struct io_u *io_u) 393{ 394 if (td->io_ops->flags & FIO_NOIO) 395 goto out; 396 397 /* 398 * see if it's time to sync 399 */ 400 if (td->o.fsync_blocks && 401 !(td->io_issues[DDIR_WRITE] % td->o.fsync_blocks) && 402 td->io_issues[DDIR_WRITE] && should_fsync(td)) { 403 io_u->ddir = DDIR_SYNC; 404 goto out; 405 } 406 407 io_u->ddir = get_rw_ddir(td); 408 409 /* 410 * See if it's time to switch to a new zone 411 */ 412 if (td->zone_bytes >= td->o.zone_size) { 413 td->zone_bytes = 0; 414 io_u->file->last_pos += td->o.zone_skip; 415 td->io_skip_bytes += td->o.zone_skip; 416 } 417 418 /* 419 * No log, let the seq/rand engine retrieve the next buflen and 420 * position. 421 */ 422 if (get_next_offset(td, io_u)) { 423 dprint(FD_IO, "io_u %p, failed getting offset\n", io_u); 424 return 1; 425 } 426 427 io_u->buflen = get_next_buflen(td, io_u); 428 if (!io_u->buflen) { 429 dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u); 430 return 1; 431 } 432 433 if (io_u->offset + io_u->buflen > io_u->file->real_file_size) { 434 dprint(FD_IO, "io_u %p, offset too large\n", io_u); 435 dprint(FD_IO, " off=%llu/%lu > %llu\n", io_u->offset, 436 io_u->buflen, io_u->file->real_file_size); 437 return 1; 438 } 439 440 /* 441 * mark entry before potentially trimming io_u 442 */ 443 if (td_random(td) && file_randommap(td, io_u->file)) 444 mark_random_map(td, io_u); 445 446 /* 447 * If using a write iolog, store this entry. 448 */ 449out: 450 dprint_io_u(io_u, "fill_io_u"); 451 td->zone_bytes += io_u->buflen; 452 log_io_u(td, io_u); 453 return 0; 454} 455 456static void __io_u_mark_map(unsigned int *map, unsigned int nr) 457{ 458 int index = 0; 459 460 switch (nr) { 461 default: 462 index = 6; 463 break; 464 case 33 ... 64: 465 index = 5; 466 break; 467 case 17 ... 32: 468 index = 4; 469 break; 470 case 9 ... 16: 471 index = 3; 472 break; 473 case 5 ... 8: 474 index = 2; 475 break; 476 case 1 ... 4: 477 index = 1; 478 case 0: 479 break; 480 } 481 482 map[index]++; 483} 484 485void io_u_mark_submit(struct thread_data *td, unsigned int nr) 486{ 487 __io_u_mark_map(td->ts.io_u_submit, nr); 488 td->ts.total_submit++; 489} 490 491void io_u_mark_complete(struct thread_data *td, unsigned int nr) 492{ 493 __io_u_mark_map(td->ts.io_u_complete, nr); 494 td->ts.total_complete++; 495} 496 497void io_u_mark_depth(struct thread_data *td, unsigned int nr) 498{ 499 int index = 0; 500 501 switch (td->cur_depth) { 502 default: 503 index = 6; 504 break; 505 case 32 ... 63: 506 index = 5; 507 break; 508 case 16 ... 31: 509 index = 4; 510 break; 511 case 8 ... 15: 512 index = 3; 513 break; 514 case 4 ... 7: 515 index = 2; 516 break; 517 case 2 ... 3: 518 index = 1; 519 case 1: 520 break; 521 } 522 523 td->ts.io_u_map[index] += nr; 524} 525 526static void io_u_mark_lat_usec(struct thread_data *td, unsigned long usec) 527{ 528 int index = 0; 529 530 assert(usec < 1000); 531 532 switch (usec) { 533 case 750 ... 999: 534 index = 9; 535 break; 536 case 500 ... 749: 537 index = 8; 538 break; 539 case 250 ... 499: 540 index = 7; 541 break; 542 case 100 ... 249: 543 index = 6; 544 break; 545 case 50 ... 99: 546 index = 5; 547 break; 548 case 20 ... 49: 549 index = 4; 550 break; 551 case 10 ... 19: 552 index = 3; 553 break; 554 case 4 ... 9: 555 index = 2; 556 break; 557 case 2 ... 3: 558 index = 1; 559 case 0 ... 1: 560 break; 561 } 562 563 assert(index < FIO_IO_U_LAT_U_NR); 564 td->ts.io_u_lat_u[index]++; 565} 566 567static void io_u_mark_lat_msec(struct thread_data *td, unsigned long msec) 568{ 569 int index = 0; 570 571 switch (msec) { 572 default: 573 index = 11; 574 break; 575 case 1000 ... 1999: 576 index = 10; 577 break; 578 case 750 ... 999: 579 index = 9; 580 break; 581 case 500 ... 749: 582 index = 8; 583 break; 584 case 250 ... 499: 585 index = 7; 586 break; 587 case 100 ... 249: 588 index = 6; 589 break; 590 case 50 ... 99: 591 index = 5; 592 break; 593 case 20 ... 49: 594 index = 4; 595 break; 596 case 10 ... 19: 597 index = 3; 598 break; 599 case 4 ... 9: 600 index = 2; 601 break; 602 case 2 ... 3: 603 index = 1; 604 case 0 ... 1: 605 break; 606 } 607 608 assert(index < FIO_IO_U_LAT_M_NR); 609 td->ts.io_u_lat_m[index]++; 610} 611 612static void io_u_mark_latency(struct thread_data *td, unsigned long usec) 613{ 614 if (usec < 1000) 615 io_u_mark_lat_usec(td, usec); 616 else 617 io_u_mark_lat_msec(td, usec / 1000); 618} 619 620/* 621 * Get next file to service by choosing one at random 622 */ 623static struct fio_file *get_next_file_rand(struct thread_data *td, int goodf, 624 int badf) 625{ 626 struct fio_file *f; 627 int fno; 628 629 do { 630 long r = os_random_long(&td->next_file_state); 631 int opened = 0; 632 633 fno = (unsigned int) ((double) td->o.nr_files 634 * (r / (OS_RAND_MAX + 1.0))); 635 f = td->files[fno]; 636 if (f->flags & FIO_FILE_DONE) 637 continue; 638 639 if (!(f->flags & FIO_FILE_OPEN)) { 640 int err; 641 642 err = td_io_open_file(td, f); 643 if (err) 644 continue; 645 opened = 1; 646 } 647 648 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) { 649 dprint(FD_FILE, "get_next_file_rand: %p\n", f); 650 return f; 651 } 652 if (opened) 653 td_io_close_file(td, f); 654 } while (1); 655} 656 657/* 658 * Get next file to service by doing round robin between all available ones 659 */ 660static struct fio_file *get_next_file_rr(struct thread_data *td, int goodf, 661 int badf) 662{ 663 unsigned int old_next_file = td->next_file; 664 struct fio_file *f; 665 666 do { 667 int opened = 0; 668 669 f = td->files[td->next_file]; 670 671 td->next_file++; 672 if (td->next_file >= td->o.nr_files) 673 td->next_file = 0; 674 675 dprint(FD_FILE, "trying file %s %x\n", f->file_name, f->flags); 676 if (f->flags & FIO_FILE_DONE) { 677 f = NULL; 678 continue; 679 } 680 681 if (!(f->flags & FIO_FILE_OPEN)) { 682 int err; 683 684 err = td_io_open_file(td, f); 685 if (err) { 686 dprint(FD_FILE, "error %d on open of %s\n", 687 err, f->file_name); 688 continue; 689 } 690 opened = 1; 691 } 692 693 dprint(FD_FILE, "goodf=%x, badf=%x, ff=%x\n", goodf, badf, f->flags); 694 if ((!goodf || (f->flags & goodf)) && !(f->flags & badf)) 695 break; 696 697 if (opened) 698 td_io_close_file(td, f); 699 700 f = NULL; 701 } while (td->next_file != old_next_file); 702 703 dprint(FD_FILE, "get_next_file_rr: %p\n", f); 704 return f; 705} 706 707static struct fio_file *get_next_file(struct thread_data *td) 708{ 709 struct fio_file *f; 710 711 assert(td->o.nr_files <= td->files_index); 712 713 if (td->nr_done_files >= td->o.nr_files) { 714 dprint(FD_FILE, "get_next_file: nr_open=%d, nr_done=%d," 715 " nr_files=%d\n", td->nr_open_files, 716 td->nr_done_files, 717 td->o.nr_files); 718 return NULL; 719 } 720 721 f = td->file_service_file; 722 if (f && (f->flags & FIO_FILE_OPEN) && !(f->flags & FIO_FILE_CLOSING)) { 723 if (td->o.file_service_type == FIO_FSERVICE_SEQ) 724 goto out; 725 if (td->file_service_left--) 726 goto out; 727 } 728 729 if (td->o.file_service_type == FIO_FSERVICE_RR || 730 td->o.file_service_type == FIO_FSERVICE_SEQ) 731 f = get_next_file_rr(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 732 else 733 f = get_next_file_rand(td, FIO_FILE_OPEN, FIO_FILE_CLOSING); 734 735 td->file_service_file = f; 736 td->file_service_left = td->file_service_nr - 1; 737out: 738 dprint(FD_FILE, "get_next_file: %p [%s]\n", f, f->file_name); 739 return f; 740} 741 742static int set_io_u_file(struct thread_data *td, struct io_u *io_u) 743{ 744 struct fio_file *f; 745 746 do { 747 f = get_next_file(td); 748 if (!f) 749 return 1; 750 751 io_u->file = f; 752 get_file(f); 753 754 if (!fill_io_u(td, io_u)) 755 break; 756 757 put_file_log(td, f); 758 td_io_close_file(td, f); 759 io_u->file = NULL; 760 f->flags |= FIO_FILE_DONE; 761 td->nr_done_files++; 762 dprint(FD_FILE, "%s: is done (%d of %d)\n", f->file_name, td->nr_done_files, td->o.nr_files); 763 } while (1); 764 765 return 0; 766} 767 768 769struct io_u *__get_io_u(struct thread_data *td) 770{ 771 struct io_u *io_u = NULL; 772 773 if (!flist_empty(&td->io_u_requeues)) 774 io_u = flist_entry(td->io_u_requeues.next, struct io_u, list); 775 else if (!queue_full(td)) { 776 io_u = flist_entry(td->io_u_freelist.next, struct io_u, list); 777 778 io_u->buflen = 0; 779 io_u->resid = 0; 780 io_u->file = NULL; 781 io_u->end_io = NULL; 782 } 783 784 if (io_u) { 785 assert(io_u->flags & IO_U_F_FREE); 786 io_u->flags &= ~IO_U_F_FREE; 787 788 io_u->error = 0; 789 flist_del(&io_u->list); 790 flist_add(&io_u->list, &td->io_u_busylist); 791 td->cur_depth++; 792 } 793 794 return io_u; 795} 796 797/* 798 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 799 * etc. The returned io_u is fully ready to be prepped and submitted. 800 */ 801struct io_u *get_io_u(struct thread_data *td) 802{ 803 struct fio_file *f; 804 struct io_u *io_u; 805 806 io_u = __get_io_u(td); 807 if (!io_u) { 808 dprint(FD_IO, "__get_io_u failed\n"); 809 return NULL; 810 } 811 812 /* 813 * from a requeue, io_u already setup 814 */ 815 if (io_u->file) 816 goto out; 817 818 /* 819 * If using an iolog, grab next piece if any available. 820 */ 821 if (td->o.read_iolog_file) { 822 if (read_iolog_get(td, io_u)) 823 goto err_put; 824 } else if (set_io_u_file(td, io_u)) { 825 dprint(FD_IO, "io_u %p, setting file failed\n", io_u); 826 goto err_put; 827 } 828 829 f = io_u->file; 830 assert(f->flags & FIO_FILE_OPEN); 831 832 if (io_u->ddir != DDIR_SYNC) { 833 if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) { 834 dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u); 835 goto err_put; 836 } 837 838 f->last_pos = io_u->offset + io_u->buflen; 839 840 if (td->o.verify != VERIFY_NONE) 841 populate_verify_io_u(td, io_u); 842 else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) 843 io_u_fill_buffer(td, io_u, io_u->xfer_buflen); 844 } 845 846 /* 847 * Set io data pointers. 848 */ 849 io_u->endpos = io_u->offset + io_u->buflen; 850 io_u->xfer_buf = io_u->buf; 851 io_u->xfer_buflen = io_u->buflen; 852 853out: 854 if (!td_io_prep(td, io_u)) { 855 if (!td->o.disable_slat) 856 fio_gettime(&io_u->start_time, NULL); 857 return io_u; 858 } 859err_put: 860 dprint(FD_IO, "get_io_u failed\n"); 861 put_io_u(td, io_u); 862 return NULL; 863} 864 865void io_u_log_error(struct thread_data *td, struct io_u *io_u) 866{ 867 const char *msg[] = { "read", "write", "sync" }; 868 869 log_err("fio: io_u error"); 870 871 if (io_u->file) 872 log_err(" on file %s", io_u->file->file_name); 873 874 log_err(": %s\n", strerror(io_u->error)); 875 876 log_err(" %s offset=%llu, buflen=%lu\n", msg[io_u->ddir], 877 io_u->offset, io_u->xfer_buflen); 878 879 if (!td->error) 880 td_verror(td, io_u->error, "io_u error"); 881} 882 883static void io_completed(struct thread_data *td, struct io_u *io_u, 884 struct io_completion_data *icd) 885{ 886 /* 887 * Older gcc's are too dumb to realize that usec is always used 888 * initialized, silence that warning. 889 */ 890 unsigned long uninitialized_var(usec); 891 892 dprint_io_u(io_u, "io complete"); 893 894 assert(io_u->flags & IO_U_F_FLIGHT); 895 io_u->flags &= ~IO_U_F_FLIGHT; 896 897 if (io_u->ddir == DDIR_SYNC) { 898 td->last_was_sync = 1; 899 return; 900 } 901 902 td->last_was_sync = 0; 903 904 if (!io_u->error) { 905 unsigned int bytes = io_u->buflen - io_u->resid; 906 const enum fio_ddir idx = io_u->ddir; 907 int ret; 908 909 td->io_blocks[idx]++; 910 td->io_bytes[idx] += bytes; 911 td->this_io_bytes[idx] += bytes; 912 913 if (ramp_time_over(td)) { 914 if (!td->o.disable_clat || !td->o.disable_bw) 915 usec = utime_since(&io_u->issue_time, 916 &icd->time); 917 918 if (!td->o.disable_clat) { 919 add_clat_sample(td, idx, usec); 920 io_u_mark_latency(td, usec); 921 } 922 if (!td->o.disable_bw) 923 add_bw_sample(td, idx, &icd->time); 924 } 925 926 if (td_write(td) && idx == DDIR_WRITE && 927 td->o.do_verify && 928 td->o.verify != VERIFY_NONE) 929 log_io_piece(td, io_u); 930 931 icd->bytes_done[idx] += bytes; 932 933 if (io_u->end_io) { 934 ret = io_u->end_io(td, io_u); 935 if (ret && !icd->error) 936 icd->error = ret; 937 } 938 } else { 939 icd->error = io_u->error; 940 io_u_log_error(td, io_u); 941 } 942} 943 944static void init_icd(struct thread_data *td, struct io_completion_data *icd, 945 int nr) 946{ 947 if (!td->o.disable_clat || !td->o.disable_bw) 948 fio_gettime(&icd->time, NULL); 949 950 icd->nr = nr; 951 952 icd->error = 0; 953 icd->bytes_done[0] = icd->bytes_done[1] = 0; 954} 955 956static void ios_completed(struct thread_data *td, 957 struct io_completion_data *icd) 958{ 959 struct io_u *io_u; 960 int i; 961 962 for (i = 0; i < icd->nr; i++) { 963 io_u = td->io_ops->event(td, i); 964 965 io_completed(td, io_u, icd); 966 put_io_u(td, io_u); 967 } 968} 969 970/* 971 * Complete a single io_u for the sync engines. 972 */ 973long io_u_sync_complete(struct thread_data *td, struct io_u *io_u) 974{ 975 struct io_completion_data icd; 976 977 init_icd(td, &icd, 1); 978 io_completed(td, io_u, &icd); 979 put_io_u(td, io_u); 980 981 if (!icd.error) 982 return icd.bytes_done[0] + icd.bytes_done[1]; 983 984 td_verror(td, icd.error, "io_u_sync_complete"); 985 return -1; 986} 987 988/* 989 * Called to complete min_events number of io for the async engines. 990 */ 991long io_u_queued_complete(struct thread_data *td, int min_evts) 992{ 993 struct io_completion_data icd; 994 struct timespec *tvp = NULL; 995 int ret; 996 struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, }; 997 998 dprint(FD_IO, "io_u_queued_completed: min=%d\n", min_evts); 999 1000 if (!min_evts) 1001 tvp = &ts; 1002 1003 ret = td_io_getevents(td, min_evts, td->o.iodepth_batch_complete, tvp); 1004 if (ret < 0) { 1005 td_verror(td, -ret, "td_io_getevents"); 1006 return ret; 1007 } else if (!ret) 1008 return ret; 1009 1010 init_icd(td, &icd, ret); 1011 ios_completed(td, &icd); 1012 if (!icd.error) 1013 return icd.bytes_done[0] + icd.bytes_done[1]; 1014 1015 td_verror(td, icd.error, "io_u_queued_complete"); 1016 return -1; 1017} 1018 1019/* 1020 * Call when io_u is really queued, to update the submission latency. 1021 */ 1022void io_u_queued(struct thread_data *td, struct io_u *io_u) 1023{ 1024 if (!td->o.disable_slat) { 1025 unsigned long slat_time; 1026 1027 slat_time = utime_since(&io_u->start_time, &io_u->issue_time); 1028 add_slat_sample(td, io_u->ddir, slat_time); 1029 } 1030} 1031 1032/* 1033 * "randomly" fill the buffer contents 1034 */ 1035void io_u_fill_buffer(struct thread_data *td, struct io_u *io_u, 1036 unsigned int max_bs) 1037{ 1038 long *ptr = io_u->buf; 1039 1040 if (!td->o.zero_buffers) { 1041 while ((void *) ptr - io_u->buf < max_bs) { 1042 *ptr = rand() * GOLDEN_RATIO_PRIME; 1043 ptr++; 1044 } 1045 } else 1046 memset(ptr, 0, max_bs); 1047} 1048