io_u.c revision e9fbdd74f6ac24d93824177a8858c028305c9900
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6 7#include "fio.h" 8#include "os.h" 9 10/* 11 * The ->file_map[] contains a map of blocks we have or have not done io 12 * to yet. Used to make sure we cover the entire range in a fair fashion. 13 */ 14static int random_map_free(struct thread_data *td, struct fio_file *f, 15 unsigned long long block) 16{ 17 unsigned int idx = RAND_MAP_IDX(td, f, block); 18 unsigned int bit = RAND_MAP_BIT(td, f, block); 19 20 return (f->file_map[idx] & (1UL << bit)) == 0; 21} 22 23/* 24 * Mark a given offset as used in the map. 25 */ 26static void mark_random_map(struct thread_data *td, struct fio_file *f, 27 struct io_u *io_u) 28{ 29 unsigned int min_bs = td->rw_min_bs; 30 unsigned long long block; 31 unsigned int blocks; 32 unsigned int nr_blocks; 33 34 block = io_u->offset / (unsigned long long) min_bs; 35 blocks = 0; 36 nr_blocks = (io_u->buflen + min_bs - 1) / min_bs; 37 38 while (blocks < nr_blocks) { 39 unsigned int idx, bit; 40 41 if (!random_map_free(td, f, block)) 42 break; 43 44 idx = RAND_MAP_IDX(td, f, block); 45 bit = RAND_MAP_BIT(td, f, block); 46 47 fio_assert(td, idx < f->num_maps); 48 49 f->file_map[idx] |= (1UL << bit); 50 block++; 51 blocks++; 52 } 53 54 if ((blocks * min_bs) < io_u->buflen) 55 io_u->buflen = blocks * min_bs; 56} 57 58/* 59 * Return the next free block in the map. 60 */ 61static int get_next_free_block(struct thread_data *td, struct fio_file *f, 62 unsigned long long *b) 63{ 64 int i; 65 66 i = f->last_free_lookup; 67 *b = (i * BLOCKS_PER_MAP); 68 while ((*b) * td->rw_min_bs < f->real_file_size) { 69 if (f->file_map[i] != -1UL) { 70 *b += ffz(f->file_map[i]); 71 f->last_free_lookup = i; 72 return 0; 73 } 74 75 *b += BLOCKS_PER_MAP; 76 i++; 77 } 78 79 return 1; 80} 81 82/* 83 * For random io, generate a random new block and see if it's used. Repeat 84 * until we find a free one. For sequential io, just return the end of 85 * the last io issued. 86 */ 87static int get_next_offset(struct thread_data *td, struct fio_file *f, 88 struct io_u *io_u) 89{ 90 const int ddir = io_u->ddir; 91 unsigned long long b, rb; 92 long r; 93 94 if (!td->sequential) { 95 unsigned long long max_blocks = f->file_size / td->min_bs[ddir]; 96 int loops = 5; 97 98 do { 99 r = os_random_long(&td->random_state); 100 b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0)); 101 if (td->norandommap) 102 break; 103 rb = b + (f->file_offset / td->min_bs[ddir]); 104 loops--; 105 } while (!random_map_free(td, f, rb) && loops); 106 107 /* 108 * if we failed to retrieve a truly random offset within 109 * the loops assigned, see if there are free ones left at all 110 */ 111 if (!loops && get_next_free_block(td, f, &b)) 112 return 1; 113 } else 114 b = f->last_pos / td->min_bs[ddir]; 115 116 io_u->offset = (b * td->min_bs[ddir]) + f->file_offset; 117 if (io_u->offset >= f->real_file_size) 118 return 1; 119 120 return 0; 121} 122 123static unsigned int get_next_buflen(struct thread_data *td, struct fio_file *f, 124 struct io_u *io_u) 125{ 126 const int ddir = io_u->ddir; 127 unsigned int buflen; 128 long r; 129 130 if (td->min_bs[ddir] == td->max_bs[ddir]) 131 buflen = td->min_bs[ddir]; 132 else { 133 r = os_random_long(&td->bsrange_state); 134 buflen = (unsigned int) (1 + (double) (td->max_bs[ddir] - 1) * r / (RAND_MAX + 1.0)); 135 if (!td->bs_unaligned) 136 buflen = (buflen + td->min_bs[ddir] - 1) & ~(td->min_bs[ddir] - 1); 137 } 138 139 while (buflen + io_u->offset > f->real_file_size) { 140 if (buflen == td->min_bs[ddir]) 141 return 0; 142 143 buflen = td->min_bs[ddir]; 144 } 145 146 return buflen; 147} 148 149/* 150 * Return the data direction for the next io_u. If the job is a 151 * mixed read/write workload, check the rwmix cycle and switch if 152 * necessary. 153 */ 154static enum fio_ddir get_rw_ddir(struct thread_data *td) 155{ 156 if (td_rw(td)) { 157 struct timeval now; 158 unsigned long elapsed; 159 160 fio_gettime(&now, NULL); 161 elapsed = mtime_since_now(&td->rwmix_switch); 162 163 /* 164 * Check if it's time to seed a new data direction. 165 */ 166 if (elapsed >= td->rwmixcycle) { 167 unsigned int v; 168 long r; 169 170 r = os_random_long(&td->rwmix_state); 171 v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0))); 172 if (v < td->rwmixread) 173 td->rwmix_ddir = DDIR_READ; 174 else 175 td->rwmix_ddir = DDIR_WRITE; 176 memcpy(&td->rwmix_switch, &now, sizeof(now)); 177 } 178 return td->rwmix_ddir; 179 } else if (td_read(td)) 180 return DDIR_READ; 181 else 182 return DDIR_WRITE; 183} 184 185void put_io_u(struct thread_data *td, struct io_u *io_u) 186{ 187 io_u->file = NULL; 188 list_del(&io_u->list); 189 list_add(&io_u->list, &td->io_u_freelist); 190 td->cur_depth--; 191} 192 193static int fill_io_u(struct thread_data *td, struct fio_file *f, 194 struct io_u *io_u) 195{ 196 /* 197 * If using an iolog, grab next piece if any available. 198 */ 199 if (td->read_iolog) 200 return read_iolog_get(td, io_u); 201 202 /* 203 * see if it's time to sync 204 */ 205 if (td->fsync_blocks && !(td->io_blocks[DDIR_WRITE] % td->fsync_blocks) 206 && should_fsync(td)) { 207 io_u->ddir = DDIR_SYNC; 208 io_u->file = f; 209 return 0; 210 } 211 212 io_u->ddir = get_rw_ddir(td); 213 214 /* 215 * No log, let the seq/rand engine retrieve the next buflen and 216 * position. 217 */ 218 if (get_next_offset(td, f, io_u)) 219 return 1; 220 221 io_u->buflen = get_next_buflen(td, f, io_u); 222 if (!io_u->buflen) 223 return 1; 224 225 /* 226 * mark entry before potentially trimming io_u 227 */ 228 if (!td->read_iolog && !td->sequential && !td->norandommap) 229 mark_random_map(td, f, io_u); 230 231 /* 232 * If using a write iolog, store this entry. 233 */ 234 if (td->write_iolog_file) 235 write_iolog_put(td, io_u); 236 237 io_u->file = f; 238 return 0; 239} 240 241static void io_u_mark_depth(struct thread_data *td) 242{ 243 int index = 0; 244 245 switch (td->cur_depth) { 246 default: 247 index++; 248 case 32 ... 63: 249 index++; 250 case 16 ... 31: 251 index++; 252 case 8 ... 15: 253 index++; 254 case 4 ... 7: 255 index++; 256 case 2 ... 3: 257 index++; 258 case 1: 259 break; 260 } 261 262 td->io_u_map[index]++; 263 td->total_io_u++; 264} 265 266struct io_u *__get_io_u(struct thread_data *td) 267{ 268 struct io_u *io_u = NULL; 269 270 if (!queue_full(td)) { 271 io_u = list_entry(td->io_u_freelist.next, struct io_u, list); 272 273 io_u->buflen = 0; 274 io_u->error = 0; 275 io_u->resid = 0; 276 list_del(&io_u->list); 277 list_add(&io_u->list, &td->io_u_busylist); 278 td->cur_depth++; 279 io_u_mark_depth(td); 280 } 281 282 return io_u; 283} 284 285/* 286 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 287 * etc. The returned io_u is fully ready to be prepped and submitted. 288 */ 289struct io_u *get_io_u(struct thread_data *td, struct fio_file *f) 290{ 291 struct io_u *io_u; 292 293 io_u = __get_io_u(td); 294 if (!io_u) 295 return NULL; 296 297 if (td->zone_bytes >= td->zone_size) { 298 td->zone_bytes = 0; 299 f->last_pos += td->zone_skip; 300 } 301 302 if (fill_io_u(td, f, io_u)) { 303 put_io_u(td, io_u); 304 return NULL; 305 } 306 307 if (io_u->buflen + io_u->offset > f->real_file_size) { 308 if (td->io_ops->flags & FIO_RAWIO) { 309 put_io_u(td, io_u); 310 return NULL; 311 } 312 313 io_u->buflen = f->real_file_size - io_u->offset; 314 } 315 316 if (io_u->ddir != DDIR_SYNC) { 317 if (!io_u->buflen) { 318 put_io_u(td, io_u); 319 return NULL; 320 } 321 322 f->last_pos += io_u->buflen; 323 324 if (td->verify != VERIFY_NONE) 325 populate_verify_io_u(td, io_u); 326 } 327 328 if (td_io_prep(td, io_u)) { 329 put_io_u(td, io_u); 330 return NULL; 331 } 332 333 /* 334 * Set io data pointers. 335 */ 336 io_u->xfer_buf = io_u->buf; 337 io_u->xfer_buflen = io_u->buflen; 338 339 fio_gettime(&io_u->start_time, NULL); 340 return io_u; 341} 342 343void io_completed(struct thread_data *td, struct io_u *io_u, 344 struct io_completion_data *icd) 345{ 346 unsigned long msec; 347 348 if (io_u->ddir == DDIR_SYNC) { 349 td->last_was_sync = 1; 350 return; 351 } 352 353 td->last_was_sync = 0; 354 355 if (!io_u->error) { 356 unsigned int bytes = io_u->buflen - io_u->resid; 357 const enum fio_ddir idx = io_u->ddir; 358 359 td->io_blocks[idx]++; 360 td->io_bytes[idx] += bytes; 361 td->zone_bytes += bytes; 362 td->this_io_bytes[idx] += bytes; 363 364 io_u->file->last_completed_pos = io_u->offset + io_u->buflen; 365 366 msec = mtime_since(&io_u->issue_time, &icd->time); 367 368 add_clat_sample(td, idx, msec); 369 add_bw_sample(td, idx, &icd->time); 370 371 if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE) 372 log_io_piece(td, io_u); 373 374 icd->bytes_done[idx] += bytes; 375 } else 376 icd->error = io_u->error; 377} 378 379void ios_completed(struct thread_data *td, struct io_completion_data *icd) 380{ 381 struct io_u *io_u; 382 int i; 383 384 fio_gettime(&icd->time, NULL); 385 386 icd->error = 0; 387 icd->bytes_done[0] = icd->bytes_done[1] = 0; 388 389 for (i = 0; i < icd->nr; i++) { 390 io_u = td->io_ops->event(td, i); 391 392 io_completed(td, io_u, icd); 393 put_io_u(td, io_u); 394 } 395} 396