io_u.c revision f697125ab9003d358a37186d610e09799afd190f
1#include <unistd.h> 2#include <fcntl.h> 3#include <string.h> 4#include <signal.h> 5#include <time.h> 6#include <assert.h> 7 8#include "fio.h" 9#include "os.h" 10 11/* 12 * The ->file_map[] contains a map of blocks we have or have not done io 13 * to yet. Used to make sure we cover the entire range in a fair fashion. 14 */ 15static int random_map_free(struct thread_data *td, struct fio_file *f, 16 unsigned long long block) 17{ 18 unsigned int idx = RAND_MAP_IDX(td, f, block); 19 unsigned int bit = RAND_MAP_BIT(td, f, block); 20 21 return (f->file_map[idx] & (1UL << bit)) == 0; 22} 23 24/* 25 * Mark a given offset as used in the map. 26 */ 27static void mark_random_map(struct thread_data *td, struct fio_file *f, 28 struct io_u *io_u) 29{ 30 unsigned int min_bs = td->min_bs[io_u->ddir]; 31 unsigned long long block; 32 unsigned int blocks; 33 34 block = io_u->offset / (unsigned long long) min_bs; 35 blocks = 0; 36 while (blocks < (io_u->buflen / min_bs)) { 37 unsigned int idx, bit; 38 39 if (!random_map_free(td, f, block)) 40 break; 41 42 idx = RAND_MAP_IDX(td, f, block); 43 bit = RAND_MAP_BIT(td, f, block); 44 45 assert(idx < f->num_maps); 46 47 f->file_map[idx] |= (1UL << bit); 48 block++; 49 blocks++; 50 } 51 52 if ((blocks * min_bs) < io_u->buflen) 53 io_u->buflen = blocks * min_bs; 54} 55 56/* 57 * Return the next free block in the map. 58 */ 59static int get_next_free_block(struct thread_data *td, struct fio_file *f, 60 unsigned long long *b) 61{ 62 int i; 63 64 *b = 0; 65 i = 0; 66 while ((*b) * td->rw_min_bs < f->file_size) { 67 if (f->file_map[i] != -1UL) { 68 *b += ffz(f->file_map[i]); 69 return 0; 70 } 71 72 *b += BLOCKS_PER_MAP; 73 i++; 74 } 75 76 return 1; 77} 78 79/* 80 * For random io, generate a random new block and see if it's used. Repeat 81 * until we find a free one. For sequential io, just return the end of 82 * the last io issued. 83 */ 84static int get_next_offset(struct thread_data *td, struct fio_file *f, 85 unsigned long long *offset, int ddir) 86{ 87 unsigned long long b, rb; 88 long r; 89 90 if (!td->sequential) { 91 unsigned long long max_blocks = f->file_size / td->min_bs[ddir]; 92 int loops = 50; 93 94 do { 95 r = os_random_long(&td->random_state); 96 b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0)); 97 if (td->norandommap) 98 break; 99 rb = b + (f->file_offset / td->min_bs[ddir]); 100 loops--; 101 } while (!random_map_free(td, f, rb) && loops); 102 103 if (!loops) { 104 if (get_next_free_block(td, f, &b)) 105 return 1; 106 } 107 } else 108 b = f->last_pos / td->min_bs[ddir]; 109 110 *offset = (b * td->min_bs[ddir]) + f->file_offset; 111 if (*offset > f->file_size) 112 return 1; 113 114 return 0; 115} 116 117static unsigned int get_next_buflen(struct thread_data *td, int ddir) 118{ 119 unsigned int buflen; 120 long r; 121 122 if (td->min_bs[ddir] == td->max_bs[ddir]) 123 buflen = td->min_bs[ddir]; 124 else { 125 r = os_random_long(&td->bsrange_state); 126 buflen = (1 + (double) (td->max_bs[ddir] - 1) * r / (RAND_MAX + 1.0)); 127 if (!td->bs_unaligned) 128 buflen = (buflen + td->min_bs[ddir] - 1) & ~(td->min_bs[ddir] - 1); 129 } 130 131 if (buflen > td->io_size - td->this_io_bytes[ddir]) { 132 /* 133 * if using direct/raw io, we may not be able to 134 * shrink the size. so just fail it. 135 */ 136 if (td->io_ops->flags & FIO_RAWIO) 137 return 0; 138 139 buflen = td->io_size - td->this_io_bytes[ddir]; 140 } 141 142 return buflen; 143} 144 145/* 146 * Return the data direction for the next io_u. If the job is a 147 * mixed read/write workload, check the rwmix cycle and switch if 148 * necessary. 149 */ 150static int get_rw_ddir(struct thread_data *td) 151{ 152 if (td_rw(td)) { 153 struct timeval now; 154 unsigned long elapsed; 155 156 gettimeofday(&now, NULL); 157 elapsed = mtime_since_now(&td->rwmix_switch); 158 159 /* 160 * Check if it's time to seed a new data direction. 161 */ 162 if (elapsed >= td->rwmixcycle) { 163 unsigned int v; 164 long r; 165 166 r = os_random_long(&td->rwmix_state); 167 v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0))); 168 if (v < td->rwmixread) 169 td->rwmix_ddir = DDIR_READ; 170 else 171 td->rwmix_ddir = DDIR_WRITE; 172 memcpy(&td->rwmix_switch, &now, sizeof(now)); 173 } 174 return td->rwmix_ddir; 175 } else if (td_read(td)) 176 return DDIR_READ; 177 else 178 return DDIR_WRITE; 179} 180 181void put_io_u(struct thread_data *td, struct io_u *io_u) 182{ 183 io_u->file = NULL; 184 list_del(&io_u->list); 185 list_add(&io_u->list, &td->io_u_freelist); 186 td->cur_depth--; 187} 188 189static int fill_io_u(struct thread_data *td, struct fio_file *f, 190 struct io_u *io_u) 191{ 192 /* 193 * If using an iolog, grab next piece if any available. 194 */ 195 if (td->read_iolog) 196 return read_iolog_get(td, io_u); 197 198 /* 199 * see if it's time to sync 200 */ 201 if (td->fsync_blocks && !(td->io_blocks[DDIR_WRITE] % td->fsync_blocks) 202 && should_fsync(td)) { 203 io_u->ddir = DDIR_SYNC; 204 io_u->file = f; 205 return 0; 206 } 207 208 io_u->ddir = get_rw_ddir(td); 209 210 /* 211 * No log, let the seq/rand engine retrieve the next position. 212 */ 213 if (!get_next_offset(td, f, &io_u->offset, io_u->ddir)) { 214 io_u->buflen = get_next_buflen(td, io_u->ddir); 215 if (io_u->buflen) { 216 /* 217 * If using a write iolog, store this entry. 218 */ 219 if (td->write_iolog_file) 220 write_iolog_put(td, io_u); 221 222 io_u->file = f; 223 return 0; 224 } 225 } 226 227 return 1; 228} 229 230struct io_u *__get_io_u(struct thread_data *td) 231{ 232 struct io_u *io_u = NULL; 233 234 if (!queue_full(td)) { 235 io_u = list_entry(td->io_u_freelist.next, struct io_u, list); 236 237 io_u->buflen = 0; 238 io_u->error = 0; 239 io_u->resid = 0; 240 list_del(&io_u->list); 241 list_add(&io_u->list, &td->io_u_busylist); 242 td->cur_depth++; 243 } 244 245 return io_u; 246} 247 248/* 249 * Return an io_u to be processed. Gets a buflen and offset, sets direction, 250 * etc. The returned io_u is fully ready to be prepped and submitted. 251 */ 252struct io_u *get_io_u(struct thread_data *td, struct fio_file *f) 253{ 254 struct io_u *io_u; 255 256 io_u = __get_io_u(td); 257 if (!io_u) 258 return NULL; 259 260 if (td->zone_bytes >= td->zone_size) { 261 td->zone_bytes = 0; 262 f->last_pos += td->zone_skip; 263 } 264 265 if (fill_io_u(td, f, io_u)) { 266 put_io_u(td, io_u); 267 return NULL; 268 } 269 270 if (io_u->buflen + io_u->offset > f->file_size) { 271 if (td->io_ops->flags & FIO_RAWIO) { 272 put_io_u(td, io_u); 273 return NULL; 274 } 275 276 io_u->buflen = f->file_size - io_u->offset; 277 } 278 279 if (io_u->ddir != DDIR_SYNC) { 280 if (!io_u->buflen) { 281 put_io_u(td, io_u); 282 return NULL; 283 } 284 285 if (!td->read_iolog && !td->sequential && !td->norandommap) 286 mark_random_map(td, f, io_u); 287 288 f->last_pos += io_u->buflen; 289 290 if (td->verify != VERIFY_NONE) 291 populate_verify_io_u(td, io_u); 292 } 293 294 if (td_io_prep(td, io_u)) { 295 put_io_u(td, io_u); 296 return NULL; 297 } 298 299 gettimeofday(&io_u->start_time, NULL); 300 return io_u; 301} 302 303void io_completed(struct thread_data *td, struct io_u *io_u, 304 struct io_completion_data *icd) 305{ 306 struct timeval e; 307 unsigned long msec; 308 309 if (io_u->ddir == DDIR_SYNC) { 310 td->last_was_sync = 1; 311 return; 312 } 313 314 td->last_was_sync = 0; 315 316 gettimeofday(&e, NULL); 317 318 if (!io_u->error) { 319 unsigned int bytes = io_u->buflen - io_u->resid; 320 const int idx = io_u->ddir; 321 322 td->io_blocks[idx]++; 323 td->io_bytes[idx] += bytes; 324 td->zone_bytes += bytes; 325 td->this_io_bytes[idx] += bytes; 326 327 msec = mtime_since(&io_u->issue_time, &e); 328 329 add_clat_sample(td, idx, msec); 330 add_bw_sample(td, idx); 331 332 if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE) 333 log_io_piece(td, io_u); 334 335 icd->bytes_done[idx] += bytes; 336 } else 337 icd->error = io_u->error; 338} 339 340void ios_completed(struct thread_data *td, struct io_completion_data *icd) 341{ 342 struct io_u *io_u; 343 int i; 344 345 icd->error = 0; 346 icd->bytes_done[0] = icd->bytes_done[1] = 0; 347 348 for (i = 0; i < icd->nr; i++) { 349 io_u = td->io_ops->event(td, i); 350 351 io_completed(td, io_u, icd); 352 put_io_u(td, io_u); 353 } 354} 355 356 357