io_u.c revision 00de55ef85f01f3c56fac5397aca92eef0c73762
1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <unistd.h>
2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <fcntl.h>
3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <string.h>
4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <signal.h>
5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <time.h>
6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "fio.h"
8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "os.h"
9c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstruct io_completion_data {
11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	int nr;				/* input */
12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	endio_handler *handler;		/* input */
13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	int error;			/* output */
15c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned long bytes_done[2];	/* output */
16c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	struct timeval time;		/* output */
17c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath};
18c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
19c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
20c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * The ->file_map[] contains a map of blocks we have or have not done io
21c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * to yet. Used to make sure we cover the entire range in a fair fashion.
22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic int random_map_free(struct thread_data *td, struct fio_file *f,
24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			   unsigned long long block)
25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int idx = RAND_MAP_IDX(td, f, block);
27c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int bit = RAND_MAP_BIT(td, f, block);
28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	return (f->file_map[idx] & (1UL << bit)) == 0;
30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Mark a given offset as used in the map.
34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic void mark_random_map(struct thread_data *td, struct fio_file *f,
36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			    struct io_u *io_u)
37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int min_bs = td->rw_min_bs;
39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned long long block;
40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int blocks;
41c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int nr_blocks;
42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	block = io_u->offset / (unsigned long long) min_bs;
44c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	blocks = 0;
45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	nr_blocks = (io_u->buflen + min_bs - 1) / min_bs;
46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	while (blocks < nr_blocks) {
48c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		unsigned int idx, bit;
49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (!random_map_free(td, f, block))
51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			break;
52c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		idx = RAND_MAP_IDX(td, f, block);
54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		bit = RAND_MAP_BIT(td, f, block);
55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		fio_assert(td, idx < f->num_maps);
57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		f->file_map[idx] |= (1UL << bit);
59c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		block++;
60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		blocks++;
61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	}
62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	if ((blocks * min_bs) < io_u->buflen)
64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		io_u->buflen = blocks * min_bs;
65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Return the next free block in the map.
69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic int get_next_free_block(struct thread_data *td, struct fio_file *f,
71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			       unsigned long long *b)
72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	int i;
74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	i = f->last_free_lookup;
76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	*b = (i * BLOCKS_PER_MAP);
77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	while ((*b) * td->rw_min_bs < f->real_file_size) {
78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (f->file_map[i] != -1UL) {
79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			*b += ffz(f->file_map[i]);
80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			f->last_free_lookup = i;
81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			return 0;
82c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		}
83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		*b += BLOCKS_PER_MAP;
85c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		i++;
86c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	}
87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
88c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	return 1;
89c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
91c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
92c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * For random io, generate a random new block and see if it's used. Repeat
93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * until we find a free one. For sequential io, just return the end of
94c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * the last io issued.
95c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic int get_next_offset(struct thread_data *td, struct fio_file *f,
97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			   struct io_u *io_u)
98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	const int ddir = io_u->ddir;
100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned long long b, rb;
101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	long r;
102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	if (!td->sequential) {
104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		unsigned long long max_blocks = f->file_size / td->min_bs[ddir];
105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		int loops = 5;
106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		do {
108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			r = os_random_long(&td->random_state);
109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			b = ((max_blocks - 1) * r / (unsigned long long) (RAND_MAX+1.0));
110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			if (td->norandommap)
111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath				break;
112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			rb = b + (f->file_offset / td->min_bs[ddir]);
113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			loops--;
114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		} while (!random_map_free(td, f, rb) && loops);
115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		/*
117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		 * if we failed to retrieve a truly random offset within
118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		 * the loops assigned, see if there are free ones left at all
119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		 */
120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (!loops && get_next_free_block(td, f, &b))
121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			return 1;
122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	} else
123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		b = f->last_pos / td->min_bs[ddir];
124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	io_u->offset = (b * td->min_bs[ddir]) + f->file_offset;
126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	if (io_u->offset >= f->real_file_size)
127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		return 1;
128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	return 0;
130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic unsigned int get_next_buflen(struct thread_data *td, struct fio_file *f,
133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath				    struct io_u *io_u)
134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	const int ddir = io_u->ddir;
136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	unsigned int buflen;
137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	long r;
138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
139c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	if (td->min_bs[ddir] == td->max_bs[ddir])
140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		buflen = td->min_bs[ddir];
141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	else {
142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		r = os_random_long(&td->bsrange_state);
143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		buflen = (unsigned int) (1 + (double) (td->max_bs[ddir] - 1) * r / (RAND_MAX + 1.0));
144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (!td->bs_unaligned)
145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			buflen = (buflen + td->min_bs[ddir] - 1) & ~(td->min_bs[ddir] - 1);
146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	}
147c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
148c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	while (buflen + io_u->offset > f->real_file_size) {
149c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (buflen == td->min_bs[ddir])
150c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			return 0;
151c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
152c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		buflen = td->min_bs[ddir];
153c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	}
154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	return buflen;
156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath/*
159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * Return the data direction for the next io_u. If the job is a
160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * mixed read/write workload, check the rwmix cycle and switch if
161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath * necessary.
162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath */
163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathstatic enum fio_ddir get_rw_ddir(struct thread_data *td)
164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	if (td_rw(td)) {
166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		struct timeval now;
167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		unsigned long elapsed;
168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		fio_gettime(&now, NULL);
170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	 	elapsed = mtime_since_now(&td->rwmix_switch);
171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		/*
173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		 * Check if it's time to seed a new data direction.
174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		 */
175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		if (elapsed >= td->rwmixcycle) {
176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			unsigned int v;
177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			long r;
178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			r = os_random_long(&td->rwmix_state);
180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			v = 1 + (int) (100.0 * (r / (RAND_MAX + 1.0)));
181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			if (v < td->rwmixread)
182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath				td->rwmix_ddir = DDIR_READ;
183c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			else
184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath				td->rwmix_ddir = DDIR_WRITE;
185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath			memcpy(&td->rwmix_switch, &now, sizeof(now));
186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		}
187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath		return td->rwmix_ddir;
1887faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez	} else if (td_read(td))
1897faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez		return DDIR_READ;
1907faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez	else
1917faaa9f3f0df9d23790277834d426c3d992ac3baCarlos Hernandez		return DDIR_WRITE;
192c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath}
193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath
194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid put_io_u(struct thread_data *td, struct io_u *io_u)
195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{
196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	io_u->file = NULL;
197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath	list_del(&io_u->list);
198	list_add(&io_u->list, &td->io_u_freelist);
199	td->cur_depth--;
200}
201
202void requeue_io_u(struct thread_data *td, struct io_u **io_u)
203{
204	struct io_u *__io_u = *io_u;
205
206	list_del(&__io_u->list);
207	list_add_tail(&__io_u->list, &td->io_u_requeues);
208	td->cur_depth--;
209	*io_u = NULL;
210}
211
212static int fill_io_u(struct thread_data *td, struct fio_file *f,
213		     struct io_u *io_u)
214{
215	/*
216	 * If using an iolog, grab next piece if any available.
217	 */
218	if (td->read_iolog)
219		return read_iolog_get(td, io_u);
220
221	/*
222	 * see if it's time to sync
223	 */
224	if (td->fsync_blocks && !(td->io_issues[DDIR_WRITE] % td->fsync_blocks)
225	    && td->io_issues[DDIR_WRITE] && should_fsync(td)) {
226		io_u->ddir = DDIR_SYNC;
227		io_u->file = f;
228		return 0;
229	}
230
231	io_u->ddir = get_rw_ddir(td);
232
233	/*
234	 * No log, let the seq/rand engine retrieve the next buflen and
235	 * position.
236	 */
237	if (get_next_offset(td, f, io_u))
238		return 1;
239
240	io_u->buflen = get_next_buflen(td, f, io_u);
241	if (!io_u->buflen)
242		return 1;
243
244	/*
245	 * mark entry before potentially trimming io_u
246	 */
247	if (!td->read_iolog && !td->sequential && !td->norandommap)
248		mark_random_map(td, f, io_u);
249
250	/*
251	 * If using a write iolog, store this entry.
252	 */
253	if (td->write_iolog_file)
254		write_iolog_put(td, io_u);
255
256	io_u->file = f;
257	return 0;
258}
259
260static void io_u_mark_depth(struct thread_data *td)
261{
262	int index = 0;
263
264	switch (td->cur_depth) {
265	default:
266		index++;
267	case 32 ... 63:
268		index++;
269	case 16 ... 31:
270		index++;
271	case 8 ... 15:
272		index++;
273	case 4 ... 7:
274		index++;
275	case 2 ... 3:
276		index++;
277	case 1:
278		break;
279	}
280
281	td->io_u_map[index]++;
282	td->total_io_u++;
283}
284
285static void io_u_mark_latency(struct thread_data *td, unsigned long msec)
286{
287	int index = 0;
288
289	switch (msec) {
290	default:
291		index++;
292	case 1024 ... 2047:
293		index++;
294	case 512 ... 1023:
295		index++;
296	case 256 ... 511:
297		index++;
298	case 128 ... 255:
299		index++;
300	case 64 ... 127:
301		index++;
302	case 32 ... 63:
303		index++;
304	case 16 ... 31:
305		index++;
306	case 8 ... 15:
307		index++;
308	case 4 ... 7:
309		index++;
310	case 2 ... 3:
311		index++;
312	case 0 ... 1:
313		break;
314	}
315
316	td->io_u_lat[index]++;
317}
318
319static struct fio_file *get_next_file(struct thread_data *td)
320{
321	unsigned int old_next_file = td->next_file;
322	struct fio_file *f;
323
324	do {
325		f = &td->files[td->next_file];
326
327		td->next_file++;
328		if (td->next_file >= td->nr_files)
329			td->next_file = 0;
330
331		if (f->fd != -1)
332			break;
333
334		f = NULL;
335	} while (td->next_file != old_next_file);
336
337	return f;
338}
339
340struct io_u *__get_io_u(struct thread_data *td)
341{
342	struct io_u *io_u = NULL;
343
344	if (!list_empty(&td->io_u_requeues))
345		io_u = list_entry(td->io_u_requeues.next, struct io_u, list);
346	else if (!queue_full(td)) {
347		io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
348
349		io_u->buflen = 0;
350		io_u->resid = 0;
351		io_u->file = NULL;
352	}
353
354	if (io_u) {
355		io_u->error = 0;
356		list_del(&io_u->list);
357		list_add(&io_u->list, &td->io_u_busylist);
358		td->cur_depth++;
359		io_u_mark_depth(td);
360	}
361
362	return io_u;
363}
364
365/*
366 * Return an io_u to be processed. Gets a buflen and offset, sets direction,
367 * etc. The returned io_u is fully ready to be prepped and submitted.
368 */
369struct io_u *get_io_u(struct thread_data *td)
370{
371	struct fio_file *f;
372	struct io_u *io_u;
373
374	io_u = __get_io_u(td);
375	if (!io_u)
376		return NULL;
377
378	/*
379	 * from a requeue, io_u already setup
380	 */
381	if (io_u->file)
382		goto out;
383
384	f = get_next_file(td);
385	if (!f) {
386		put_io_u(td, io_u);
387		return NULL;
388	}
389
390	io_u->file = f;
391
392	if (td->zone_bytes >= td->zone_size) {
393		td->zone_bytes = 0;
394		f->last_pos += td->zone_skip;
395	}
396
397	if (fill_io_u(td, f, io_u)) {
398		put_io_u(td, io_u);
399		return NULL;
400	}
401
402	if (io_u->buflen + io_u->offset > f->real_file_size) {
403		if (td->io_ops->flags & FIO_RAWIO) {
404			put_io_u(td, io_u);
405			return NULL;
406		}
407
408		io_u->buflen = f->real_file_size - io_u->offset;
409	}
410
411	if (io_u->ddir != DDIR_SYNC) {
412		if (!io_u->buflen) {
413			put_io_u(td, io_u);
414			return NULL;
415		}
416
417		f->last_pos = io_u->offset + io_u->buflen;
418
419		if (td->verify != VERIFY_NONE)
420			populate_verify_io_u(td, io_u);
421	}
422
423	/*
424	 * Set io data pointers.
425	 */
426out:
427	io_u->xfer_buf = io_u->buf;
428	io_u->xfer_buflen = io_u->buflen;
429
430	if (td_io_prep(td, io_u)) {
431		put_io_u(td, io_u);
432		return NULL;
433	}
434
435	fio_gettime(&io_u->start_time, NULL);
436	return io_u;
437}
438
439static void io_completed(struct thread_data *td, struct io_u *io_u,
440			 struct io_completion_data *icd)
441{
442	unsigned long msec;
443
444	if (io_u->ddir == DDIR_SYNC) {
445		td->last_was_sync = 1;
446		return;
447	}
448
449	td->last_was_sync = 0;
450
451	if (!io_u->error) {
452		unsigned int bytes = io_u->buflen - io_u->resid;
453		const enum fio_ddir idx = io_u->ddir;
454		int ret;
455
456		td->io_blocks[idx]++;
457		td->io_bytes[idx] += bytes;
458		td->zone_bytes += bytes;
459		td->this_io_bytes[idx] += bytes;
460
461		io_u->file->last_completed_pos = io_u->offset + io_u->buflen;
462
463		msec = mtime_since(&io_u->issue_time, &icd->time);
464
465		add_clat_sample(td, idx, msec);
466		add_bw_sample(td, idx, &icd->time);
467		io_u_mark_latency(td, msec);
468
469		if ((td_rw(td) || td_write(td)) && idx == DDIR_WRITE)
470			log_io_piece(td, io_u);
471
472		icd->bytes_done[idx] += bytes;
473
474		if (icd->handler) {
475			ret = icd->handler(io_u);
476			if (ret && !icd->error)
477				icd->error = ret;
478		}
479	} else
480		icd->error = io_u->error;
481}
482
483static void init_icd(struct io_completion_data *icd, endio_handler *handler,
484		     int nr)
485{
486	fio_gettime(&icd->time, NULL);
487
488	icd->handler = handler;
489	icd->nr = nr;
490
491	icd->error = 0;
492	icd->bytes_done[0] = icd->bytes_done[1] = 0;
493}
494
495static void ios_completed(struct thread_data *td,
496			  struct io_completion_data *icd)
497{
498	struct io_u *io_u;
499	int i;
500
501	for (i = 0; i < icd->nr; i++) {
502		io_u = td->io_ops->event(td, i);
503
504		io_completed(td, io_u, icd);
505		put_io_u(td, io_u);
506	}
507}
508
509long io_u_sync_complete(struct thread_data *td, struct io_u *io_u,
510			endio_handler *handler)
511{
512	struct io_completion_data icd;
513
514	init_icd(&icd, handler, 1);
515	io_completed(td, io_u, &icd);
516	put_io_u(td, io_u);
517
518	if (!icd.error)
519		return icd.bytes_done[0] + icd.bytes_done[1];
520
521	return -1;
522}
523
524long io_u_queued_complete(struct thread_data *td, int min_events,
525			  endio_handler *handler)
526
527{
528	struct io_completion_data icd;
529	struct timespec *tvp = NULL;
530	int ret;
531
532	if (min_events > 0) {
533		ret = td_io_commit(td);
534		if (ret < 0) {
535			td_verror(td, -ret);
536			return ret;
537		}
538	} else {
539		struct timespec ts = { .tv_sec = 0, .tv_nsec = 0, };
540
541		tvp = &ts;
542	}
543
544	ret = td_io_getevents(td, min_events, td->cur_depth, tvp);
545	if (ret < 0) {
546		td_verror(td, -ret);
547		return ret;
548	} else if (!ret)
549		return ret;
550
551	init_icd(&icd, handler, ret);
552	ios_completed(td, &icd);
553	if (!icd.error)
554		return icd.bytes_done[0] + icd.bytes_done[1];
555
556	return -1;
557}
558