1/*
2 * sync/psync engine
3 *
4 * IO engine that does regular read(2)/write(2) with lseek(2) to transfer
5 * data and IO engine that does regular pread(2)/pwrite(2) to transfer data.
6 *
7 */
8#include <stdio.h>
9#include <stdlib.h>
10#include <unistd.h>
11#include <sys/uio.h>
12#include <errno.h>
13#include <assert.h>
14
15#include "../fio.h"
16#include "../optgroup.h"
17
18/*
19 * Sync engine uses engine_data to store last offset
20 */
21#define LAST_POS(f)	((f)->engine_pos)
22
23struct syncio_data {
24	struct iovec *iovecs;
25	struct io_u **io_us;
26	unsigned int queued;
27	unsigned int events;
28	unsigned long queued_bytes;
29
30	unsigned long long last_offset;
31	struct fio_file *last_file;
32	enum fio_ddir last_ddir;
33};
34
35#ifdef FIO_HAVE_PWRITEV2
36struct psyncv2_options {
37	void *pad;
38	unsigned int hipri;
39};
40
41static struct fio_option options[] = {
42	{
43		.name	= "hipri",
44		.lname	= "RWF_HIPRI",
45		.type	= FIO_OPT_STR_SET,
46		.off1	= offsetof(struct psyncv2_options, hipri),
47		.help	= "Set RWF_HIPRI for pwritev2/preadv2",
48		.category = FIO_OPT_C_ENGINE,
49		.group	= FIO_OPT_G_INVALID,
50	},
51	{
52		.name	= NULL,
53	},
54};
55#endif
56
57static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
58{
59	struct fio_file *f = io_u->file;
60
61	if (!ddir_rw(io_u->ddir))
62		return 0;
63
64	if (LAST_POS(f) != -1ULL && LAST_POS(f) == io_u->offset)
65		return 0;
66
67	if (lseek(f->fd, io_u->offset, SEEK_SET) == -1) {
68		td_verror(td, errno, "lseek");
69		return 1;
70	}
71
72	return 0;
73}
74
75static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret)
76{
77	if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir))
78		LAST_POS(io_u->file) = io_u->offset + ret;
79
80	if (ret != (int) io_u->xfer_buflen) {
81		if (ret >= 0) {
82			io_u->resid = io_u->xfer_buflen - ret;
83			io_u->error = 0;
84			return FIO_Q_COMPLETED;
85		} else
86			io_u->error = errno;
87	}
88
89	if (io_u->error) {
90		io_u_log_error(td, io_u);
91		td_verror(td, io_u->error, "xfer");
92	}
93
94	return FIO_Q_COMPLETED;
95}
96
97#ifdef CONFIG_PWRITEV
98static int fio_pvsyncio_queue(struct thread_data *td, struct io_u *io_u)
99{
100	struct syncio_data *sd = td->io_ops_data;
101	struct iovec *iov = &sd->iovecs[0];
102	struct fio_file *f = io_u->file;
103	int ret;
104
105	fio_ro_check(td, io_u);
106
107	iov->iov_base = io_u->xfer_buf;
108	iov->iov_len = io_u->xfer_buflen;
109
110	if (io_u->ddir == DDIR_READ)
111		ret = preadv(f->fd, iov, 1, io_u->offset);
112	else if (io_u->ddir == DDIR_WRITE)
113		ret = pwritev(f->fd, iov, 1, io_u->offset);
114	else if (io_u->ddir == DDIR_TRIM) {
115		do_io_u_trim(td, io_u);
116		return FIO_Q_COMPLETED;
117	} else
118		ret = do_io_u_sync(td, io_u);
119
120	return fio_io_end(td, io_u, ret);
121}
122#endif
123
124#ifdef FIO_HAVE_PWRITEV2
125static int fio_pvsyncio2_queue(struct thread_data *td, struct io_u *io_u)
126{
127	struct syncio_data *sd = td->io_ops_data;
128	struct psyncv2_options *o = td->eo;
129	struct iovec *iov = &sd->iovecs[0];
130	struct fio_file *f = io_u->file;
131	int ret, flags = 0;
132
133	fio_ro_check(td, io_u);
134
135	if (o->hipri)
136		flags |= RWF_HIPRI;
137
138	iov->iov_base = io_u->xfer_buf;
139	iov->iov_len = io_u->xfer_buflen;
140
141	if (io_u->ddir == DDIR_READ)
142		ret = preadv2(f->fd, iov, 1, io_u->offset, flags);
143	else if (io_u->ddir == DDIR_WRITE)
144		ret = pwritev2(f->fd, iov, 1, io_u->offset, flags);
145	else if (io_u->ddir == DDIR_TRIM) {
146		do_io_u_trim(td, io_u);
147		return FIO_Q_COMPLETED;
148	} else
149		ret = do_io_u_sync(td, io_u);
150
151	return fio_io_end(td, io_u, ret);
152}
153#endif
154
155
156static int fio_psyncio_queue(struct thread_data *td, struct io_u *io_u)
157{
158	struct fio_file *f = io_u->file;
159	int ret;
160
161	fio_ro_check(td, io_u);
162
163	if (io_u->ddir == DDIR_READ)
164		ret = pread(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
165	else if (io_u->ddir == DDIR_WRITE)
166		ret = pwrite(f->fd, io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
167	else if (io_u->ddir == DDIR_TRIM) {
168		do_io_u_trim(td, io_u);
169		return FIO_Q_COMPLETED;
170	} else
171		ret = do_io_u_sync(td, io_u);
172
173	return fio_io_end(td, io_u, ret);
174}
175
176static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
177{
178	struct fio_file *f = io_u->file;
179	int ret;
180
181	fio_ro_check(td, io_u);
182
183	if (io_u->ddir == DDIR_READ)
184		ret = read(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
185	else if (io_u->ddir == DDIR_WRITE)
186		ret = write(f->fd, io_u->xfer_buf, io_u->xfer_buflen);
187	else if (io_u->ddir == DDIR_TRIM) {
188		do_io_u_trim(td, io_u);
189		return FIO_Q_COMPLETED;
190	} else
191		ret = do_io_u_sync(td, io_u);
192
193	return fio_io_end(td, io_u, ret);
194}
195
196static int fio_vsyncio_getevents(struct thread_data *td, unsigned int min,
197				 unsigned int max,
198				 const struct timespec fio_unused *t)
199{
200	struct syncio_data *sd = td->io_ops_data;
201	int ret;
202
203	if (min) {
204		ret = sd->events;
205		sd->events = 0;
206	} else
207		ret = 0;
208
209	dprint(FD_IO, "vsyncio_getevents: min=%d,max=%d: %d\n", min, max, ret);
210	return ret;
211}
212
213static struct io_u *fio_vsyncio_event(struct thread_data *td, int event)
214{
215	struct syncio_data *sd = td->io_ops_data;
216
217	return sd->io_us[event];
218}
219
220static int fio_vsyncio_append(struct thread_data *td, struct io_u *io_u)
221{
222	struct syncio_data *sd = td->io_ops_data;
223
224	if (ddir_sync(io_u->ddir))
225		return 0;
226
227	if (io_u->offset == sd->last_offset && io_u->file == sd->last_file &&
228	    io_u->ddir == sd->last_ddir)
229		return 1;
230
231	return 0;
232}
233
234static void fio_vsyncio_set_iov(struct syncio_data *sd, struct io_u *io_u,
235				int idx)
236{
237	sd->io_us[idx] = io_u;
238	sd->iovecs[idx].iov_base = io_u->xfer_buf;
239	sd->iovecs[idx].iov_len = io_u->xfer_buflen;
240	sd->last_offset = io_u->offset + io_u->xfer_buflen;
241	sd->last_file = io_u->file;
242	sd->last_ddir = io_u->ddir;
243	sd->queued_bytes += io_u->xfer_buflen;
244	sd->queued++;
245}
246
247static int fio_vsyncio_queue(struct thread_data *td, struct io_u *io_u)
248{
249	struct syncio_data *sd = td->io_ops_data;
250
251	fio_ro_check(td, io_u);
252
253	if (!fio_vsyncio_append(td, io_u)) {
254		dprint(FD_IO, "vsyncio_queue: no append (%d)\n", sd->queued);
255		/*
256		 * If we can't append and have stuff queued, tell fio to
257		 * commit those first and then retry this io
258		 */
259		if (sd->queued)
260			return FIO_Q_BUSY;
261		if (ddir_sync(io_u->ddir)) {
262			int ret = do_io_u_sync(td, io_u);
263
264			return fio_io_end(td, io_u, ret);
265		}
266
267		sd->queued = 0;
268		sd->queued_bytes = 0;
269		fio_vsyncio_set_iov(sd, io_u, 0);
270	} else {
271		if (sd->queued == td->o.iodepth) {
272			dprint(FD_IO, "vsyncio_queue: max depth %d\n", sd->queued);
273			return FIO_Q_BUSY;
274		}
275
276		dprint(FD_IO, "vsyncio_queue: append\n");
277		fio_vsyncio_set_iov(sd, io_u, sd->queued);
278	}
279
280	dprint(FD_IO, "vsyncio_queue: depth now %d\n", sd->queued);
281	return FIO_Q_QUEUED;
282}
283
284/*
285 * Check that we transferred all bytes, or saw an error, etc
286 */
287static int fio_vsyncio_end(struct thread_data *td, ssize_t bytes)
288{
289	struct syncio_data *sd = td->io_ops_data;
290	struct io_u *io_u;
291	unsigned int i;
292	int err;
293
294	/*
295	 * transferred everything, perfect
296	 */
297	if (bytes == sd->queued_bytes)
298		return 0;
299
300	err = errno;
301	for (i = 0; i < sd->queued; i++) {
302		io_u = sd->io_us[i];
303
304		if (bytes == -1) {
305			io_u->error = err;
306		} else {
307			unsigned int this_io;
308
309			this_io = bytes;
310			if (this_io > io_u->xfer_buflen)
311				this_io = io_u->xfer_buflen;
312
313			io_u->resid = io_u->xfer_buflen - this_io;
314			io_u->error = 0;
315			bytes -= this_io;
316		}
317	}
318
319	if (bytes == -1) {
320		td_verror(td, err, "xfer vsync");
321		return -err;
322	}
323
324	return 0;
325}
326
327static int fio_vsyncio_commit(struct thread_data *td)
328{
329	struct syncio_data *sd = td->io_ops_data;
330	struct fio_file *f;
331	ssize_t ret;
332
333	if (!sd->queued)
334		return 0;
335
336	io_u_mark_submit(td, sd->queued);
337	f = sd->last_file;
338
339	if (lseek(f->fd, sd->io_us[0]->offset, SEEK_SET) == -1) {
340		int err = -errno;
341
342		td_verror(td, errno, "lseek");
343		return err;
344	}
345
346	if (sd->last_ddir == DDIR_READ)
347		ret = readv(f->fd, sd->iovecs, sd->queued);
348	else
349		ret = writev(f->fd, sd->iovecs, sd->queued);
350
351	dprint(FD_IO, "vsyncio_commit: %d\n", (int) ret);
352	sd->events = sd->queued;
353	sd->queued = 0;
354	return fio_vsyncio_end(td, ret);
355}
356
357static int fio_vsyncio_init(struct thread_data *td)
358{
359	struct syncio_data *sd;
360
361	sd = malloc(sizeof(*sd));
362	memset(sd, 0, sizeof(*sd));
363	sd->last_offset = -1ULL;
364	sd->iovecs = malloc(td->o.iodepth * sizeof(struct iovec));
365	sd->io_us = malloc(td->o.iodepth * sizeof(struct io_u *));
366
367	td->io_ops_data = sd;
368	return 0;
369}
370
371static void fio_vsyncio_cleanup(struct thread_data *td)
372{
373	struct syncio_data *sd = td->io_ops_data;
374
375	if (sd) {
376		free(sd->iovecs);
377		free(sd->io_us);
378		free(sd);
379	}
380}
381
382static struct ioengine_ops ioengine_rw = {
383	.name		= "sync",
384	.version	= FIO_IOOPS_VERSION,
385	.prep		= fio_syncio_prep,
386	.queue		= fio_syncio_queue,
387	.open_file	= generic_open_file,
388	.close_file	= generic_close_file,
389	.get_file_size	= generic_get_file_size,
390	.flags		= FIO_SYNCIO,
391};
392
393static struct ioengine_ops ioengine_prw = {
394	.name		= "psync",
395	.version	= FIO_IOOPS_VERSION,
396	.queue		= fio_psyncio_queue,
397	.open_file	= generic_open_file,
398	.close_file	= generic_close_file,
399	.get_file_size	= generic_get_file_size,
400	.flags		= FIO_SYNCIO,
401};
402
403static struct ioengine_ops ioengine_vrw = {
404	.name		= "vsync",
405	.version	= FIO_IOOPS_VERSION,
406	.init		= fio_vsyncio_init,
407	.cleanup	= fio_vsyncio_cleanup,
408	.queue		= fio_vsyncio_queue,
409	.commit		= fio_vsyncio_commit,
410	.event		= fio_vsyncio_event,
411	.getevents	= fio_vsyncio_getevents,
412	.open_file	= generic_open_file,
413	.close_file	= generic_close_file,
414	.get_file_size	= generic_get_file_size,
415	.flags		= FIO_SYNCIO,
416};
417
418#ifdef CONFIG_PWRITEV
419static struct ioengine_ops ioengine_pvrw = {
420	.name		= "pvsync",
421	.version	= FIO_IOOPS_VERSION,
422	.init		= fio_vsyncio_init,
423	.cleanup	= fio_vsyncio_cleanup,
424	.queue		= fio_pvsyncio_queue,
425	.open_file	= generic_open_file,
426	.close_file	= generic_close_file,
427	.get_file_size	= generic_get_file_size,
428	.flags		= FIO_SYNCIO,
429};
430#endif
431
432#ifdef FIO_HAVE_PWRITEV2
433static struct ioengine_ops ioengine_pvrw2 = {
434	.name		= "pvsync2",
435	.version	= FIO_IOOPS_VERSION,
436	.init		= fio_vsyncio_init,
437	.cleanup	= fio_vsyncio_cleanup,
438	.queue		= fio_pvsyncio2_queue,
439	.open_file	= generic_open_file,
440	.close_file	= generic_close_file,
441	.get_file_size	= generic_get_file_size,
442	.flags		= FIO_SYNCIO,
443	.options	= options,
444	.option_struct_size	= sizeof(struct psyncv2_options),
445};
446#endif
447
448static void fio_init fio_syncio_register(void)
449{
450	register_ioengine(&ioengine_rw);
451	register_ioengine(&ioengine_prw);
452	register_ioengine(&ioengine_vrw);
453#ifdef CONFIG_PWRITEV
454	register_ioengine(&ioengine_pvrw);
455#endif
456#ifdef FIO_HAVE_PWRITEV2
457	register_ioengine(&ioengine_pvrw2);
458#endif
459}
460
461static void fio_exit fio_syncio_unregister(void)
462{
463	unregister_ioengine(&ioengine_rw);
464	unregister_ioengine(&ioengine_prw);
465	unregister_ioengine(&ioengine_vrw);
466#ifdef CONFIG_PWRITEV
467	unregister_ioengine(&ioengine_pvrw);
468#endif
469#ifdef FIO_HAVE_PWRITEV2
470	unregister_ioengine(&ioengine_pvrw2);
471#endif
472}
473