1/*
2 * binject engine
3 *
4 * IO engine that uses the Linux binject interface to directly inject
5 * bio's to block devices.
6 *
7 */
8#include <stdio.h>
9#include <stdlib.h>
10#include <unistd.h>
11#include <errno.h>
12#include <assert.h>
13#include <string.h>
14#include <sys/poll.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17
18#include "../fio.h"
19
20#ifdef FIO_HAVE_BINJECT
21
22struct binject_data {
23	struct b_user_cmd *cmds;
24	struct io_u **events;
25	struct pollfd *pfds;
26	int *fd_flags;
27};
28
29struct binject_file {
30	unsigned int bs;
31	int minor;
32	int fd;
33};
34
35static void binject_buc_init(struct binject_data *bd, struct io_u *io_u)
36{
37	struct b_user_cmd *buc = &io_u->buc;
38
39	memset(buc, 0, sizeof(*buc));
40	binject_buc_set_magic(buc);
41
42	buc->buf = (unsigned long) io_u->xfer_buf;
43	buc->len = io_u->xfer_buflen;
44	buc->offset = io_u->offset;
45	buc->usr_ptr = (unsigned long) io_u;
46
47	buc->flags = B_FLAG_NOIDLE | B_FLAG_UNPLUG;
48	assert(buc->buf);
49}
50
51static int pollin_events(struct pollfd *pfds, int fds)
52{
53	int i;
54
55	for (i = 0; i < fds; i++)
56		if (pfds[i].revents & POLLIN)
57			return 1;
58
59	return 0;
60}
61
62static unsigned int binject_read_commands(struct thread_data *td, void *p,
63					  int left, int *err)
64{
65	struct fio_file *f;
66	int i, ret, events;
67
68one_more:
69	events = 0;
70	for_each_file(td, f, i) {
71		struct binject_file *bf = FILE_ENG_DATA(f);
72
73		ret = read(bf->fd, p, left * sizeof(struct b_user_cmd));
74		if (ret < 0) {
75			if (errno == EAGAIN)
76				continue;
77			*err = -errno;
78			td_verror(td, errno, "read");
79			break;
80		} else if (ret) {
81			p += ret;
82			events += ret / sizeof(struct b_user_cmd);
83		}
84	}
85
86	if (*err || events)
87		return events;
88
89	usleep(1000);
90	goto one_more;
91}
92
93static int fio_binject_getevents(struct thread_data *td, unsigned int min,
94				 unsigned int max,
95				 const struct timespec fio_unused *t)
96{
97	struct binject_data *bd = td->io_ops_data;
98	int left = max, ret, r = 0, ev_index = 0;
99	void *buf = bd->cmds;
100	unsigned int i, events;
101	struct fio_file *f;
102
103	/*
104	 * Fill in the file descriptors
105	 */
106	for_each_file(td, f, i) {
107		struct binject_file *bf = FILE_ENG_DATA(f);
108
109		/*
110		 * don't block for min events == 0
111		 */
112		if (!min)
113			bd->fd_flags[i] = fio_set_fd_nonblocking(bf->fd, "binject");
114		else
115			bd->fd_flags[i] = -1;
116
117		bd->pfds[i].fd = bf->fd;
118		bd->pfds[i].events = POLLIN;
119	}
120
121	while (left) {
122		while (!min) {
123			ret = poll(bd->pfds, td->o.nr_files, -1);
124			if (ret < 0) {
125				if (!r)
126					r = -errno;
127				td_verror(td, errno, "poll");
128				break;
129			} else if (!ret)
130				continue;
131
132			if (pollin_events(bd->pfds, td->o.nr_files))
133				break;
134		}
135
136		if (r < 0)
137			break;
138
139		events = binject_read_commands(td, buf, left, &r);
140
141		if (r < 0)
142			break;
143
144		left -= events;
145		r += events;
146
147		for (i = 0; i < events; i++) {
148			struct b_user_cmd *buc = (struct b_user_cmd *) buf + i;
149
150			bd->events[ev_index] = (struct io_u *) (unsigned long) buc->usr_ptr;
151			ev_index++;
152		}
153	}
154
155	if (!min) {
156		for_each_file(td, f, i) {
157			struct binject_file *bf = FILE_ENG_DATA(f);
158
159			if (bd->fd_flags[i] == -1)
160				continue;
161
162			if (fcntl(bf->fd, F_SETFL, bd->fd_flags[i]) < 0)
163				log_err("fio: binject failed to restore fcntl flags: %s\n", strerror(errno));
164		}
165	}
166
167	if (r > 0)
168		assert(ev_index == r);
169
170	return r;
171}
172
173static int fio_binject_doio(struct thread_data *td, struct io_u *io_u)
174{
175	struct b_user_cmd *buc = &io_u->buc;
176	struct binject_file *bf = FILE_ENG_DATA(io_u->file);
177	int ret;
178
179	ret = write(bf->fd, buc, sizeof(*buc));
180	if (ret < 0)
181		return ret;
182
183	return FIO_Q_QUEUED;
184}
185
186static int fio_binject_prep(struct thread_data *td, struct io_u *io_u)
187{
188	struct binject_data *bd = td->io_ops_data;
189	struct b_user_cmd *buc = &io_u->buc;
190	struct binject_file *bf = FILE_ENG_DATA(io_u->file);
191
192	if (io_u->xfer_buflen & (bf->bs - 1)) {
193		log_err("read/write not sector aligned\n");
194		return EINVAL;
195	}
196
197	if (io_u->ddir == DDIR_READ) {
198		binject_buc_init(bd, io_u);
199		buc->type = B_TYPE_READ;
200	} else if (io_u->ddir == DDIR_WRITE) {
201		binject_buc_init(bd, io_u);
202		if (io_u->flags & IO_U_F_BARRIER)
203			buc->type = B_TYPE_WRITEBARRIER;
204		else
205			buc->type = B_TYPE_WRITE;
206	} else if (io_u->ddir == DDIR_TRIM) {
207		binject_buc_init(bd, io_u);
208		buc->type = B_TYPE_DISCARD;
209	} else {
210		assert(0);
211	}
212
213	return 0;
214}
215
216static int fio_binject_queue(struct thread_data *td, struct io_u *io_u)
217{
218	int ret;
219
220	fio_ro_check(td, io_u);
221
222	ret = fio_binject_doio(td, io_u);
223
224	if (ret < 0)
225		io_u->error = errno;
226
227	if (io_u->error) {
228		td_verror(td, io_u->error, "xfer");
229		return FIO_Q_COMPLETED;
230	}
231
232	return ret;
233}
234
235static struct io_u *fio_binject_event(struct thread_data *td, int event)
236{
237	struct binject_data *bd = td->io_ops_data;
238
239	return bd->events[event];
240}
241
242static int binject_open_ctl(struct thread_data *td)
243{
244	int fd;
245
246	fd = open("/dev/binject-ctl", O_RDWR);
247	if (fd < 0)
248		td_verror(td, errno, "open binject-ctl");
249
250	return fd;
251}
252
253static void binject_unmap_dev(struct thread_data *td, struct binject_file *bf)
254{
255	struct b_ioctl_cmd bic;
256	int fdb;
257
258	if (bf->fd >= 0) {
259		close(bf->fd);
260		bf->fd = -1;
261	}
262
263	fdb = binject_open_ctl(td);
264	if (fdb < 0)
265		return;
266
267	bic.minor = bf->minor;
268
269	if (ioctl(fdb, B_IOCTL_DEL, &bic) < 0)
270		td_verror(td, errno, "binject dev unmap");
271
272	close(fdb);
273}
274
275static int binject_map_dev(struct thread_data *td, struct binject_file *bf,
276			   int fd)
277{
278	struct b_ioctl_cmd bic;
279	char name[80];
280	struct stat sb;
281	int fdb, dev_there, loops;
282
283	fdb = binject_open_ctl(td);
284	if (fdb < 0)
285		return 1;
286
287	bic.fd = fd;
288
289	if (ioctl(fdb, B_IOCTL_ADD, &bic) < 0) {
290		td_verror(td, errno, "binject dev map");
291		close(fdb);
292		return 1;
293	}
294
295	bf->minor = bic.minor;
296
297	sprintf(name, "/dev/binject%u", bf->minor);
298
299	/*
300	 * Wait for udev to create the node...
301	 */
302	dev_there = loops = 0;
303	do {
304		if (!stat(name, &sb)) {
305			dev_there = 1;
306			break;
307		}
308
309		usleep(10000);
310	} while (++loops < 100);
311
312	close(fdb);
313
314	if (!dev_there) {
315		log_err("fio: timed out waiting for binject dev\n");
316		goto err_unmap;
317	}
318
319	bf->fd = open(name, O_RDWR);
320	if (bf->fd < 0) {
321		td_verror(td, errno, "binject dev open");
322err_unmap:
323		binject_unmap_dev(td, bf);
324		return 1;
325	}
326
327	return 0;
328}
329
330static int fio_binject_close_file(struct thread_data *td, struct fio_file *f)
331{
332	struct binject_file *bf = FILE_ENG_DATA(f);
333
334	if (bf) {
335		binject_unmap_dev(td, bf);
336		free(bf);
337		FILE_SET_ENG_DATA(f, NULL);
338		return generic_close_file(td, f);
339	}
340
341	return 0;
342}
343
344static int fio_binject_open_file(struct thread_data *td, struct fio_file *f)
345{
346	struct binject_file *bf;
347	unsigned int bs;
348	int ret;
349
350	ret = generic_open_file(td, f);
351	if (ret)
352		return 1;
353
354	if (f->filetype != FIO_TYPE_BLOCK) {
355		log_err("fio: binject only works with block devices\n");
356		goto err_close;
357	}
358	if (ioctl(f->fd, BLKSSZGET, &bs) < 0) {
359		td_verror(td, errno, "BLKSSZGET");
360		goto err_close;
361	}
362
363	bf = malloc(sizeof(*bf));
364	bf->bs = bs;
365	bf->minor = bf->fd = -1;
366	FILE_SET_ENG_DATA(f, bf);
367
368	if (binject_map_dev(td, bf, f->fd)) {
369err_close:
370		ret = generic_close_file(td, f);
371		return 1;
372	}
373
374	return 0;
375}
376
377static void fio_binject_cleanup(struct thread_data *td)
378{
379	struct binject_data *bd = td->io_ops_data;
380
381	if (bd) {
382		free(bd->events);
383		free(bd->cmds);
384		free(bd->fd_flags);
385		free(bd->pfds);
386		free(bd);
387	}
388}
389
390static int fio_binject_init(struct thread_data *td)
391{
392	struct binject_data *bd;
393
394	bd = malloc(sizeof(*bd));
395	memset(bd, 0, sizeof(*bd));
396
397	bd->cmds = malloc(td->o.iodepth * sizeof(struct b_user_cmd));
398	memset(bd->cmds, 0, td->o.iodepth * sizeof(struct b_user_cmd));
399
400	bd->events = malloc(td->o.iodepth * sizeof(struct io_u *));
401	memset(bd->events, 0, td->o.iodepth * sizeof(struct io_u *));
402
403	bd->pfds = malloc(sizeof(struct pollfd) * td->o.nr_files);
404	memset(bd->pfds, 0, sizeof(struct pollfd) * td->o.nr_files);
405
406	bd->fd_flags = malloc(sizeof(int) * td->o.nr_files);
407	memset(bd->fd_flags, 0, sizeof(int) * td->o.nr_files);
408
409	td->io_ops_data = bd;
410	return 0;
411}
412
413static struct ioengine_ops ioengine = {
414	.name		= "binject",
415	.version	= FIO_IOOPS_VERSION,
416	.init		= fio_binject_init,
417	.prep		= fio_binject_prep,
418	.queue		= fio_binject_queue,
419	.getevents	= fio_binject_getevents,
420	.event		= fio_binject_event,
421	.cleanup	= fio_binject_cleanup,
422	.open_file	= fio_binject_open_file,
423	.close_file	= fio_binject_close_file,
424	.get_file_size	= generic_get_file_size,
425	.flags		= FIO_RAWIO | FIO_BARRIER | FIO_MEMALIGN,
426};
427
428#else /* FIO_HAVE_BINJECT */
429
430/*
431 * When we have a proper configure system in place, we simply wont build
432 * and install this io engine. For now install a crippled version that
433 * just complains and fails to load.
434 */
435static int fio_binject_init(struct thread_data fio_unused *td)
436{
437	log_err("fio: ioengine binject not available\n");
438	return 1;
439}
440
441static struct ioengine_ops ioengine = {
442	.name		= "binject",
443	.version	= FIO_IOOPS_VERSION,
444	.init		= fio_binject_init,
445};
446
447#endif
448
449static void fio_init fio_binject_register(void)
450{
451	register_ioengine(&ioengine);
452}
453
454static void fio_exit fio_binject_unregister(void)
455{
456	unregister_ioengine(&ioengine);
457}
458