1/*
2 * mmap engine
3 *
4 * IO engine that reads/writes from files by doing memcpy to/from
5 * a memory mapped region of the file.
6 *
7 */
8#include <stdio.h>
9#include <stdlib.h>
10#include <unistd.h>
11#include <errno.h>
12#include <sys/mman.h>
13
14#include "../fio.h"
15#include "../verify.h"
16
17/*
18 * Limits us to 1GB of mapped files in total
19 */
20#define MMAP_TOTAL_SZ	(1 * 1024 * 1024 * 1024UL)
21
22static unsigned long mmap_map_size;
23static unsigned long mmap_map_mask;
24
25static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
26			 size_t length, off_t off)
27{
28	int flags = 0;
29
30	if (td_rw(td))
31		flags = PROT_READ | PROT_WRITE;
32	else if (td_write(td)) {
33		flags = PROT_WRITE;
34
35		if (td->o.verify != VERIFY_NONE)
36			flags |= PROT_READ;
37	} else
38		flags = PROT_READ;
39
40	f->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
41	if (f->mmap_ptr == MAP_FAILED) {
42		f->mmap_ptr = NULL;
43		td_verror(td, errno, "mmap");
44		goto err;
45	}
46
47	if (!td_random(td)) {
48		if (posix_madvise(f->mmap_ptr, length, POSIX_MADV_SEQUENTIAL) < 0) {
49			td_verror(td, errno, "madvise");
50			goto err;
51		}
52	} else {
53		if (posix_madvise(f->mmap_ptr, length, POSIX_MADV_RANDOM) < 0) {
54			td_verror(td, errno, "madvise");
55			goto err;
56		}
57	}
58
59err:
60	if (td->error && f->mmap_ptr)
61		munmap(f->mmap_ptr, length);
62
63	return td->error;
64}
65
66/*
67 * Just mmap an appropriate portion, we cannot mmap the full extent
68 */
69static int fio_mmapio_prep_limited(struct thread_data *td, struct io_u *io_u)
70{
71	struct fio_file *f = io_u->file;
72
73	if (io_u->buflen > mmap_map_size) {
74		log_err("fio: bs too big for mmap engine\n");
75		return EIO;
76	}
77
78	f->mmap_sz = mmap_map_size;
79	if (f->mmap_sz  > f->io_size)
80		f->mmap_sz = f->io_size;
81
82	f->mmap_off = io_u->offset;
83
84	return fio_mmap_file(td, f, f->mmap_sz, f->mmap_off);
85}
86
87/*
88 * Attempt to mmap the entire file
89 */
90static int fio_mmapio_prep_full(struct thread_data *td, struct io_u *io_u)
91{
92	struct fio_file *f = io_u->file;
93	int ret;
94
95	if (fio_file_partial_mmap(f))
96		return EINVAL;
97
98	f->mmap_sz = f->io_size;
99	f->mmap_off = 0;
100
101	ret = fio_mmap_file(td, f, f->mmap_sz, f->mmap_off);
102	if (ret)
103		fio_file_set_partial_mmap(f);
104
105	return ret;
106}
107
108static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u)
109{
110	struct fio_file *f = io_u->file;
111	int ret;
112
113	/*
114	 * It fits within existing mapping, use it
115	 */
116	if (io_u->offset >= f->mmap_off &&
117	    io_u->offset + io_u->buflen < f->mmap_off + f->mmap_sz)
118		goto done;
119
120	/*
121	 * unmap any existing mapping
122	 */
123	if (f->mmap_ptr) {
124		if (munmap(f->mmap_ptr, f->mmap_sz) < 0)
125			return errno;
126		f->mmap_ptr = NULL;
127	}
128
129	if (fio_mmapio_prep_full(td, io_u)) {
130		td_clear_error(td);
131		ret = fio_mmapio_prep_limited(td, io_u);
132		if (ret)
133			return ret;
134	}
135
136done:
137	io_u->mmap_data = f->mmap_ptr + io_u->offset - f->mmap_off -
138				f->file_offset;
139	return 0;
140}
141
142static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
143{
144	struct fio_file *f = io_u->file;
145
146	fio_ro_check(td, io_u);
147
148	if (io_u->ddir == DDIR_READ)
149		memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
150	else if (io_u->ddir == DDIR_WRITE)
151		memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen);
152	else if (ddir_sync(io_u->ddir)) {
153		if (msync(f->mmap_ptr, f->mmap_sz, MS_SYNC)) {
154			io_u->error = errno;
155			td_verror(td, io_u->error, "msync");
156		}
157	} else if (io_u->ddir == DDIR_TRIM) {
158		int ret = do_io_u_trim(td, io_u);
159
160		if (!ret)
161			td_verror(td, io_u->error, "trim");
162	}
163
164
165	/*
166	 * not really direct, but should drop the pages from the cache
167	 */
168	if (td->o.odirect && ddir_rw(io_u->ddir)) {
169		if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
170			io_u->error = errno;
171			td_verror(td, io_u->error, "msync");
172		}
173		if (posix_madvise(io_u->mmap_data, io_u->xfer_buflen, POSIX_MADV_DONTNEED) < 0) {
174			io_u->error = errno;
175			td_verror(td, io_u->error, "madvise");
176		}
177	}
178
179	return FIO_Q_COMPLETED;
180}
181
182static int fio_mmapio_init(struct thread_data *td)
183{
184	struct thread_options *o = &td->o;
185	unsigned long shift, mask;
186
187	if ((td->o.rw_min_bs & page_mask) &&
188	    (o->odirect || o->fsync_blocks || o->fdatasync_blocks)) {
189		log_err("fio: mmap options dictate a minimum block size of "
190			"%llu bytes\n", (unsigned long long) page_size);
191		return 1;
192	}
193
194	mmap_map_size = MMAP_TOTAL_SZ / td->o.nr_files;
195	mask = mmap_map_size;
196	shift = 0;
197	do {
198		mask >>= 1;
199		if (!mask)
200			break;
201		shift++;
202	} while (1);
203
204	mmap_map_mask = 1UL << shift;
205	return 0;
206}
207
208static struct ioengine_ops ioengine = {
209	.name		= "mmap",
210	.version	= FIO_IOOPS_VERSION,
211	.init		= fio_mmapio_init,
212	.prep		= fio_mmapio_prep,
213	.queue		= fio_mmapio_queue,
214	.open_file	= generic_open_file,
215	.close_file	= generic_close_file,
216	.get_file_size	= generic_get_file_size,
217	.flags		= FIO_SYNCIO | FIO_NOEXTEND,
218};
219
220static void fio_init fio_mmapio_register(void)
221{
222	register_ioengine(&ioengine);
223}
224
225static void fio_exit fio_mmapio_unregister(void)
226{
227	unregister_ioengine(&ioengine);
228}
229