1/*
2 * mmap engine
3 *
4 * IO engine that reads/writes from files by doing memcpy to/from
5 * a memory mapped region of the file.
6 *
7 */
8#include <stdio.h>
9#include <stdlib.h>
10#include <unistd.h>
11#include <errno.h>
12#include <sys/mman.h>
13
14#include "../fio.h"
15#include "../verify.h"
16
17/*
18 * Limits us to 1GB of mapped files in total
19 */
20#define MMAP_TOTAL_SZ	(1 * 1024 * 1024 * 1024UL)
21
22static unsigned long mmap_map_size;
23static unsigned long mmap_map_mask;
24
25struct fio_mmap_data {
26	void *mmap_ptr;
27	size_t mmap_sz;
28	off_t mmap_off;
29};
30
31static int fio_mmap_file(struct thread_data *td, struct fio_file *f,
32			 size_t length, off_t off)
33{
34	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
35	int flags = 0;
36
37	if (td_rw(td))
38		flags = PROT_READ | PROT_WRITE;
39	else if (td_write(td)) {
40		flags = PROT_WRITE;
41
42		if (td->o.verify != VERIFY_NONE)
43			flags |= PROT_READ;
44	} else
45		flags = PROT_READ;
46
47	fmd->mmap_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
48	if (fmd->mmap_ptr == MAP_FAILED) {
49		fmd->mmap_ptr = NULL;
50		td_verror(td, errno, "mmap");
51		goto err;
52	}
53
54	if (!td_random(td)) {
55		if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_SEQUENTIAL) < 0) {
56			td_verror(td, errno, "madvise");
57			goto err;
58		}
59	} else {
60		if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_RANDOM) < 0) {
61			td_verror(td, errno, "madvise");
62			goto err;
63		}
64	}
65	if (posix_madvise(fmd->mmap_ptr, length, POSIX_MADV_DONTNEED) < 0) {
66		td_verror(td, errno, "madvise");
67		goto err;
68	}
69
70#ifdef FIO_MADV_FREE
71	if (f->filetype == FIO_TYPE_BD)
72		(void) posix_madvise(fmd->mmap_ptr, fmd->mmap_sz, FIO_MADV_FREE);
73#endif
74
75
76err:
77	if (td->error && fmd->mmap_ptr)
78		munmap(fmd->mmap_ptr, length);
79
80	return td->error;
81}
82
83/*
84 * Just mmap an appropriate portion, we cannot mmap the full extent
85 */
86static int fio_mmapio_prep_limited(struct thread_data *td, struct io_u *io_u)
87{
88	struct fio_file *f = io_u->file;
89	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
90
91	if (io_u->buflen > mmap_map_size) {
92		log_err("fio: bs too big for mmap engine\n");
93		return EIO;
94	}
95
96	fmd->mmap_sz = mmap_map_size;
97	if (fmd->mmap_sz  > f->io_size)
98		fmd->mmap_sz = f->io_size;
99
100	fmd->mmap_off = io_u->offset;
101
102	return fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off);
103}
104
105/*
106 * Attempt to mmap the entire file
107 */
108static int fio_mmapio_prep_full(struct thread_data *td, struct io_u *io_u)
109{
110	struct fio_file *f = io_u->file;
111	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
112	int ret;
113
114	if (fio_file_partial_mmap(f))
115		return EINVAL;
116	if (io_u->offset != (size_t) io_u->offset ||
117	    f->io_size != (size_t) f->io_size) {
118		fio_file_set_partial_mmap(f);
119		return EINVAL;
120	}
121
122	fmd->mmap_sz = f->io_size;
123	fmd->mmap_off = 0;
124
125	ret = fio_mmap_file(td, f, fmd->mmap_sz, fmd->mmap_off);
126	if (ret)
127		fio_file_set_partial_mmap(f);
128
129	return ret;
130}
131
132static int fio_mmapio_prep(struct thread_data *td, struct io_u *io_u)
133{
134	struct fio_file *f = io_u->file;
135	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
136	int ret;
137
138	/*
139	 * It fits within existing mapping, use it
140	 */
141	if (io_u->offset >= fmd->mmap_off &&
142	    io_u->offset + io_u->buflen < fmd->mmap_off + fmd->mmap_sz)
143		goto done;
144
145	/*
146	 * unmap any existing mapping
147	 */
148	if (fmd->mmap_ptr) {
149		if (munmap(fmd->mmap_ptr, fmd->mmap_sz) < 0)
150			return errno;
151		fmd->mmap_ptr = NULL;
152	}
153
154	if (fio_mmapio_prep_full(td, io_u)) {
155		td_clear_error(td);
156		ret = fio_mmapio_prep_limited(td, io_u);
157		if (ret)
158			return ret;
159	}
160
161done:
162	io_u->mmap_data = fmd->mmap_ptr + io_u->offset - fmd->mmap_off -
163				f->file_offset;
164	return 0;
165}
166
167static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
168{
169	struct fio_file *f = io_u->file;
170	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
171
172	fio_ro_check(td, io_u);
173
174	if (io_u->ddir == DDIR_READ)
175		memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
176	else if (io_u->ddir == DDIR_WRITE)
177		memcpy(io_u->mmap_data, io_u->xfer_buf, io_u->xfer_buflen);
178	else if (ddir_sync(io_u->ddir)) {
179		if (msync(fmd->mmap_ptr, fmd->mmap_sz, MS_SYNC)) {
180			io_u->error = errno;
181			td_verror(td, io_u->error, "msync");
182		}
183	} else if (io_u->ddir == DDIR_TRIM) {
184		int ret = do_io_u_trim(td, io_u);
185
186		if (!ret)
187			td_verror(td, io_u->error, "trim");
188	}
189
190
191	/*
192	 * not really direct, but should drop the pages from the cache
193	 */
194	if (td->o.odirect && ddir_rw(io_u->ddir)) {
195		if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
196			io_u->error = errno;
197			td_verror(td, io_u->error, "msync");
198		}
199		if (posix_madvise(io_u->mmap_data, io_u->xfer_buflen, POSIX_MADV_DONTNEED) < 0) {
200			io_u->error = errno;
201			td_verror(td, io_u->error, "madvise");
202		}
203	}
204
205	return FIO_Q_COMPLETED;
206}
207
208static int fio_mmapio_init(struct thread_data *td)
209{
210	struct thread_options *o = &td->o;
211	unsigned long shift, mask;
212
213	if ((td->o.rw_min_bs & page_mask) &&
214	    (o->odirect || o->fsync_blocks || o->fdatasync_blocks)) {
215		log_err("fio: mmap options dictate a minimum block size of "
216			"%llu bytes\n", (unsigned long long) page_size);
217		return 1;
218	}
219
220	mmap_map_size = MMAP_TOTAL_SZ / td->o.nr_files;
221	mask = mmap_map_size;
222	shift = 0;
223	do {
224		mask >>= 1;
225		if (!mask)
226			break;
227		shift++;
228	} while (1);
229
230	mmap_map_mask = 1UL << shift;
231	return 0;
232}
233
234static int fio_mmapio_open_file(struct thread_data *td, struct fio_file *f)
235{
236	struct fio_mmap_data *fmd;
237	int ret;
238
239	ret = generic_open_file(td, f);
240	if (ret)
241		return ret;
242
243	fmd = calloc(1, sizeof(*fmd));
244	if (!fmd) {
245		int fio_unused ret;
246		ret = generic_close_file(td, f);
247		return 1;
248	}
249
250	FILE_SET_ENG_DATA(f, fmd);
251	return 0;
252}
253
254static int fio_mmapio_close_file(struct thread_data *td, struct fio_file *f)
255{
256	struct fio_mmap_data *fmd = FILE_ENG_DATA(f);
257
258	FILE_SET_ENG_DATA(f, NULL);
259	free(fmd);
260	fio_file_clear_partial_mmap(f);
261
262	return generic_close_file(td, f);
263}
264
265static struct ioengine_ops ioengine = {
266	.name		= "mmap",
267	.version	= FIO_IOOPS_VERSION,
268	.init		= fio_mmapio_init,
269	.prep		= fio_mmapio_prep,
270	.queue		= fio_mmapio_queue,
271	.open_file	= fio_mmapio_open_file,
272	.close_file	= fio_mmapio_close_file,
273	.get_file_size	= generic_get_file_size,
274	.flags		= FIO_SYNCIO | FIO_NOEXTEND,
275};
276
277static void fio_init fio_mmapio_register(void)
278{
279	register_ioengine(&ioengine);
280}
281
282static void fio_exit fio_mmapio_unregister(void)
283{
284	unregister_ioengine(&ioengine);
285}
286