1/*
2 * pmemblk: IO engine that uses NVML libpmemblk to read and write data
3 *
4 * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License,
8 * version 2 as published by the Free Software Foundation..
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public
16 * License along with this program; if not, write to the Free
17 * Software Foundation, Inc., 59 Temple Place, Suite 330,
18 * Boston, MA 02111-1307 USA
19 */
20
21/*
22 * pmemblk engine
23 *
24 * IO engine that uses libpmemblk to read and write data
25 *
26 * To use:
27 *   ioengine=pmemblk
28 *
29 * Other relevant settings:
30 *   thread=1   REQUIRED
31 *   iodepth=1
32 *   direct=1
33 *   unlink=1
34 *   filename=/mnt/pmem0/fiotestfile,BSIZE,FSIZEMiB
35 *
36 *   thread must be set to 1 for pmemblk as multiple processes cannot
37 *     open the same block pool file.
38 *
39 *   iodepth should be set to 1 as pmemblk is always synchronous.
40 *   Use numjobs to scale up.
41 *
42 *   direct=1 is implied as pmemblk is always direct. A warning message
43 *   is printed if this is not specified.
44 *
45 *   unlink=1 removes the block pool file after testing, and is optional.
46 *
47 *   The pmem device must have a DAX-capable filesystem and be mounted
48 *   with DAX enabled.  filename must point to a file on that filesystem.
49 *
50 *   Example:
51 *     mkfs.xfs /dev/pmem0
52 *     mkdir /mnt/pmem0
53 *     mount -o dax /dev/pmem0 /mnt/pmem0
54 *
55 *   When specifying the filename, if the block pool file does not already
56 *   exist, then the pmemblk engine creates the pool file if you specify
57 *   the block and file sizes.  BSIZE is the block size in bytes.
58 *   FSIZEMB is the pool file size in MiB.
59 *
60 *   See examples/pmemblk.fio for more.
61 *
62 */
63
64#include <stdio.h>
65#include <stdlib.h>
66#include <unistd.h>
67#include <sys/uio.h>
68#include <errno.h>
69#include <assert.h>
70#include <string.h>
71#include <libpmem.h>
72#include <libpmemblk.h>
73
74#include "../fio.h"
75
76/*
77 * libpmemblk
78 */
79typedef struct fio_pmemblk_file *fio_pmemblk_file_t;
80
81struct fio_pmemblk_file {
82	fio_pmemblk_file_t pmb_next;
83	char *pmb_filename;
84	uint64_t pmb_refcnt;
85	PMEMblkpool *pmb_pool;
86	size_t pmb_bsize;
87	size_t pmb_nblocks;
88};
89
90static fio_pmemblk_file_t Cache;
91
92static pthread_mutex_t CacheLock = PTHREAD_MUTEX_INITIALIZER;
93
94#define PMB_CREATE   (0x0001)	/* should create file */
95
96fio_pmemblk_file_t fio_pmemblk_cache_lookup(const char *filename)
97{
98	fio_pmemblk_file_t i;
99
100	for (i = Cache; i != NULL; i = i->pmb_next)
101		if (!strcmp(filename, i->pmb_filename))
102			return i;
103
104	return NULL;
105}
106
107static void fio_pmemblk_cache_insert(fio_pmemblk_file_t pmb)
108{
109	pmb->pmb_next = Cache;
110	Cache = pmb;
111}
112
113static void fio_pmemblk_cache_remove(fio_pmemblk_file_t pmb)
114{
115	fio_pmemblk_file_t i;
116
117	if (pmb == Cache) {
118		Cache = Cache->pmb_next;
119		pmb->pmb_next = NULL;
120		return;
121	}
122
123	for (i = Cache; i != NULL; i = i->pmb_next)
124		if (pmb == i->pmb_next) {
125			i->pmb_next = i->pmb_next->pmb_next;
126			pmb->pmb_next = NULL;
127			return;
128		}
129}
130
131/*
132 * to control block size and gross file size at the libpmemblk
133 * level, we allow the block size and file size to be appended
134 * to the file name:
135 *
136 *   path[,bsize,fsizemib]
137 *
138 * note that we do not use the fio option "filesize" to dictate
139 * the file size because we can only give libpmemblk the gross
140 * file size, which is different from the net or usable file
141 * size (which is probably what fio wants).
142 *
143 * the final path without the parameters is returned in ppath.
144 * the block size and file size are returned in pbsize and fsize.
145 *
146 * note that the user specifies the file size in MiB, but
147 * we return bytes from here.
148 */
149static void pmb_parse_path(const char *pathspec, char **ppath, uint64_t *pbsize,
150			   uint64_t *pfsize)
151{
152	char *path;
153	char *s;
154	uint64_t bsize;
155	uint64_t fsizemib;
156
157	path = strdup(pathspec);
158	if (!path) {
159		*ppath = NULL;
160		return;
161	}
162
163	/* extract sizes, if given */
164	s = strrchr(path, ',');
165	if (s && (fsizemib = strtoull(s + 1, NULL, 10))) {
166		*s = 0;
167		s = strrchr(path, ',');
168		if (s && (bsize = strtoull(s + 1, NULL, 10))) {
169			*s = 0;
170			*ppath = path;
171			*pbsize = bsize;
172			*pfsize = fsizemib << 20;
173			return;
174		}
175	}
176
177	/* size specs not found */
178	strcpy(path, pathspec);
179	*ppath = path;
180	*pbsize = 0;
181	*pfsize = 0;
182}
183
184static fio_pmemblk_file_t pmb_open(const char *pathspec, int flags)
185{
186	fio_pmemblk_file_t pmb;
187	char *path = NULL;
188	uint64_t bsize = 0;
189	uint64_t fsize = 0;
190
191	pmb_parse_path(pathspec, &path, &bsize, &fsize);
192	if (!path)
193		return NULL;
194
195	pthread_mutex_lock(&CacheLock);
196
197	pmb = fio_pmemblk_cache_lookup(path);
198	if (!pmb) {
199		pmb = malloc(sizeof(*pmb));
200		if (!pmb)
201			goto error;
202
203		/* try opening existing first, create it if needed */
204		pmb->pmb_pool = pmemblk_open(path, bsize);
205		if (!pmb->pmb_pool && (errno == ENOENT) &&
206		    (flags & PMB_CREATE) && (0 < fsize) && (0 < bsize)) {
207			pmb->pmb_pool =
208			    pmemblk_create(path, bsize, fsize, 0644);
209		}
210		if (!pmb->pmb_pool) {
211			log_err("pmemblk: unable to open pmemblk pool file %s (%s)\n",
212			     path, strerror(errno));
213			goto error;
214		}
215
216		pmb->pmb_filename = path;
217		pmb->pmb_next = NULL;
218		pmb->pmb_refcnt = 0;
219		pmb->pmb_bsize = pmemblk_bsize(pmb->pmb_pool);
220		pmb->pmb_nblocks = pmemblk_nblock(pmb->pmb_pool);
221
222		fio_pmemblk_cache_insert(pmb);
223	}
224
225	pmb->pmb_refcnt += 1;
226
227	pthread_mutex_unlock(&CacheLock);
228
229	return pmb;
230
231error:
232	if (pmb) {
233		if (pmb->pmb_pool)
234			pmemblk_close(pmb->pmb_pool);
235		pmb->pmb_pool = NULL;
236		pmb->pmb_filename = NULL;
237		free(pmb);
238	}
239	if (path)
240		free(path);
241
242	pthread_mutex_unlock(&CacheLock);
243	return NULL;
244}
245
246static void pmb_close(fio_pmemblk_file_t pmb, const bool keep)
247{
248	pthread_mutex_lock(&CacheLock);
249
250	pmb->pmb_refcnt--;
251
252	if (!keep && !pmb->pmb_refcnt) {
253		pmemblk_close(pmb->pmb_pool);
254		pmb->pmb_pool = NULL;
255		free(pmb->pmb_filename);
256		pmb->pmb_filename = NULL;
257		fio_pmemblk_cache_remove(pmb);
258		free(pmb);
259	}
260
261	pthread_mutex_unlock(&CacheLock);
262}
263
264static int pmb_get_flags(struct thread_data *td, uint64_t *pflags)
265{
266	static int thread_warned = 0;
267	static int odirect_warned = 0;
268
269	uint64_t flags = 0;
270
271	if (!td->o.use_thread) {
272		if (!thread_warned) {
273			thread_warned = 1;
274			log_err("pmemblk: must set thread=1 for pmemblk engine\n");
275		}
276		return 1;
277	}
278
279	if (!td->o.odirect && !odirect_warned) {
280		odirect_warned = 1;
281		log_info("pmemblk: direct == 0, but pmemblk is always direct\n");
282	}
283
284	if (td->o.allow_create)
285		flags |= PMB_CREATE;
286
287	(*pflags) = flags;
288	return 0;
289}
290
291static int fio_pmemblk_open_file(struct thread_data *td, struct fio_file *f)
292{
293	uint64_t flags = 0;
294	fio_pmemblk_file_t pmb;
295
296	if (pmb_get_flags(td, &flags))
297		return 1;
298
299	pmb = pmb_open(f->file_name, flags);
300	if (!pmb)
301		return 1;
302
303	FILE_SET_ENG_DATA(f, pmb);
304	return 0;
305}
306
307static int fio_pmemblk_close_file(struct thread_data fio_unused *td,
308				  struct fio_file *f)
309{
310	fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
311
312	if (pmb)
313		pmb_close(pmb, false);
314
315	FILE_SET_ENG_DATA(f, NULL);
316	return 0;
317}
318
319static int fio_pmemblk_get_file_size(struct thread_data *td, struct fio_file *f)
320{
321	uint64_t flags = 0;
322	fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
323
324	if (fio_file_size_known(f))
325		return 0;
326
327	if (!pmb) {
328		if (pmb_get_flags(td, &flags))
329			return 1;
330		pmb = pmb_open(f->file_name, flags);
331		if (!pmb)
332			return 1;
333	}
334
335	f->real_file_size = pmb->pmb_bsize * pmb->pmb_nblocks;
336
337	fio_file_set_size_known(f);
338
339	if (!FILE_ENG_DATA(f))
340		pmb_close(pmb, true);
341
342	return 0;
343}
344
345static int fio_pmemblk_queue(struct thread_data *td, struct io_u *io_u)
346{
347	struct fio_file *f = io_u->file;
348	fio_pmemblk_file_t pmb = FILE_ENG_DATA(f);
349
350	unsigned long long off;
351	unsigned long len;
352	void *buf;
353
354	fio_ro_check(td, io_u);
355
356	switch (io_u->ddir) {
357	case DDIR_READ:
358	case DDIR_WRITE:
359		off = io_u->offset;
360		len = io_u->xfer_buflen;
361
362		io_u->error = EINVAL;
363		if (off % pmb->pmb_bsize)
364			break;
365		if (len % pmb->pmb_bsize)
366			break;
367		if ((off + len) / pmb->pmb_bsize > pmb->pmb_nblocks)
368			break;
369
370		io_u->error = 0;
371		buf = io_u->xfer_buf;
372		off /= pmb->pmb_bsize;
373		len /= pmb->pmb_bsize;
374		while (0 < len) {
375			if (io_u->ddir == DDIR_READ &&
376			   0 != pmemblk_read(pmb->pmb_pool, buf, off)) {
377				io_u->error = errno;
378				break;
379			} else if (0 != pmemblk_write(pmb->pmb_pool, buf, off)) {
380				io_u->error = errno;
381				break;
382			}
383			buf += pmb->pmb_bsize;
384			off++;
385			len--;
386		}
387		off *= pmb->pmb_bsize;
388		len *= pmb->pmb_bsize;
389		io_u->resid = io_u->xfer_buflen - (off - io_u->offset);
390		break;
391	case DDIR_SYNC:
392	case DDIR_DATASYNC:
393	case DDIR_SYNC_FILE_RANGE:
394		/* we're always sync'd */
395		io_u->error = 0;
396		break;
397	default:
398		io_u->error = EINVAL;
399		break;
400	}
401
402	return FIO_Q_COMPLETED;
403}
404
405static int fio_pmemblk_unlink_file(struct thread_data *td, struct fio_file *f)
406{
407	char *path = NULL;
408	uint64_t bsize = 0;
409	uint64_t fsize = 0;
410
411	/*
412	 * we need our own unlink in case the user has specified
413	 * the block and file sizes in the path name.  we parse
414	 * the file_name to determine the file name we actually used.
415	 */
416
417	pmb_parse_path(f->file_name, &path, &bsize, &fsize);
418	if (!path)
419		return ENOENT;
420
421	unlink(path);
422	free(path);
423	return 0;
424}
425
426static struct ioengine_ops ioengine = {
427	.name = "pmemblk",
428	.version = FIO_IOOPS_VERSION,
429	.queue = fio_pmemblk_queue,
430	.open_file = fio_pmemblk_open_file,
431	.close_file = fio_pmemblk_close_file,
432	.get_file_size = fio_pmemblk_get_file_size,
433	.unlink_file = fio_pmemblk_unlink_file,
434	.flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
435};
436
437static void fio_init fio_pmemblk_register(void)
438{
439	register_ioengine(&ioengine);
440}
441
442static void fio_exit fio_pmemblk_unregister(void)
443{
444	unregister_ioengine(&ioengine);
445}
446