init.c revision 54592a37327e73362c7001714630fe5198def707
1/*
2 * This file contains job initialization and setup functions.
3 */
4#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
7#include <fcntl.h>
8#include <ctype.h>
9#include <string.h>
10#include <errno.h>
11#include <getopt.h>
12#include <assert.h>
13#include <sys/ipc.h>
14#include <sys/shm.h>
15#include <sys/types.h>
16#include <sys/stat.h>
17
18#include "fio.h"
19#include "parse.h"
20
21#define FIO_RANDSEED		(0xb1899bedUL)
22
23#define td_var_offset(var)	((size_t) &((struct thread_data *)0)->var)
24
25static int str_mem_cb(void *, const char *);
26static int str_lockmem_cb(void *, unsigned long *);
27#ifdef FIO_HAVE_IOPRIO
28static int str_prio_cb(void *, unsigned int *);
29static int str_prioclass_cb(void *, unsigned int *);
30#endif
31static int str_exitall_cb(void);
32static int str_cpumask_cb(void *, unsigned int *);
33static int str_fst_cb(void *, const char *);
34static int str_filename_cb(void *, const char *);
35static int str_directory_cb(void *, const char *);
36static int str_opendir_cb(void *, const char *);
37
38#define __stringify_1(x)	#x
39#define __stringify(x)		__stringify_1(x)
40
41/*
42 * Map of job/command line options
43 */
44static struct fio_option options[] = {
45	{
46		.name	= "description",
47		.type	= FIO_OPT_STR_STORE,
48		.off1	= td_var_offset(description),
49		.help	= "Text job description",
50	},
51	{
52		.name	= "name",
53		.type	= FIO_OPT_STR_STORE,
54		.off1	= td_var_offset(name),
55		.help	= "Name of this job",
56	},
57	{
58		.name	= "directory",
59		.type	= FIO_OPT_STR_STORE,
60		.off1	= td_var_offset(directory),
61		.cb	= str_directory_cb,
62		.help	= "Directory to store files in",
63	},
64	{
65		.name	= "filename",
66		.type	= FIO_OPT_STR_STORE,
67		.off1	= td_var_offset(filename),
68		.cb	= str_filename_cb,
69		.help	= "File(s) to use for the workload",
70	},
71	{
72		.name	= "opendir",
73		.type	= FIO_OPT_STR_STORE,
74		.off1	= td_var_offset(opendir),
75		.cb	= str_opendir_cb,
76		.help	= "Recursively add files from this directory and down",
77	},
78	{
79		.name	= "rw",
80		.type	= FIO_OPT_STR,
81		.off1	= td_var_offset(td_ddir),
82		.help	= "IO direction",
83		.def	= "read",
84		.posval = {
85			  { .ival = "read",
86			    .oval = TD_DDIR_READ,
87			    .help = "Sequential read",
88			  },
89			  { .ival = "write",
90			    .oval = TD_DDIR_WRITE,
91			    .help = "Sequential write",
92			  },
93			  { .ival = "randread",
94			    .oval = TD_DDIR_RANDREAD,
95			    .help = "Random read",
96			  },
97			  { .ival = "randwrite",
98			    .oval = TD_DDIR_RANDWRITE,
99			    .help = "Random write",
100			  },
101			  { .ival = "rw",
102			    .oval = TD_DDIR_RW,
103			    .help = "Sequential read and write mix",
104			  },
105			  { .ival = "randrw",
106			    .oval = TD_DDIR_RANDRW,
107			    .help = "Random read and write mix"
108			  },
109		},
110	},
111	{
112		.name	= "ioengine",
113		.type	= FIO_OPT_STR_STORE,
114		.off1	= td_var_offset(ioengine),
115		.help	= "IO engine to use",
116		.def	= "sync",
117		.posval	= {
118			  { .ival = "sync",
119			    .help = "Use read/write",
120			  },
121#ifdef FIO_HAVE_LIBAIO
122			  { .ival = "libaio",
123			    .help = "Linux native asynchronous IO",
124			  },
125#endif
126#ifdef FIO_HAVE_POSIXAIO
127			  { .ival = "posixaio",
128			    .help = "POSIX asynchronous IO",
129			  },
130#endif
131			  { .ival = "mmap",
132			    .help = "Memory mapped IO",
133			  },
134#ifdef FIO_HAVE_SPLICE
135			  { .ival = "splice",
136			    .help = "splice/vmsplice based IO",
137			  },
138#endif
139#ifdef FIO_HAVE_SGIO
140			  { .ival = "sg",
141			    .help = "SCSI generic v3 IO",
142			  },
143#endif
144			  { .ival = "null",
145			    .help = "Testing engine (no data transfer)",
146			  },
147			  { .ival = "net",
148			    .help = "Network IO",
149			  },
150#ifdef FIO_HAVE_SYSLET
151			  { .ival = "syslet-rw",
152			    .help = "syslet enabled async pread/pwrite IO",
153			  },
154#endif
155			  { .ival = "cpuio",
156			    .help = "CPU cycler burner engine",
157			  },
158			  { .ival = "external",
159			    .help = "Load external engine (append name)",
160			  },
161		},
162	},
163	{
164		.name	= "iodepth",
165		.type	= FIO_OPT_INT,
166		.off1	= td_var_offset(iodepth),
167		.help	= "Amount of IO buffers to keep in flight",
168		.def	= "1",
169	},
170	{
171		.name	= "iodepth_batch",
172		.type	= FIO_OPT_INT,
173		.off1	= td_var_offset(iodepth_batch),
174		.help	= "Number of IO to submit in one go",
175	},
176	{
177		.name	= "iodepth_low",
178		.type	= FIO_OPT_INT,
179		.off1	= td_var_offset(iodepth_low),
180		.help	= "Low water mark for queuing depth",
181	},
182	{
183		.name	= "size",
184		.type	= FIO_OPT_STR_VAL,
185		.off1	= td_var_offset(total_file_size),
186		.help	= "Size of device or file",
187	},
188	{
189		.name	= "bs",
190		.type	= FIO_OPT_STR_VAL_INT,
191		.off1	= td_var_offset(bs[DDIR_READ]),
192		.off2	= td_var_offset(bs[DDIR_WRITE]),
193		.help	= "Block size unit",
194		.def	= "4k",
195	},
196	{
197		.name	= "bsrange",
198		.type	= FIO_OPT_RANGE,
199		.off1	= td_var_offset(min_bs[DDIR_READ]),
200		.off2	= td_var_offset(max_bs[DDIR_READ]),
201		.off3	= td_var_offset(min_bs[DDIR_WRITE]),
202		.off4	= td_var_offset(max_bs[DDIR_WRITE]),
203		.help	= "Set block size range (in more detail than bs)",
204	},
205	{
206		.name	= "bs_unaligned",
207		.type	= FIO_OPT_STR_SET,
208		.off1	= td_var_offset(bs_unaligned),
209		.help	= "Don't sector align IO buffer sizes",
210	},
211	{
212		.name	= "offset",
213		.type	= FIO_OPT_STR_VAL,
214		.off1	= td_var_offset(start_offset),
215		.help	= "Start IO from this offset",
216		.def	= "0",
217	},
218	{
219		.name	= "randrepeat",
220		.type	= FIO_OPT_BOOL,
221		.off1	= td_var_offset(rand_repeatable),
222		.help	= "Use repeatable random IO pattern",
223		.def	= "1",
224	},
225	{
226		.name	= "norandommap",
227		.type	= FIO_OPT_STR_SET,
228		.off1	= td_var_offset(norandommap),
229		.help	= "Accept potential duplicate random blocks",
230	},
231	{
232		.name	= "nrfiles",
233		.type	= FIO_OPT_INT,
234		.off1	= td_var_offset(nr_files),
235		.help	= "Split job workload between this number of files",
236		.def	= "1",
237	},
238	{
239		.name	= "openfiles",
240		.type	= FIO_OPT_INT,
241		.off1	= td_var_offset(open_files),
242		.help	= "Number of files to keep open at the same time",
243	},
244	{
245		.name	= "file_service_type",
246		.type	= FIO_OPT_STR,
247		.cb	= str_fst_cb,
248		.off1	= td_var_offset(file_service_type),
249		.help	= "How to select which file to service next",
250		.def	= "roundrobin",
251		.posval	= {
252			  { .ival = "random",
253			    .oval = FIO_FSERVICE_RANDOM,
254			    .help = "Choose a file at random",
255			  },
256			  { .ival = "roundrobin",
257			    .oval = FIO_FSERVICE_RR,
258			    .help = "Round robin select files",
259			  },
260		},
261	},
262	{
263		.name	= "fsync",
264		.type	= FIO_OPT_INT,
265		.off1	= td_var_offset(fsync_blocks),
266		.help	= "Issue fsync for writes every given number of blocks",
267		.def	= "0",
268	},
269	{
270		.name	= "direct",
271		.type	= FIO_OPT_BOOL,
272		.off1	= td_var_offset(odirect),
273		.help	= "Use O_DIRECT IO (negates buffered)",
274		.def	= "0",
275	},
276	{
277		.name	= "buffered",
278		.type	= FIO_OPT_BOOL,
279		.off1	= td_var_offset(odirect),
280		.neg	= 1,
281		.help	= "Use buffered IO (negates direct)",
282		.def	= "1",
283	},
284	{
285		.name	= "overwrite",
286		.type	= FIO_OPT_BOOL,
287		.off1	= td_var_offset(overwrite),
288		.help	= "When writing, set whether to overwrite current data",
289		.def	= "0",
290	},
291	{
292		.name	= "loops",
293		.type	= FIO_OPT_INT,
294		.off1	= td_var_offset(loops),
295		.help	= "Number of times to run the job",
296		.def	= "1",
297	},
298	{
299		.name	= "numjobs",
300		.type	= FIO_OPT_INT,
301		.off1	= td_var_offset(numjobs),
302		.help	= "Duplicate this job this many times",
303		.def	= "1",
304	},
305	{
306		.name	= "startdelay",
307		.type	= FIO_OPT_INT,
308		.off1	= td_var_offset(start_delay),
309		.help	= "Only start job when this period has passed",
310		.def	= "0",
311	},
312	{
313		.name	= "runtime",
314		.alias	= "timeout",
315		.type	= FIO_OPT_STR_VAL_TIME,
316		.off1	= td_var_offset(timeout),
317		.help	= "Stop workload when this amount of time has passed",
318		.def	= "0",
319	},
320	{
321		.name	= "mem",
322		.type	= FIO_OPT_STR,
323		.cb	= str_mem_cb,
324		.off1	= td_var_offset(mem_type),
325		.help	= "Backing type for IO buffers",
326		.def	= "malloc",
327		.posval	= {
328			  { .ival = "malloc",
329			    .oval = MEM_MALLOC,
330			    .help = "Use malloc(3) for IO buffers",
331			  },
332			  { .ival = "shm",
333			    .oval = MEM_SHM,
334			    .help = "Use shared memory segments for IO buffers",
335			  },
336#ifdef FIO_HAVE_HUGETLB
337			  { .ival = "shmhuge",
338			    .oval = MEM_SHMHUGE,
339			    .help = "Like shm, but use huge pages",
340			  },
341#endif
342			  { .ival = "mmap",
343			    .oval = MEM_MMAP,
344			    .help = "Use mmap(2) (file or anon) for IO buffers",
345			  },
346#ifdef FIO_HAVE_HUGETLB
347			  { .ival = "mmaphuge",
348			    .oval = MEM_MMAPHUGE,
349			    .help = "Like mmap, but use huge pages",
350			  },
351#endif
352		  },
353	},
354	{
355		.name	= "verify",
356		.type	= FIO_OPT_STR,
357		.off1	= td_var_offset(verify),
358		.help	= "Verify data written",
359		.def	= "0",
360		.posval = {
361			  { .ival = "0",
362			    .oval = VERIFY_NONE,
363			    .help = "Don't do IO verification",
364			  },
365			  { .ival = "crc32",
366			    .oval = VERIFY_CRC32,
367			    .help = "Use crc32 checksums for verification",
368			  },
369			  { .ival = "md5",
370			    .oval = VERIFY_MD5,
371			    .help = "Use md5 checksums for verification",
372			  },
373		},
374	},
375	{
376		.name	= "write_iolog",
377		.type	= FIO_OPT_STR_STORE,
378		.off1	= td_var_offset(write_iolog_file),
379		.help	= "Store IO pattern to file",
380	},
381	{
382		.name	= "read_iolog",
383		.type	= FIO_OPT_STR_STORE,
384		.off1	= td_var_offset(read_iolog_file),
385		.help	= "Playback IO pattern from file",
386	},
387	{
388		.name	= "exec_prerun",
389		.type	= FIO_OPT_STR_STORE,
390		.off1	= td_var_offset(exec_prerun),
391		.help	= "Execute this file prior to running job",
392	},
393	{
394		.name	= "exec_postrun",
395		.type	= FIO_OPT_STR_STORE,
396		.off1	= td_var_offset(exec_postrun),
397		.help	= "Execute this file after running job",
398	},
399#ifdef FIO_HAVE_IOSCHED_SWITCH
400	{
401		.name	= "ioscheduler",
402		.type	= FIO_OPT_STR_STORE,
403		.off1	= td_var_offset(ioscheduler),
404		.help	= "Use this IO scheduler on the backing device",
405	},
406#endif
407	{
408		.name	= "zonesize",
409		.type	= FIO_OPT_STR_VAL,
410		.off1	= td_var_offset(zone_size),
411		.help	= "Give size of an IO zone",
412		.def	= "0",
413	},
414	{
415		.name	= "zoneskip",
416		.type	= FIO_OPT_STR_VAL,
417		.off1	= td_var_offset(zone_skip),
418		.help	= "Space between IO zones",
419		.def	= "0",
420	},
421	{
422		.name	= "lockmem",
423		.type	= FIO_OPT_STR_VAL,
424		.cb	= str_lockmem_cb,
425		.help	= "Lock down this amount of memory",
426		.def	= "0",
427	},
428	{
429		.name	= "rwmixcycle",
430		.type	= FIO_OPT_INT,
431		.off1	= td_var_offset(rwmixcycle),
432		.help	= "Cycle period for mixed read/write workloads (msec)",
433		.def	= "500",
434	},
435	{
436		.name	= "rwmixread",
437		.type	= FIO_OPT_INT,
438		.off1	= td_var_offset(rwmixread),
439		.maxval	= 100,
440		.help	= "Percentage of mixed workload that is reads",
441		.def	= "50",
442	},
443	{
444		.name	= "rwmixwrite",
445		.type	= FIO_OPT_INT,
446		.off1	= td_var_offset(rwmixwrite),
447		.maxval	= 100,
448		.help	= "Percentage of mixed workload that is writes",
449		.def	= "50",
450	},
451	{
452		.name	= "nice",
453		.type	= FIO_OPT_INT,
454		.off1	= td_var_offset(nice),
455		.help	= "Set job CPU nice value",
456		.minval	= -19,
457		.maxval	= 20,
458		.def	= "0",
459	},
460#ifdef FIO_HAVE_IOPRIO
461	{
462		.name	= "prio",
463		.type	= FIO_OPT_INT,
464		.cb	= str_prio_cb,
465		.help	= "Set job IO priority value",
466		.minval	= 0,
467		.maxval	= 7,
468	},
469	{
470		.name	= "prioclass",
471		.type	= FIO_OPT_INT,
472		.cb	= str_prioclass_cb,
473		.help	= "Set job IO priority class",
474		.minval	= 0,
475		.maxval	= 3,
476	},
477#endif
478	{
479		.name	= "thinktime",
480		.type	= FIO_OPT_INT,
481		.off1	= td_var_offset(thinktime),
482		.help	= "Idle time between IO buffers (usec)",
483		.def	= "0",
484	},
485	{
486		.name	= "thinktime_spin",
487		.type	= FIO_OPT_INT,
488		.off1	= td_var_offset(thinktime_spin),
489		.help	= "Start think time by spinning this amount (usec)",
490		.def	= "0",
491	},
492	{
493		.name	= "thinktime_blocks",
494		.type	= FIO_OPT_INT,
495		.off1	= td_var_offset(thinktime_blocks),
496		.help	= "IO buffer period between 'thinktime'",
497		.def	= "1",
498	},
499	{
500		.name	= "rate",
501		.type	= FIO_OPT_INT,
502		.off1	= td_var_offset(rate),
503		.help	= "Set bandwidth rate",
504	},
505	{
506		.name	= "ratemin",
507		.type	= FIO_OPT_INT,
508		.off1	= td_var_offset(ratemin),
509		.help	= "The bottom limit accepted",
510	},
511	{
512		.name	= "ratecycle",
513		.type	= FIO_OPT_INT,
514		.off1	= td_var_offset(ratecycle),
515		.help	= "Window average for rate limits (msec)",
516		.def	= "1000",
517	},
518	{
519		.name	= "invalidate",
520		.type	= FIO_OPT_BOOL,
521		.off1	= td_var_offset(invalidate_cache),
522		.help	= "Invalidate buffer/page cache prior to running job",
523		.def	= "1",
524	},
525	{
526		.name	= "sync",
527		.type	= FIO_OPT_BOOL,
528		.off1	= td_var_offset(sync_io),
529		.help	= "Use O_SYNC for buffered writes",
530		.def	= "0",
531	},
532	{
533		.name	= "bwavgtime",
534		.type	= FIO_OPT_INT,
535		.off1	= td_var_offset(bw_avg_time),
536		.help	= "Time window over which to calculate bandwidth (msec)",
537		.def	= "500",
538	},
539	{
540		.name	= "create_serialize",
541		.type	= FIO_OPT_BOOL,
542		.off1	= td_var_offset(create_serialize),
543		.help	= "Serialize creating of job files",
544		.def	= "1",
545	},
546	{
547		.name	= "create_fsync",
548		.type	= FIO_OPT_BOOL,
549		.off1	= td_var_offset(create_fsync),
550		.help	= "Fsync file after creation",
551		.def	= "1",
552	},
553	{
554		.name	= "cpuload",
555		.type	= FIO_OPT_INT,
556		.off1	= td_var_offset(cpuload),
557		.help	= "Use this percentage of CPU",
558	},
559	{
560		.name	= "cpuchunks",
561		.type	= FIO_OPT_INT,
562		.off1	= td_var_offset(cpucycle),
563		.help	= "Length of the CPU burn cycles (usecs)",
564		.def	= "50000",
565	},
566#ifdef FIO_HAVE_CPU_AFFINITY
567	{
568		.name	= "cpumask",
569		.type	= FIO_OPT_INT,
570		.cb	= str_cpumask_cb,
571		.help	= "CPU affinity mask",
572	},
573#endif
574	{
575		.name	= "end_fsync",
576		.type	= FIO_OPT_BOOL,
577		.off1	= td_var_offset(end_fsync),
578		.help	= "Include fsync at the end of job",
579		.def	= "0",
580	},
581	{
582		.name	= "fsync_on_close",
583		.type	= FIO_OPT_BOOL,
584		.off1	= td_var_offset(fsync_on_close),
585		.help	= "fsync files on close",
586		.def	= "0",
587	},
588	{
589		.name	= "unlink",
590		.type	= FIO_OPT_BOOL,
591		.off1	= td_var_offset(unlink),
592		.help	= "Unlink created files after job has completed",
593		.def	= "0",
594	},
595	{
596		.name	= "exitall",
597		.type	= FIO_OPT_STR_SET,
598		.cb	= str_exitall_cb,
599		.help	= "Terminate all jobs when one exits",
600	},
601	{
602		.name	= "stonewall",
603		.type	= FIO_OPT_STR_SET,
604		.off1	= td_var_offset(stonewall),
605		.help	= "Insert a hard barrier between this job and previous",
606	},
607	{
608		.name	= "thread",
609		.type	= FIO_OPT_STR_SET,
610		.off1	= td_var_offset(use_thread),
611		.help	= "Use threads instead of forks",
612	},
613	{
614		.name	= "write_bw_log",
615		.type	= FIO_OPT_STR_SET,
616		.off1	= td_var_offset(write_bw_log),
617		.help	= "Write log of bandwidth during run",
618	},
619	{
620		.name	= "write_lat_log",
621		.type	= FIO_OPT_STR_SET,
622		.off1	= td_var_offset(write_lat_log),
623		.help	= "Write log of latency during run",
624	},
625	{
626		.name	= "hugepage-size",
627		.type	= FIO_OPT_STR_VAL,
628		.off1	= td_var_offset(hugepage_size),
629		.help	= "When using hugepages, specify size of each page",
630		.def	= __stringify(FIO_HUGE_PAGE),
631	},
632	{
633		.name	= "group_reporting",
634		.type	= FIO_OPT_STR_SET,
635		.off1	= td_var_offset(group_reporting),
636		.help	= "Do reporting on a per-group basis",
637	},
638	{
639		.name = NULL,
640	},
641};
642
643#define FIO_JOB_OPTS	(sizeof(options) / sizeof(struct fio_option))
644#define FIO_CMD_OPTS	(16)
645#define FIO_GETOPT_JOB	(0x89988998)
646
647/*
648 * Command line options. These will contain the above, plus a few
649 * extra that only pertain to fio itself and not jobs.
650 */
651static struct option long_options[FIO_JOB_OPTS + FIO_CMD_OPTS] = {
652	{
653		.name		= "output",
654		.has_arg	= required_argument,
655		.val		= 'o',
656	},
657	{
658		.name		= "timeout",
659		.has_arg	= required_argument,
660		.val		= 't',
661	},
662	{
663		.name		= "latency-log",
664		.has_arg	= required_argument,
665		.val		= 'l',
666	},
667	{
668		.name		= "bandwidth-log",
669		.has_arg	= required_argument,
670		.val		= 'b',
671	},
672	{
673		.name		= "minimal",
674		.has_arg	= optional_argument,
675		.val		= 'm',
676	},
677	{
678		.name		= "version",
679		.has_arg	= no_argument,
680		.val		= 'v',
681	},
682	{
683		.name		= "help",
684		.has_arg	= no_argument,
685		.val		= 'h',
686	},
687	{
688		.name		= "cmdhelp",
689		.has_arg	= optional_argument,
690		.val		= 'c',
691	},
692	{
693		.name		= NULL,
694	},
695};
696
697static int def_timeout = 0;
698
699static char fio_version_string[] = "fio 1.14a";
700
701static char **ini_file;
702static int max_jobs = MAX_JOBS;
703
704struct thread_data def_thread;
705struct thread_data *threads = NULL;
706
707int exitall_on_terminate = 0;
708int terse_output = 0;
709unsigned long long mlock_size = 0;
710FILE *f_out = NULL;
711FILE *f_err = NULL;
712
713static int write_lat_log = 0;
714int write_bw_log = 0;
715
716static int prev_group_jobs;
717
718FILE *get_f_out()
719{
720	return f_out;
721}
722
723FILE *get_f_err()
724{
725	return f_err;
726}
727
728/*
729 * Return a free job structure.
730 */
731static struct thread_data *get_new_job(int global, struct thread_data *parent)
732{
733	struct thread_data *td;
734
735	if (global)
736		return &def_thread;
737	if (thread_number >= max_jobs)
738		return NULL;
739
740	td = &threads[thread_number++];
741	*td = *parent;
742
743	td->thread_number = thread_number;
744	return td;
745}
746
747static void put_job(struct thread_data *td)
748{
749	if (td == &def_thread)
750		return;
751
752	if (td->error)
753		fprintf(f_out, "fio: %s\n", td->verror);
754
755	memset(&threads[td->thread_number - 1], 0, sizeof(*td));
756	thread_number--;
757}
758
759/*
760 * Lazy way of fixing up options that depend on each other. We could also
761 * define option callback handlers, but this is easier.
762 */
763static void fixup_options(struct thread_data *td)
764{
765	if (!td->rwmixread && td->rwmixwrite)
766		td->rwmixread = 100 - td->rwmixwrite;
767
768	if (td->write_iolog_file && td->read_iolog_file) {
769		log_err("fio: read iolog overrides write_iolog\n");
770		free(td->write_iolog_file);
771		td->write_iolog_file = NULL;
772	}
773
774	if (td->io_ops->flags & FIO_SYNCIO)
775		td->iodepth = 1;
776	else {
777		if (!td->iodepth)
778			td->iodepth = td->open_files;
779	}
780
781	/*
782	 * only really works for sequential io for now, and with 1 file
783	 */
784	if (td->zone_size && td_random(td) && td->open_files == 1)
785		td->zone_size = 0;
786
787	/*
788	 * Reads can do overwrites, we always need to pre-create the file
789	 */
790	if (td_read(td) || td_rw(td))
791		td->overwrite = 1;
792
793	if (!td->min_bs[DDIR_READ])
794		td->min_bs[DDIR_READ]= td->bs[DDIR_READ];
795	if (!td->max_bs[DDIR_READ])
796		td->max_bs[DDIR_READ] = td->bs[DDIR_READ];
797	if (!td->min_bs[DDIR_WRITE])
798		td->min_bs[DDIR_WRITE]= td->bs[DDIR_WRITE];
799	if (!td->max_bs[DDIR_WRITE])
800		td->max_bs[DDIR_WRITE] = td->bs[DDIR_WRITE];
801
802	td->rw_min_bs = min(td->min_bs[DDIR_READ], td->min_bs[DDIR_WRITE]);
803
804	if (td_read(td) && !td_rw(td))
805		td->verify = 0;
806
807	if (td->norandommap && td->verify != VERIFY_NONE) {
808		log_err("fio: norandommap given, verify disabled\n");
809		td->verify = VERIFY_NONE;
810	}
811	if (td->bs_unaligned && (td->odirect || td->io_ops->flags & FIO_RAWIO))
812		log_err("fio: bs_unaligned may not work with raw io\n");
813
814	/*
815	 * thinktime_spin must be less than thinktime
816	 */
817	if (td->thinktime_spin > td->thinktime)
818		td->thinktime_spin = td->thinktime;
819
820	/*
821	 * The low water mark cannot be bigger than the iodepth
822	 */
823	if (td->iodepth_low > td->iodepth || !td->iodepth_low) {
824		/*
825		 * syslet work around - if the workload is sequential,
826		 * we want to let the queue drain all the way down to
827		 * avoid seeking between async threads
828		 */
829		if (!strcmp(td->io_ops->name, "syslet-rw") && !td_random(td))
830			td->iodepth_low = 1;
831		else
832			td->iodepth_low = td->iodepth;
833	}
834
835	/*
836	 * If batch number isn't set, default to the same as iodepth
837	 */
838	if (td->iodepth_batch > td->iodepth || !td->iodepth_batch)
839		td->iodepth_batch = td->iodepth;
840
841	if (td->nr_files > td->files_index)
842		td->nr_files = td->files_index;
843
844	if (td->open_files > td->nr_files || !td->open_files)
845		td->open_files = td->nr_files;
846}
847
848/*
849 * This function leaks the buffer
850 */
851static char *to_kmg(unsigned int val)
852{
853	char *buf = malloc(32);
854	char post[] = { 0, 'K', 'M', 'G', 'P', 'E', 0 };
855	char *p = post;
856
857	do {
858		if (val & 1023)
859			break;
860
861		val >>= 10;
862		p++;
863	} while (*p);
864
865	snprintf(buf, 31, "%u%c", val, *p);
866	return buf;
867}
868
869/* External engines are specified by "external:name.o") */
870static const char *get_engine_name(const char *str)
871{
872	char *p = strstr(str, ":");
873
874	if (!p)
875		return str;
876
877	p++;
878	strip_blank_front(&p);
879	strip_blank_end(p);
880	return p;
881}
882
883static int exists_and_not_file(const char *filename)
884{
885	struct stat sb;
886
887	if (lstat(filename, &sb) == -1)
888		return 0;
889
890	if (S_ISREG(sb.st_mode))
891		return 0;
892
893	return 1;
894}
895
896/*
897 * Adds a job to the list of things todo. Sanitizes the various options
898 * to make sure we don't have conflicts, and initializes various
899 * members of td.
900 */
901static int add_job(struct thread_data *td, const char *jobname, int job_add_num)
902{
903	const char *ddir_str[] = { NULL, "read", "write", "rw", NULL,
904				   "randread", "randwrite", "randrw" };
905	unsigned int i;
906	struct fio_file *f;
907	const char *engine;
908	char fname[PATH_MAX];
909	int numjobs, file_alloced;
910
911	/*
912	 * the def_thread is just for options, it's not a real job
913	 */
914	if (td == &def_thread)
915		return 0;
916
917	engine = get_engine_name(td->ioengine);
918	td->io_ops = load_ioengine(td, engine);
919	if (!td->io_ops) {
920		log_err("fio: failed to load engine %s\n", engine);
921		return 1;
922	}
923
924	if (td->use_thread)
925		nr_thread++;
926	else
927		nr_process++;
928
929	if (td->odirect)
930		td->io_ops->flags |= FIO_RAWIO;
931
932	file_alloced = 0;
933	if (!td->filename && !td->files_index) {
934		file_alloced = 1;
935
936		if (td->nr_files == 1 && exists_and_not_file(jobname))
937			add_file(td, jobname);
938		else {
939			for (i = 0; i < td->nr_files; i++) {
940				sprintf(fname, "%s.%d.%d", jobname, td->thread_number, i);
941				add_file(td, fname);
942			}
943		}
944	}
945
946	fixup_options(td);
947
948	for_each_file(td, f, i) {
949		if (td->directory && f->filetype == FIO_TYPE_FILE) {
950			sprintf(fname, "%s/%s", td->directory, f->file_name);
951			f->file_name = strdup(fname);
952		}
953	}
954
955	td->mutex = fio_sem_init(0);
956
957	td->ts.clat_stat[0].min_val = td->ts.clat_stat[1].min_val = ULONG_MAX;
958	td->ts.slat_stat[0].min_val = td->ts.slat_stat[1].min_val = ULONG_MAX;
959	td->ts.bw_stat[0].min_val = td->ts.bw_stat[1].min_val = ULONG_MAX;
960
961	if ((td->stonewall || td->numjobs > 1) && prev_group_jobs) {
962		prev_group_jobs = 0;
963		groupid++;
964	}
965
966	td->groupid = groupid;
967	prev_group_jobs++;
968
969	if (setup_rate(td))
970		goto err;
971
972	if (td->write_lat_log) {
973		setup_log(&td->ts.slat_log);
974		setup_log(&td->ts.clat_log);
975	}
976	if (td->write_bw_log)
977		setup_log(&td->ts.bw_log);
978
979	if (!td->name)
980		td->name = strdup(jobname);
981
982	if (!terse_output) {
983		if (!job_add_num) {
984			if (!strcmp(td->io_ops->name, "cpuio"))
985				fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle);
986			else {
987				char *c1, *c2, *c3, *c4;
988
989				c1 = to_kmg(td->min_bs[DDIR_READ]);
990				c2 = to_kmg(td->max_bs[DDIR_READ]);
991				c3 = to_kmg(td->min_bs[DDIR_WRITE]);
992				c4 = to_kmg(td->max_bs[DDIR_WRITE]);
993
994				fprintf(f_out, "%s: (g=%d): rw=%s, bs=%s-%s/%s-%s, ioengine=%s, iodepth=%u\n", td->name, td->groupid, ddir_str[td->td_ddir], c1, c2, c3, c4, td->io_ops->name, td->iodepth);
995
996				free(c1);
997				free(c2);
998				free(c3);
999				free(c4);
1000			}
1001		} else if (job_add_num == 1)
1002			fprintf(f_out, "...\n");
1003	}
1004
1005	/*
1006	 * recurse add identical jobs, clear numjobs and stonewall options
1007	 * as they don't apply to sub-jobs
1008	 */
1009	numjobs = td->numjobs;
1010	while (--numjobs) {
1011		struct thread_data *td_new = get_new_job(0, td);
1012
1013		if (!td_new)
1014			goto err;
1015
1016		td_new->numjobs = 1;
1017		td_new->stonewall = 0;
1018
1019		if (file_alloced) {
1020			td_new->filename = NULL;
1021			td_new->files_index = 0;
1022			td_new->files = NULL;
1023		}
1024
1025		job_add_num = numjobs - 1;
1026
1027		if (add_job(td_new, jobname, job_add_num))
1028			goto err;
1029	}
1030
1031	if (td->numjobs > 1) {
1032		groupid++;
1033		prev_group_jobs = 0;
1034	}
1035
1036	return 0;
1037err:
1038	put_job(td);
1039	return -1;
1040}
1041
1042/*
1043 * Initialize the various random states we need (random io, block size ranges,
1044 * read/write mix, etc).
1045 */
1046int init_random_state(struct thread_data *td)
1047{
1048	unsigned long seeds[5];
1049	int fd, num_maps, blocks;
1050	struct fio_file *f;
1051	unsigned int i;
1052
1053	if (td->io_ops->flags & FIO_DISKLESSIO)
1054		return 0;
1055
1056	fd = open("/dev/urandom", O_RDONLY);
1057	if (fd == -1) {
1058		td_verror(td, errno, "open");
1059		return 1;
1060	}
1061
1062	if (read(fd, seeds, sizeof(seeds)) < (int) sizeof(seeds)) {
1063		td_verror(td, EIO, "read");
1064		close(fd);
1065		return 1;
1066	}
1067
1068	close(fd);
1069
1070	os_random_seed(seeds[0], &td->bsrange_state);
1071	os_random_seed(seeds[1], &td->verify_state);
1072	os_random_seed(seeds[2], &td->rwmix_state);
1073
1074	if (td->file_service_type == FIO_FSERVICE_RANDOM)
1075		os_random_seed(seeds[3], &td->next_file_state);
1076
1077	if (!td_random(td))
1078		return 0;
1079
1080	if (td->rand_repeatable)
1081		seeds[4] = FIO_RANDSEED * td->thread_number;
1082
1083	if (!td->norandommap) {
1084		for_each_file(td, f, i) {
1085			blocks = (f->real_file_size + td->rw_min_bs - 1) / td->rw_min_bs;
1086			num_maps = (blocks + BLOCKS_PER_MAP-1)/ BLOCKS_PER_MAP;
1087			f->file_map = malloc(num_maps * sizeof(long));
1088			if (!f->file_map) {
1089				log_err("fio: failed allocating random map. If running a large number of jobs, try the 'norandommap' option\n");
1090				return 1;
1091			}
1092			f->num_maps = num_maps;
1093			memset(f->file_map, 0, num_maps * sizeof(long));
1094		}
1095	}
1096
1097	os_random_seed(seeds[4], &td->random_state);
1098	return 0;
1099}
1100
1101static void fill_cpu_mask(os_cpu_mask_t cpumask, int cpu)
1102{
1103#ifdef FIO_HAVE_CPU_AFFINITY
1104	unsigned int i;
1105
1106	CPU_ZERO(&cpumask);
1107
1108	for (i = 0; i < sizeof(int) * 8; i++) {
1109		if ((1 << i) & cpu)
1110			CPU_SET(i, &cpumask);
1111	}
1112#endif
1113}
1114
1115static int is_empty_or_comment(char *line)
1116{
1117	unsigned int i;
1118
1119	for (i = 0; i < strlen(line); i++) {
1120		if (line[i] == ';')
1121			return 1;
1122		if (line[i] == '#')
1123			return 1;
1124		if (!isspace(line[i]) && !iscntrl(line[i]))
1125			return 0;
1126	}
1127
1128	return 1;
1129}
1130
1131/*
1132 * Check if mmap/mmaphuge has a :/foo/bar/file at the end. If so, return that.
1133 */
1134static char *get_opt_postfix(const char *str)
1135{
1136	char *p = strstr(str, ":");
1137
1138	if (!p)
1139		return NULL;
1140
1141	p++;
1142	strip_blank_front(&p);
1143	strip_blank_end(p);
1144	return strdup(p);
1145}
1146
1147static int str_mem_cb(void *data, const char *mem)
1148{
1149	struct thread_data *td = data;
1150
1151	if (td->mem_type == MEM_MMAPHUGE || td->mem_type == MEM_MMAP) {
1152		td->mmapfile = get_opt_postfix(mem);
1153		if (td->mem_type == MEM_MMAPHUGE && !td->mmapfile) {
1154			log_err("fio: mmaphuge:/path/to/file\n");
1155			return 1;
1156		}
1157	}
1158
1159	return 0;
1160}
1161
1162static int str_lockmem_cb(void fio_unused *data, unsigned long *val)
1163{
1164	mlock_size = *val;
1165	return 0;
1166}
1167
1168#ifdef FIO_HAVE_IOPRIO
1169static int str_prioclass_cb(void *data, unsigned int *val)
1170{
1171	struct thread_data *td = data;
1172
1173	td->ioprio |= *val << IOPRIO_CLASS_SHIFT;
1174	return 0;
1175}
1176
1177static int str_prio_cb(void *data, unsigned int *val)
1178{
1179	struct thread_data *td = data;
1180
1181	td->ioprio |= *val;
1182	return 0;
1183}
1184#endif
1185
1186static int str_exitall_cb(void)
1187{
1188	exitall_on_terminate = 1;
1189	return 0;
1190}
1191
1192static int str_cpumask_cb(void *data, unsigned int *val)
1193{
1194	struct thread_data *td = data;
1195
1196	fill_cpu_mask(td->cpumask, *val);
1197	return 0;
1198}
1199
1200static int str_fst_cb(void *data, const char *str)
1201{
1202	struct thread_data *td = data;
1203	char *nr = get_opt_postfix(str);
1204
1205	td->file_service_nr = 1;
1206	if (nr)
1207		td->file_service_nr = atoi(nr);
1208
1209	return 0;
1210}
1211
1212static int str_filename_cb(void *data, const char *input)
1213{
1214	struct thread_data *td = data;
1215	char *fname, *str, *p;
1216
1217	p = str = strdup(input);
1218
1219	strip_blank_front(&str);
1220	strip_blank_end(str);
1221
1222	if (!td->files_index)
1223		td->nr_files = 0;
1224
1225	while ((fname = strsep(&str, ":")) != NULL) {
1226		if (!strlen(fname))
1227			break;
1228		add_file(td, fname);
1229		td->nr_files++;
1230	}
1231
1232	free(p);
1233	return 0;
1234}
1235
1236static int str_directory_cb(void *data, const char fio_unused *str)
1237{
1238	struct thread_data *td = data;
1239	struct stat sb;
1240
1241	if (lstat(td->directory, &sb) < 0) {
1242		log_err("fio: %s is not a directory\n", td->directory);
1243		td_verror(td, errno, "lstat");
1244		return 1;
1245	}
1246	if (!S_ISDIR(sb.st_mode)) {
1247		log_err("fio: %s is not a directory\n", td->directory);
1248		return 1;
1249	}
1250
1251	return 0;
1252}
1253
1254static int str_opendir_cb(void *data, const char fio_unused *str)
1255{
1256	struct thread_data *td = data;
1257
1258	if (!td->files_index)
1259		td->nr_files = 0;
1260
1261	return add_dir_files(td, td->opendir);
1262}
1263
1264/*
1265 * This is our [ini] type file parser.
1266 */
1267static int parse_jobs_ini(char *file, int stonewall_flag)
1268{
1269	unsigned int global;
1270	struct thread_data *td;
1271	char *string, *name;
1272	fpos_t off;
1273	FILE *f;
1274	char *p;
1275	int ret = 0, stonewall;
1276
1277	f = fopen(file, "r");
1278	if (!f) {
1279		perror("fopen job file");
1280		return 1;
1281	}
1282
1283	string = malloc(4096);
1284	name = malloc(256);
1285	memset(name, 0, 256);
1286
1287	stonewall = stonewall_flag;
1288	do {
1289		p = fgets(string, 4095, f);
1290		if (!p)
1291			break;
1292		if (is_empty_or_comment(p))
1293			continue;
1294		if (sscanf(p, "[%255s]", name) != 1)
1295			continue;
1296
1297		global = !strncmp(name, "global", 6);
1298
1299		name[strlen(name) - 1] = '\0';
1300
1301		td = get_new_job(global, &def_thread);
1302		if (!td) {
1303			ret = 1;
1304			break;
1305		}
1306
1307		/*
1308		 * Seperate multiple job files by a stonewall
1309		 */
1310		if (!global && stonewall) {
1311			td->stonewall = stonewall;
1312			stonewall = 0;
1313		}
1314
1315		fgetpos(f, &off);
1316		while ((p = fgets(string, 4096, f)) != NULL) {
1317			if (is_empty_or_comment(p))
1318				continue;
1319
1320			strip_blank_front(&p);
1321
1322			if (p[0] == '[')
1323				break;
1324
1325			strip_blank_end(p);
1326
1327			fgetpos(f, &off);
1328
1329			/*
1330			 * Don't break here, continue parsing options so we
1331			 * dump all the bad ones. Makes trial/error fixups
1332			 * easier on the user.
1333			 */
1334			ret |= parse_option(p, options, td);
1335		}
1336
1337		if (!ret) {
1338			fsetpos(f, &off);
1339			ret = add_job(td, name, 0);
1340		} else {
1341			log_err("fio: job %s dropped\n", name);
1342			put_job(td);
1343		}
1344	} while (!ret);
1345
1346	free(string);
1347	free(name);
1348	fclose(f);
1349	return ret;
1350}
1351
1352static int fill_def_thread(void)
1353{
1354	memset(&def_thread, 0, sizeof(def_thread));
1355
1356	if (fio_getaffinity(getpid(), &def_thread.cpumask) == -1) {
1357		perror("sched_getaffinity");
1358		return 1;
1359	}
1360
1361	/*
1362	 * fill default options
1363	 */
1364	fill_default_options(&def_thread, options);
1365
1366	def_thread.timeout = def_timeout;
1367	def_thread.write_bw_log = write_bw_log;
1368	def_thread.write_lat_log = write_lat_log;
1369
1370#ifdef FIO_HAVE_DISK_UTIL
1371	def_thread.do_disk_util = 1;
1372#endif
1373
1374	return 0;
1375}
1376
1377static void usage(void)
1378{
1379	printf("%s\n", fio_version_string);
1380	printf("\t--output\tWrite output to file\n");
1381	printf("\t--timeout\tRuntime in seconds\n");
1382	printf("\t--latency-log\tGenerate per-job latency logs\n");
1383	printf("\t--bandwidth-log\tGenerate per-job bandwidth logs\n");
1384	printf("\t--minimal\tMinimal (terse) output\n");
1385	printf("\t--version\tPrint version info and exit\n");
1386	printf("\t--help\t\tPrint this page\n");
1387	printf("\t--cmdhelp=cmd\tPrint command help, \"all\" for all of them\n");
1388}
1389
1390static int parse_cmd_line(int argc, char *argv[])
1391{
1392	struct thread_data *td = NULL;
1393	int c, ini_idx = 0, lidx, ret, dont_add_job = 0;
1394
1395	while ((c = getopt_long_only(argc, argv, "", long_options, &lidx)) != -1) {
1396		switch (c) {
1397		case 't':
1398			def_timeout = atoi(optarg);
1399			break;
1400		case 'l':
1401			write_lat_log = 1;
1402			break;
1403		case 'w':
1404			write_bw_log = 1;
1405			break;
1406		case 'o':
1407			f_out = fopen(optarg, "w+");
1408			if (!f_out) {
1409				perror("fopen output");
1410				exit(1);
1411			}
1412			f_err = f_out;
1413			break;
1414		case 'm':
1415			terse_output = 1;
1416			break;
1417		case 'h':
1418			usage();
1419			exit(0);
1420		case 'c':
1421			ret = show_cmd_help(options, optarg);
1422			exit(ret);
1423		case 'v':
1424			printf("%s\n", fio_version_string);
1425			exit(0);
1426		case FIO_GETOPT_JOB: {
1427			const char *opt = long_options[lidx].name;
1428			char *val = optarg;
1429
1430			if (!strncmp(opt, "name", 4) && td) {
1431				ret = add_job(td, td->name ?: "fio", 0);
1432				if (ret) {
1433					put_job(td);
1434					return 0;
1435				}
1436				td = NULL;
1437			}
1438			if (!td) {
1439				int global = !strncmp(val, "global", 6);
1440
1441				td = get_new_job(global, &def_thread);
1442				if (!td)
1443					return 0;
1444			}
1445
1446			ret = parse_cmd_option(opt, val, options, td);
1447			if (ret)
1448				dont_add_job = 1;
1449			break;
1450		}
1451		default:
1452			break;
1453		}
1454	}
1455
1456	if (td) {
1457		if (dont_add_job)
1458			put_job(td);
1459		else {
1460			ret = add_job(td, td->name ?: "fio", 0);
1461			if (ret)
1462				put_job(td);
1463		}
1464	}
1465
1466	while (optind < argc) {
1467		ini_idx++;
1468		ini_file = realloc(ini_file, ini_idx * sizeof(char *));
1469		ini_file[ini_idx - 1] = strdup(argv[optind]);
1470		optind++;
1471	}
1472
1473	return ini_idx;
1474}
1475
1476static void free_shm(void)
1477{
1478	struct shmid_ds sbuf;
1479
1480	if (threads) {
1481		shmdt((void *) threads);
1482		threads = NULL;
1483		shmctl(shm_id, IPC_RMID, &sbuf);
1484	}
1485}
1486
1487/*
1488 * The thread area is shared between the main process and the job
1489 * threads/processes. So setup a shared memory segment that will hold
1490 * all the job info.
1491 */
1492static int setup_thread_area(void)
1493{
1494	/*
1495	 * 1024 is too much on some machines, scale max_jobs if
1496	 * we get a failure that looks like too large a shm segment
1497	 */
1498	do {
1499		size_t size = max_jobs * sizeof(struct thread_data);
1500
1501		shm_id = shmget(0, size, IPC_CREAT | 0600);
1502		if (shm_id != -1)
1503			break;
1504		if (errno != EINVAL) {
1505			perror("shmget");
1506			break;
1507		}
1508
1509		max_jobs >>= 1;
1510	} while (max_jobs);
1511
1512	if (shm_id == -1)
1513		return 1;
1514
1515	threads = shmat(shm_id, NULL, 0);
1516	if (threads == (void *) -1) {
1517		perror("shmat");
1518		return 1;
1519	}
1520
1521	atexit(free_shm);
1522	return 0;
1523}
1524
1525/*
1526 * Copy the fio options into the long options map, so we mirror
1527 * job and cmd line options.
1528 */
1529static void dupe_job_options(void)
1530{
1531	struct fio_option *o;
1532	unsigned int i;
1533
1534	i = 0;
1535	while (long_options[i].name)
1536		i++;
1537
1538	o = &options[0];
1539	while (o->name) {
1540		long_options[i].name = o->name;
1541		long_options[i].val = FIO_GETOPT_JOB;
1542		if (o->type == FIO_OPT_STR_SET)
1543			long_options[i].has_arg = no_argument;
1544		else
1545			long_options[i].has_arg = required_argument;
1546
1547		i++;
1548		o++;
1549		assert(i < FIO_JOB_OPTS + FIO_CMD_OPTS);
1550	}
1551}
1552
1553int parse_options(int argc, char *argv[])
1554{
1555	int job_files, i;
1556
1557	f_out = stdout;
1558	f_err = stderr;
1559
1560	options_init(options);
1561
1562	dupe_job_options();
1563
1564	if (setup_thread_area())
1565		return 1;
1566	if (fill_def_thread())
1567		return 1;
1568
1569	job_files = parse_cmd_line(argc, argv);
1570
1571	for (i = 0; i < job_files; i++) {
1572		if (fill_def_thread())
1573			return 1;
1574		if (parse_jobs_ini(ini_file[i], i))
1575			return 1;
1576		free(ini_file[i]);
1577	}
1578
1579	free(ini_file);
1580
1581	if (!thread_number) {
1582		log_err("No jobs defined(s)\n");
1583		return 1;
1584	}
1585
1586	return 0;
1587}
1588