1/*
2 * Blktrace record utility - Convert binary trace data into bunches of IOs
3 *
4 * Copyright (C) 2007 Alan D. Brunelle <Alan.Brunelle@hp.com>
5 *
6 *  This program is free software; you can redistribute it and/or modify
7 *  it under the terms of the GNU General Public License as published by
8 *  the Free Software Foundation; either version 2 of the License, or
9 *  (at your option) any later version.
10 *
11 *  This program is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 *  GNU General Public License for more details.
15 *
16 *  You should have received a copy of the GNU General Public License
17 *  along with this program; if not, write to the Free Software
18 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 */
20
21static char build_date[] = __DATE__ " at "__TIME__;
22
23#include <assert.h>
24#include <fcntl.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <sys/param.h>
30#include <sys/stat.h>
31#include <sys/types.h>
32#include <dirent.h>
33#include <stdarg.h>
34
35#if !defined(_GNU_SOURCE)
36#	define _GNU_SOURCE
37#endif
38#include <getopt.h>
39
40#include "list.h"
41#include "btrecord.h"
42#include "blktrace.h"
43
44/*
45 * Per input file information
46 *
47 * @head: 	Used to link up on input_files
48 * @devnm: 	Device name portion of this input file
49 * @file_name: 	Fully qualified name for this input file
50 * @cpu: 	CPU that this file was collected on
51 * @ifd: 	Input file descriptor (when opened)
52 * @tpkts: 	Total number of packets processed.
53 */
54struct ifile_info {
55	struct list_head head;
56	char *devnm, *file_name;
57	int cpu, ifd;
58	__u64 tpkts, genesis;
59};
60
61/*
62 * Per IO trace information
63 *
64 * @time: 	Time stamp when trace was emitted
65 * @sector: 	IO sector identifier
66 * @bytes: 	Number of bytes transferred
67 * @rw: 	Read (1) or write (0)
68 */
69struct io_spec {
70	__u64 time;
71	__u64 sector;
72	__u32 bytes;
73	int rw;
74};
75
76/*
77 * Per output file information
78 *
79 * @ofp: 	Output file
80 * @vfp:	Verbose output file
81 * @file_name: 	Fully qualified name for this file
82 * @vfn:	Fully qualified name for this file
83 * @cur: 	Current IO bunch being collected
84 * @iip: 	Input file this is associated with
85 * @start_time: Start time of th ecurrent bunch
86 * @last_time: 	Time of last packet put in
87 * @bunches: 	Number of bunches processed
88 * @pkts: 	Number of packets stored in bunches
89 */
90struct io_stream {
91	FILE *ofp, *vfp;
92	char *file_name, *vfn;
93	struct io_bunch *cur;
94	struct ifile_info *iip;
95	__u64 start_time, last_time, bunches, pkts;
96};
97
98int data_is_native;				// Indicates whether to swap
99static LIST_HEAD(input_files);			// List of all input files
100static char *idir = ".";			// Input directory base
101static char *odir = ".";			// Output directory base
102static char *obase = "replay";			// Output file base
103static __u64 max_bunch_tm = (10 * 1000 * 1000);	// 10 milliseconds
104static __u64 max_pkts_per_bunch = 8;		// Default # of pkts per bunch
105static int verbose = 0;				// Boolean: output stats
106static int find_traces = 0;			// Boolean: Find traces in dir
107
108static char usage_str[] =                                                  \
109        "\n"                                                               \
110	"\t[ -d <dir>  : --input-directory=<dir> ] Default: .\n"           \
111	"\t[ -D <dir>  : --output-directory=<dir>] Default: .\n"           \
112	"\t[ -F        : --find-traces           ] Default: Off\n"         \
113        "\t[ -h        : --help                  ] Default: Off\n"         \
114        "\t[ -m <nsec> : --max-bunch-time=<nsec> ] Default: 10 msec\n"     \
115	"\t[ -M <pkts> : --max-pkts=<pkts>       ] Default: 8\n"           \
116        "\t[ -o <base> : --output-base=<base>    ] Default: replay\n"      \
117        "\t[ -v        : --verbose               ] Default: Off\n"         \
118        "\t[ -V        : --version               ] Default: Off\n"         \
119	"\t<dev>...                                Default: None\n"	   \
120        "\n";
121
122#define S_OPTS	"d:D:Fhm:M:o:vV"
123static struct option l_opts[] = {
124	{
125		.name = "input-directory",
126		.has_arg = required_argument,
127		.flag = NULL,
128		.val = 'd'
129	},
130	{
131		.name = "output-directory",
132		.has_arg = required_argument,
133		.flag = NULL,
134		.val = 'D'
135	},
136	{
137		.name = "find-traces",
138		.has_arg = no_argument,
139		.flag = NULL,
140		.val = 'F'
141	},
142	{
143		.name = "help",
144		.has_arg = no_argument,
145		.flag = NULL,
146		.val = 'h'
147	},
148	{
149		.name = "max-bunch-time",
150		.has_arg = required_argument,
151		.flag = NULL,
152		.val = 'm'
153	},
154	{
155		.name = "max-pkts",
156		.has_arg = required_argument,
157		.flag = NULL,
158		.val = 'M'
159	},
160	{
161		.name = "output-base",
162		.has_arg = required_argument,
163		.flag = NULL,
164		.val = 'o'
165	},
166	{
167		.name = "verbose",
168		.has_arg = no_argument,
169		.flag = NULL,
170		.val = 'v'
171	},
172	{
173		.name = "version",
174		.has_arg = no_argument,
175		.flag = NULL,
176		.val = 'V'
177	},
178	{
179		.name = NULL
180	}
181};
182
183#define ERR_ARGS			1
184#define ERR_SYSCALL			2
185static inline void fatal(const char *errstring, const int exitval,
186			 const char *fmt, ...)
187{
188	va_list ap;
189
190	if (errstring)
191		perror(errstring);
192
193	va_start(ap, fmt);
194	vfprintf(stderr, fmt, ap);
195	va_end(ap);
196
197	exit(exitval);
198	/*NOTREACHED*/
199}
200
201/**
202 * match - Return true if this trace is a proper QUEUE transaction
203 * @action: Action field from trace
204 */
205static inline int match(__u32 action)
206{
207	return ((action & 0xffff) == __BLK_TA_QUEUE) &&
208				       (action & BLK_TC_ACT(BLK_TC_QUEUE));
209}
210
211/**
212 * usage - Display usage string and version
213 */
214static void usage(void)
215{
216	fprintf(stderr, "Usage: btrecord -- version %s\n%s",
217		my_btversion, usage_str);
218}
219
220/**
221 * write_file_hdr - Seek to and write btrecord file header
222 * @stream: Output file information
223 * @hdr: Header to write
224 */
225static void write_file_hdr(struct io_stream *stream, struct io_file_hdr *hdr)
226{
227	hdr->version = mk_btversion(btver_mjr, btver_mnr, btver_sub);
228
229	if (verbose) {
230		fprintf(stderr, "\t%s: %llx %llx %llx %llx\n",
231			stream->file_name,
232			(long long unsigned)hdr->version,
233			(long long unsigned)hdr->genesis,
234			(long long unsigned)hdr->nbunches,
235			(long long unsigned)hdr->total_pkts);
236	}
237
238	fseek(stream->ofp, 0, SEEK_SET);
239	if (fwrite(hdr, sizeof(*hdr), 1, stream->ofp) != 1) {
240		fatal(stream->file_name, ERR_SYSCALL, "Hdr write failed\n");
241		/*NOTREACHED*/
242	}
243}
244
245/**
246 * io_bunch_create - Allocate & initialize an io_bunch
247 * @io_stream: IO stream being added to
248 * @pre_stall: Amount of time that this bunch should be delayed by
249 * @start_time: Records current start
250 */
251static inline void io_bunch_create(struct io_stream *stream, __u64 start_time)
252{
253	struct io_bunch *cur = malloc(sizeof(*cur));
254
255	memset(cur, 0, sizeof(*cur));
256
257	cur->hdr.npkts = 0;
258	cur->hdr.time_stamp = stream->start_time = start_time;
259
260	stream->cur = cur;
261}
262
263/**
264 * io_bunch_add - Add an IO to the current bunch of IOs
265 * @stream: Per-output file stream information
266 * @spec: IO trace specification
267 *
268 * Returns update bunch information
269 */
270static void io_bunch_add(struct io_stream *stream, struct io_spec *spec)
271{
272	struct io_bunch *cur = stream->cur;
273	struct io_pkt iop = {
274		.sector = spec->sector,
275		.nbytes = spec->bytes,
276		.rw = spec->rw
277	};
278
279	assert(cur != NULL);
280	assert(cur->hdr.npkts < BT_MAX_PKTS);
281	assert(stream->last_time == 0 || stream->last_time <= spec->time);
282
283	cur->pkts[cur->hdr.npkts++] = iop;	// Struct copy
284	stream->last_time = spec->time;
285}
286
287/**
288 * rem_input_file - Release resources associated with an input file
289 * @iip: Per-input file information
290 */
291static void rem_input_file(struct ifile_info *iip)
292{
293	list_del(&iip->head);
294
295	close(iip->ifd);
296	free(iip->file_name);
297	free(iip->devnm);
298	free(iip);
299}
300
301/**
302 * __add_input_file - Allocate and initialize per-input file structure
303 * @cpu: CPU for this file
304 * @devnm: Device name for this file
305 * @file_name: Fully qualifed input file name
306 */
307static void __add_input_file(int cpu, char *devnm, char *file_name)
308{
309	struct ifile_info *iip = malloc(sizeof(*iip));
310
311	iip->cpu = cpu;
312	iip->tpkts = 0;
313	iip->genesis = 0;
314	iip->devnm = strdup(devnm);
315	iip->file_name = strdup(file_name);
316	iip->ifd = open(file_name, O_RDONLY);
317	if (iip->ifd < 0) {
318		fatal(file_name, ERR_ARGS, "Unable to open\n");
319		/*NOTREACHED*/
320	}
321
322	list_add_tail(&iip->head, &input_files);
323}
324
325/**
326 * add_input_file - Set up the input file name
327 * @devnm: Device name to use
328 */
329static void add_input_file(char *devnm)
330{
331	struct list_head *p;
332	int cpu, found = 0;
333
334	__list_for_each(p, &input_files) {
335		struct ifile_info *iip = list_entry(p, struct ifile_info, head);
336		if (strcmp(iip->devnm, devnm) == 0)
337			return;
338	}
339
340	for (cpu = 0; ; cpu++) {
341		char full_name[MAXPATHLEN];
342
343		sprintf(full_name, "%s/%s.blktrace.%d", idir, devnm, cpu);
344		if (access(full_name, R_OK) != 0)
345			break;
346
347		__add_input_file(cpu, devnm, full_name);
348		found++;
349	}
350
351	if (!found) {
352		fatal(NULL, ERR_ARGS, "No traces found for %s\n", devnm);
353		/*NOTREACHED*/
354	}
355}
356
357static void find_input_files(char *idir)
358{
359	struct dirent *ent;
360	DIR *dir = opendir(idir);
361
362	if (dir == NULL) {
363		fatal(idir, ERR_ARGS, "Unable to open %s\n", idir);
364		/*NOTREACHED*/
365	}
366
367	while ((ent = readdir(dir)) != NULL) {
368		char *p, *dsf = malloc(256);
369
370		if (strstr(ent->d_name, ".blktrace.") == NULL)
371			continue;
372
373		dsf = strdup(ent->d_name);
374		p = index(dsf, '.');
375		assert(p != NULL);
376		*p = '\0';
377		add_input_file(dsf);
378		free(dsf);
379	}
380
381	closedir(dir);
382}
383
384/**
385 * handle_args - Parse passed in argument list
386 * @argc: Number of arguments in argv
387 * @argv: Arguments passed in
388 *
389 * Does rudimentary parameter verification as well.
390 */
391void handle_args(int argc, char *argv[])
392{
393	int c;
394
395	while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
396		switch (c) {
397		case 'd':
398			idir = optarg;
399			if (access(idir, R_OK | X_OK) != 0) {
400				fatal(idir, ERR_ARGS,
401				      "Invalid input directory specified\n");
402				/*NOTREACHED*/
403			}
404			break;
405
406		case 'D':
407			odir = optarg;
408			if (access(odir, R_OK | X_OK) != 0) {
409				fatal(odir, ERR_ARGS,
410				      "Invalid output directory specified\n");
411				/*NOTREACHED*/
412			}
413			break;
414
415		case 'F':
416			find_traces = 1;
417			break;
418
419		case 'h':
420			usage();
421			exit(0);
422			/*NOTREACHED*/
423
424		case 'm':
425			max_bunch_tm = (__u64)atoll(optarg);
426			if (max_bunch_tm < 1) {
427				fprintf(stderr, "Invalid bunch time %llu\n",
428					(unsigned long long)max_bunch_tm);
429				exit(ERR_ARGS);
430				/*NOTREACHED*/
431			}
432			break;
433
434		case 'M':
435			max_pkts_per_bunch = (__u64)atoll(optarg);
436			if (!((1 <= max_pkts_per_bunch) &&
437						(max_pkts_per_bunch < 513))) {
438				fprintf(stderr, "Invalid max pkts %llu\n",
439					(unsigned long long)max_pkts_per_bunch);
440				exit(ERR_ARGS);
441				/*NOTREACHED*/
442			}
443			break;
444
445		case 'o':
446			obase = optarg;
447			break;
448
449		case 'V':
450			fprintf(stderr, "btrecord -- version %s\n",
451				my_btversion);
452			fprintf(stderr, "            Built on %s\n", build_date);
453			exit(0);
454			/*NOTREACHED*/
455
456		case 'v':
457			verbose++;
458			break;
459
460		default:
461			usage();
462			fatal(NULL, ERR_ARGS, "Invalid command line\n");
463			/*NOTREACHED*/
464		}
465	}
466
467	while (optind < argc)
468		add_input_file(argv[optind++]);
469
470	if (find_traces)
471		find_input_files(idir);
472
473	if (list_len(&input_files) == 0) {
474		fatal(NULL, ERR_ARGS, "Missing required input file name(s)\n");
475		/*NOTREACHED*/
476	}
477}
478
479/**
480 * next_io - Retrieve next Q trace from input stream
481 * @iip: Per-input file information
482 * @spec: IO specifier for trace
483 *
484 * Returns 0 on end of file, 1 if valid data returned.
485 */
486static int next_io(struct ifile_info *iip, struct io_spec *spec)
487{
488	ssize_t ret;
489	__u32 action;
490	__u16 pdu_len;
491	struct blk_io_trace t;
492
493again:
494	ret = read(iip->ifd, &t, sizeof(t));
495	if (ret < 0) {
496		fatal(iip->file_name, ERR_SYSCALL, "Read failed\n");
497		/*NOTREACHED*/
498	}
499	else if (ret == 0)
500		return 0;
501	else if (ret < (ssize_t)sizeof(t)) {
502		fprintf(stderr, "WARNING: Short read on %s (%d)\n",
503			iip->file_name, (int)ret);
504		return 0;
505	}
506
507	if (data_is_native == -1)
508		check_data_endianness(t.magic);
509
510	assert(data_is_native >= 0);
511	if (data_is_native) {
512		spec->time = t.time;
513		spec->sector = t.sector;
514		spec->bytes = t.bytes;
515		action = t.action;
516		pdu_len = t.pdu_len;
517	}
518	else {
519		spec->time = be64_to_cpu(t.time);
520		spec->sector = be64_to_cpu(t.sector);
521		spec->bytes = be32_to_cpu(t.bytes);
522		action = be32_to_cpu(t.action);
523		pdu_len = be16_to_cpu(t.pdu_len);
524	}
525
526
527	if (pdu_len) {
528		char buf[pdu_len];
529
530		ret = read(iip->ifd, buf, pdu_len);
531		if (ret < 0) {
532			fatal(iip->file_name, ERR_SYSCALL, "Read PDU failed\n");
533			/*NOTREACHED*/
534		}
535		else if (ret < (ssize_t)pdu_len) {
536			fprintf(stderr, "WARNING: Short PDU read on %s (%d)\n",
537				iip->file_name, (int)ret);
538			return 0;
539		}
540	}
541
542	iip->tpkts++;
543	if (!match(action))
544		goto again;
545
546	spec->rw = (action & BLK_TC_ACT(BLK_TC_READ)) ? 1 : 0;
547	if (verbose > 1)
548		fprintf(stderr, "%2d: %10llu+%10llu (%d) @ %10llx\n",
549			iip->cpu, (long long unsigned)spec->sector,
550			(long long unsigned)spec->bytes / 512LLU,
551			spec->rw, (long long unsigned)spec->time);
552
553	if (iip->genesis == 0) {
554		iip->genesis = spec->time;
555		if (verbose > 1)
556			fprintf(stderr, "\tSetting new genesis: %llx(%d)\n",
557				(long long unsigned)iip->genesis, iip->cpu);
558	}
559	else if (iip->genesis > spec->time)
560		fatal(NULL, ERR_SYSCALL,
561			"Time inversion? %llu ... %llu\n",
562			(long long unsigned )iip->genesis,
563			(long long unsigned )spec->time);
564
565	return 1;
566}
567
568/**
569 * bunch_output_hdr - Output bunch header
570 */
571static inline void bunch_output_hdr(struct io_stream *stream)
572{
573	struct io_bunch_hdr *hdrp = &stream->cur->hdr;
574
575	assert(0 < hdrp->npkts && hdrp->npkts <= BT_MAX_PKTS);
576	if (fwrite(hdrp, sizeof(struct io_bunch_hdr), 1, stream->ofp) != 1) {
577		fatal(stream->file_name, ERR_SYSCALL, "fwrite(hdr) failed\n");
578		/*NOTREACHED*/
579	}
580
581	if (verbose) {
582		__u64 off = hdrp->time_stamp - stream->iip->genesis;
583
584		assert(stream->vfp);
585		fprintf(stream->vfp, "------------------\n");
586		fprintf(stream->vfp, "%4llu.%09llu %3llu\n",
587			(unsigned long long)off / (1000 * 1000 * 1000),
588			(unsigned long long)off % (1000 * 1000 * 1000),
589			(unsigned long long)hdrp->npkts);
590		fprintf(stream->vfp, "------------------\n");
591	}
592}
593
594/**
595 * bunch_output_pkt - Output IO packets
596 */
597static inline void bunch_output_pkts(struct io_stream *stream)
598{
599	struct io_pkt *p = stream->cur->pkts;
600	size_t npkts = stream->cur->hdr.npkts;
601
602	assert(0 < npkts && npkts <= BT_MAX_PKTS);
603	if (fwrite(p, sizeof(struct io_pkt), npkts, stream->ofp) != npkts) {
604		fatal(stream->file_name, ERR_SYSCALL, "fwrite(pkts) failed\n");
605		/*NOTREACHED*/
606	}
607
608	if (verbose) {
609		size_t i;
610
611		assert(stream->vfp);
612		for (i = 0; i < npkts; i++, p++)
613			fprintf(stream->vfp, "\t%1d %10llu\t%10llu\n",
614				p->rw,
615				(unsigned long long)p->sector,
616				(unsigned long long)p->nbytes / 512);
617	}
618}
619
620/**
621 * stream_flush - Flush current bunch of IOs out to the output stream
622 * @stream: Per-output file stream information
623 */
624static void stream_flush(struct io_stream *stream)
625{
626	struct io_bunch *cur = stream->cur;
627
628	if (cur) {
629		if (cur->hdr.npkts) {
630			assert(cur->hdr.npkts <= BT_MAX_PKTS);
631			bunch_output_hdr(stream);
632			bunch_output_pkts(stream);
633
634			stream->bunches++;
635			stream->pkts += cur->hdr.npkts;
636		}
637		free(cur);
638	}
639}
640
641/**
642 * bunch_done - Returns true if current bunch is either full, or next IO is late
643 * @stream: Output stream information
644 * @spec: IO trace specification
645 */
646static inline int bunch_done(struct io_stream *stream, struct io_spec *spec)
647{
648	if (stream->cur->hdr.npkts >= max_pkts_per_bunch)
649		return 1;
650
651	if ((spec->time - stream->start_time) > max_bunch_tm)
652		return 1;
653
654	return 0;
655}
656
657/**
658 * stream_add_io - Add an IO trace to the current stream
659 * @stream: Output stream information
660 * @spec: IO trace specification
661 */
662static void stream_add_io(struct io_stream *stream, struct io_spec *spec)
663{
664
665	if (stream->cur == NULL)
666		io_bunch_create(stream, spec->time);
667	else if (bunch_done(stream, spec)) {
668		stream_flush(stream);
669		io_bunch_create(stream, spec->time);
670	}
671
672	io_bunch_add(stream, spec);
673}
674
675/**
676 * stream_open - Open output stream for specified input stream
677 * @iip: Per-input file information
678 */
679static struct io_stream *stream_open(struct ifile_info *iip)
680{
681	char ofile_name[MAXPATHLEN];
682	struct io_stream *stream = malloc(sizeof(*stream));
683	struct io_file_hdr io_file_hdr = {
684		.genesis = 0,
685		.nbunches = 0,
686		.total_pkts = 0
687	};
688
689	memset(stream, 0, sizeof(*stream));
690
691	sprintf(ofile_name, "%s/%s.%s.%d", odir, iip->devnm, obase, iip->cpu);
692	stream->ofp = fopen(ofile_name, "w");
693	if (!stream->ofp) {
694		fatal(ofile_name, ERR_SYSCALL, "Open failed\n");
695		/*NOTREACHED*/
696	}
697
698	stream->iip = iip;
699	stream->cur = NULL;
700	stream->bunches = stream->pkts = 0;
701	stream->last_time = 0;
702	stream->file_name = strdup(ofile_name);
703
704	write_file_hdr(stream, &io_file_hdr);
705
706	if (verbose) {
707		char vfile_name[MAXPATHLEN];
708
709		sprintf(vfile_name, "%s/%s.%s.%d.rec", odir, iip->devnm,
710			obase, iip->cpu);
711		stream->vfp = fopen(vfile_name, "w");
712		if (!stream->vfp) {
713			fatal(vfile_name, ERR_SYSCALL, "Open failed\n");
714			/*NOTREACHED*/
715		}
716
717		stream->vfn = strdup(vfile_name);
718	}
719
720	data_is_native = -1;
721	return stream;
722}
723
724/**
725 * stream_close - Release resources associated with an output stream
726 * @stream: Stream to release
727 */
728static void stream_close(struct io_stream *stream)
729{
730	struct io_file_hdr io_file_hdr = {
731		.genesis = stream->iip->genesis,
732		.nbunches = stream->bunches,
733		.total_pkts = stream->pkts
734	};
735
736	stream_flush(stream);
737	write_file_hdr(stream, &io_file_hdr);
738	fclose(stream->ofp);
739
740	if (verbose && stream->bunches) {
741		fprintf(stderr,
742			"%s:%d: %llu pkts (tot), %llu pkts (replay), "
743					"%llu bunches, %.1lf pkts/bunch\n",
744			stream->iip->devnm, stream->iip->cpu,
745			(unsigned long long)stream->iip->tpkts,
746			(unsigned long long)stream->pkts,
747			(unsigned long long)stream->bunches,
748			(double)(stream->pkts) / (double)(stream->bunches));
749
750		fclose(stream->vfp);
751		free(stream->vfn);
752	}
753
754	free(stream->file_name);
755	free(stream);
756}
757
758/**
759 * process - Process one input file to an output file
760 * @iip: Per-input file information
761 */
762static void process(struct ifile_info *iip)
763{
764	struct io_spec spec;
765	struct io_stream *stream;
766
767	stream = stream_open(iip);
768	while (next_io(iip, &spec))
769		stream_add_io(stream, &spec);
770	stream_close(stream);
771
772	rem_input_file(iip);
773}
774
775/**
776 * main -
777 * @argc: Number of arguments
778 * @argv: Array of arguments
779 */
780int main(int argc, char *argv[])
781{
782	struct list_head *p, *q;
783
784	handle_args(argc, argv);
785	list_for_each_safe(p, q, &input_files)
786		process(list_entry(p, struct ifile_info, head));
787
788	return 0;
789}
790