btrace2fio.c revision 44aab35f7b67302c3bd67f704cc4d6f847da0f27
1#include <stdio.h>
2#include <stdio.h>
3#include <unistd.h>
4#include <inttypes.h>
5#include <assert.h>
6
7#include "../io_ddir.h"
8#include "../flist.h"
9#include "../hash.h"
10#include "../fifo.h"
11#include "../blktrace_api.h"
12#include "../os/os.h"
13#include "../log.h"
14#include "../lib/linux-dev-lookup.h"
15
16#define TRACE_FIFO_SIZE	8192
17
18static unsigned int rt_threshold = 1000000;
19static unsigned int ios_threshold = 10;
20static unsigned int rate_threshold;
21static unsigned int set_rate;
22static unsigned int max_depth = 256;
23static int output_ascii = 1;
24static char *filename;
25
26struct bs {
27	unsigned int bs;
28	unsigned int nr;
29	int merges;
30};
31
32struct trace_file {
33	char *name;
34	int major, minor;
35};
36
37struct btrace_out {
38	unsigned long ios[DDIR_RWDIR_CNT];
39	unsigned long merges[DDIR_RWDIR_CNT];
40
41	uint64_t last_end[DDIR_RWDIR_CNT];
42	uint64_t seq[DDIR_RWDIR_CNT];
43
44	struct bs *bs[DDIR_RWDIR_CNT];
45	unsigned int nr_bs[DDIR_RWDIR_CNT];
46
47	int inflight;
48	unsigned int depth;
49	int depth_disabled;
50	int complete_seen;
51
52	uint64_t first_ttime[DDIR_RWDIR_CNT];
53	uint64_t last_ttime[DDIR_RWDIR_CNT];
54	uint64_t kb[DDIR_RWDIR_CNT];
55
56	uint64_t start_delay;
57};
58
59struct btrace_pid {
60	struct flist_head hash_list;
61	struct flist_head pid_list;
62	pid_t pid;
63
64	struct trace_file *files;
65	int nr_files;
66	unsigned int last_major, last_minor;
67
68	struct btrace_out o;
69};
70
71struct inflight {
72	struct flist_head list;
73	struct btrace_pid *p;
74	uint64_t end_sector;
75};
76
77#define PID_HASH_BITS	10
78#define PID_HASH_SIZE	(1U << PID_HASH_BITS)
79
80static struct flist_head pid_hash[PID_HASH_SIZE];
81static FLIST_HEAD(pid_list);
82
83#define INFLIGHT_HASH_BITS	8
84#define INFLIGHT_HASH_SIZE	(1U << INFLIGHT_HASH_BITS)
85static struct flist_head inflight_hash[INFLIGHT_HASH_SIZE];
86
87static uint64_t first_ttime = -1ULL;
88
89static struct inflight *inflight_find(uint64_t sector)
90{
91	struct flist_head *inflight_list;
92	struct flist_head *e;
93
94	inflight_list = &inflight_hash[hash_long(sector, INFLIGHT_HASH_BITS)];
95
96	flist_for_each(e, inflight_list) {
97		struct inflight *i = flist_entry(e, struct inflight, list);
98
99		if (i->end_sector == sector)
100			return i;
101	}
102
103	return NULL;
104}
105
106static void inflight_remove(struct inflight *i)
107{
108	struct btrace_out *o = &i->p->o;
109
110	o->inflight--;
111	assert(o->inflight >= 0);
112	flist_del(&i->list);
113	free(i);
114}
115
116static void __inflight_add(struct inflight *i)
117{
118	struct flist_head *list;
119
120	list = &inflight_hash[hash_long(i->end_sector, INFLIGHT_HASH_BITS)];
121	flist_add_tail(&i->list, list);
122}
123
124static void inflight_add(struct btrace_pid *p, uint64_t sector, uint32_t len)
125{
126	struct btrace_out *o = &p->o;
127	struct inflight *i;
128
129	i = calloc(1, sizeof(*i));
130	i->p = p;
131	o->inflight++;
132	if (!o->depth_disabled) {
133		o->depth = max((int) o->depth, o->inflight);
134		if (o->depth >= max_depth && !o->complete_seen) {
135			o->depth_disabled = 1;
136			o->depth = max_depth;
137		}
138	}
139	i->end_sector = sector + (len >> 9);
140	__inflight_add(i);
141}
142
143static void inflight_merge(struct inflight *i, int rw, unsigned int size)
144{
145	i->p->o.merges[rw]++;
146	if (size) {
147		i->end_sector += (size >> 9);
148		flist_del(&i->list);
149		__inflight_add(i);
150	}
151}
152
153/*
154 * fifo refill frontend, to avoid reading data in trace sized bites
155 */
156static int refill_fifo(struct fifo *fifo, int fd)
157{
158	char buf[TRACE_FIFO_SIZE];
159	unsigned int total;
160	int ret;
161
162	total = sizeof(buf);
163	if (total > fifo_room(fifo))
164		total = fifo_room(fifo);
165
166	ret = read(fd, buf, total);
167	if (ret < 0) {
168		perror("read refill");
169		return -1;
170	}
171
172	if (ret > 0)
173		ret = fifo_put(fifo, buf, ret);
174
175	return ret;
176}
177
178/*
179 * Retrieve 'len' bytes from the fifo, refilling if necessary.
180 */
181static int trace_fifo_get(struct fifo *fifo, int fd, void *buf,
182			  unsigned int len)
183{
184	if (fifo_len(fifo) < len) {
185		int ret = refill_fifo(fifo, fd);
186
187		if (ret < 0)
188			return ret;
189	}
190
191	return fifo_get(fifo, buf, len);
192}
193
194/*
195 * Just discard the pdu by seeking past it.
196 */
197static int discard_pdu(struct fifo *fifo, int fd, struct blk_io_trace *t)
198{
199	if (t->pdu_len == 0)
200		return 0;
201
202	return trace_fifo_get(fifo, fd, NULL, t->pdu_len);
203}
204
205static int handle_trace_notify(struct blk_io_trace *t)
206{
207	switch (t->action) {
208	case BLK_TN_PROCESS:
209		//printf("got process notify: %x, %d\n", t->action, t->pid);
210		break;
211	case BLK_TN_TIMESTAMP:
212		//printf("got timestamp notify: %x, %d\n", t->action, t->pid);
213		break;
214	case BLK_TN_MESSAGE:
215		break;
216	default:
217		log_err("unknown trace act %x\n", t->action);
218		return 1;
219	}
220
221	return 0;
222}
223
224static void __add_bs(struct btrace_out *o, unsigned int len, int rw)
225{
226	o->bs[rw] = realloc(o->bs[rw], (o->nr_bs[rw] + 1) * sizeof(struct bs));
227	o->bs[rw][o->nr_bs[rw]].bs = len;
228	o->bs[rw][o->nr_bs[rw]].nr = 1;
229	o->nr_bs[rw]++;
230}
231
232static void add_bs(struct btrace_out *o, unsigned int len, int rw)
233{
234	struct bs *bs = o->bs[rw];
235	int i;
236
237	if (!o->nr_bs[rw]) {
238		__add_bs(o, len, rw);
239		return;
240	}
241
242	for (i = 0; i < o->nr_bs[rw]; i++) {
243		if (bs[i].bs == len) {
244			bs[i].nr++;
245			return;
246		}
247	}
248
249	__add_bs(o, len, rw);
250}
251
252#define FMINORBITS	20
253#define FMINORMASK	((1U << FMINORBITS) - 1)
254#define FMAJOR(dev)	((unsigned int) ((dev) >> FMINORBITS))
255#define FMINOR(dev)	((unsigned int) ((dev) & FMINORMASK))
256
257static int btrace_add_file(struct btrace_pid *p, uint32_t devno)
258{
259	unsigned int maj = FMAJOR(devno);
260	unsigned int min = FMINOR(devno);
261	struct trace_file *f;
262	unsigned int i;
263	char dev[256];
264
265	if (filename)
266		return 0;
267	if (p->last_major == maj && p->last_minor == min)
268		return 0;
269
270	p->last_major = maj;
271	p->last_minor = min;
272
273	/*
274	 * check for this file in our list
275	 */
276	for (i = 0; i < p->nr_files; i++) {
277		f = &p->files[i];
278
279		if (f->major == maj && f->minor == min)
280			return 0;
281	}
282
283	strcpy(dev, "/dev");
284	if (!blktrace_lookup_device(NULL, dev, maj, min)) {
285		log_err("fio: failed to find device %u/%u\n", maj, min);
286		if (!output_ascii) {
287			log_err("fio: use -d to specify device\n");
288			return 1;
289		}
290		return 0;
291	}
292
293	p->files = realloc(p->files, (p->nr_files + 1) * sizeof(*f));
294	f = &p->files[p->nr_files];
295	f->name = strdup(dev);
296	f->major = maj;
297	f->minor = min;
298	p->nr_files++;
299	return 0;
300}
301
302static int t_to_rwdir(struct blk_io_trace *t)
303{
304	if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
305		return DDIR_TRIM;
306
307	return (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
308}
309
310static int handle_trace_discard(struct blk_io_trace *t, struct btrace_pid *p)
311{
312	struct btrace_out *o = &p->o;
313
314	if (btrace_add_file(p, t->device))
315		return 1;
316
317	if (o->first_ttime[2] == -1ULL)
318		o->first_ttime[2] = t->time;
319
320	o->ios[DDIR_TRIM]++;
321	add_bs(o, t->bytes, DDIR_TRIM);
322	return 0;
323}
324
325static int handle_trace_fs(struct blk_io_trace *t, struct btrace_pid *p)
326{
327	struct btrace_out *o = &p->o;
328	int rw;
329
330	if (btrace_add_file(p, t->device))
331		return 1;
332
333	first_ttime = min(first_ttime, (uint64_t) t->time);
334
335	rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
336
337	if (o->first_ttime[rw] == -1ULL)
338		o->first_ttime[rw] = t->time;
339
340	add_bs(o, t->bytes, rw);
341	o->ios[rw]++;
342
343	if (t->sector == o->last_end[rw] || o->last_end[rw] == -1ULL)
344		o->seq[rw]++;
345
346	o->last_end[rw] = t->sector + (t->bytes >> 9);
347	return 0;
348}
349
350static int handle_queue_trace(struct blk_io_trace *t, struct btrace_pid *p)
351{
352	if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
353		return handle_trace_notify(t);
354	else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
355		return handle_trace_discard(t, p);
356	else
357		return handle_trace_fs(t, p);
358}
359
360static int handle_trace(struct blk_io_trace *t, struct btrace_pid *p)
361{
362	unsigned int act = t->action & 0xffff;
363	int ret = 0;
364
365	if (act == __BLK_TA_QUEUE) {
366		inflight_add(p, t->sector, t->bytes);
367		ret = handle_queue_trace(t, p);
368	} else if (act == __BLK_TA_REQUEUE) {
369		p->o.inflight--;
370	} else if (act == __BLK_TA_BACKMERGE) {
371		struct inflight *i;
372
373		i = inflight_find(t->sector + (t->bytes >> 9));
374		if (i)
375			inflight_remove(i);
376
377		i = inflight_find(t->sector);
378		if (i)
379			inflight_merge(i, t_to_rwdir(t), t->bytes);
380	} else if (act == __BLK_TA_FRONTMERGE) {
381		struct inflight *i;
382
383		i = inflight_find(t->sector + (t->bytes >> 9));
384		if (i)
385			inflight_remove(i);
386
387		i = inflight_find(t->sector);
388		if (i)
389			inflight_merge(i, t_to_rwdir(t), 0);
390	} else if (act == __BLK_TA_COMPLETE) {
391		struct inflight *i;
392
393		i = inflight_find(t->sector + (t->bytes >> 9));
394		if (i) {
395			i->p->o.kb[t_to_rwdir(t)] += (t->bytes >> 10);
396			i->p->o.complete_seen = 1;
397			inflight_remove(i);
398		}
399	}
400
401	return ret;
402}
403
404static void byteswap_trace(struct blk_io_trace *t)
405{
406	t->magic = fio_swap32(t->magic);
407	t->sequence = fio_swap32(t->sequence);
408	t->time = fio_swap64(t->time);
409	t->sector = fio_swap64(t->sector);
410	t->bytes = fio_swap32(t->bytes);
411	t->action = fio_swap32(t->action);
412	t->pid = fio_swap32(t->pid);
413	t->device = fio_swap32(t->device);
414	t->cpu = fio_swap32(t->cpu);
415	t->error = fio_swap16(t->error);
416	t->pdu_len = fio_swap16(t->pdu_len);
417}
418
419static struct btrace_pid *pid_hash_find(pid_t pid, struct flist_head *list)
420{
421	struct flist_head *e;
422	struct btrace_pid *p;
423
424	flist_for_each(e, list) {
425		p = flist_entry(e, struct btrace_pid, hash_list);
426		if (p->pid == pid)
427			return p;
428	}
429
430	return NULL;
431}
432
433static struct btrace_pid *pid_hash_get(pid_t pid)
434{
435	struct flist_head *hash_list;
436	struct btrace_pid *p;
437
438	hash_list = &pid_hash[hash_long(pid, PID_HASH_BITS)];
439
440	p = pid_hash_find(pid, hash_list);
441	if (!p) {
442		int i;
443
444		p = calloc(1, sizeof(*p));
445
446		for (i = 0; i < DDIR_RWDIR_CNT; i++) {
447			p->o.first_ttime[i] = -1ULL;
448			p->o.last_ttime[i] = -1ULL;
449			p->o.last_end[i] = -1ULL;
450		}
451
452		p->pid = pid;
453		flist_add_tail(&p->hash_list, hash_list);
454		flist_add_tail(&p->pid_list, &pid_list);
455	}
456
457	return p;
458}
459
460/*
461 * Load a blktrace file by reading all the blk_io_trace entries, and storing
462 * them as io_pieces like the fio text version would do.
463 */
464static int load_blktrace(const char *fname, int need_swap)
465{
466	struct btrace_pid *p;
467	unsigned long traces;
468	struct blk_io_trace t;
469	struct fifo *fifo;
470	int fd, ret = 0;
471
472	fd = open(fname, O_RDONLY);
473	if (fd < 0) {
474		perror("open trace file\n");
475		return 1;
476	}
477
478	fifo = fifo_alloc(TRACE_FIFO_SIZE);
479
480	traces = 0;
481	do {
482		ret = trace_fifo_get(fifo, fd, &t, sizeof(t));
483		if (ret < 0)
484			goto err;
485		else if (!ret)
486			break;
487		else if (ret < (int) sizeof(t)) {
488			log_err("fio: short fifo get\n");
489			break;
490		}
491
492		if (need_swap)
493			byteswap_trace(&t);
494
495		if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
496			log_err("fio: bad magic in blktrace data: %x\n", t.magic);
497			goto err;
498		}
499		if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
500			log_err("fio: bad blktrace version %d\n", t.magic & 0xff);
501			goto err;
502		}
503		ret = discard_pdu(fifo, fd, &t);
504		if (ret < 0) {
505			log_err("blktrace lseek\n");
506			goto err;
507		} else if (t.pdu_len != ret) {
508			log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
509			goto err;
510		}
511
512		p = pid_hash_get(t.pid);
513		ret = handle_trace(&t, p);
514		if (ret)
515			break;
516		p->o.last_ttime[t_to_rwdir(&t)] = t.time;
517		traces++;
518	} while (1);
519
520	fifo_free(fifo);
521	close(fd);
522
523	if (ret)
524		return ret;
525
526	if (output_ascii)
527		printf("Traces loaded: %lu\n", traces);
528
529	return 0;
530err:
531	close(fd);
532	fifo_free(fifo);
533	return 1;
534}
535
536static int bs_cmp(const void *ba, const void *bb)
537{
538	const struct bs *bsa = ba;
539	const struct bs *bsb = bb;
540
541	return bsb->nr - bsa->nr;
542}
543
544static unsigned long o_to_kb_rate(struct btrace_out *o, int rw)
545{
546	uint64_t usec = (o->last_ttime[rw] - o->first_ttime[rw]) / 1000ULL;
547	uint64_t val;
548
549	if (!usec)
550		return 0;
551
552	usec /= 1000;
553	if (!usec)
554		return 0;
555
556	val = o->kb[rw] * 1000ULL;
557	return val / usec;
558}
559
560static uint64_t o_first_ttime(struct btrace_out *o)
561{
562	uint64_t first;
563
564	first = min(o->first_ttime[0], o->first_ttime[1]);
565	return min(first, o->first_ttime[2]);
566}
567
568static uint64_t o_longest_ttime(struct btrace_out *o)
569{
570	uint64_t ret = 0;
571	int i;
572
573	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
574		uint64_t diff;
575
576		diff = o->last_ttime[i] - o->first_ttime[i];
577		ret = max(diff, ret);
578	}
579
580	return ret;
581}
582
583static void __output_p_ascii(struct btrace_pid *p, unsigned long *ios)
584{
585	const char *msg[] = { "reads", "writes", "trims" };
586	struct btrace_out *o = &p->o;
587	unsigned long total, usec;
588	int i, j;
589
590	printf("[pid:\t%u]\n", p->pid);
591
592	total = ddir_rw_sum(o->ios);
593	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
594		float perc;
595
596		if (!o->ios[i])
597			continue;
598
599		ios[i] += o->ios[i] + o->merges[i];
600		printf("%s\n", msg[i]);
601		perc = ((float) o->ios[i] * 100.0) / (float) total;
602		printf("\tios:    %lu (perc=%3.2f%%)\n", o->ios[i], perc);
603		perc = ((float) o->merges[i] * 100.0) / (float) total;
604		printf("\tmerges: %lu (perc=%3.2f%%)\n", o->merges[i], perc);
605		perc = ((float) o->seq[i] * 100.0) / (float) o->ios[i];
606		printf("\tseq:    %lu (perc=%3.2f%%)\n", (unsigned long) o->seq[i], perc);
607		printf("\trate:   %lu KB/sec\n", o_to_kb_rate(o, i));
608
609		for (j = 0; j < o->nr_bs[i]; j++) {
610			struct bs *bs = &o->bs[i][j];
611
612			perc = (((float) bs->nr * 100.0) / (float) o->ios[i]);
613			printf("\tbs=%u, perc=%3.2f%%\n", bs->bs, perc);
614		}
615	}
616
617	printf("depth:\t%u\n", o->depth);
618	usec = o_longest_ttime(o) / 1000ULL;
619	printf("usec:\t%lu (delay=%llu)\n", usec, (unsigned long long) o->start_delay);
620
621	printf("files:\t");
622	for (i = 0; i < p->nr_files; i++)
623		printf("%s,", p->files[i].name);
624	printf("\n");
625
626	printf("\n");
627}
628
629static int __output_p_fio(struct btrace_pid *p, unsigned long *ios)
630{
631	struct btrace_out *o = &p->o;
632	unsigned long total;
633	unsigned long long time;
634	float perc;
635	int i, j;
636
637	if ((o->ios[0] + o->ios[1]) && o->ios[2]) {
638		log_err("fio: trace has both read/write and trim\n");
639		return 1;
640	}
641	if (!p->nr_files) {
642		log_err("fio: no devices found\n");
643		return 1;
644	}
645
646	printf("[pid%u]\n", p->pid);
647	printf("direct=1\n");
648	if (o->depth == 1)
649		printf("ioengine=sync\n");
650	else
651		printf("ioengine=libaio\niodepth=%u\n", o->depth);
652
653	if (o->ios[0] && !o->ios[1])
654		printf("rw=randread\n");
655	else if (!o->ios[0] && o->ios[1])
656		printf("rw=randwrite\n");
657	else if (o->ios[2])
658		printf("rw=randtrim\n");
659	else {
660		printf("rw=randrw\n");
661		total = ddir_rw_sum(o->ios);
662		perc = ((float) o->ios[0] * 100.0) / (float) total;
663		printf("rwmixread=%u\n", (int) (perc + 0.99));
664	}
665
666	printf("percentage_random=");
667	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
668		if (o->seq[i] && o->ios[i]) {
669			perc = ((float) o->seq[i] * 100.0) / (float) o->ios[i];
670			if (perc >= 99.0)
671				perc = 100.0;
672		} else
673			perc = 100.0;
674
675		if (i)
676			printf(",");
677		perc = 100.0 - perc;
678		printf("%u", (int) perc);
679	}
680	printf("\n");
681
682	printf("filename=");
683	for (i = 0; i < p->nr_files; i++) {
684		if (i)
685			printf(":");
686		printf("%s", p->files[i].name);
687	}
688	printf("\n");
689
690	printf("startdelay=%llus\n", o->start_delay / 1000000ULL);
691
692	time = o_longest_ttime(o);
693	time = (time + 1000000000ULL - 1) / 1000000000ULL;
694	printf("runtime=%llus\n", time);
695
696	printf("bssplit=");
697	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
698
699		if (i && o->nr_bs[i - 1] && o->nr_bs[i])
700			printf(",");
701
702		for (j = 0; j < o->nr_bs[i]; j++) {
703			struct bs *bs = &o->bs[i][j];
704
705			perc = (((float) bs->nr * 100.0) / (float) o->ios[i]);
706			if (perc < 1.00)
707				continue;
708			if (j)
709				printf(":");
710			if (j + 1 == o->nr_bs[i])
711				printf("%u/", bs->bs);
712			else
713				printf("%u/%u", bs->bs, (int) perc);
714		}
715	}
716	printf("\n");
717
718	if (set_rate) {
719		printf("rate=");
720		for (i = 0; i < DDIR_RWDIR_CNT; i++) {
721			unsigned long rate;
722
723			rate = o_to_kb_rate(o, i);
724			if (i)
725				printf(",");
726			if (rate)
727				printf("%luk", rate);
728		}
729		printf("\n");
730	}
731
732	printf("\n");
733	return 0;
734}
735
736static int __output_p(struct btrace_pid *p, unsigned long *ios)
737{
738	struct btrace_out *o = &p->o;
739	int i, ret = 0;
740
741	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
742		if (o->nr_bs[i] <= 1)
743			continue;
744		qsort(o->bs[i], o->nr_bs[i], sizeof(struct bs), bs_cmp);
745	}
746
747	if (filename) {
748		p->files = malloc(sizeof(struct trace_file));
749		p->nr_files++;
750		p->files[0].name = filename;
751	}
752
753	if (output_ascii)
754		__output_p_ascii(p, ios);
755	else
756		ret = __output_p_fio(p, ios);
757
758	return ret;
759}
760
761static void remove_ddir(struct btrace_out *o, int rw)
762{
763	o->ios[rw] = 0;
764}
765
766static int prune_entry(struct btrace_out *o)
767{
768	unsigned long rate;
769	uint64_t time;
770	int i;
771
772	if (ddir_rw_sum(o->ios) < ios_threshold)
773		return 1;
774
775	time = o_longest_ttime(o) / 1000ULL;
776	if (time < rt_threshold)
777		return 1;
778
779	rate = 0;
780	for (i = 0; i < DDIR_RWDIR_CNT; i++) {
781		unsigned long this_rate;
782
783		this_rate = o_to_kb_rate(o, i);
784		if (this_rate < rate_threshold) {
785			remove_ddir(o, i);
786			this_rate = 0;
787		}
788		rate += this_rate;
789	}
790
791	if (rate < rate_threshold)
792		return 1;
793
794	return 0;
795}
796
797static int entry_cmp(void *priv, struct flist_head *a, struct flist_head *b)
798{
799	struct btrace_pid *pa = flist_entry(a, struct btrace_pid, pid_list);
800	struct btrace_pid *pb = flist_entry(b, struct btrace_pid, pid_list);
801
802	return ddir_rw_sum(pb->o.ios) - ddir_rw_sum(pa->o.ios);
803}
804
805static void free_p(struct btrace_pid *p)
806{
807	struct btrace_out *o = &p->o;
808	int i;
809
810	for (i = 0; i < p->nr_files; i++) {
811		if (p->files[i].name && p->files[i].name != filename)
812			free(p->files[i].name);
813	}
814
815	for (i = 0; i < DDIR_RWDIR_CNT; i++)
816		free(o->bs[i]);
817
818	free(p->files);
819	flist_del(&p->pid_list);
820	flist_del(&p->hash_list);
821	free(p);
822}
823
824static int output_p(void)
825{
826	unsigned long ios[DDIR_RWDIR_CNT];
827	struct flist_head *e, *tmp;
828	int depth_disabled = 0;
829	int ret = 0;
830
831	flist_for_each_safe(e, tmp, &pid_list) {
832		struct btrace_pid *p;
833
834		p = flist_entry(e, struct btrace_pid, pid_list);
835		if (prune_entry(&p->o)) {
836			free_p(p);
837			continue;
838		}
839		p->o.start_delay = (o_first_ttime(&p->o) / 1000ULL) - first_ttime;
840		depth_disabled += p->o.depth_disabled;
841	}
842
843	if (depth_disabled)
844		log_err("fio: missing completion traces, depths capped at %u\n", max_depth);
845
846	memset(ios, 0, sizeof(ios));
847
848	flist_sort(NULL, &pid_list, entry_cmp);
849
850	flist_for_each(e, &pid_list) {
851		struct btrace_pid *p;
852
853		p = flist_entry(e, struct btrace_pid, pid_list);
854		ret |= __output_p(p, ios);
855		if (ret && !output_ascii)
856			break;
857	}
858
859	if (output_ascii)
860		printf("Total: reads=%lu, writes=%lu\n", ios[0], ios[1]);
861
862	return ret;
863}
864
865static int usage(char *argv[])
866{
867	log_err("%s: <blktrace bin file>\n", argv[0]);
868	log_err("\t-t\tUsec threshold to ignore task\n");
869	log_err("\t-n\tNumber IOS threshold to ignore task\n");
870	log_err("\t-f\tFio job file output\n");
871	log_err("\t-d\tUse this file/device for replay\n");
872	log_err("\t-r\tIgnore jobs with less than this KB/sec rate\n");
873	log_err("\t-R\tSet rate in fio job\n");
874	log_err("\t-D\tCap queue depth at this value (def=%u)\n", max_depth);
875	return 1;
876}
877
878static int trace_needs_swap(const char *trace_file, int *swap)
879{
880	struct blk_io_trace t;
881	int fd, ret;
882
883	*swap = -1;
884
885	fd = open(trace_file, O_RDONLY);
886	if (fd < 0) {
887		perror("open");
888		return 1;
889	}
890
891	ret = read(fd, &t, sizeof(t));
892	if (ret < 0) {
893		close(fd);
894		perror("read");
895		return 1;
896	} else if (ret != sizeof(t)) {
897		close(fd);
898		log_err("fio: short read on trace file\n");
899		return 1;
900	}
901
902	close(fd);
903
904	if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
905		*swap = 0;
906	else {
907		/*
908		 * Maybe it needs to be endian swapped...
909		 */
910		t.magic = fio_swap32(t.magic);
911		if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
912			*swap = 1;
913	}
914
915	if (*swap == -1) {
916		log_err("fio: blktrace appears corrupt\n");
917		return 1;
918	}
919
920	return 0;
921}
922
923int main(int argc, char *argv[])
924{
925	int need_swap, i, c;
926
927	if (argc < 2)
928		return usage(argv);
929
930	while ((c = getopt(argc, argv, "t:n:fd:r:RD:")) != -1) {
931		switch (c) {
932		case 'R':
933			set_rate = 1;
934			break;
935		case 'r':
936			rate_threshold = atoi(optarg);
937			break;
938		case 't':
939			rt_threshold = atoi(optarg);
940			break;
941		case 'n':
942			ios_threshold = atoi(optarg);
943			break;
944		case 'f':
945			output_ascii = 0;
946			break;
947		case 'd':
948			filename = strdup(optarg);
949			break;
950		case 'D':
951			max_depth = atoi(optarg);
952			break;
953		case '?':
954		default:
955			return usage(argv);
956		}
957	}
958
959	if (argc == optind)
960		return usage(argv);
961
962	if (trace_needs_swap(argv[optind], &need_swap))
963		return 1;
964
965	for (i = 0; i < PID_HASH_SIZE; i++)
966		INIT_FLIST_HEAD(&pid_hash[i]);
967	for (i = 0; i < INFLIGHT_HASH_SIZE; i++)
968		INIT_FLIST_HEAD(&inflight_hash[i]);
969
970	load_blktrace(argv[optind], need_swap);
971	first_ttime /= 1000ULL;
972
973	return output_p();
974}
975