log.c revision 0d29de831183dfd049c97a03008d425ce21e2fa4
1/*
2 * Code related to writing an iolog of what a thread is doing, and to
3 * later read that back and replay
4 */
5#include <stdio.h>
6#include <stdlib.h>
7#include <libgen.h>
8#include <assert.h>
9#include "flist.h"
10#include "fio.h"
11#include "verify.h"
12#include "trim.h"
13
14static const char iolog_ver2[] = "fio version 2 iolog";
15
16void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
17{
18	flist_add_tail(&ipo->list, &td->io_log_list);
19	td->total_io_size += ipo->len;
20}
21
22void log_io_u(struct thread_data *td, struct io_u *io_u)
23{
24	const char *act[] = { "read", "write", "sync", "datasync",
25				"sync_file_range", "wait", "trim" };
26
27	assert(io_u->ddir <= 6);
28
29	if (!td->o.write_iolog_file)
30		return;
31
32	fprintf(td->iolog_f, "%s %s %llu %lu\n", io_u->file->file_name,
33						act[io_u->ddir], io_u->offset,
34						io_u->buflen);
35}
36
37void log_file(struct thread_data *td, struct fio_file *f,
38	      enum file_log_act what)
39{
40	const char *act[] = { "add", "open", "close" };
41
42	assert(what < 3);
43
44	if (!td->o.write_iolog_file)
45		return;
46
47
48	/*
49	 * this happens on the pre-open/close done before the job starts
50	 */
51	if (!td->iolog_f)
52		return;
53
54	fprintf(td->iolog_f, "%s %s\n", f->file_name, act[what]);
55}
56
57static void iolog_delay(struct thread_data *td, unsigned long delay)
58{
59	unsigned long usec = utime_since_now(&td->last_issue);
60
61	if (delay < usec)
62		return;
63
64	delay -= usec;
65
66	/*
67	 * less than 100 usec delay, just regard it as noise
68	 */
69	if (delay < 100)
70		return;
71
72	usec_sleep(td, delay);
73}
74
75static int ipo_special(struct thread_data *td, struct io_piece *ipo)
76{
77	struct fio_file *f;
78	int ret;
79
80	/*
81	 * Not a special ipo
82	 */
83	if (ipo->ddir != DDIR_INVAL)
84		return 0;
85
86	f = td->files[ipo->fileno];
87
88	switch (ipo->file_action) {
89	case FIO_LOG_OPEN_FILE:
90		ret = td_io_open_file(td, f);
91		if (!ret)
92			break;
93		td_verror(td, ret, "iolog open file");
94		return -1;
95	case FIO_LOG_CLOSE_FILE:
96		td_io_close_file(td, f);
97		break;
98	case FIO_LOG_UNLINK_FILE:
99		unlink(f->file_name);
100		break;
101	default:
102		log_err("fio: bad file action %d\n", ipo->file_action);
103		break;
104	}
105
106	return 1;
107}
108
109int read_iolog_get(struct thread_data *td, struct io_u *io_u)
110{
111	struct io_piece *ipo;
112	unsigned long elapsed;
113
114	while (!flist_empty(&td->io_log_list)) {
115		int ret;
116
117		ipo = flist_entry(td->io_log_list.next, struct io_piece, list);
118		flist_del(&ipo->list);
119		remove_trim_entry(td, ipo);
120
121		ret = ipo_special(td, ipo);
122		if (ret < 0) {
123			free(ipo);
124			break;
125		} else if (ret > 0) {
126			free(ipo);
127			continue;
128		}
129
130		io_u->ddir = ipo->ddir;
131		if (ipo->ddir != DDIR_WAIT) {
132			io_u->offset = ipo->offset;
133			io_u->buflen = ipo->len;
134			io_u->file = td->files[ipo->fileno];
135			get_file(io_u->file);
136			dprint(FD_IO, "iolog: get %llu/%lu/%s\n", io_u->offset,
137						io_u->buflen, io_u->file->file_name);
138			if (ipo->delay)
139				iolog_delay(td, ipo->delay);
140		} else {
141			elapsed = mtime_since_genesis();
142			if (ipo->delay > elapsed)
143				usec_sleep(td, (ipo->delay - elapsed) * 1000);
144
145		}
146
147		free(ipo);
148
149		if (io_u->ddir != DDIR_WAIT)
150			return 0;
151	}
152
153	td->done = 1;
154	return 1;
155}
156
157void prune_io_piece_log(struct thread_data *td)
158{
159	struct io_piece *ipo;
160	struct rb_node *n;
161
162	while ((n = rb_first(&td->io_hist_tree)) != NULL) {
163		ipo = rb_entry(n, struct io_piece, rb_node);
164		rb_erase(n, &td->io_hist_tree);
165		remove_trim_entry(td, ipo);
166		td->io_hist_len--;
167		free(ipo);
168	}
169
170	while (!flist_empty(&td->io_hist_list)) {
171		ipo = flist_entry(td->io_hist_list.next, struct io_piece, list);
172		flist_del(&ipo->list);
173		remove_trim_entry(td, ipo);
174		td->io_hist_len--;
175		free(ipo);
176	}
177}
178
179/*
180 * log a successful write, so we can unwind the log for verify
181 */
182void log_io_piece(struct thread_data *td, struct io_u *io_u)
183{
184	struct rb_node **p, *parent;
185	struct io_piece *ipo, *__ipo;
186
187	ipo = malloc(sizeof(struct io_piece));
188	init_ipo(ipo);
189	ipo->file = io_u->file;
190	ipo->offset = io_u->offset;
191	ipo->len = io_u->buflen;
192
193	if (io_u_should_trim(td, io_u)) {
194		flist_add_tail(&ipo->trim_list, &td->trim_list);
195		td->trim_entries++;
196	}
197
198	/*
199	 * We don't need to sort the entries, if:
200	 *
201	 *	Sequential writes, or
202	 *	Random writes that lay out the file as it goes along
203	 *
204	 * For both these cases, just reading back data in the order we
205	 * wrote it out is the fastest.
206	 *
207	 * One exception is if we don't have a random map AND we are doing
208	 * verifies, in that case we need to check for duplicate blocks and
209	 * drop the old one, which we rely on the rb insert/lookup for
210	 * handling.
211	 */
212	if ((!td_random(td) || !td->o.overwrite) &&
213	      (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) {
214		INIT_FLIST_HEAD(&ipo->list);
215		flist_add_tail(&ipo->list, &td->io_hist_list);
216		ipo->flags |= IP_F_ONLIST;
217		td->io_hist_len++;
218		return;
219	}
220
221	RB_CLEAR_NODE(&ipo->rb_node);
222
223	/*
224	 * Sort the entry into the verification list
225	 */
226restart:
227	p = &td->io_hist_tree.rb_node;
228	parent = NULL;
229	while (*p) {
230		parent = *p;
231
232		__ipo = rb_entry(parent, struct io_piece, rb_node);
233		if (ipo->file < __ipo->file)
234			p = &(*p)->rb_left;
235		else if (ipo->file > __ipo->file)
236			p = &(*p)->rb_right;
237		else if (ipo->offset < __ipo->offset)
238			p = &(*p)->rb_left;
239		else if (ipo->offset > __ipo->offset)
240			p = &(*p)->rb_right;
241		else {
242			assert(ipo->len == __ipo->len);
243			td->io_hist_len--;
244			rb_erase(parent, &td->io_hist_tree);
245			remove_trim_entry(td, __ipo);
246			free(__ipo);
247			goto restart;
248		}
249	}
250
251	rb_link_node(&ipo->rb_node, parent, p);
252	rb_insert_color(&ipo->rb_node, &td->io_hist_tree);
253	ipo->flags |= IP_F_ONRB;
254	td->io_hist_len++;
255}
256
257void write_iolog_close(struct thread_data *td)
258{
259	fflush(td->iolog_f);
260	fclose(td->iolog_f);
261	free(td->iolog_buf);
262	td->iolog_f = NULL;
263	td->iolog_buf = NULL;
264}
265
266/*
267 * Read version 2 iolog data. It is enhanced to include per-file logging,
268 * syncs, etc.
269 */
270static int read_iolog2(struct thread_data *td, FILE *f)
271{
272	unsigned long long offset;
273	unsigned int bytes;
274	int reads, writes, waits, fileno = 0, file_action = 0; /* stupid gcc */
275	char *fname, *act;
276	char *str, *p;
277	enum fio_ddir rw;
278
279	free_release_files(td);
280
281	/*
282	 * Read in the read iolog and store it, reuse the infrastructure
283	 * for doing verifications.
284	 */
285	str = malloc(4096);
286	fname = malloc(256+16);
287	act = malloc(256+16);
288
289	reads = writes = waits = 0;
290	while ((p = fgets(str, 4096, f)) != NULL) {
291		struct io_piece *ipo;
292		int r;
293
294		r = sscanf(p, "%256s %256s %llu %u", fname, act, &offset,
295									&bytes);
296		if (r == 4) {
297			/*
298			 * Check action first
299			 */
300			if (!strcmp(act, "wait"))
301				rw = DDIR_WAIT;
302			else if (!strcmp(act, "read"))
303				rw = DDIR_READ;
304			else if (!strcmp(act, "write"))
305				rw = DDIR_WRITE;
306			else if (!strcmp(act, "sync"))
307				rw = DDIR_SYNC;
308			else if (!strcmp(act, "datasync"))
309				rw = DDIR_DATASYNC;
310			else if (!strcmp(act, "trim"))
311				rw = DDIR_TRIM;
312			else {
313				log_err("fio: bad iolog file action: %s\n",
314									act);
315				continue;
316			}
317		} else if (r == 2) {
318			rw = DDIR_INVAL;
319			if (!strcmp(act, "add")) {
320				td->o.nr_files++;
321				fileno = add_file(td, fname);
322				file_action = FIO_LOG_ADD_FILE;
323				continue;
324			} else if (!strcmp(act, "open")) {
325				fileno = get_fileno(td, fname);
326				file_action = FIO_LOG_OPEN_FILE;
327			} else if (!strcmp(act, "close")) {
328				fileno = get_fileno(td, fname);
329				file_action = FIO_LOG_CLOSE_FILE;
330			} else {
331				log_err("fio: bad iolog file action: %s\n",
332									act);
333				continue;
334			}
335		} else {
336			log_err("bad iolog2: %s", p);
337			continue;
338		}
339
340		if (rw == DDIR_READ)
341			reads++;
342		else if (rw == DDIR_WRITE) {
343			/*
344			 * Don't add a write for ro mode
345			 */
346			if (read_only)
347				continue;
348			writes++;
349		} else if (rw == DDIR_WAIT) {
350			waits++;
351		} else if (rw == DDIR_INVAL) {
352		} else if (!ddir_sync(rw)) {
353			log_err("bad ddir: %d\n", rw);
354			continue;
355		}
356
357		/*
358		 * Make note of file
359		 */
360		ipo = malloc(sizeof(*ipo));
361		init_ipo(ipo);
362		ipo->ddir = rw;
363		if (rw == DDIR_WAIT) {
364			ipo->delay = offset;
365		} else {
366			ipo->offset = offset;
367			ipo->len = bytes;
368			if (bytes > td->o.max_bs[rw])
369				td->o.max_bs[rw] = bytes;
370			ipo->fileno = fileno;
371			ipo->file_action = file_action;
372		}
373
374		queue_io_piece(td, ipo);
375	}
376
377	free(str);
378	free(act);
379	free(fname);
380
381	if (writes && read_only) {
382		log_err("fio: <%s> skips replay of %d writes due to"
383			" read-only\n", td->o.name, writes);
384		writes = 0;
385	}
386
387	if (!reads && !writes && !waits)
388		return 1;
389	else if (reads && !writes)
390		td->o.td_ddir = TD_DDIR_READ;
391	else if (!reads && writes)
392		td->o.td_ddir = TD_DDIR_WRITE;
393	else
394		td->o.td_ddir = TD_DDIR_RW;
395
396	return 0;
397}
398
399/*
400 * open iolog, check version, and call appropriate parser
401 */
402static int init_iolog_read(struct thread_data *td)
403{
404	char buffer[256], *p;
405	FILE *f;
406	int ret;
407
408	f = fopen(td->o.read_iolog_file, "r");
409	if (!f) {
410		perror("fopen read iolog");
411		return 1;
412	}
413
414	p = fgets(buffer, sizeof(buffer), f);
415	if (!p) {
416		td_verror(td, errno, "iolog read");
417		log_err("fio: unable to read iolog\n");
418		return 1;
419	}
420
421	/*
422	 * version 2 of the iolog stores a specific string as the
423	 * first line, check for that
424	 */
425	if (!strncmp(iolog_ver2, buffer, strlen(iolog_ver2)))
426		ret = read_iolog2(td, f);
427	else {
428		log_err("fio: iolog version 1 is no longer supported\n");
429		ret = 1;
430	}
431
432	fclose(f);
433	return ret;
434}
435
436/*
437 * Setup a log for storing io patterns.
438 */
439static int init_iolog_write(struct thread_data *td)
440{
441	struct fio_file *ff;
442	FILE *f;
443	unsigned int i;
444
445	f = fopen(td->o.write_iolog_file, "a");
446	if (!f) {
447		perror("fopen write iolog");
448		return 1;
449	}
450
451	/*
452	 * That's it for writing, setup a log buffer and we're done.
453	  */
454	td->iolog_f = f;
455	td->iolog_buf = malloc(8192);
456	setvbuf(f, td->iolog_buf, _IOFBF, 8192);
457
458	/*
459	 * write our version line
460	 */
461	if (fprintf(f, "%s\n", iolog_ver2) < 0) {
462		perror("iolog init\n");
463		return 1;
464	}
465
466	/*
467	 * add all known files
468	 */
469	for_each_file(td, ff, i)
470		log_file(td, ff, FIO_LOG_ADD_FILE);
471
472	return 0;
473}
474
475int init_iolog(struct thread_data *td)
476{
477	int ret = 0;
478
479	if (td->o.read_iolog_file) {
480		/*
481		 * Check if it's a blktrace file and load that if possible.
482		 * Otherwise assume it's a normal log file and load that.
483		 */
484		if (is_blktrace(td->o.read_iolog_file))
485			ret = load_blktrace(td, td->o.read_iolog_file);
486		else
487			ret = init_iolog_read(td);
488	} else if (td->o.write_iolog_file)
489		ret = init_iolog_write(td);
490
491	return ret;
492}
493
494void setup_log(struct io_log **log)
495{
496	struct io_log *l = malloc(sizeof(*l));
497
498	l->nr_samples = 0;
499	l->max_samples = 1024;
500	l->log = malloc(l->max_samples * sizeof(struct io_sample));
501	*log = l;
502}
503
504void __finish_log(struct io_log *log, const char *name)
505{
506	unsigned int i;
507	FILE *f;
508
509	f = fopen(name, "a");
510	if (!f) {
511		perror("fopen log");
512		return;
513	}
514
515	for (i = 0; i < log->nr_samples; i++) {
516		fprintf(f, "%lu, %lu, %u, %u\n", log->log[i].time,
517						log->log[i].val,
518						log->log[i].ddir,
519						log->log[i].bs);
520	}
521
522	fclose(f);
523	free(log->log);
524	free(log);
525}
526
527void finish_log_named(struct thread_data *td, struct io_log *log,
528		       const char *prefix, const char *postfix)
529{
530	char file_name[256], *p;
531
532	snprintf(file_name, 200, "%s_%s.log", prefix, postfix);
533	p = basename(file_name);
534	__finish_log(log, p);
535}
536
537void finish_log(struct thread_data *td, struct io_log *log, const char *name)
538{
539	finish_log_named(td, log, td->o.name, name);
540}
541