session.c revision e6817ec1d8ab31fc7b01906e305f848542df6413
1#define _FILE_OFFSET_BITS 64
2
3#include <linux/kernel.h>
4
5#include <byteswap.h>
6#include <unistd.h>
7#include <sys/types.h>
8#include <sys/mman.h>
9
10#include "evlist.h"
11#include "evsel.h"
12#include "session.h"
13#include "sort.h"
14#include "util.h"
15
16static int perf_session__open(struct perf_session *self, bool force)
17{
18	struct stat input_stat;
19
20	if (!strcmp(self->filename, "-")) {
21		self->fd_pipe = true;
22		self->fd = STDIN_FILENO;
23
24		if (perf_session__read_header(self, self->fd) < 0)
25			pr_err("incompatible file format");
26
27		return 0;
28	}
29
30	self->fd = open(self->filename, O_RDONLY);
31	if (self->fd < 0) {
32		int err = errno;
33
34		pr_err("failed to open %s: %s", self->filename, strerror(err));
35		if (err == ENOENT && !strcmp(self->filename, "perf.data"))
36			pr_err("  (try 'perf record' first)");
37		pr_err("\n");
38		return -errno;
39	}
40
41	if (fstat(self->fd, &input_stat) < 0)
42		goto out_close;
43
44	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
45		pr_err("file %s not owned by current user or root\n",
46		       self->filename);
47		goto out_close;
48	}
49
50	if (!input_stat.st_size) {
51		pr_info("zero-sized file (%s), nothing to do!\n",
52			self->filename);
53		goto out_close;
54	}
55
56	if (perf_session__read_header(self, self->fd) < 0) {
57		pr_err("incompatible file format");
58		goto out_close;
59	}
60
61	if (!perf_evlist__valid_sample_type(self->evlist)) {
62		pr_err("non matching sample_type");
63		goto out_close;
64	}
65
66	if (!perf_evlist__valid_sample_id_all(self->evlist)) {
67		pr_err("non matching sample_id_all");
68		goto out_close;
69	}
70
71	self->size = input_stat.st_size;
72	return 0;
73
74out_close:
75	close(self->fd);
76	self->fd = -1;
77	return -1;
78}
79
80static void perf_session__id_header_size(struct perf_session *session)
81{
82       struct perf_sample *data;
83       u64 sample_type = session->sample_type;
84       u16 size = 0;
85
86	if (!session->sample_id_all)
87		goto out;
88
89       if (sample_type & PERF_SAMPLE_TID)
90               size += sizeof(data->tid) * 2;
91
92       if (sample_type & PERF_SAMPLE_TIME)
93               size += sizeof(data->time);
94
95       if (sample_type & PERF_SAMPLE_ID)
96               size += sizeof(data->id);
97
98       if (sample_type & PERF_SAMPLE_STREAM_ID)
99               size += sizeof(data->stream_id);
100
101       if (sample_type & PERF_SAMPLE_CPU)
102               size += sizeof(data->cpu) * 2;
103out:
104       session->id_hdr_size = size;
105}
106
107void perf_session__update_sample_type(struct perf_session *self)
108{
109	self->sample_type = perf_evlist__sample_type(self->evlist);
110	self->sample_size = __perf_evsel__sample_size(self->sample_type);
111	self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
112	perf_session__id_header_size(self);
113}
114
115int perf_session__create_kernel_maps(struct perf_session *self)
116{
117	int ret = machine__create_kernel_maps(&self->host_machine);
118
119	if (ret >= 0)
120		ret = machines__create_guest_kernel_maps(&self->machines);
121	return ret;
122}
123
124static void perf_session__destroy_kernel_maps(struct perf_session *self)
125{
126	machine__destroy_kernel_maps(&self->host_machine);
127	machines__destroy_guest_kernel_maps(&self->machines);
128}
129
130struct perf_session *perf_session__new(const char *filename, int mode,
131				       bool force, bool repipe,
132				       struct perf_event_ops *ops)
133{
134	size_t len = filename ? strlen(filename) + 1 : 0;
135	struct perf_session *self = zalloc(sizeof(*self) + len);
136
137	if (self == NULL)
138		goto out;
139
140	memcpy(self->filename, filename, len);
141	self->threads = RB_ROOT;
142	INIT_LIST_HEAD(&self->dead_threads);
143	self->last_match = NULL;
144	/*
145	 * On 64bit we can mmap the data file in one go. No need for tiny mmap
146	 * slices. On 32bit we use 32MB.
147	 */
148#if BITS_PER_LONG == 64
149	self->mmap_window = ULLONG_MAX;
150#else
151	self->mmap_window = 32 * 1024 * 1024ULL;
152#endif
153	self->machines = RB_ROOT;
154	self->repipe = repipe;
155	INIT_LIST_HEAD(&self->ordered_samples.samples);
156	INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
157	INIT_LIST_HEAD(&self->ordered_samples.to_free);
158	machine__init(&self->host_machine, "", HOST_KERNEL_ID);
159
160	if (mode == O_RDONLY) {
161		if (perf_session__open(self, force) < 0)
162			goto out_delete;
163		perf_session__update_sample_type(self);
164	} else if (mode == O_WRONLY) {
165		/*
166		 * In O_RDONLY mode this will be performed when reading the
167		 * kernel MMAP event, in perf_event__process_mmap().
168		 */
169		if (perf_session__create_kernel_maps(self) < 0)
170			goto out_delete;
171	}
172
173	if (ops && ops->ordering_requires_timestamps &&
174	    ops->ordered_samples && !self->sample_id_all) {
175		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
176		ops->ordered_samples = false;
177	}
178
179out:
180	return self;
181out_delete:
182	perf_session__delete(self);
183	return NULL;
184}
185
186static void perf_session__delete_dead_threads(struct perf_session *self)
187{
188	struct thread *n, *t;
189
190	list_for_each_entry_safe(t, n, &self->dead_threads, node) {
191		list_del(&t->node);
192		thread__delete(t);
193	}
194}
195
196static void perf_session__delete_threads(struct perf_session *self)
197{
198	struct rb_node *nd = rb_first(&self->threads);
199
200	while (nd) {
201		struct thread *t = rb_entry(nd, struct thread, rb_node);
202
203		rb_erase(&t->rb_node, &self->threads);
204		nd = rb_next(nd);
205		thread__delete(t);
206	}
207}
208
209void perf_session__delete(struct perf_session *self)
210{
211	perf_session__destroy_kernel_maps(self);
212	perf_session__delete_dead_threads(self);
213	perf_session__delete_threads(self);
214	machine__exit(&self->host_machine);
215	close(self->fd);
216	free(self);
217}
218
219void perf_session__remove_thread(struct perf_session *self, struct thread *th)
220{
221	self->last_match = NULL;
222	rb_erase(&th->rb_node, &self->threads);
223	/*
224	 * We may have references to this thread, for instance in some hist_entry
225	 * instances, so just move them to a separate list.
226	 */
227	list_add_tail(&th->node, &self->dead_threads);
228}
229
230static bool symbol__match_parent_regex(struct symbol *sym)
231{
232	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
233		return 1;
234
235	return 0;
236}
237
238int perf_session__resolve_callchain(struct perf_session *self,
239				    struct thread *thread,
240				    struct ip_callchain *chain,
241				    struct symbol **parent)
242{
243	u8 cpumode = PERF_RECORD_MISC_USER;
244	unsigned int i;
245	int err;
246
247	callchain_cursor_reset(&self->callchain_cursor);
248
249	for (i = 0; i < chain->nr; i++) {
250		u64 ip = chain->ips[i];
251		struct addr_location al;
252
253		if (ip >= PERF_CONTEXT_MAX) {
254			switch (ip) {
255			case PERF_CONTEXT_HV:
256				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break;
257			case PERF_CONTEXT_KERNEL:
258				cpumode = PERF_RECORD_MISC_KERNEL;	break;
259			case PERF_CONTEXT_USER:
260				cpumode = PERF_RECORD_MISC_USER;	break;
261			default:
262				break;
263			}
264			continue;
265		}
266
267		al.filtered = false;
268		thread__find_addr_location(thread, self, cpumode,
269				MAP__FUNCTION, thread->pid, ip, &al, NULL);
270		if (al.sym != NULL) {
271			if (sort__has_parent && !*parent &&
272			    symbol__match_parent_regex(al.sym))
273				*parent = al.sym;
274			if (!symbol_conf.use_callchain)
275				break;
276		}
277
278		err = callchain_cursor_append(&self->callchain_cursor,
279					      ip, al.map, al.sym);
280		if (err)
281			return err;
282	}
283
284	return 0;
285}
286
287static int process_event_synth_stub(union perf_event *event __used,
288				    struct perf_session *session __used)
289{
290	dump_printf(": unhandled!\n");
291	return 0;
292}
293
294static int process_event_sample_stub(union perf_event *event __used,
295				     struct perf_sample *sample __used,
296				     struct perf_evsel *evsel __used,
297				     struct perf_session *session __used)
298{
299	dump_printf(": unhandled!\n");
300	return 0;
301}
302
303static int process_event_stub(union perf_event *event __used,
304			      struct perf_sample *sample __used,
305			      struct perf_session *session __used)
306{
307	dump_printf(": unhandled!\n");
308	return 0;
309}
310
311static int process_finished_round_stub(union perf_event *event __used,
312				       struct perf_session *session __used,
313				       struct perf_event_ops *ops __used)
314{
315	dump_printf(": unhandled!\n");
316	return 0;
317}
318
319static int process_finished_round(union perf_event *event,
320				  struct perf_session *session,
321				  struct perf_event_ops *ops);
322
323static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
324{
325	if (handler->sample == NULL)
326		handler->sample = process_event_sample_stub;
327	if (handler->mmap == NULL)
328		handler->mmap = process_event_stub;
329	if (handler->comm == NULL)
330		handler->comm = process_event_stub;
331	if (handler->fork == NULL)
332		handler->fork = process_event_stub;
333	if (handler->exit == NULL)
334		handler->exit = process_event_stub;
335	if (handler->lost == NULL)
336		handler->lost = perf_event__process_lost;
337	if (handler->read == NULL)
338		handler->read = process_event_stub;
339	if (handler->throttle == NULL)
340		handler->throttle = process_event_stub;
341	if (handler->unthrottle == NULL)
342		handler->unthrottle = process_event_stub;
343	if (handler->attr == NULL)
344		handler->attr = process_event_synth_stub;
345	if (handler->event_type == NULL)
346		handler->event_type = process_event_synth_stub;
347	if (handler->tracing_data == NULL)
348		handler->tracing_data = process_event_synth_stub;
349	if (handler->build_id == NULL)
350		handler->build_id = process_event_synth_stub;
351	if (handler->finished_round == NULL) {
352		if (handler->ordered_samples)
353			handler->finished_round = process_finished_round;
354		else
355			handler->finished_round = process_finished_round_stub;
356	}
357}
358
359void mem_bswap_64(void *src, int byte_size)
360{
361	u64 *m = src;
362
363	while (byte_size > 0) {
364		*m = bswap_64(*m);
365		byte_size -= sizeof(u64);
366		++m;
367	}
368}
369
370static void perf_event__all64_swap(union perf_event *event)
371{
372	struct perf_event_header *hdr = &event->header;
373	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
374}
375
376static void perf_event__comm_swap(union perf_event *event)
377{
378	event->comm.pid = bswap_32(event->comm.pid);
379	event->comm.tid = bswap_32(event->comm.tid);
380}
381
382static void perf_event__mmap_swap(union perf_event *event)
383{
384	event->mmap.pid	  = bswap_32(event->mmap.pid);
385	event->mmap.tid	  = bswap_32(event->mmap.tid);
386	event->mmap.start = bswap_64(event->mmap.start);
387	event->mmap.len	  = bswap_64(event->mmap.len);
388	event->mmap.pgoff = bswap_64(event->mmap.pgoff);
389}
390
391static void perf_event__task_swap(union perf_event *event)
392{
393	event->fork.pid	 = bswap_32(event->fork.pid);
394	event->fork.tid	 = bswap_32(event->fork.tid);
395	event->fork.ppid = bswap_32(event->fork.ppid);
396	event->fork.ptid = bswap_32(event->fork.ptid);
397	event->fork.time = bswap_64(event->fork.time);
398}
399
400static void perf_event__read_swap(union perf_event *event)
401{
402	event->read.pid		 = bswap_32(event->read.pid);
403	event->read.tid		 = bswap_32(event->read.tid);
404	event->read.value	 = bswap_64(event->read.value);
405	event->read.time_enabled = bswap_64(event->read.time_enabled);
406	event->read.time_running = bswap_64(event->read.time_running);
407	event->read.id		 = bswap_64(event->read.id);
408}
409
410/* exported for swapping attributes in file header */
411void perf_event__attr_swap(struct perf_event_attr *attr)
412{
413	attr->type		= bswap_32(attr->type);
414	attr->size		= bswap_32(attr->size);
415	attr->config		= bswap_64(attr->config);
416	attr->sample_period	= bswap_64(attr->sample_period);
417	attr->sample_type	= bswap_64(attr->sample_type);
418	attr->read_format	= bswap_64(attr->read_format);
419	attr->wakeup_events	= bswap_32(attr->wakeup_events);
420	attr->bp_type		= bswap_32(attr->bp_type);
421	attr->bp_addr		= bswap_64(attr->bp_addr);
422	attr->bp_len		= bswap_64(attr->bp_len);
423}
424
425static void perf_event__hdr_attr_swap(union perf_event *event)
426{
427	size_t size;
428
429	perf_event__attr_swap(&event->attr.attr);
430
431	size = event->header.size;
432	size -= (void *)&event->attr.id - (void *)event;
433	mem_bswap_64(event->attr.id, size);
434}
435
436static void perf_event__event_type_swap(union perf_event *event)
437{
438	event->event_type.event_type.event_id =
439		bswap_64(event->event_type.event_type.event_id);
440}
441
442static void perf_event__tracing_data_swap(union perf_event *event)
443{
444	event->tracing_data.size = bswap_32(event->tracing_data.size);
445}
446
447typedef void (*perf_event__swap_op)(union perf_event *event);
448
449static perf_event__swap_op perf_event__swap_ops[] = {
450	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap,
451	[PERF_RECORD_COMM]		  = perf_event__comm_swap,
452	[PERF_RECORD_FORK]		  = perf_event__task_swap,
453	[PERF_RECORD_EXIT]		  = perf_event__task_swap,
454	[PERF_RECORD_LOST]		  = perf_event__all64_swap,
455	[PERF_RECORD_READ]		  = perf_event__read_swap,
456	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
457	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
458	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
459	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
460	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
461	[PERF_RECORD_HEADER_MAX]	  = NULL,
462};
463
464struct sample_queue {
465	u64			timestamp;
466	u64			file_offset;
467	union perf_event	*event;
468	struct list_head	list;
469};
470
471static void perf_session_free_sample_buffers(struct perf_session *session)
472{
473	struct ordered_samples *os = &session->ordered_samples;
474
475	while (!list_empty(&os->to_free)) {
476		struct sample_queue *sq;
477
478		sq = list_entry(os->to_free.next, struct sample_queue, list);
479		list_del(&sq->list);
480		free(sq);
481	}
482}
483
484static int perf_session_deliver_event(struct perf_session *session,
485				      union perf_event *event,
486				      struct perf_sample *sample,
487				      struct perf_event_ops *ops,
488				      u64 file_offset);
489
490static void flush_sample_queue(struct perf_session *s,
491			       struct perf_event_ops *ops)
492{
493	struct ordered_samples *os = &s->ordered_samples;
494	struct list_head *head = &os->samples;
495	struct sample_queue *tmp, *iter;
496	struct perf_sample sample;
497	u64 limit = os->next_flush;
498	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
499	int ret;
500
501	if (!ops->ordered_samples || !limit)
502		return;
503
504	list_for_each_entry_safe(iter, tmp, head, list) {
505		if (iter->timestamp > limit)
506			break;
507
508		ret = perf_session__parse_sample(s, iter->event, &sample);
509		if (ret)
510			pr_err("Can't parse sample, err = %d\n", ret);
511		else
512			perf_session_deliver_event(s, iter->event, &sample, ops,
513						   iter->file_offset);
514
515		os->last_flush = iter->timestamp;
516		list_del(&iter->list);
517		list_add(&iter->list, &os->sample_cache);
518	}
519
520	if (list_empty(head)) {
521		os->last_sample = NULL;
522	} else if (last_ts <= limit) {
523		os->last_sample =
524			list_entry(head->prev, struct sample_queue, list);
525	}
526}
527
528/*
529 * When perf record finishes a pass on every buffers, it records this pseudo
530 * event.
531 * We record the max timestamp t found in the pass n.
532 * Assuming these timestamps are monotonic across cpus, we know that if
533 * a buffer still has events with timestamps below t, they will be all
534 * available and then read in the pass n + 1.
535 * Hence when we start to read the pass n + 2, we can safely flush every
536 * events with timestamps below t.
537 *
538 *    ============ PASS n =================
539 *       CPU 0         |   CPU 1
540 *                     |
541 *    cnt1 timestamps  |   cnt2 timestamps
542 *          1          |         2
543 *          2          |         3
544 *          -          |         4  <--- max recorded
545 *
546 *    ============ PASS n + 1 ==============
547 *       CPU 0         |   CPU 1
548 *                     |
549 *    cnt1 timestamps  |   cnt2 timestamps
550 *          3          |         5
551 *          4          |         6
552 *          5          |         7 <---- max recorded
553 *
554 *      Flush every events below timestamp 4
555 *
556 *    ============ PASS n + 2 ==============
557 *       CPU 0         |   CPU 1
558 *                     |
559 *    cnt1 timestamps  |   cnt2 timestamps
560 *          6          |         8
561 *          7          |         9
562 *          -          |         10
563 *
564 *      Flush every events below timestamp 7
565 *      etc...
566 */
567static int process_finished_round(union perf_event *event __used,
568				  struct perf_session *session,
569				  struct perf_event_ops *ops)
570{
571	flush_sample_queue(session, ops);
572	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
573
574	return 0;
575}
576
577/* The queue is ordered by time */
578static void __queue_event(struct sample_queue *new, struct perf_session *s)
579{
580	struct ordered_samples *os = &s->ordered_samples;
581	struct sample_queue *sample = os->last_sample;
582	u64 timestamp = new->timestamp;
583	struct list_head *p;
584
585	os->last_sample = new;
586
587	if (!sample) {
588		list_add(&new->list, &os->samples);
589		os->max_timestamp = timestamp;
590		return;
591	}
592
593	/*
594	 * last_sample might point to some random place in the list as it's
595	 * the last queued event. We expect that the new event is close to
596	 * this.
597	 */
598	if (sample->timestamp <= timestamp) {
599		while (sample->timestamp <= timestamp) {
600			p = sample->list.next;
601			if (p == &os->samples) {
602				list_add_tail(&new->list, &os->samples);
603				os->max_timestamp = timestamp;
604				return;
605			}
606			sample = list_entry(p, struct sample_queue, list);
607		}
608		list_add_tail(&new->list, &sample->list);
609	} else {
610		while (sample->timestamp > timestamp) {
611			p = sample->list.prev;
612			if (p == &os->samples) {
613				list_add(&new->list, &os->samples);
614				return;
615			}
616			sample = list_entry(p, struct sample_queue, list);
617		}
618		list_add(&new->list, &sample->list);
619	}
620}
621
622#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue))
623
624static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
625				    struct perf_sample *sample, u64 file_offset)
626{
627	struct ordered_samples *os = &s->ordered_samples;
628	struct list_head *sc = &os->sample_cache;
629	u64 timestamp = sample->time;
630	struct sample_queue *new;
631
632	if (!timestamp || timestamp == ~0ULL)
633		return -ETIME;
634
635	if (timestamp < s->ordered_samples.last_flush) {
636		printf("Warning: Timestamp below last timeslice flush\n");
637		return -EINVAL;
638	}
639
640	if (!list_empty(sc)) {
641		new = list_entry(sc->next, struct sample_queue, list);
642		list_del(&new->list);
643	} else if (os->sample_buffer) {
644		new = os->sample_buffer + os->sample_buffer_idx;
645		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
646			os->sample_buffer = NULL;
647	} else {
648		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
649		if (!os->sample_buffer)
650			return -ENOMEM;
651		list_add(&os->sample_buffer->list, &os->to_free);
652		os->sample_buffer_idx = 2;
653		new = os->sample_buffer + 1;
654	}
655
656	new->timestamp = timestamp;
657	new->file_offset = file_offset;
658	new->event = event;
659
660	__queue_event(new, s);
661
662	return 0;
663}
664
665static void callchain__printf(struct perf_sample *sample)
666{
667	unsigned int i;
668
669	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr);
670
671	for (i = 0; i < sample->callchain->nr; i++)
672		printf("..... %2d: %016" PRIx64 "\n",
673		       i, sample->callchain->ips[i]);
674}
675
676static void perf_session__print_tstamp(struct perf_session *session,
677				       union perf_event *event,
678				       struct perf_sample *sample)
679{
680	if (event->header.type != PERF_RECORD_SAMPLE &&
681	    !session->sample_id_all) {
682		fputs("-1 -1 ", stdout);
683		return;
684	}
685
686	if ((session->sample_type & PERF_SAMPLE_CPU))
687		printf("%u ", sample->cpu);
688
689	if (session->sample_type & PERF_SAMPLE_TIME)
690		printf("%" PRIu64 " ", sample->time);
691}
692
693static void dump_event(struct perf_session *session, union perf_event *event,
694		       u64 file_offset, struct perf_sample *sample)
695{
696	if (!dump_trace)
697		return;
698
699	printf("\n%#" PRIx64 " [%#x]: event: %d\n",
700	       file_offset, event->header.size, event->header.type);
701
702	trace_event(event);
703
704	if (sample)
705		perf_session__print_tstamp(session, event, sample);
706
707	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
708	       event->header.size, perf_event__name(event->header.type));
709}
710
711static void dump_sample(struct perf_session *session, union perf_event *event,
712			struct perf_sample *sample)
713{
714	if (!dump_trace)
715		return;
716
717	printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n",
718	       event->header.misc, sample->pid, sample->tid, sample->ip,
719	       sample->period);
720
721	if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
722		callchain__printf(sample);
723}
724
725static int perf_session_deliver_event(struct perf_session *session,
726				      union perf_event *event,
727				      struct perf_sample *sample,
728				      struct perf_event_ops *ops,
729				      u64 file_offset)
730{
731	struct perf_evsel *evsel;
732
733	dump_event(session, event, file_offset, sample);
734
735	switch (event->header.type) {
736	case PERF_RECORD_SAMPLE:
737		dump_sample(session, event, sample);
738		evsel = perf_evlist__id2evsel(session->evlist, sample->id);
739		if (evsel == NULL) {
740			++session->hists.stats.nr_unknown_id;
741			return -1;
742		}
743		return ops->sample(event, sample, evsel, session);
744	case PERF_RECORD_MMAP:
745		return ops->mmap(event, sample, session);
746	case PERF_RECORD_COMM:
747		return ops->comm(event, sample, session);
748	case PERF_RECORD_FORK:
749		return ops->fork(event, sample, session);
750	case PERF_RECORD_EXIT:
751		return ops->exit(event, sample, session);
752	case PERF_RECORD_LOST:
753		return ops->lost(event, sample, session);
754	case PERF_RECORD_READ:
755		return ops->read(event, sample, session);
756	case PERF_RECORD_THROTTLE:
757		return ops->throttle(event, sample, session);
758	case PERF_RECORD_UNTHROTTLE:
759		return ops->unthrottle(event, sample, session);
760	default:
761		++session->hists.stats.nr_unknown_events;
762		return -1;
763	}
764}
765
766static int perf_session__preprocess_sample(struct perf_session *session,
767					   union perf_event *event, struct perf_sample *sample)
768{
769	if (event->header.type != PERF_RECORD_SAMPLE ||
770	    !(session->sample_type & PERF_SAMPLE_CALLCHAIN))
771		return 0;
772
773	if (!ip_callchain__valid(sample->callchain, event)) {
774		pr_debug("call-chain problem with event, skipping it.\n");
775		++session->hists.stats.nr_invalid_chains;
776		session->hists.stats.total_invalid_chains += sample->period;
777		return -EINVAL;
778	}
779	return 0;
780}
781
782static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
783					    struct perf_event_ops *ops, u64 file_offset)
784{
785	dump_event(session, event, file_offset, NULL);
786
787	/* These events are processed right away */
788	switch (event->header.type) {
789	case PERF_RECORD_HEADER_ATTR:
790		return ops->attr(event, session);
791	case PERF_RECORD_HEADER_EVENT_TYPE:
792		return ops->event_type(event, session);
793	case PERF_RECORD_HEADER_TRACING_DATA:
794		/* setup for reading amidst mmap */
795		lseek(session->fd, file_offset, SEEK_SET);
796		return ops->tracing_data(event, session);
797	case PERF_RECORD_HEADER_BUILD_ID:
798		return ops->build_id(event, session);
799	case PERF_RECORD_FINISHED_ROUND:
800		return ops->finished_round(event, session, ops);
801	default:
802		return -EINVAL;
803	}
804}
805
806static int perf_session__process_event(struct perf_session *session,
807				       union perf_event *event,
808				       struct perf_event_ops *ops,
809				       u64 file_offset)
810{
811	struct perf_sample sample;
812	int ret;
813
814	if (session->header.needs_swap &&
815	    perf_event__swap_ops[event->header.type])
816		perf_event__swap_ops[event->header.type](event);
817
818	if (event->header.type >= PERF_RECORD_HEADER_MAX)
819		return -EINVAL;
820
821	hists__inc_nr_events(&session->hists, event->header.type);
822
823	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
824		return perf_session__process_user_event(session, event, ops, file_offset);
825
826	/*
827	 * For all kernel events we get the sample data
828	 */
829	ret = perf_session__parse_sample(session, event, &sample);
830	if (ret)
831		return ret;
832
833	/* Preprocess sample records - precheck callchains */
834	if (perf_session__preprocess_sample(session, event, &sample))
835		return 0;
836
837	if (ops->ordered_samples) {
838		ret = perf_session_queue_event(session, event, &sample,
839					       file_offset);
840		if (ret != -ETIME)
841			return ret;
842	}
843
844	return perf_session_deliver_event(session, event, &sample, ops,
845					  file_offset);
846}
847
848void perf_event_header__bswap(struct perf_event_header *self)
849{
850	self->type = bswap_32(self->type);
851	self->misc = bswap_16(self->misc);
852	self->size = bswap_16(self->size);
853}
854
855static struct thread *perf_session__register_idle_thread(struct perf_session *self)
856{
857	struct thread *thread = perf_session__findnew(self, 0);
858
859	if (thread == NULL || thread__set_comm(thread, "swapper")) {
860		pr_err("problem inserting idle task.\n");
861		thread = NULL;
862	}
863
864	return thread;
865}
866
867static void perf_session__warn_about_errors(const struct perf_session *session,
868					    const struct perf_event_ops *ops)
869{
870	if (ops->lost == perf_event__process_lost &&
871	    session->hists.stats.total_lost != 0) {
872		ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
873			    "!\n\nCheck IO/CPU overload!\n\n",
874			    session->hists.stats.total_period,
875			    session->hists.stats.total_lost);
876	}
877
878	if (session->hists.stats.nr_unknown_events != 0) {
879		ui__warning("Found %u unknown events!\n\n"
880			    "Is this an older tool processing a perf.data "
881			    "file generated by a more recent tool?\n\n"
882			    "If that is not the case, consider "
883			    "reporting to linux-kernel@vger.kernel.org.\n\n",
884			    session->hists.stats.nr_unknown_events);
885	}
886
887	if (session->hists.stats.nr_unknown_id != 0) {
888		ui__warning("%u samples with id not present in the header\n",
889			    session->hists.stats.nr_unknown_id);
890	}
891
892 	if (session->hists.stats.nr_invalid_chains != 0) {
893 		ui__warning("Found invalid callchains!\n\n"
894 			    "%u out of %u events were discarded for this reason.\n\n"
895 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
896 			    session->hists.stats.nr_invalid_chains,
897 			    session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
898 	}
899}
900
901#define session_done()	(*(volatile int *)(&session_done))
902volatile int session_done;
903
904static int __perf_session__process_pipe_events(struct perf_session *self,
905					       struct perf_event_ops *ops)
906{
907	union perf_event event;
908	uint32_t size;
909	int skip = 0;
910	u64 head;
911	int err;
912	void *p;
913
914	perf_event_ops__fill_defaults(ops);
915
916	head = 0;
917more:
918	err = readn(self->fd, &event, sizeof(struct perf_event_header));
919	if (err <= 0) {
920		if (err == 0)
921			goto done;
922
923		pr_err("failed to read event header\n");
924		goto out_err;
925	}
926
927	if (self->header.needs_swap)
928		perf_event_header__bswap(&event.header);
929
930	size = event.header.size;
931	if (size == 0)
932		size = 8;
933
934	p = &event;
935	p += sizeof(struct perf_event_header);
936
937	if (size - sizeof(struct perf_event_header)) {
938		err = readn(self->fd, p, size - sizeof(struct perf_event_header));
939		if (err <= 0) {
940			if (err == 0) {
941				pr_err("unexpected end of event stream\n");
942				goto done;
943			}
944
945			pr_err("failed to read event data\n");
946			goto out_err;
947		}
948	}
949
950	if (size == 0 ||
951	    (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
952		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
953			    head, event.header.size, event.header.type);
954		/*
955		 * assume we lost track of the stream, check alignment, and
956		 * increment a single u64 in the hope to catch on again 'soon'.
957		 */
958		if (unlikely(head & 7))
959			head &= ~7ULL;
960
961		size = 8;
962	}
963
964	head += size;
965
966	if (skip > 0)
967		head += skip;
968
969	if (!session_done())
970		goto more;
971done:
972	err = 0;
973out_err:
974	perf_session__warn_about_errors(self, ops);
975	perf_session_free_sample_buffers(self);
976	return err;
977}
978
979static union perf_event *
980fetch_mmaped_event(struct perf_session *session,
981		   u64 head, size_t mmap_size, char *buf)
982{
983	union perf_event *event;
984
985	/*
986	 * Ensure we have enough space remaining to read
987	 * the size of the event in the headers.
988	 */
989	if (head + sizeof(event->header) > mmap_size)
990		return NULL;
991
992	event = (union perf_event *)(buf + head);
993
994	if (session->header.needs_swap)
995		perf_event_header__bswap(&event->header);
996
997	if (head + event->header.size > mmap_size)
998		return NULL;
999
1000	return event;
1001}
1002
1003int __perf_session__process_events(struct perf_session *session,
1004				   u64 data_offset, u64 data_size,
1005				   u64 file_size, struct perf_event_ops *ops)
1006{
1007	u64 head, page_offset, file_offset, file_pos, progress_next;
1008	int err, mmap_prot, mmap_flags, map_idx = 0;
1009	struct ui_progress *progress;
1010	size_t	page_size, mmap_size;
1011	char *buf, *mmaps[8];
1012	union perf_event *event;
1013	uint32_t size;
1014
1015	perf_event_ops__fill_defaults(ops);
1016
1017	page_size = sysconf(_SC_PAGESIZE);
1018
1019	page_offset = page_size * (data_offset / page_size);
1020	file_offset = page_offset;
1021	head = data_offset - page_offset;
1022
1023	if (data_offset + data_size < file_size)
1024		file_size = data_offset + data_size;
1025
1026	progress_next = file_size / 16;
1027	progress = ui_progress__new("Processing events...", file_size);
1028	if (progress == NULL)
1029		return -1;
1030
1031	mmap_size = session->mmap_window;
1032	if (mmap_size > file_size)
1033		mmap_size = file_size;
1034
1035	memset(mmaps, 0, sizeof(mmaps));
1036
1037	mmap_prot  = PROT_READ;
1038	mmap_flags = MAP_SHARED;
1039
1040	if (session->header.needs_swap) {
1041		mmap_prot  |= PROT_WRITE;
1042		mmap_flags = MAP_PRIVATE;
1043	}
1044remap:
1045	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
1046		   file_offset);
1047	if (buf == MAP_FAILED) {
1048		pr_err("failed to mmap file\n");
1049		err = -errno;
1050		goto out_err;
1051	}
1052	mmaps[map_idx] = buf;
1053	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
1054	file_pos = file_offset + head;
1055
1056more:
1057	event = fetch_mmaped_event(session, head, mmap_size, buf);
1058	if (!event) {
1059		if (mmaps[map_idx]) {
1060			munmap(mmaps[map_idx], mmap_size);
1061			mmaps[map_idx] = NULL;
1062		}
1063
1064		page_offset = page_size * (head / page_size);
1065		file_offset += page_offset;
1066		head -= page_offset;
1067		goto remap;
1068	}
1069
1070	size = event->header.size;
1071
1072	if (size == 0 ||
1073	    perf_session__process_event(session, event, ops, file_pos) < 0) {
1074		dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
1075			    file_offset + head, event->header.size,
1076			    event->header.type);
1077		/*
1078		 * assume we lost track of the stream, check alignment, and
1079		 * increment a single u64 in the hope to catch on again 'soon'.
1080		 */
1081		if (unlikely(head & 7))
1082			head &= ~7ULL;
1083
1084		size = 8;
1085	}
1086
1087	head += size;
1088	file_pos += size;
1089
1090	if (file_pos >= progress_next) {
1091		progress_next += file_size / 16;
1092		ui_progress__update(progress, file_pos);
1093	}
1094
1095	if (file_pos < file_size)
1096		goto more;
1097
1098	err = 0;
1099	/* do the final flush for ordered samples */
1100	session->ordered_samples.next_flush = ULLONG_MAX;
1101	flush_sample_queue(session, ops);
1102out_err:
1103	ui_progress__delete(progress);
1104	perf_session__warn_about_errors(session, ops);
1105	perf_session_free_sample_buffers(session);
1106	return err;
1107}
1108
1109int perf_session__process_events(struct perf_session *self,
1110				 struct perf_event_ops *ops)
1111{
1112	int err;
1113
1114	if (perf_session__register_idle_thread(self) == NULL)
1115		return -ENOMEM;
1116
1117	if (!self->fd_pipe)
1118		err = __perf_session__process_events(self,
1119						     self->header.data_offset,
1120						     self->header.data_size,
1121						     self->size, ops);
1122	else
1123		err = __perf_session__process_pipe_events(self, ops);
1124
1125	return err;
1126}
1127
1128bool perf_session__has_traces(struct perf_session *self, const char *msg)
1129{
1130	if (!(self->sample_type & PERF_SAMPLE_RAW)) {
1131		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
1132		return false;
1133	}
1134
1135	return true;
1136}
1137
1138int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
1139					     const char *symbol_name,
1140					     u64 addr)
1141{
1142	char *bracket;
1143	enum map_type i;
1144	struct ref_reloc_sym *ref;
1145
1146	ref = zalloc(sizeof(struct ref_reloc_sym));
1147	if (ref == NULL)
1148		return -ENOMEM;
1149
1150	ref->name = strdup(symbol_name);
1151	if (ref->name == NULL) {
1152		free(ref);
1153		return -ENOMEM;
1154	}
1155
1156	bracket = strchr(ref->name, ']');
1157	if (bracket)
1158		*bracket = '\0';
1159
1160	ref->addr = addr;
1161
1162	for (i = 0; i < MAP__NR_TYPES; ++i) {
1163		struct kmap *kmap = map__kmap(maps[i]);
1164		kmap->ref_reloc_sym = ref;
1165	}
1166
1167	return 0;
1168}
1169
1170size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp)
1171{
1172	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) +
1173	       __dsos__fprintf(&self->host_machine.user_dsos, fp) +
1174	       machines__fprintf_dsos(&self->machines, fp);
1175}
1176
1177size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
1178					  bool with_hits)
1179{
1180	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
1181	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
1182}
1183
1184size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
1185{
1186	struct perf_evsel *pos;
1187	size_t ret = fprintf(fp, "Aggregated stats:\n");
1188
1189	ret += hists__fprintf_nr_events(&session->hists, fp);
1190
1191	list_for_each_entry(pos, &session->evlist->entries, node) {
1192		ret += fprintf(fp, "%s stats:\n", event_name(pos));
1193		ret += hists__fprintf_nr_events(&pos->hists, fp);
1194	}
1195
1196	return ret;
1197}
1198
1199struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1200					      unsigned int type)
1201{
1202	struct perf_evsel *pos;
1203
1204	list_for_each_entry(pos, &session->evlist->entries, node) {
1205		if (pos->attr.type == type)
1206			return pos;
1207	}
1208	return NULL;
1209}
1210
1211void perf_session__print_symbols(union perf_event *event,
1212				struct perf_sample *sample,
1213				struct perf_session *session)
1214{
1215	struct addr_location al;
1216	const char *symname, *dsoname;
1217	struct callchain_cursor *cursor = &session->callchain_cursor;
1218	struct callchain_cursor_node *node;
1219
1220	if (perf_event__preprocess_sample(event, session, &al, sample,
1221					  NULL) < 0) {
1222		error("problem processing %d event, skipping it.\n",
1223			event->header.type);
1224		return;
1225	}
1226
1227	if (symbol_conf.use_callchain && sample->callchain) {
1228
1229		if (perf_session__resolve_callchain(session, al.thread,
1230						sample->callchain, NULL) != 0) {
1231			if (verbose)
1232				error("Failed to resolve callchain. Skipping\n");
1233			return;
1234		}
1235		callchain_cursor_commit(cursor);
1236
1237		while (1) {
1238			node = callchain_cursor_current(cursor);
1239			if (!node)
1240				break;
1241
1242			if (node->sym && node->sym->name)
1243				symname = node->sym->name;
1244			else
1245				symname = "";
1246
1247			if (node->map && node->map->dso && node->map->dso->name)
1248				dsoname = node->map->dso->name;
1249			else
1250				dsoname = "";
1251
1252			printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname);
1253
1254			callchain_cursor_advance(cursor);
1255		}
1256
1257	} else {
1258		if (al.sym && al.sym->name)
1259			symname = al.sym->name;
1260		else
1261			symname = "";
1262
1263		if (al.map && al.map->dso && al.map->dso->name)
1264			dsoname = al.map->dso->name;
1265		else
1266			dsoname = "";
1267
1268		printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname);
1269	}
1270}
1271