header.c revision c8ec03e66ee9fe1aefea9555b98aa5f5f7935d5c
1#include "util.h"
2#include <sys/types.h>
3#include <byteswap.h>
4#include <unistd.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <linux/list.h>
8#include <linux/kernel.h>
9#include <linux/bitops.h>
10#include <sys/utsname.h>
11
12#include "evlist.h"
13#include "evsel.h"
14#include "header.h"
15#include "../perf.h"
16#include "trace-event.h"
17#include "session.h"
18#include "symbol.h"
19#include "debug.h"
20#include "cpumap.h"
21#include "pmu.h"
22#include "vdso.h"
23#include "strbuf.h"
24#include "build-id.h"
25
26static bool no_buildid_cache = false;
27
28static u32 header_argc;
29static const char **header_argv;
30
31/*
32 * magic2 = "PERFILE2"
33 * must be a numerical value to let the endianness
34 * determine the memory layout. That way we are able
35 * to detect endianness when reading the perf.data file
36 * back.
37 *
38 * we check for legacy (PERFFILE) format.
39 */
40static const char *__perf_magic1 = "PERFFILE";
41static const u64 __perf_magic2    = 0x32454c4946524550ULL;
42static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
43
44#define PERF_MAGIC	__perf_magic2
45
46struct perf_file_attr {
47	struct perf_event_attr	attr;
48	struct perf_file_section	ids;
49};
50
51void perf_header__set_feat(struct perf_header *header, int feat)
52{
53	set_bit(feat, header->adds_features);
54}
55
56void perf_header__clear_feat(struct perf_header *header, int feat)
57{
58	clear_bit(feat, header->adds_features);
59}
60
61bool perf_header__has_feat(const struct perf_header *header, int feat)
62{
63	return test_bit(feat, header->adds_features);
64}
65
66static int do_write(int fd, const void *buf, size_t size)
67{
68	while (size) {
69		int ret = write(fd, buf, size);
70
71		if (ret < 0)
72			return -errno;
73
74		size -= ret;
75		buf += ret;
76	}
77
78	return 0;
79}
80
81#define NAME_ALIGN 64
82
83static int write_padded(int fd, const void *bf, size_t count,
84			size_t count_aligned)
85{
86	static const char zero_buf[NAME_ALIGN];
87	int err = do_write(fd, bf, count);
88
89	if (!err)
90		err = do_write(fd, zero_buf, count_aligned - count);
91
92	return err;
93}
94
95static int do_write_string(int fd, const char *str)
96{
97	u32 len, olen;
98	int ret;
99
100	olen = strlen(str) + 1;
101	len = PERF_ALIGN(olen, NAME_ALIGN);
102
103	/* write len, incl. \0 */
104	ret = do_write(fd, &len, sizeof(len));
105	if (ret < 0)
106		return ret;
107
108	return write_padded(fd, str, olen, len);
109}
110
111static char *do_read_string(int fd, struct perf_header *ph)
112{
113	ssize_t sz, ret;
114	u32 len;
115	char *buf;
116
117	sz = readn(fd, &len, sizeof(len));
118	if (sz < (ssize_t)sizeof(len))
119		return NULL;
120
121	if (ph->needs_swap)
122		len = bswap_32(len);
123
124	buf = malloc(len);
125	if (!buf)
126		return NULL;
127
128	ret = readn(fd, buf, len);
129	if (ret == (ssize_t)len) {
130		/*
131		 * strings are padded by zeroes
132		 * thus the actual strlen of buf
133		 * may be less than len
134		 */
135		return buf;
136	}
137
138	free(buf);
139	return NULL;
140}
141
142int
143perf_header__set_cmdline(int argc, const char **argv)
144{
145	int i;
146
147	/*
148	 * If header_argv has already been set, do not override it.
149	 * This allows a command to set the cmdline, parse args and
150	 * then call another builtin function that implements a
151	 * command -- e.g, cmd_kvm calling cmd_record.
152	 */
153	if (header_argv)
154		return 0;
155
156	header_argc = (u32)argc;
157
158	/* do not include NULL termination */
159	header_argv = calloc(argc, sizeof(char *));
160	if (!header_argv)
161		return -ENOMEM;
162
163	/*
164	 * must copy argv contents because it gets moved
165	 * around during option parsing
166	 */
167	for (i = 0; i < argc ; i++)
168		header_argv[i] = argv[i];
169
170	return 0;
171}
172
173#define dsos__for_each_with_build_id(pos, head)	\
174	list_for_each_entry(pos, head, node)	\
175		if (!pos->has_build_id)		\
176			continue;		\
177		else
178
179static int write_buildid(char *name, size_t name_len, u8 *build_id,
180			 pid_t pid, u16 misc, int fd)
181{
182	int err;
183	struct build_id_event b;
184	size_t len;
185
186	len = name_len + 1;
187	len = PERF_ALIGN(len, NAME_ALIGN);
188
189	memset(&b, 0, sizeof(b));
190	memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
191	b.pid = pid;
192	b.header.misc = misc;
193	b.header.size = sizeof(b) + len;
194
195	err = do_write(fd, &b, sizeof(b));
196	if (err < 0)
197		return err;
198
199	return write_padded(fd, name, name_len + 1, len);
200}
201
202static int __dsos__write_buildid_table(struct list_head *head,
203				       struct machine *machine,
204				       pid_t pid, u16 misc, int fd)
205{
206	char nm[PATH_MAX];
207	struct dso *pos;
208
209	dsos__for_each_with_build_id(pos, head) {
210		int err;
211		char  *name;
212		size_t name_len;
213
214		if (!pos->hit)
215			continue;
216
217		if (is_vdso_map(pos->short_name)) {
218			name = (char *) VDSO__MAP_NAME;
219			name_len = sizeof(VDSO__MAP_NAME) + 1;
220		} else if (dso__is_kcore(pos)) {
221			machine__mmap_name(machine, nm, sizeof(nm));
222			name = nm;
223			name_len = strlen(nm) + 1;
224		} else {
225			name = pos->long_name;
226			name_len = pos->long_name_len + 1;
227		}
228
229		err = write_buildid(name, name_len, pos->build_id,
230				    pid, misc, fd);
231		if (err)
232			return err;
233	}
234
235	return 0;
236}
237
238static int machine__write_buildid_table(struct machine *machine, int fd)
239{
240	int err;
241	u16 kmisc = PERF_RECORD_MISC_KERNEL,
242	    umisc = PERF_RECORD_MISC_USER;
243
244	if (!machine__is_host(machine)) {
245		kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
246		umisc = PERF_RECORD_MISC_GUEST_USER;
247	}
248
249	err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
250					  machine->pid, kmisc, fd);
251	if (err == 0)
252		err = __dsos__write_buildid_table(&machine->user_dsos, machine,
253						  machine->pid, umisc, fd);
254	return err;
255}
256
257static int dsos__write_buildid_table(struct perf_header *header, int fd)
258{
259	struct perf_session *session = container_of(header,
260			struct perf_session, header);
261	struct rb_node *nd;
262	int err = machine__write_buildid_table(&session->machines.host, fd);
263
264	if (err)
265		return err;
266
267	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
268		struct machine *pos = rb_entry(nd, struct machine, rb_node);
269		err = machine__write_buildid_table(pos, fd);
270		if (err)
271			break;
272	}
273	return err;
274}
275
276int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
277			  const char *name, bool is_kallsyms, bool is_vdso)
278{
279	const size_t size = PATH_MAX;
280	char *realname, *filename = zalloc(size),
281	     *linkname = zalloc(size), *targetname;
282	int len, err = -1;
283	bool slash = is_kallsyms || is_vdso;
284
285	if (is_kallsyms) {
286		if (symbol_conf.kptr_restrict) {
287			pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
288			err = 0;
289			goto out_free;
290		}
291		realname = (char *) name;
292	} else
293		realname = realpath(name, NULL);
294
295	if (realname == NULL || filename == NULL || linkname == NULL)
296		goto out_free;
297
298	len = scnprintf(filename, size, "%s%s%s",
299		       debugdir, slash ? "/" : "",
300		       is_vdso ? VDSO__MAP_NAME : realname);
301	if (mkdir_p(filename, 0755))
302		goto out_free;
303
304	snprintf(filename + len, size - len, "/%s", sbuild_id);
305
306	if (access(filename, F_OK)) {
307		if (is_kallsyms) {
308			 if (copyfile("/proc/kallsyms", filename))
309				goto out_free;
310		} else if (link(realname, filename) && copyfile(name, filename))
311			goto out_free;
312	}
313
314	len = scnprintf(linkname, size, "%s/.build-id/%.2s",
315		       debugdir, sbuild_id);
316
317	if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
318		goto out_free;
319
320	snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
321	targetname = filename + strlen(debugdir) - 5;
322	memcpy(targetname, "../..", 5);
323
324	if (symlink(targetname, linkname) == 0)
325		err = 0;
326out_free:
327	if (!is_kallsyms)
328		free(realname);
329	free(filename);
330	free(linkname);
331	return err;
332}
333
334static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
335				 const char *name, const char *debugdir,
336				 bool is_kallsyms, bool is_vdso)
337{
338	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
339
340	build_id__sprintf(build_id, build_id_size, sbuild_id);
341
342	return build_id_cache__add_s(sbuild_id, debugdir, name,
343				     is_kallsyms, is_vdso);
344}
345
346int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
347{
348	const size_t size = PATH_MAX;
349	char *filename = zalloc(size),
350	     *linkname = zalloc(size);
351	int err = -1;
352
353	if (filename == NULL || linkname == NULL)
354		goto out_free;
355
356	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
357		 debugdir, sbuild_id, sbuild_id + 2);
358
359	if (access(linkname, F_OK))
360		goto out_free;
361
362	if (readlink(linkname, filename, size - 1) < 0)
363		goto out_free;
364
365	if (unlink(linkname))
366		goto out_free;
367
368	/*
369	 * Since the link is relative, we must make it absolute:
370	 */
371	snprintf(linkname, size, "%s/.build-id/%.2s/%s",
372		 debugdir, sbuild_id, filename);
373
374	if (unlink(linkname))
375		goto out_free;
376
377	err = 0;
378out_free:
379	free(filename);
380	free(linkname);
381	return err;
382}
383
384static int dso__cache_build_id(struct dso *dso, struct machine *machine,
385			       const char *debugdir)
386{
387	bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
388	bool is_vdso = is_vdso_map(dso->short_name);
389	char *name = dso->long_name;
390	char nm[PATH_MAX];
391
392	if (dso__is_kcore(dso)) {
393		is_kallsyms = true;
394		machine__mmap_name(machine, nm, sizeof(nm));
395		name = nm;
396	}
397	return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
398				     debugdir, is_kallsyms, is_vdso);
399}
400
401static int __dsos__cache_build_ids(struct list_head *head,
402				   struct machine *machine, const char *debugdir)
403{
404	struct dso *pos;
405	int err = 0;
406
407	dsos__for_each_with_build_id(pos, head)
408		if (dso__cache_build_id(pos, machine, debugdir))
409			err = -1;
410
411	return err;
412}
413
414static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
415{
416	int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
417					  debugdir);
418	ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
419	return ret;
420}
421
422static int perf_session__cache_build_ids(struct perf_session *session)
423{
424	struct rb_node *nd;
425	int ret;
426	char debugdir[PATH_MAX];
427
428	snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
429
430	if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
431		return -1;
432
433	ret = machine__cache_build_ids(&session->machines.host, debugdir);
434
435	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
436		struct machine *pos = rb_entry(nd, struct machine, rb_node);
437		ret |= machine__cache_build_ids(pos, debugdir);
438	}
439	return ret ? -1 : 0;
440}
441
442static bool machine__read_build_ids(struct machine *machine, bool with_hits)
443{
444	bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
445	ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
446	return ret;
447}
448
449static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
450{
451	struct rb_node *nd;
452	bool ret = machine__read_build_ids(&session->machines.host, with_hits);
453
454	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
455		struct machine *pos = rb_entry(nd, struct machine, rb_node);
456		ret |= machine__read_build_ids(pos, with_hits);
457	}
458
459	return ret;
460}
461
462static int write_tracing_data(int fd, struct perf_header *h __maybe_unused,
463			    struct perf_evlist *evlist)
464{
465	return read_tracing_data(fd, &evlist->entries);
466}
467
468
469static int write_build_id(int fd, struct perf_header *h,
470			  struct perf_evlist *evlist __maybe_unused)
471{
472	struct perf_session *session;
473	int err;
474
475	session = container_of(h, struct perf_session, header);
476
477	if (!perf_session__read_build_ids(session, true))
478		return -1;
479
480	err = dsos__write_buildid_table(h, fd);
481	if (err < 0) {
482		pr_debug("failed to write buildid table\n");
483		return err;
484	}
485	if (!no_buildid_cache)
486		perf_session__cache_build_ids(session);
487
488	return 0;
489}
490
491static int write_hostname(int fd, struct perf_header *h __maybe_unused,
492			  struct perf_evlist *evlist __maybe_unused)
493{
494	struct utsname uts;
495	int ret;
496
497	ret = uname(&uts);
498	if (ret < 0)
499		return -1;
500
501	return do_write_string(fd, uts.nodename);
502}
503
504static int write_osrelease(int fd, struct perf_header *h __maybe_unused,
505			   struct perf_evlist *evlist __maybe_unused)
506{
507	struct utsname uts;
508	int ret;
509
510	ret = uname(&uts);
511	if (ret < 0)
512		return -1;
513
514	return do_write_string(fd, uts.release);
515}
516
517static int write_arch(int fd, struct perf_header *h __maybe_unused,
518		      struct perf_evlist *evlist __maybe_unused)
519{
520	struct utsname uts;
521	int ret;
522
523	ret = uname(&uts);
524	if (ret < 0)
525		return -1;
526
527	return do_write_string(fd, uts.machine);
528}
529
530static int write_version(int fd, struct perf_header *h __maybe_unused,
531			 struct perf_evlist *evlist __maybe_unused)
532{
533	return do_write_string(fd, perf_version_string);
534}
535
536static int write_cpudesc(int fd, struct perf_header *h __maybe_unused,
537		       struct perf_evlist *evlist __maybe_unused)
538{
539#ifndef CPUINFO_PROC
540#define CPUINFO_PROC NULL
541#endif
542	FILE *file;
543	char *buf = NULL;
544	char *s, *p;
545	const char *search = CPUINFO_PROC;
546	size_t len = 0;
547	int ret = -1;
548
549	if (!search)
550		return -1;
551
552	file = fopen("/proc/cpuinfo", "r");
553	if (!file)
554		return -1;
555
556	while (getline(&buf, &len, file) > 0) {
557		ret = strncmp(buf, search, strlen(search));
558		if (!ret)
559			break;
560	}
561
562	if (ret)
563		goto done;
564
565	s = buf;
566
567	p = strchr(buf, ':');
568	if (p && *(p+1) == ' ' && *(p+2))
569		s = p + 2;
570	p = strchr(s, '\n');
571	if (p)
572		*p = '\0';
573
574	/* squash extra space characters (branding string) */
575	p = s;
576	while (*p) {
577		if (isspace(*p)) {
578			char *r = p + 1;
579			char *q = r;
580			*p = ' ';
581			while (*q && isspace(*q))
582				q++;
583			if (q != (p+1))
584				while ((*r++ = *q++));
585		}
586		p++;
587	}
588	ret = do_write_string(fd, s);
589done:
590	free(buf);
591	fclose(file);
592	return ret;
593}
594
595static int write_nrcpus(int fd, struct perf_header *h __maybe_unused,
596			struct perf_evlist *evlist __maybe_unused)
597{
598	long nr;
599	u32 nrc, nra;
600	int ret;
601
602	nr = sysconf(_SC_NPROCESSORS_CONF);
603	if (nr < 0)
604		return -1;
605
606	nrc = (u32)(nr & UINT_MAX);
607
608	nr = sysconf(_SC_NPROCESSORS_ONLN);
609	if (nr < 0)
610		return -1;
611
612	nra = (u32)(nr & UINT_MAX);
613
614	ret = do_write(fd, &nrc, sizeof(nrc));
615	if (ret < 0)
616		return ret;
617
618	return do_write(fd, &nra, sizeof(nra));
619}
620
621static int write_event_desc(int fd, struct perf_header *h __maybe_unused,
622			    struct perf_evlist *evlist)
623{
624	struct perf_evsel *evsel;
625	u32 nre, nri, sz;
626	int ret;
627
628	nre = evlist->nr_entries;
629
630	/*
631	 * write number of events
632	 */
633	ret = do_write(fd, &nre, sizeof(nre));
634	if (ret < 0)
635		return ret;
636
637	/*
638	 * size of perf_event_attr struct
639	 */
640	sz = (u32)sizeof(evsel->attr);
641	ret = do_write(fd, &sz, sizeof(sz));
642	if (ret < 0)
643		return ret;
644
645	list_for_each_entry(evsel, &evlist->entries, node) {
646
647		ret = do_write(fd, &evsel->attr, sz);
648		if (ret < 0)
649			return ret;
650		/*
651		 * write number of unique id per event
652		 * there is one id per instance of an event
653		 *
654		 * copy into an nri to be independent of the
655		 * type of ids,
656		 */
657		nri = evsel->ids;
658		ret = do_write(fd, &nri, sizeof(nri));
659		if (ret < 0)
660			return ret;
661
662		/*
663		 * write event string as passed on cmdline
664		 */
665		ret = do_write_string(fd, perf_evsel__name(evsel));
666		if (ret < 0)
667			return ret;
668		/*
669		 * write unique ids for this event
670		 */
671		ret = do_write(fd, evsel->id, evsel->ids * sizeof(u64));
672		if (ret < 0)
673			return ret;
674	}
675	return 0;
676}
677
678static int write_cmdline(int fd, struct perf_header *h __maybe_unused,
679			 struct perf_evlist *evlist __maybe_unused)
680{
681	char buf[MAXPATHLEN];
682	char proc[32];
683	u32 i, n;
684	int ret;
685
686	/*
687	 * actual atual path to perf binary
688	 */
689	sprintf(proc, "/proc/%d/exe", getpid());
690	ret = readlink(proc, buf, sizeof(buf));
691	if (ret <= 0)
692		return -1;
693
694	/* readlink() does not add null termination */
695	buf[ret] = '\0';
696
697	/* account for binary path */
698	n = header_argc + 1;
699
700	ret = do_write(fd, &n, sizeof(n));
701	if (ret < 0)
702		return ret;
703
704	ret = do_write_string(fd, buf);
705	if (ret < 0)
706		return ret;
707
708	for (i = 0 ; i < header_argc; i++) {
709		ret = do_write_string(fd, header_argv[i]);
710		if (ret < 0)
711			return ret;
712	}
713	return 0;
714}
715
716#define CORE_SIB_FMT \
717	"/sys/devices/system/cpu/cpu%d/topology/core_siblings_list"
718#define THRD_SIB_FMT \
719	"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
720
721struct cpu_topo {
722	u32 core_sib;
723	u32 thread_sib;
724	char **core_siblings;
725	char **thread_siblings;
726};
727
728static int build_cpu_topo(struct cpu_topo *tp, int cpu)
729{
730	FILE *fp;
731	char filename[MAXPATHLEN];
732	char *buf = NULL, *p;
733	size_t len = 0;
734	ssize_t sret;
735	u32 i = 0;
736	int ret = -1;
737
738	sprintf(filename, CORE_SIB_FMT, cpu);
739	fp = fopen(filename, "r");
740	if (!fp)
741		goto try_threads;
742
743	sret = getline(&buf, &len, fp);
744	fclose(fp);
745	if (sret <= 0)
746		goto try_threads;
747
748	p = strchr(buf, '\n');
749	if (p)
750		*p = '\0';
751
752	for (i = 0; i < tp->core_sib; i++) {
753		if (!strcmp(buf, tp->core_siblings[i]))
754			break;
755	}
756	if (i == tp->core_sib) {
757		tp->core_siblings[i] = buf;
758		tp->core_sib++;
759		buf = NULL;
760		len = 0;
761	}
762	ret = 0;
763
764try_threads:
765	sprintf(filename, THRD_SIB_FMT, cpu);
766	fp = fopen(filename, "r");
767	if (!fp)
768		goto done;
769
770	if (getline(&buf, &len, fp) <= 0)
771		goto done;
772
773	p = strchr(buf, '\n');
774	if (p)
775		*p = '\0';
776
777	for (i = 0; i < tp->thread_sib; i++) {
778		if (!strcmp(buf, tp->thread_siblings[i]))
779			break;
780	}
781	if (i == tp->thread_sib) {
782		tp->thread_siblings[i] = buf;
783		tp->thread_sib++;
784		buf = NULL;
785	}
786	ret = 0;
787done:
788	if(fp)
789		fclose(fp);
790	free(buf);
791	return ret;
792}
793
794static void free_cpu_topo(struct cpu_topo *tp)
795{
796	u32 i;
797
798	if (!tp)
799		return;
800
801	for (i = 0 ; i < tp->core_sib; i++)
802		free(tp->core_siblings[i]);
803
804	for (i = 0 ; i < tp->thread_sib; i++)
805		free(tp->thread_siblings[i]);
806
807	free(tp);
808}
809
810static struct cpu_topo *build_cpu_topology(void)
811{
812	struct cpu_topo *tp;
813	void *addr;
814	u32 nr, i;
815	size_t sz;
816	long ncpus;
817	int ret = -1;
818
819	ncpus = sysconf(_SC_NPROCESSORS_CONF);
820	if (ncpus < 0)
821		return NULL;
822
823	nr = (u32)(ncpus & UINT_MAX);
824
825	sz = nr * sizeof(char *);
826
827	addr = calloc(1, sizeof(*tp) + 2 * sz);
828	if (!addr)
829		return NULL;
830
831	tp = addr;
832
833	addr += sizeof(*tp);
834	tp->core_siblings = addr;
835	addr += sz;
836	tp->thread_siblings = addr;
837
838	for (i = 0; i < nr; i++) {
839		ret = build_cpu_topo(tp, i);
840		if (ret < 0)
841			break;
842	}
843	if (ret) {
844		free_cpu_topo(tp);
845		tp = NULL;
846	}
847	return tp;
848}
849
850static int write_cpu_topology(int fd, struct perf_header *h __maybe_unused,
851			  struct perf_evlist *evlist __maybe_unused)
852{
853	struct cpu_topo *tp;
854	u32 i;
855	int ret;
856
857	tp = build_cpu_topology();
858	if (!tp)
859		return -1;
860
861	ret = do_write(fd, &tp->core_sib, sizeof(tp->core_sib));
862	if (ret < 0)
863		goto done;
864
865	for (i = 0; i < tp->core_sib; i++) {
866		ret = do_write_string(fd, tp->core_siblings[i]);
867		if (ret < 0)
868			goto done;
869	}
870	ret = do_write(fd, &tp->thread_sib, sizeof(tp->thread_sib));
871	if (ret < 0)
872		goto done;
873
874	for (i = 0; i < tp->thread_sib; i++) {
875		ret = do_write_string(fd, tp->thread_siblings[i]);
876		if (ret < 0)
877			break;
878	}
879done:
880	free_cpu_topo(tp);
881	return ret;
882}
883
884
885
886static int write_total_mem(int fd, struct perf_header *h __maybe_unused,
887			  struct perf_evlist *evlist __maybe_unused)
888{
889	char *buf = NULL;
890	FILE *fp;
891	size_t len = 0;
892	int ret = -1, n;
893	uint64_t mem;
894
895	fp = fopen("/proc/meminfo", "r");
896	if (!fp)
897		return -1;
898
899	while (getline(&buf, &len, fp) > 0) {
900		ret = strncmp(buf, "MemTotal:", 9);
901		if (!ret)
902			break;
903	}
904	if (!ret) {
905		n = sscanf(buf, "%*s %"PRIu64, &mem);
906		if (n == 1)
907			ret = do_write(fd, &mem, sizeof(mem));
908	}
909	free(buf);
910	fclose(fp);
911	return ret;
912}
913
914static int write_topo_node(int fd, int node)
915{
916	char str[MAXPATHLEN];
917	char field[32];
918	char *buf = NULL, *p;
919	size_t len = 0;
920	FILE *fp;
921	u64 mem_total, mem_free, mem;
922	int ret = -1;
923
924	sprintf(str, "/sys/devices/system/node/node%d/meminfo", node);
925	fp = fopen(str, "r");
926	if (!fp)
927		return -1;
928
929	while (getline(&buf, &len, fp) > 0) {
930		/* skip over invalid lines */
931		if (!strchr(buf, ':'))
932			continue;
933		if (sscanf(buf, "%*s %*d %s %"PRIu64, field, &mem) != 2)
934			goto done;
935		if (!strcmp(field, "MemTotal:"))
936			mem_total = mem;
937		if (!strcmp(field, "MemFree:"))
938			mem_free = mem;
939	}
940
941	fclose(fp);
942	fp = NULL;
943
944	ret = do_write(fd, &mem_total, sizeof(u64));
945	if (ret)
946		goto done;
947
948	ret = do_write(fd, &mem_free, sizeof(u64));
949	if (ret)
950		goto done;
951
952	ret = -1;
953	sprintf(str, "/sys/devices/system/node/node%d/cpulist", node);
954
955	fp = fopen(str, "r");
956	if (!fp)
957		goto done;
958
959	if (getline(&buf, &len, fp) <= 0)
960		goto done;
961
962	p = strchr(buf, '\n');
963	if (p)
964		*p = '\0';
965
966	ret = do_write_string(fd, buf);
967done:
968	free(buf);
969	if (fp)
970		fclose(fp);
971	return ret;
972}
973
974static int write_numa_topology(int fd, struct perf_header *h __maybe_unused,
975			  struct perf_evlist *evlist __maybe_unused)
976{
977	char *buf = NULL;
978	size_t len = 0;
979	FILE *fp;
980	struct cpu_map *node_map = NULL;
981	char *c;
982	u32 nr, i, j;
983	int ret = -1;
984
985	fp = fopen("/sys/devices/system/node/online", "r");
986	if (!fp)
987		return -1;
988
989	if (getline(&buf, &len, fp) <= 0)
990		goto done;
991
992	c = strchr(buf, '\n');
993	if (c)
994		*c = '\0';
995
996	node_map = cpu_map__new(buf);
997	if (!node_map)
998		goto done;
999
1000	nr = (u32)node_map->nr;
1001
1002	ret = do_write(fd, &nr, sizeof(nr));
1003	if (ret < 0)
1004		goto done;
1005
1006	for (i = 0; i < nr; i++) {
1007		j = (u32)node_map->map[i];
1008		ret = do_write(fd, &j, sizeof(j));
1009		if (ret < 0)
1010			break;
1011
1012		ret = write_topo_node(fd, i);
1013		if (ret < 0)
1014			break;
1015	}
1016done:
1017	free(buf);
1018	fclose(fp);
1019	free(node_map);
1020	return ret;
1021}
1022
1023/*
1024 * File format:
1025 *
1026 * struct pmu_mappings {
1027 *	u32	pmu_num;
1028 *	struct pmu_map {
1029 *		u32	type;
1030 *		char	name[];
1031 *	}[pmu_num];
1032 * };
1033 */
1034
1035static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused,
1036			      struct perf_evlist *evlist __maybe_unused)
1037{
1038	struct perf_pmu *pmu = NULL;
1039	off_t offset = lseek(fd, 0, SEEK_CUR);
1040	__u32 pmu_num = 0;
1041	int ret;
1042
1043	/* write real pmu_num later */
1044	ret = do_write(fd, &pmu_num, sizeof(pmu_num));
1045	if (ret < 0)
1046		return ret;
1047
1048	while ((pmu = perf_pmu__scan(pmu))) {
1049		if (!pmu->name)
1050			continue;
1051		pmu_num++;
1052
1053		ret = do_write(fd, &pmu->type, sizeof(pmu->type));
1054		if (ret < 0)
1055			return ret;
1056
1057		ret = do_write_string(fd, pmu->name);
1058		if (ret < 0)
1059			return ret;
1060	}
1061
1062	if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) {
1063		/* discard all */
1064		lseek(fd, offset, SEEK_SET);
1065		return -1;
1066	}
1067
1068	return 0;
1069}
1070
1071/*
1072 * File format:
1073 *
1074 * struct group_descs {
1075 *	u32	nr_groups;
1076 *	struct group_desc {
1077 *		char	name[];
1078 *		u32	leader_idx;
1079 *		u32	nr_members;
1080 *	}[nr_groups];
1081 * };
1082 */
1083static int write_group_desc(int fd, struct perf_header *h __maybe_unused,
1084			    struct perf_evlist *evlist)
1085{
1086	u32 nr_groups = evlist->nr_groups;
1087	struct perf_evsel *evsel;
1088	int ret;
1089
1090	ret = do_write(fd, &nr_groups, sizeof(nr_groups));
1091	if (ret < 0)
1092		return ret;
1093
1094	list_for_each_entry(evsel, &evlist->entries, node) {
1095		if (perf_evsel__is_group_leader(evsel) &&
1096		    evsel->nr_members > 1) {
1097			const char *name = evsel->group_name ?: "{anon_group}";
1098			u32 leader_idx = evsel->idx;
1099			u32 nr_members = evsel->nr_members;
1100
1101			ret = do_write_string(fd, name);
1102			if (ret < 0)
1103				return ret;
1104
1105			ret = do_write(fd, &leader_idx, sizeof(leader_idx));
1106			if (ret < 0)
1107				return ret;
1108
1109			ret = do_write(fd, &nr_members, sizeof(nr_members));
1110			if (ret < 0)
1111				return ret;
1112		}
1113	}
1114	return 0;
1115}
1116
1117/*
1118 * default get_cpuid(): nothing gets recorded
1119 * actual implementation must be in arch/$(ARCH)/util/header.c
1120 */
1121int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused,
1122				     size_t sz __maybe_unused)
1123{
1124	return -1;
1125}
1126
1127static int write_cpuid(int fd, struct perf_header *h __maybe_unused,
1128		       struct perf_evlist *evlist __maybe_unused)
1129{
1130	char buffer[64];
1131	int ret;
1132
1133	ret = get_cpuid(buffer, sizeof(buffer));
1134	if (!ret)
1135		goto write_it;
1136
1137	return -1;
1138write_it:
1139	return do_write_string(fd, buffer);
1140}
1141
1142static int write_branch_stack(int fd __maybe_unused,
1143			      struct perf_header *h __maybe_unused,
1144		       struct perf_evlist *evlist __maybe_unused)
1145{
1146	return 0;
1147}
1148
1149static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
1150			   FILE *fp)
1151{
1152	fprintf(fp, "# hostname : %s\n", ph->env.hostname);
1153}
1154
1155static void print_osrelease(struct perf_header *ph, int fd __maybe_unused,
1156			    FILE *fp)
1157{
1158	fprintf(fp, "# os release : %s\n", ph->env.os_release);
1159}
1160
1161static void print_arch(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
1162{
1163	fprintf(fp, "# arch : %s\n", ph->env.arch);
1164}
1165
1166static void print_cpudesc(struct perf_header *ph, int fd __maybe_unused,
1167			  FILE *fp)
1168{
1169	fprintf(fp, "# cpudesc : %s\n", ph->env.cpu_desc);
1170}
1171
1172static void print_nrcpus(struct perf_header *ph, int fd __maybe_unused,
1173			 FILE *fp)
1174{
1175	fprintf(fp, "# nrcpus online : %u\n", ph->env.nr_cpus_online);
1176	fprintf(fp, "# nrcpus avail : %u\n", ph->env.nr_cpus_avail);
1177}
1178
1179static void print_version(struct perf_header *ph, int fd __maybe_unused,
1180			  FILE *fp)
1181{
1182	fprintf(fp, "# perf version : %s\n", ph->env.version);
1183}
1184
1185static void print_cmdline(struct perf_header *ph, int fd __maybe_unused,
1186			  FILE *fp)
1187{
1188	int nr, i;
1189	char *str;
1190
1191	nr = ph->env.nr_cmdline;
1192	str = ph->env.cmdline;
1193
1194	fprintf(fp, "# cmdline : ");
1195
1196	for (i = 0; i < nr; i++) {
1197		fprintf(fp, "%s ", str);
1198		str += strlen(str) + 1;
1199	}
1200	fputc('\n', fp);
1201}
1202
1203static void print_cpu_topology(struct perf_header *ph, int fd __maybe_unused,
1204			       FILE *fp)
1205{
1206	int nr, i;
1207	char *str;
1208
1209	nr = ph->env.nr_sibling_cores;
1210	str = ph->env.sibling_cores;
1211
1212	for (i = 0; i < nr; i++) {
1213		fprintf(fp, "# sibling cores   : %s\n", str);
1214		str += strlen(str) + 1;
1215	}
1216
1217	nr = ph->env.nr_sibling_threads;
1218	str = ph->env.sibling_threads;
1219
1220	for (i = 0; i < nr; i++) {
1221		fprintf(fp, "# sibling threads : %s\n", str);
1222		str += strlen(str) + 1;
1223	}
1224}
1225
1226static void free_event_desc(struct perf_evsel *events)
1227{
1228	struct perf_evsel *evsel;
1229
1230	if (!events)
1231		return;
1232
1233	for (evsel = events; evsel->attr.size; evsel++) {
1234		if (evsel->name)
1235			free(evsel->name);
1236		if (evsel->id)
1237			free(evsel->id);
1238	}
1239
1240	free(events);
1241}
1242
1243static struct perf_evsel *
1244read_event_desc(struct perf_header *ph, int fd)
1245{
1246	struct perf_evsel *evsel, *events = NULL;
1247	u64 *id;
1248	void *buf = NULL;
1249	u32 nre, sz, nr, i, j;
1250	ssize_t ret;
1251	size_t msz;
1252
1253	/* number of events */
1254	ret = readn(fd, &nre, sizeof(nre));
1255	if (ret != (ssize_t)sizeof(nre))
1256		goto error;
1257
1258	if (ph->needs_swap)
1259		nre = bswap_32(nre);
1260
1261	ret = readn(fd, &sz, sizeof(sz));
1262	if (ret != (ssize_t)sizeof(sz))
1263		goto error;
1264
1265	if (ph->needs_swap)
1266		sz = bswap_32(sz);
1267
1268	/* buffer to hold on file attr struct */
1269	buf = malloc(sz);
1270	if (!buf)
1271		goto error;
1272
1273	/* the last event terminates with evsel->attr.size == 0: */
1274	events = calloc(nre + 1, sizeof(*events));
1275	if (!events)
1276		goto error;
1277
1278	msz = sizeof(evsel->attr);
1279	if (sz < msz)
1280		msz = sz;
1281
1282	for (i = 0, evsel = events; i < nre; evsel++, i++) {
1283		evsel->idx = i;
1284
1285		/*
1286		 * must read entire on-file attr struct to
1287		 * sync up with layout.
1288		 */
1289		ret = readn(fd, buf, sz);
1290		if (ret != (ssize_t)sz)
1291			goto error;
1292
1293		if (ph->needs_swap)
1294			perf_event__attr_swap(buf);
1295
1296		memcpy(&evsel->attr, buf, msz);
1297
1298		ret = readn(fd, &nr, sizeof(nr));
1299		if (ret != (ssize_t)sizeof(nr))
1300			goto error;
1301
1302		if (ph->needs_swap) {
1303			nr = bswap_32(nr);
1304			evsel->needs_swap = true;
1305		}
1306
1307		evsel->name = do_read_string(fd, ph);
1308
1309		if (!nr)
1310			continue;
1311
1312		id = calloc(nr, sizeof(*id));
1313		if (!id)
1314			goto error;
1315		evsel->ids = nr;
1316		evsel->id = id;
1317
1318		for (j = 0 ; j < nr; j++) {
1319			ret = readn(fd, id, sizeof(*id));
1320			if (ret != (ssize_t)sizeof(*id))
1321				goto error;
1322			if (ph->needs_swap)
1323				*id = bswap_64(*id);
1324			id++;
1325		}
1326	}
1327out:
1328	if (buf)
1329		free(buf);
1330	return events;
1331error:
1332	if (events)
1333		free_event_desc(events);
1334	events = NULL;
1335	goto out;
1336}
1337
1338static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
1339{
1340	struct perf_evsel *evsel, *events = read_event_desc(ph, fd);
1341	u32 j;
1342	u64 *id;
1343
1344	if (!events) {
1345		fprintf(fp, "# event desc: not available or unable to read\n");
1346		return;
1347	}
1348
1349	for (evsel = events; evsel->attr.size; evsel++) {
1350		fprintf(fp, "# event : name = %s, ", evsel->name);
1351
1352		fprintf(fp, "type = %d, config = 0x%"PRIx64
1353			    ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64,
1354				evsel->attr.type,
1355				(u64)evsel->attr.config,
1356				(u64)evsel->attr.config1,
1357				(u64)evsel->attr.config2);
1358
1359		fprintf(fp, ", excl_usr = %d, excl_kern = %d",
1360				evsel->attr.exclude_user,
1361				evsel->attr.exclude_kernel);
1362
1363		fprintf(fp, ", excl_host = %d, excl_guest = %d",
1364				evsel->attr.exclude_host,
1365				evsel->attr.exclude_guest);
1366
1367		fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip);
1368
1369		fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2);
1370		fprintf(fp, ", attr_mmap  = %d", evsel->attr.mmap);
1371		fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data);
1372		if (evsel->ids) {
1373			fprintf(fp, ", id = {");
1374			for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
1375				if (j)
1376					fputc(',', fp);
1377				fprintf(fp, " %"PRIu64, *id);
1378			}
1379			fprintf(fp, " }");
1380		}
1381
1382		fputc('\n', fp);
1383	}
1384
1385	free_event_desc(events);
1386}
1387
1388static void print_total_mem(struct perf_header *ph, int fd __maybe_unused,
1389			    FILE *fp)
1390{
1391	fprintf(fp, "# total memory : %Lu kB\n", ph->env.total_mem);
1392}
1393
1394static void print_numa_topology(struct perf_header *ph, int fd __maybe_unused,
1395				FILE *fp)
1396{
1397	u32 nr, c, i;
1398	char *str, *tmp;
1399	uint64_t mem_total, mem_free;
1400
1401	/* nr nodes */
1402	nr = ph->env.nr_numa_nodes;
1403	str = ph->env.numa_nodes;
1404
1405	for (i = 0; i < nr; i++) {
1406		/* node number */
1407		c = strtoul(str, &tmp, 0);
1408		if (*tmp != ':')
1409			goto error;
1410
1411		str = tmp + 1;
1412		mem_total = strtoull(str, &tmp, 0);
1413		if (*tmp != ':')
1414			goto error;
1415
1416		str = tmp + 1;
1417		mem_free = strtoull(str, &tmp, 0);
1418		if (*tmp != ':')
1419			goto error;
1420
1421		fprintf(fp, "# node%u meminfo  : total = %"PRIu64" kB,"
1422			    " free = %"PRIu64" kB\n",
1423			c, mem_total, mem_free);
1424
1425		str = tmp + 1;
1426		fprintf(fp, "# node%u cpu list : %s\n", c, str);
1427
1428		str += strlen(str) + 1;
1429	}
1430	return;
1431error:
1432	fprintf(fp, "# numa topology : not available\n");
1433}
1434
1435static void print_cpuid(struct perf_header *ph, int fd __maybe_unused, FILE *fp)
1436{
1437	fprintf(fp, "# cpuid : %s\n", ph->env.cpuid);
1438}
1439
1440static void print_branch_stack(struct perf_header *ph __maybe_unused,
1441			       int fd __maybe_unused, FILE *fp)
1442{
1443	fprintf(fp, "# contains samples with branch stack\n");
1444}
1445
1446static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
1447			       FILE *fp)
1448{
1449	const char *delimiter = "# pmu mappings: ";
1450	char *str, *tmp;
1451	u32 pmu_num;
1452	u32 type;
1453
1454	pmu_num = ph->env.nr_pmu_mappings;
1455	if (!pmu_num) {
1456		fprintf(fp, "# pmu mappings: not available\n");
1457		return;
1458	}
1459
1460	str = ph->env.pmu_mappings;
1461
1462	while (pmu_num) {
1463		type = strtoul(str, &tmp, 0);
1464		if (*tmp != ':')
1465			goto error;
1466
1467		str = tmp + 1;
1468		fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type);
1469
1470		delimiter = ", ";
1471		str += strlen(str) + 1;
1472		pmu_num--;
1473	}
1474
1475	fprintf(fp, "\n");
1476
1477	if (!pmu_num)
1478		return;
1479error:
1480	fprintf(fp, "# pmu mappings: unable to read\n");
1481}
1482
1483static void print_group_desc(struct perf_header *ph, int fd __maybe_unused,
1484			     FILE *fp)
1485{
1486	struct perf_session *session;
1487	struct perf_evsel *evsel;
1488	u32 nr = 0;
1489
1490	session = container_of(ph, struct perf_session, header);
1491
1492	list_for_each_entry(evsel, &session->evlist->entries, node) {
1493		if (perf_evsel__is_group_leader(evsel) &&
1494		    evsel->nr_members > 1) {
1495			fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
1496				perf_evsel__name(evsel));
1497
1498			nr = evsel->nr_members - 1;
1499		} else if (nr) {
1500			fprintf(fp, ",%s", perf_evsel__name(evsel));
1501
1502			if (--nr == 0)
1503				fprintf(fp, "}\n");
1504		}
1505	}
1506}
1507
1508static int __event_process_build_id(struct build_id_event *bev,
1509				    char *filename,
1510				    struct perf_session *session)
1511{
1512	int err = -1;
1513	struct list_head *head;
1514	struct machine *machine;
1515	u16 misc;
1516	struct dso *dso;
1517	enum dso_kernel_type dso_type;
1518
1519	machine = perf_session__findnew_machine(session, bev->pid);
1520	if (!machine)
1521		goto out;
1522
1523	misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1524
1525	switch (misc) {
1526	case PERF_RECORD_MISC_KERNEL:
1527		dso_type = DSO_TYPE_KERNEL;
1528		head = &machine->kernel_dsos;
1529		break;
1530	case PERF_RECORD_MISC_GUEST_KERNEL:
1531		dso_type = DSO_TYPE_GUEST_KERNEL;
1532		head = &machine->kernel_dsos;
1533		break;
1534	case PERF_RECORD_MISC_USER:
1535	case PERF_RECORD_MISC_GUEST_USER:
1536		dso_type = DSO_TYPE_USER;
1537		head = &machine->user_dsos;
1538		break;
1539	default:
1540		goto out;
1541	}
1542
1543	dso = __dsos__findnew(head, filename);
1544	if (dso != NULL) {
1545		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1546
1547		dso__set_build_id(dso, &bev->build_id);
1548
1549		if (filename[0] == '[')
1550			dso->kernel = dso_type;
1551
1552		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1553				  sbuild_id);
1554		pr_debug("build id event received for %s: %s\n",
1555			 dso->long_name, sbuild_id);
1556	}
1557
1558	err = 0;
1559out:
1560	return err;
1561}
1562
1563static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1564						 int input, u64 offset, u64 size)
1565{
1566	struct perf_session *session = container_of(header, struct perf_session, header);
1567	struct {
1568		struct perf_event_header   header;
1569		u8			   build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
1570		char			   filename[0];
1571	} old_bev;
1572	struct build_id_event bev;
1573	char filename[PATH_MAX];
1574	u64 limit = offset + size;
1575
1576	while (offset < limit) {
1577		ssize_t len;
1578
1579		if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1580			return -1;
1581
1582		if (header->needs_swap)
1583			perf_event_header__bswap(&old_bev.header);
1584
1585		len = old_bev.header.size - sizeof(old_bev);
1586		if (readn(input, filename, len) != len)
1587			return -1;
1588
1589		bev.header = old_bev.header;
1590
1591		/*
1592		 * As the pid is the missing value, we need to fill
1593		 * it properly. The header.misc value give us nice hint.
1594		 */
1595		bev.pid	= HOST_KERNEL_ID;
1596		if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
1597		    bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
1598			bev.pid	= DEFAULT_GUEST_KERNEL_ID;
1599
1600		memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
1601		__event_process_build_id(&bev, filename, session);
1602
1603		offset += bev.header.size;
1604	}
1605
1606	return 0;
1607}
1608
1609static int perf_header__read_build_ids(struct perf_header *header,
1610				       int input, u64 offset, u64 size)
1611{
1612	struct perf_session *session = container_of(header, struct perf_session, header);
1613	struct build_id_event bev;
1614	char filename[PATH_MAX];
1615	u64 limit = offset + size, orig_offset = offset;
1616	int err = -1;
1617
1618	while (offset < limit) {
1619		ssize_t len;
1620
1621		if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
1622			goto out;
1623
1624		if (header->needs_swap)
1625			perf_event_header__bswap(&bev.header);
1626
1627		len = bev.header.size - sizeof(bev);
1628		if (readn(input, filename, len) != len)
1629			goto out;
1630		/*
1631		 * The a1645ce1 changeset:
1632		 *
1633		 * "perf: 'perf kvm' tool for monitoring guest performance from host"
1634		 *
1635		 * Added a field to struct build_id_event that broke the file
1636		 * format.
1637		 *
1638		 * Since the kernel build-id is the first entry, process the
1639		 * table using the old format if the well known
1640		 * '[kernel.kallsyms]' string for the kernel build-id has the
1641		 * first 4 characters chopped off (where the pid_t sits).
1642		 */
1643		if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
1644			if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
1645				return -1;
1646			return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
1647		}
1648
1649		__event_process_build_id(&bev, filename, session);
1650
1651		offset += bev.header.size;
1652	}
1653	err = 0;
1654out:
1655	return err;
1656}
1657
1658static int process_tracing_data(struct perf_file_section *section __maybe_unused,
1659				struct perf_header *ph __maybe_unused,
1660				int fd, void *data)
1661{
1662	ssize_t ret = trace_report(fd, data, false);
1663	return ret < 0 ? -1 : 0;
1664}
1665
1666static int process_build_id(struct perf_file_section *section,
1667			    struct perf_header *ph, int fd,
1668			    void *data __maybe_unused)
1669{
1670	if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1671		pr_debug("Failed to read buildids, continuing...\n");
1672	return 0;
1673}
1674
1675static int process_hostname(struct perf_file_section *section __maybe_unused,
1676			    struct perf_header *ph, int fd,
1677			    void *data __maybe_unused)
1678{
1679	ph->env.hostname = do_read_string(fd, ph);
1680	return ph->env.hostname ? 0 : -ENOMEM;
1681}
1682
1683static int process_osrelease(struct perf_file_section *section __maybe_unused,
1684			     struct perf_header *ph, int fd,
1685			     void *data __maybe_unused)
1686{
1687	ph->env.os_release = do_read_string(fd, ph);
1688	return ph->env.os_release ? 0 : -ENOMEM;
1689}
1690
1691static int process_version(struct perf_file_section *section __maybe_unused,
1692			   struct perf_header *ph, int fd,
1693			   void *data __maybe_unused)
1694{
1695	ph->env.version = do_read_string(fd, ph);
1696	return ph->env.version ? 0 : -ENOMEM;
1697}
1698
1699static int process_arch(struct perf_file_section *section __maybe_unused,
1700			struct perf_header *ph,	int fd,
1701			void *data __maybe_unused)
1702{
1703	ph->env.arch = do_read_string(fd, ph);
1704	return ph->env.arch ? 0 : -ENOMEM;
1705}
1706
1707static int process_nrcpus(struct perf_file_section *section __maybe_unused,
1708			  struct perf_header *ph, int fd,
1709			  void *data __maybe_unused)
1710{
1711	size_t ret;
1712	u32 nr;
1713
1714	ret = readn(fd, &nr, sizeof(nr));
1715	if (ret != sizeof(nr))
1716		return -1;
1717
1718	if (ph->needs_swap)
1719		nr = bswap_32(nr);
1720
1721	ph->env.nr_cpus_online = nr;
1722
1723	ret = readn(fd, &nr, sizeof(nr));
1724	if (ret != sizeof(nr))
1725		return -1;
1726
1727	if (ph->needs_swap)
1728		nr = bswap_32(nr);
1729
1730	ph->env.nr_cpus_avail = nr;
1731	return 0;
1732}
1733
1734static int process_cpudesc(struct perf_file_section *section __maybe_unused,
1735			   struct perf_header *ph, int fd,
1736			   void *data __maybe_unused)
1737{
1738	ph->env.cpu_desc = do_read_string(fd, ph);
1739	return ph->env.cpu_desc ? 0 : -ENOMEM;
1740}
1741
1742static int process_cpuid(struct perf_file_section *section __maybe_unused,
1743			 struct perf_header *ph,  int fd,
1744			 void *data __maybe_unused)
1745{
1746	ph->env.cpuid = do_read_string(fd, ph);
1747	return ph->env.cpuid ? 0 : -ENOMEM;
1748}
1749
1750static int process_total_mem(struct perf_file_section *section __maybe_unused,
1751			     struct perf_header *ph, int fd,
1752			     void *data __maybe_unused)
1753{
1754	uint64_t mem;
1755	size_t ret;
1756
1757	ret = readn(fd, &mem, sizeof(mem));
1758	if (ret != sizeof(mem))
1759		return -1;
1760
1761	if (ph->needs_swap)
1762		mem = bswap_64(mem);
1763
1764	ph->env.total_mem = mem;
1765	return 0;
1766}
1767
1768static struct perf_evsel *
1769perf_evlist__find_by_index(struct perf_evlist *evlist, int idx)
1770{
1771	struct perf_evsel *evsel;
1772
1773	list_for_each_entry(evsel, &evlist->entries, node) {
1774		if (evsel->idx == idx)
1775			return evsel;
1776	}
1777
1778	return NULL;
1779}
1780
1781static void
1782perf_evlist__set_event_name(struct perf_evlist *evlist,
1783			    struct perf_evsel *event)
1784{
1785	struct perf_evsel *evsel;
1786
1787	if (!event->name)
1788		return;
1789
1790	evsel = perf_evlist__find_by_index(evlist, event->idx);
1791	if (!evsel)
1792		return;
1793
1794	if (evsel->name)
1795		return;
1796
1797	evsel->name = strdup(event->name);
1798}
1799
1800static int
1801process_event_desc(struct perf_file_section *section __maybe_unused,
1802		   struct perf_header *header, int fd,
1803		   void *data __maybe_unused)
1804{
1805	struct perf_session *session;
1806	struct perf_evsel *evsel, *events = read_event_desc(header, fd);
1807
1808	if (!events)
1809		return 0;
1810
1811	session = container_of(header, struct perf_session, header);
1812	for (evsel = events; evsel->attr.size; evsel++)
1813		perf_evlist__set_event_name(session->evlist, evsel);
1814
1815	free_event_desc(events);
1816
1817	return 0;
1818}
1819
1820static int process_cmdline(struct perf_file_section *section __maybe_unused,
1821			   struct perf_header *ph, int fd,
1822			   void *data __maybe_unused)
1823{
1824	size_t ret;
1825	char *str;
1826	u32 nr, i;
1827	struct strbuf sb;
1828
1829	ret = readn(fd, &nr, sizeof(nr));
1830	if (ret != sizeof(nr))
1831		return -1;
1832
1833	if (ph->needs_swap)
1834		nr = bswap_32(nr);
1835
1836	ph->env.nr_cmdline = nr;
1837	strbuf_init(&sb, 128);
1838
1839	for (i = 0; i < nr; i++) {
1840		str = do_read_string(fd, ph);
1841		if (!str)
1842			goto error;
1843
1844		/* include a NULL character at the end */
1845		strbuf_add(&sb, str, strlen(str) + 1);
1846		free(str);
1847	}
1848	ph->env.cmdline = strbuf_detach(&sb, NULL);
1849	return 0;
1850
1851error:
1852	strbuf_release(&sb);
1853	return -1;
1854}
1855
1856static int process_cpu_topology(struct perf_file_section *section __maybe_unused,
1857				struct perf_header *ph, int fd,
1858				void *data __maybe_unused)
1859{
1860	size_t ret;
1861	u32 nr, i;
1862	char *str;
1863	struct strbuf sb;
1864
1865	ret = readn(fd, &nr, sizeof(nr));
1866	if (ret != sizeof(nr))
1867		return -1;
1868
1869	if (ph->needs_swap)
1870		nr = bswap_32(nr);
1871
1872	ph->env.nr_sibling_cores = nr;
1873	strbuf_init(&sb, 128);
1874
1875	for (i = 0; i < nr; i++) {
1876		str = do_read_string(fd, ph);
1877		if (!str)
1878			goto error;
1879
1880		/* include a NULL character at the end */
1881		strbuf_add(&sb, str, strlen(str) + 1);
1882		free(str);
1883	}
1884	ph->env.sibling_cores = strbuf_detach(&sb, NULL);
1885
1886	ret = readn(fd, &nr, sizeof(nr));
1887	if (ret != sizeof(nr))
1888		return -1;
1889
1890	if (ph->needs_swap)
1891		nr = bswap_32(nr);
1892
1893	ph->env.nr_sibling_threads = nr;
1894
1895	for (i = 0; i < nr; i++) {
1896		str = do_read_string(fd, ph);
1897		if (!str)
1898			goto error;
1899
1900		/* include a NULL character at the end */
1901		strbuf_add(&sb, str, strlen(str) + 1);
1902		free(str);
1903	}
1904	ph->env.sibling_threads = strbuf_detach(&sb, NULL);
1905	return 0;
1906
1907error:
1908	strbuf_release(&sb);
1909	return -1;
1910}
1911
1912static int process_numa_topology(struct perf_file_section *section __maybe_unused,
1913				 struct perf_header *ph, int fd,
1914				 void *data __maybe_unused)
1915{
1916	size_t ret;
1917	u32 nr, node, i;
1918	char *str;
1919	uint64_t mem_total, mem_free;
1920	struct strbuf sb;
1921
1922	/* nr nodes */
1923	ret = readn(fd, &nr, sizeof(nr));
1924	if (ret != sizeof(nr))
1925		goto error;
1926
1927	if (ph->needs_swap)
1928		nr = bswap_32(nr);
1929
1930	ph->env.nr_numa_nodes = nr;
1931	strbuf_init(&sb, 256);
1932
1933	for (i = 0; i < nr; i++) {
1934		/* node number */
1935		ret = readn(fd, &node, sizeof(node));
1936		if (ret != sizeof(node))
1937			goto error;
1938
1939		ret = readn(fd, &mem_total, sizeof(u64));
1940		if (ret != sizeof(u64))
1941			goto error;
1942
1943		ret = readn(fd, &mem_free, sizeof(u64));
1944		if (ret != sizeof(u64))
1945			goto error;
1946
1947		if (ph->needs_swap) {
1948			node = bswap_32(node);
1949			mem_total = bswap_64(mem_total);
1950			mem_free = bswap_64(mem_free);
1951		}
1952
1953		strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
1954			    node, mem_total, mem_free);
1955
1956		str = do_read_string(fd, ph);
1957		if (!str)
1958			goto error;
1959
1960		/* include a NULL character at the end */
1961		strbuf_add(&sb, str, strlen(str) + 1);
1962		free(str);
1963	}
1964	ph->env.numa_nodes = strbuf_detach(&sb, NULL);
1965	return 0;
1966
1967error:
1968	strbuf_release(&sb);
1969	return -1;
1970}
1971
1972static int process_pmu_mappings(struct perf_file_section *section __maybe_unused,
1973				struct perf_header *ph, int fd,
1974				void *data __maybe_unused)
1975{
1976	size_t ret;
1977	char *name;
1978	u32 pmu_num;
1979	u32 type;
1980	struct strbuf sb;
1981
1982	ret = readn(fd, &pmu_num, sizeof(pmu_num));
1983	if (ret != sizeof(pmu_num))
1984		return -1;
1985
1986	if (ph->needs_swap)
1987		pmu_num = bswap_32(pmu_num);
1988
1989	if (!pmu_num) {
1990		pr_debug("pmu mappings not available\n");
1991		return 0;
1992	}
1993
1994	ph->env.nr_pmu_mappings = pmu_num;
1995	strbuf_init(&sb, 128);
1996
1997	while (pmu_num) {
1998		if (readn(fd, &type, sizeof(type)) != sizeof(type))
1999			goto error;
2000		if (ph->needs_swap)
2001			type = bswap_32(type);
2002
2003		name = do_read_string(fd, ph);
2004		if (!name)
2005			goto error;
2006
2007		strbuf_addf(&sb, "%u:%s", type, name);
2008		/* include a NULL character at the end */
2009		strbuf_add(&sb, "", 1);
2010
2011		free(name);
2012		pmu_num--;
2013	}
2014	ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
2015	return 0;
2016
2017error:
2018	strbuf_release(&sb);
2019	return -1;
2020}
2021
2022static int process_group_desc(struct perf_file_section *section __maybe_unused,
2023			      struct perf_header *ph, int fd,
2024			      void *data __maybe_unused)
2025{
2026	size_t ret = -1;
2027	u32 i, nr, nr_groups;
2028	struct perf_session *session;
2029	struct perf_evsel *evsel, *leader = NULL;
2030	struct group_desc {
2031		char *name;
2032		u32 leader_idx;
2033		u32 nr_members;
2034	} *desc;
2035
2036	if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups))
2037		return -1;
2038
2039	if (ph->needs_swap)
2040		nr_groups = bswap_32(nr_groups);
2041
2042	ph->env.nr_groups = nr_groups;
2043	if (!nr_groups) {
2044		pr_debug("group desc not available\n");
2045		return 0;
2046	}
2047
2048	desc = calloc(nr_groups, sizeof(*desc));
2049	if (!desc)
2050		return -1;
2051
2052	for (i = 0; i < nr_groups; i++) {
2053		desc[i].name = do_read_string(fd, ph);
2054		if (!desc[i].name)
2055			goto out_free;
2056
2057		if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32))
2058			goto out_free;
2059
2060		if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32))
2061			goto out_free;
2062
2063		if (ph->needs_swap) {
2064			desc[i].leader_idx = bswap_32(desc[i].leader_idx);
2065			desc[i].nr_members = bswap_32(desc[i].nr_members);
2066		}
2067	}
2068
2069	/*
2070	 * Rebuild group relationship based on the group_desc
2071	 */
2072	session = container_of(ph, struct perf_session, header);
2073	session->evlist->nr_groups = nr_groups;
2074
2075	i = nr = 0;
2076	list_for_each_entry(evsel, &session->evlist->entries, node) {
2077		if (evsel->idx == (int) desc[i].leader_idx) {
2078			evsel->leader = evsel;
2079			/* {anon_group} is a dummy name */
2080			if (strcmp(desc[i].name, "{anon_group}"))
2081				evsel->group_name = desc[i].name;
2082			evsel->nr_members = desc[i].nr_members;
2083
2084			if (i >= nr_groups || nr > 0) {
2085				pr_debug("invalid group desc\n");
2086				goto out_free;
2087			}
2088
2089			leader = evsel;
2090			nr = evsel->nr_members - 1;
2091			i++;
2092		} else if (nr) {
2093			/* This is a group member */
2094			evsel->leader = leader;
2095
2096			nr--;
2097		}
2098	}
2099
2100	if (i != nr_groups || nr != 0) {
2101		pr_debug("invalid group desc\n");
2102		goto out_free;
2103	}
2104
2105	ret = 0;
2106out_free:
2107	while ((int) --i >= 0)
2108		free(desc[i].name);
2109	free(desc);
2110
2111	return ret;
2112}
2113
2114struct feature_ops {
2115	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
2116	void (*print)(struct perf_header *h, int fd, FILE *fp);
2117	int (*process)(struct perf_file_section *section,
2118		       struct perf_header *h, int fd, void *data);
2119	const char *name;
2120	bool full_only;
2121};
2122
2123#define FEAT_OPA(n, func) \
2124	[n] = { .name = #n, .write = write_##func, .print = print_##func }
2125#define FEAT_OPP(n, func) \
2126	[n] = { .name = #n, .write = write_##func, .print = print_##func, \
2127		.process = process_##func }
2128#define FEAT_OPF(n, func) \
2129	[n] = { .name = #n, .write = write_##func, .print = print_##func, \
2130		.process = process_##func, .full_only = true }
2131
2132/* feature_ops not implemented: */
2133#define print_tracing_data	NULL
2134#define print_build_id		NULL
2135
2136static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
2137	FEAT_OPP(HEADER_TRACING_DATA,	tracing_data),
2138	FEAT_OPP(HEADER_BUILD_ID,	build_id),
2139	FEAT_OPP(HEADER_HOSTNAME,	hostname),
2140	FEAT_OPP(HEADER_OSRELEASE,	osrelease),
2141	FEAT_OPP(HEADER_VERSION,	version),
2142	FEAT_OPP(HEADER_ARCH,		arch),
2143	FEAT_OPP(HEADER_NRCPUS,		nrcpus),
2144	FEAT_OPP(HEADER_CPUDESC,	cpudesc),
2145	FEAT_OPP(HEADER_CPUID,		cpuid),
2146	FEAT_OPP(HEADER_TOTAL_MEM,	total_mem),
2147	FEAT_OPP(HEADER_EVENT_DESC,	event_desc),
2148	FEAT_OPP(HEADER_CMDLINE,	cmdline),
2149	FEAT_OPF(HEADER_CPU_TOPOLOGY,	cpu_topology),
2150	FEAT_OPF(HEADER_NUMA_TOPOLOGY,	numa_topology),
2151	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
2152	FEAT_OPP(HEADER_PMU_MAPPINGS,	pmu_mappings),
2153	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
2154};
2155
2156struct header_print_data {
2157	FILE *fp;
2158	bool full; /* extended list of headers */
2159};
2160
2161static int perf_file_section__fprintf_info(struct perf_file_section *section,
2162					   struct perf_header *ph,
2163					   int feat, int fd, void *data)
2164{
2165	struct header_print_data *hd = data;
2166
2167	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
2168		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
2169				"%d, continuing...\n", section->offset, feat);
2170		return 0;
2171	}
2172	if (feat >= HEADER_LAST_FEATURE) {
2173		pr_warning("unknown feature %d\n", feat);
2174		return 0;
2175	}
2176	if (!feat_ops[feat].print)
2177		return 0;
2178
2179	if (!feat_ops[feat].full_only || hd->full)
2180		feat_ops[feat].print(ph, fd, hd->fp);
2181	else
2182		fprintf(hd->fp, "# %s info available, use -I to display\n",
2183			feat_ops[feat].name);
2184
2185	return 0;
2186}
2187
2188int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
2189{
2190	struct header_print_data hd;
2191	struct perf_header *header = &session->header;
2192	int fd = session->fd;
2193	hd.fp = fp;
2194	hd.full = full;
2195
2196	perf_header__process_sections(header, fd, &hd,
2197				      perf_file_section__fprintf_info);
2198	return 0;
2199}
2200
2201static int do_write_feat(int fd, struct perf_header *h, int type,
2202			 struct perf_file_section **p,
2203			 struct perf_evlist *evlist)
2204{
2205	int err;
2206	int ret = 0;
2207
2208	if (perf_header__has_feat(h, type)) {
2209		if (!feat_ops[type].write)
2210			return -1;
2211
2212		(*p)->offset = lseek(fd, 0, SEEK_CUR);
2213
2214		err = feat_ops[type].write(fd, h, evlist);
2215		if (err < 0) {
2216			pr_debug("failed to write feature %d\n", type);
2217
2218			/* undo anything written */
2219			lseek(fd, (*p)->offset, SEEK_SET);
2220
2221			return -1;
2222		}
2223		(*p)->size = lseek(fd, 0, SEEK_CUR) - (*p)->offset;
2224		(*p)++;
2225	}
2226	return ret;
2227}
2228
2229static int perf_header__adds_write(struct perf_header *header,
2230				   struct perf_evlist *evlist, int fd)
2231{
2232	int nr_sections;
2233	struct perf_file_section *feat_sec, *p;
2234	int sec_size;
2235	u64 sec_start;
2236	int feat;
2237	int err;
2238
2239	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
2240	if (!nr_sections)
2241		return 0;
2242
2243	feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
2244	if (feat_sec == NULL)
2245		return -ENOMEM;
2246
2247	sec_size = sizeof(*feat_sec) * nr_sections;
2248
2249	sec_start = header->feat_offset;
2250	lseek(fd, sec_start + sec_size, SEEK_SET);
2251
2252	for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
2253		if (do_write_feat(fd, header, feat, &p, evlist))
2254			perf_header__clear_feat(header, feat);
2255	}
2256
2257	lseek(fd, sec_start, SEEK_SET);
2258	/*
2259	 * may write more than needed due to dropped feature, but
2260	 * this is okay, reader will skip the mising entries
2261	 */
2262	err = do_write(fd, feat_sec, sec_size);
2263	if (err < 0)
2264		pr_debug("failed to write feature section\n");
2265	free(feat_sec);
2266	return err;
2267}
2268
2269int perf_header__write_pipe(int fd)
2270{
2271	struct perf_pipe_file_header f_header;
2272	int err;
2273
2274	f_header = (struct perf_pipe_file_header){
2275		.magic	   = PERF_MAGIC,
2276		.size	   = sizeof(f_header),
2277	};
2278
2279	err = do_write(fd, &f_header, sizeof(f_header));
2280	if (err < 0) {
2281		pr_debug("failed to write perf pipe header\n");
2282		return err;
2283	}
2284
2285	return 0;
2286}
2287
2288int perf_session__write_header(struct perf_session *session,
2289			       struct perf_evlist *evlist,
2290			       int fd, bool at_exit)
2291{
2292	struct perf_file_header f_header;
2293	struct perf_file_attr   f_attr;
2294	struct perf_header *header = &session->header;
2295	struct perf_evsel *evsel;
2296	u64 attr_offset;
2297	int err;
2298
2299	lseek(fd, sizeof(f_header), SEEK_SET);
2300
2301	list_for_each_entry(evsel, &evlist->entries, node) {
2302		evsel->id_offset = lseek(fd, 0, SEEK_CUR);
2303		err = do_write(fd, evsel->id, evsel->ids * sizeof(u64));
2304		if (err < 0) {
2305			pr_debug("failed to write perf header\n");
2306			return err;
2307		}
2308	}
2309
2310	attr_offset = lseek(fd, 0, SEEK_CUR);
2311
2312	list_for_each_entry(evsel, &evlist->entries, node) {
2313		f_attr = (struct perf_file_attr){
2314			.attr = evsel->attr,
2315			.ids  = {
2316				.offset = evsel->id_offset,
2317				.size   = evsel->ids * sizeof(u64),
2318			}
2319		};
2320		err = do_write(fd, &f_attr, sizeof(f_attr));
2321		if (err < 0) {
2322			pr_debug("failed to write perf header attribute\n");
2323			return err;
2324		}
2325	}
2326
2327	header->data_offset = lseek(fd, 0, SEEK_CUR);
2328	header->feat_offset = header->data_offset + header->data_size;
2329
2330	if (at_exit) {
2331		err = perf_header__adds_write(header, evlist, fd);
2332		if (err < 0)
2333			return err;
2334	}
2335
2336	f_header = (struct perf_file_header){
2337		.magic	   = PERF_MAGIC,
2338		.size	   = sizeof(f_header),
2339		.attr_size = sizeof(f_attr),
2340		.attrs = {
2341			.offset = attr_offset,
2342			.size   = evlist->nr_entries * sizeof(f_attr),
2343		},
2344		.data = {
2345			.offset = header->data_offset,
2346			.size	= header->data_size,
2347		},
2348		/* event_types is ignored, store zeros */
2349	};
2350
2351	memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
2352
2353	lseek(fd, 0, SEEK_SET);
2354	err = do_write(fd, &f_header, sizeof(f_header));
2355	if (err < 0) {
2356		pr_debug("failed to write perf header\n");
2357		return err;
2358	}
2359	lseek(fd, header->data_offset + header->data_size, SEEK_SET);
2360
2361	return 0;
2362}
2363
2364static int perf_header__getbuffer64(struct perf_header *header,
2365				    int fd, void *buf, size_t size)
2366{
2367	if (readn(fd, buf, size) <= 0)
2368		return -1;
2369
2370	if (header->needs_swap)
2371		mem_bswap_64(buf, size);
2372
2373	return 0;
2374}
2375
2376int perf_header__process_sections(struct perf_header *header, int fd,
2377				  void *data,
2378				  int (*process)(struct perf_file_section *section,
2379						 struct perf_header *ph,
2380						 int feat, int fd, void *data))
2381{
2382	struct perf_file_section *feat_sec, *sec;
2383	int nr_sections;
2384	int sec_size;
2385	int feat;
2386	int err;
2387
2388	nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
2389	if (!nr_sections)
2390		return 0;
2391
2392	feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
2393	if (!feat_sec)
2394		return -1;
2395
2396	sec_size = sizeof(*feat_sec) * nr_sections;
2397
2398	lseek(fd, header->feat_offset, SEEK_SET);
2399
2400	err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
2401	if (err < 0)
2402		goto out_free;
2403
2404	for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
2405		err = process(sec++, header, feat, fd, data);
2406		if (err < 0)
2407			goto out_free;
2408	}
2409	err = 0;
2410out_free:
2411	free(feat_sec);
2412	return err;
2413}
2414
2415static const int attr_file_abi_sizes[] = {
2416	[0] = PERF_ATTR_SIZE_VER0,
2417	[1] = PERF_ATTR_SIZE_VER1,
2418	[2] = PERF_ATTR_SIZE_VER2,
2419	[3] = PERF_ATTR_SIZE_VER3,
2420	0,
2421};
2422
2423/*
2424 * In the legacy file format, the magic number is not used to encode endianness.
2425 * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
2426 * on ABI revisions, we need to try all combinations for all endianness to
2427 * detect the endianness.
2428 */
2429static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
2430{
2431	uint64_t ref_size, attr_size;
2432	int i;
2433
2434	for (i = 0 ; attr_file_abi_sizes[i]; i++) {
2435		ref_size = attr_file_abi_sizes[i]
2436			 + sizeof(struct perf_file_section);
2437		if (hdr_sz != ref_size) {
2438			attr_size = bswap_64(hdr_sz);
2439			if (attr_size != ref_size)
2440				continue;
2441
2442			ph->needs_swap = true;
2443		}
2444		pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
2445			 i,
2446			 ph->needs_swap);
2447		return 0;
2448	}
2449	/* could not determine endianness */
2450	return -1;
2451}
2452
2453#define PERF_PIPE_HDR_VER0	16
2454
2455static const size_t attr_pipe_abi_sizes[] = {
2456	[0] = PERF_PIPE_HDR_VER0,
2457	0,
2458};
2459
2460/*
2461 * In the legacy pipe format, there is an implicit assumption that endiannesss
2462 * between host recording the samples, and host parsing the samples is the
2463 * same. This is not always the case given that the pipe output may always be
2464 * redirected into a file and analyzed on a different machine with possibly a
2465 * different endianness and perf_event ABI revsions in the perf tool itself.
2466 */
2467static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
2468{
2469	u64 attr_size;
2470	int i;
2471
2472	for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
2473		if (hdr_sz != attr_pipe_abi_sizes[i]) {
2474			attr_size = bswap_64(hdr_sz);
2475			if (attr_size != hdr_sz)
2476				continue;
2477
2478			ph->needs_swap = true;
2479		}
2480		pr_debug("Pipe ABI%d perf.data file detected\n", i);
2481		return 0;
2482	}
2483	return -1;
2484}
2485
2486bool is_perf_magic(u64 magic)
2487{
2488	if (!memcmp(&magic, __perf_magic1, sizeof(magic))
2489		|| magic == __perf_magic2
2490		|| magic == __perf_magic2_sw)
2491		return true;
2492
2493	return false;
2494}
2495
2496static int check_magic_endian(u64 magic, uint64_t hdr_sz,
2497			      bool is_pipe, struct perf_header *ph)
2498{
2499	int ret;
2500
2501	/* check for legacy format */
2502	ret = memcmp(&magic, __perf_magic1, sizeof(magic));
2503	if (ret == 0) {
2504		ph->version = PERF_HEADER_VERSION_1;
2505		pr_debug("legacy perf.data format\n");
2506		if (is_pipe)
2507			return try_all_pipe_abis(hdr_sz, ph);
2508
2509		return try_all_file_abis(hdr_sz, ph);
2510	}
2511	/*
2512	 * the new magic number serves two purposes:
2513	 * - unique number to identify actual perf.data files
2514	 * - encode endianness of file
2515	 */
2516
2517	/* check magic number with one endianness */
2518	if (magic == __perf_magic2)
2519		return 0;
2520
2521	/* check magic number with opposite endianness */
2522	if (magic != __perf_magic2_sw)
2523		return -1;
2524
2525	ph->needs_swap = true;
2526	ph->version = PERF_HEADER_VERSION_2;
2527
2528	return 0;
2529}
2530
2531int perf_file_header__read(struct perf_file_header *header,
2532			   struct perf_header *ph, int fd)
2533{
2534	int ret;
2535
2536	lseek(fd, 0, SEEK_SET);
2537
2538	ret = readn(fd, header, sizeof(*header));
2539	if (ret <= 0)
2540		return -1;
2541
2542	if (check_magic_endian(header->magic,
2543			       header->attr_size, false, ph) < 0) {
2544		pr_debug("magic/endian check failed\n");
2545		return -1;
2546	}
2547
2548	if (ph->needs_swap) {
2549		mem_bswap_64(header, offsetof(struct perf_file_header,
2550			     adds_features));
2551	}
2552
2553	if (header->size != sizeof(*header)) {
2554		/* Support the previous format */
2555		if (header->size == offsetof(typeof(*header), adds_features))
2556			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
2557		else
2558			return -1;
2559	} else if (ph->needs_swap) {
2560		/*
2561		 * feature bitmap is declared as an array of unsigned longs --
2562		 * not good since its size can differ between the host that
2563		 * generated the data file and the host analyzing the file.
2564		 *
2565		 * We need to handle endianness, but we don't know the size of
2566		 * the unsigned long where the file was generated. Take a best
2567		 * guess at determining it: try 64-bit swap first (ie., file
2568		 * created on a 64-bit host), and check if the hostname feature
2569		 * bit is set (this feature bit is forced on as of fbe96f2).
2570		 * If the bit is not, undo the 64-bit swap and try a 32-bit
2571		 * swap. If the hostname bit is still not set (e.g., older data
2572		 * file), punt and fallback to the original behavior --
2573		 * clearing all feature bits and setting buildid.
2574		 */
2575		mem_bswap_64(&header->adds_features,
2576			    BITS_TO_U64(HEADER_FEAT_BITS));
2577
2578		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
2579			/* unswap as u64 */
2580			mem_bswap_64(&header->adds_features,
2581				    BITS_TO_U64(HEADER_FEAT_BITS));
2582
2583			/* unswap as u32 */
2584			mem_bswap_32(&header->adds_features,
2585				    BITS_TO_U32(HEADER_FEAT_BITS));
2586		}
2587
2588		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
2589			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
2590			set_bit(HEADER_BUILD_ID, header->adds_features);
2591		}
2592	}
2593
2594	memcpy(&ph->adds_features, &header->adds_features,
2595	       sizeof(ph->adds_features));
2596
2597	ph->data_offset  = header->data.offset;
2598	ph->data_size	 = header->data.size;
2599	ph->feat_offset  = header->data.offset + header->data.size;
2600	return 0;
2601}
2602
2603static int perf_file_section__process(struct perf_file_section *section,
2604				      struct perf_header *ph,
2605				      int feat, int fd, void *data)
2606{
2607	if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
2608		pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
2609			  "%d, continuing...\n", section->offset, feat);
2610		return 0;
2611	}
2612
2613	if (feat >= HEADER_LAST_FEATURE) {
2614		pr_debug("unknown feature %d, continuing...\n", feat);
2615		return 0;
2616	}
2617
2618	if (!feat_ops[feat].process)
2619		return 0;
2620
2621	return feat_ops[feat].process(section, ph, fd, data);
2622}
2623
2624static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
2625				       struct perf_header *ph, int fd,
2626				       bool repipe)
2627{
2628	int ret;
2629
2630	ret = readn(fd, header, sizeof(*header));
2631	if (ret <= 0)
2632		return -1;
2633
2634	if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
2635		pr_debug("endian/magic failed\n");
2636		return -1;
2637	}
2638
2639	if (ph->needs_swap)
2640		header->size = bswap_64(header->size);
2641
2642	if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
2643		return -1;
2644
2645	return 0;
2646}
2647
2648static int perf_header__read_pipe(struct perf_session *session)
2649{
2650	struct perf_header *header = &session->header;
2651	struct perf_pipe_file_header f_header;
2652
2653	if (perf_file_header__read_pipe(&f_header, header, session->fd,
2654					session->repipe) < 0) {
2655		pr_debug("incompatible file format\n");
2656		return -EINVAL;
2657	}
2658
2659	return 0;
2660}
2661
2662static int read_attr(int fd, struct perf_header *ph,
2663		     struct perf_file_attr *f_attr)
2664{
2665	struct perf_event_attr *attr = &f_attr->attr;
2666	size_t sz, left;
2667	size_t our_sz = sizeof(f_attr->attr);
2668	int ret;
2669
2670	memset(f_attr, 0, sizeof(*f_attr));
2671
2672	/* read minimal guaranteed structure */
2673	ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
2674	if (ret <= 0) {
2675		pr_debug("cannot read %d bytes of header attr\n",
2676			 PERF_ATTR_SIZE_VER0);
2677		return -1;
2678	}
2679
2680	/* on file perf_event_attr size */
2681	sz = attr->size;
2682
2683	if (ph->needs_swap)
2684		sz = bswap_32(sz);
2685
2686	if (sz == 0) {
2687		/* assume ABI0 */
2688		sz =  PERF_ATTR_SIZE_VER0;
2689	} else if (sz > our_sz) {
2690		pr_debug("file uses a more recent and unsupported ABI"
2691			 " (%zu bytes extra)\n", sz - our_sz);
2692		return -1;
2693	}
2694	/* what we have not yet read and that we know about */
2695	left = sz - PERF_ATTR_SIZE_VER0;
2696	if (left) {
2697		void *ptr = attr;
2698		ptr += PERF_ATTR_SIZE_VER0;
2699
2700		ret = readn(fd, ptr, left);
2701	}
2702	/* read perf_file_section, ids are read in caller */
2703	ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
2704
2705	return ret <= 0 ? -1 : 0;
2706}
2707
2708static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
2709						struct pevent *pevent)
2710{
2711	struct event_format *event;
2712	char bf[128];
2713
2714	/* already prepared */
2715	if (evsel->tp_format)
2716		return 0;
2717
2718	if (pevent == NULL) {
2719		pr_debug("broken or missing trace data\n");
2720		return -1;
2721	}
2722
2723	event = pevent_find_event(pevent, evsel->attr.config);
2724	if (event == NULL)
2725		return -1;
2726
2727	if (!evsel->name) {
2728		snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
2729		evsel->name = strdup(bf);
2730		if (evsel->name == NULL)
2731			return -1;
2732	}
2733
2734	evsel->tp_format = event;
2735	return 0;
2736}
2737
2738static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
2739						  struct pevent *pevent)
2740{
2741	struct perf_evsel *pos;
2742
2743	list_for_each_entry(pos, &evlist->entries, node) {
2744		if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
2745		    perf_evsel__prepare_tracepoint_event(pos, pevent))
2746			return -1;
2747	}
2748
2749	return 0;
2750}
2751
2752int perf_session__read_header(struct perf_session *session)
2753{
2754	struct perf_header *header = &session->header;
2755	struct perf_file_header	f_header;
2756	struct perf_file_attr	f_attr;
2757	u64			f_id;
2758	int nr_attrs, nr_ids, i, j;
2759	int fd = session->fd;
2760
2761	session->evlist = perf_evlist__new();
2762	if (session->evlist == NULL)
2763		return -ENOMEM;
2764
2765	if (session->fd_pipe)
2766		return perf_header__read_pipe(session);
2767
2768	if (perf_file_header__read(&f_header, header, fd) < 0)
2769		return -EINVAL;
2770
2771	/*
2772	 * Sanity check that perf.data was written cleanly; data size is
2773	 * initialized to 0 and updated only if the on_exit function is run.
2774	 * If data size is still 0 then the file contains only partial
2775	 * information.  Just warn user and process it as much as it can.
2776	 */
2777	if (f_header.data.size == 0) {
2778		pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
2779			   "Was the 'perf record' command properly terminated?\n",
2780			   session->filename);
2781	}
2782
2783	nr_attrs = f_header.attrs.size / f_header.attr_size;
2784	lseek(fd, f_header.attrs.offset, SEEK_SET);
2785
2786	for (i = 0; i < nr_attrs; i++) {
2787		struct perf_evsel *evsel;
2788		off_t tmp;
2789
2790		if (read_attr(fd, header, &f_attr) < 0)
2791			goto out_errno;
2792
2793		if (header->needs_swap)
2794			perf_event__attr_swap(&f_attr.attr);
2795
2796		tmp = lseek(fd, 0, SEEK_CUR);
2797		evsel = perf_evsel__new(&f_attr.attr, i);
2798
2799		if (evsel == NULL)
2800			goto out_delete_evlist;
2801
2802		evsel->needs_swap = header->needs_swap;
2803		/*
2804		 * Do it before so that if perf_evsel__alloc_id fails, this
2805		 * entry gets purged too at perf_evlist__delete().
2806		 */
2807		perf_evlist__add(session->evlist, evsel);
2808
2809		nr_ids = f_attr.ids.size / sizeof(u64);
2810		/*
2811		 * We don't have the cpu and thread maps on the header, so
2812		 * for allocating the perf_sample_id table we fake 1 cpu and
2813		 * hattr->ids threads.
2814		 */
2815		if (perf_evsel__alloc_id(evsel, 1, nr_ids))
2816			goto out_delete_evlist;
2817
2818		lseek(fd, f_attr.ids.offset, SEEK_SET);
2819
2820		for (j = 0; j < nr_ids; j++) {
2821			if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
2822				goto out_errno;
2823
2824			perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
2825		}
2826
2827		lseek(fd, tmp, SEEK_SET);
2828	}
2829
2830	symbol_conf.nr_events = nr_attrs;
2831
2832	perf_header__process_sections(header, fd, &session->pevent,
2833				      perf_file_section__process);
2834
2835	if (perf_evlist__prepare_tracepoint_events(session->evlist,
2836						   session->pevent))
2837		goto out_delete_evlist;
2838
2839	return 0;
2840out_errno:
2841	return -errno;
2842
2843out_delete_evlist:
2844	perf_evlist__delete(session->evlist);
2845	session->evlist = NULL;
2846	return -ENOMEM;
2847}
2848
2849int perf_event__synthesize_attr(struct perf_tool *tool,
2850				struct perf_event_attr *attr, u32 ids, u64 *id,
2851				perf_event__handler_t process)
2852{
2853	union perf_event *ev;
2854	size_t size;
2855	int err;
2856
2857	size = sizeof(struct perf_event_attr);
2858	size = PERF_ALIGN(size, sizeof(u64));
2859	size += sizeof(struct perf_event_header);
2860	size += ids * sizeof(u64);
2861
2862	ev = malloc(size);
2863
2864	if (ev == NULL)
2865		return -ENOMEM;
2866
2867	ev->attr.attr = *attr;
2868	memcpy(ev->attr.id, id, ids * sizeof(u64));
2869
2870	ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
2871	ev->attr.header.size = (u16)size;
2872
2873	if (ev->attr.header.size == size)
2874		err = process(tool, ev, NULL, NULL);
2875	else
2876		err = -E2BIG;
2877
2878	free(ev);
2879
2880	return err;
2881}
2882
2883int perf_event__synthesize_attrs(struct perf_tool *tool,
2884				   struct perf_session *session,
2885				   perf_event__handler_t process)
2886{
2887	struct perf_evsel *evsel;
2888	int err = 0;
2889
2890	list_for_each_entry(evsel, &session->evlist->entries, node) {
2891		err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
2892						  evsel->id, process);
2893		if (err) {
2894			pr_debug("failed to create perf header attribute\n");
2895			return err;
2896		}
2897	}
2898
2899	return err;
2900}
2901
2902int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
2903			     union perf_event *event,
2904			     struct perf_evlist **pevlist)
2905{
2906	u32 i, ids, n_ids;
2907	struct perf_evsel *evsel;
2908	struct perf_evlist *evlist = *pevlist;
2909
2910	if (evlist == NULL) {
2911		*pevlist = evlist = perf_evlist__new();
2912		if (evlist == NULL)
2913			return -ENOMEM;
2914	}
2915
2916	evsel = perf_evsel__new(&event->attr.attr, evlist->nr_entries);
2917	if (evsel == NULL)
2918		return -ENOMEM;
2919
2920	perf_evlist__add(evlist, evsel);
2921
2922	ids = event->header.size;
2923	ids -= (void *)&event->attr.id - (void *)event;
2924	n_ids = ids / sizeof(u64);
2925	/*
2926	 * We don't have the cpu and thread maps on the header, so
2927	 * for allocating the perf_sample_id table we fake 1 cpu and
2928	 * hattr->ids threads.
2929	 */
2930	if (perf_evsel__alloc_id(evsel, 1, n_ids))
2931		return -ENOMEM;
2932
2933	for (i = 0; i < n_ids; i++) {
2934		perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
2935	}
2936
2937	symbol_conf.nr_events = evlist->nr_entries;
2938
2939	return 0;
2940}
2941
2942int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
2943					struct perf_evlist *evlist,
2944					perf_event__handler_t process)
2945{
2946	union perf_event ev;
2947	struct tracing_data *tdata;
2948	ssize_t size = 0, aligned_size = 0, padding;
2949	int err __maybe_unused = 0;
2950
2951	/*
2952	 * We are going to store the size of the data followed
2953	 * by the data contents. Since the fd descriptor is a pipe,
2954	 * we cannot seek back to store the size of the data once
2955	 * we know it. Instead we:
2956	 *
2957	 * - write the tracing data to the temp file
2958	 * - get/write the data size to pipe
2959	 * - write the tracing data from the temp file
2960	 *   to the pipe
2961	 */
2962	tdata = tracing_data_get(&evlist->entries, fd, true);
2963	if (!tdata)
2964		return -1;
2965
2966	memset(&ev, 0, sizeof(ev));
2967
2968	ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
2969	size = tdata->size;
2970	aligned_size = PERF_ALIGN(size, sizeof(u64));
2971	padding = aligned_size - size;
2972	ev.tracing_data.header.size = sizeof(ev.tracing_data);
2973	ev.tracing_data.size = aligned_size;
2974
2975	process(tool, &ev, NULL, NULL);
2976
2977	/*
2978	 * The put function will copy all the tracing data
2979	 * stored in temp file to the pipe.
2980	 */
2981	tracing_data_put(tdata);
2982
2983	write_padded(fd, NULL, 0, padding);
2984
2985	return aligned_size;
2986}
2987
2988int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
2989				     union perf_event *event,
2990				     struct perf_session *session)
2991{
2992	ssize_t size_read, padding, size = event->tracing_data.size;
2993	off_t offset = lseek(session->fd, 0, SEEK_CUR);
2994	char buf[BUFSIZ];
2995
2996	/* setup for reading amidst mmap */
2997	lseek(session->fd, offset + sizeof(struct tracing_data_event),
2998	      SEEK_SET);
2999
3000	size_read = trace_report(session->fd, &session->pevent,
3001				 session->repipe);
3002	padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
3003
3004	if (readn(session->fd, buf, padding) < 0) {
3005		pr_err("%s: reading input file", __func__);
3006		return -1;
3007	}
3008	if (session->repipe) {
3009		int retw = write(STDOUT_FILENO, buf, padding);
3010		if (retw <= 0 || retw != padding) {
3011			pr_err("%s: repiping tracing data padding", __func__);
3012			return -1;
3013		}
3014	}
3015
3016	if (size_read + padding != size) {
3017		pr_err("%s: tracing data size mismatch", __func__);
3018		return -1;
3019	}
3020
3021	perf_evlist__prepare_tracepoint_events(session->evlist,
3022					       session->pevent);
3023
3024	return size_read + padding;
3025}
3026
3027int perf_event__synthesize_build_id(struct perf_tool *tool,
3028				    struct dso *pos, u16 misc,
3029				    perf_event__handler_t process,
3030				    struct machine *machine)
3031{
3032	union perf_event ev;
3033	size_t len;
3034	int err = 0;
3035
3036	if (!pos->hit)
3037		return err;
3038
3039	memset(&ev, 0, sizeof(ev));
3040
3041	len = pos->long_name_len + 1;
3042	len = PERF_ALIGN(len, NAME_ALIGN);
3043	memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
3044	ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
3045	ev.build_id.header.misc = misc;
3046	ev.build_id.pid = machine->pid;
3047	ev.build_id.header.size = sizeof(ev.build_id) + len;
3048	memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
3049
3050	err = process(tool, &ev, NULL, machine);
3051
3052	return err;
3053}
3054
3055int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
3056				 union perf_event *event,
3057				 struct perf_session *session)
3058{
3059	__event_process_build_id(&event->build_id,
3060				 event->build_id.filename,
3061				 session);
3062	return 0;
3063}
3064
3065void disable_buildid_cache(void)
3066{
3067	no_buildid_cache = true;
3068}
3069