1/*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8#define _FILE_OFFSET_BITS 64
9
10#include "builtin.h"
11
12#include "perf.h"
13
14#include "util/build-id.h"
15#include "util/util.h"
16#include "util/parse-options.h"
17#include "util/parse-events.h"
18
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/session.h"
25#include "util/symbol.h"
26#include "util/cpumap.h"
27#include "util/thread_map.h"
28
29#include <unistd.h>
30#include <sched.h>
31#include <sys/mman.h>
32
33#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
34
35enum write_mode_t {
36	WRITE_FORCE,
37	WRITE_APPEND
38};
39
40static u64			user_interval			= ULLONG_MAX;
41static u64			default_interval		=      0;
42
43static unsigned int		page_size;
44static unsigned int		mmap_pages			= UINT_MAX;
45static unsigned int		user_freq 			= UINT_MAX;
46static int			freq				=   1000;
47static int			output;
48static int			pipe_output			=      0;
49static const char		*output_name			= NULL;
50static int			group				=      0;
51static int			realtime_prio			=      0;
52static bool			nodelay				=  false;
53static bool			raw_samples			=  false;
54static bool			sample_id_all_avail		=   true;
55static bool			system_wide			=  false;
56static pid_t			target_pid			=     -1;
57static pid_t			target_tid			=     -1;
58static pid_t			child_pid			=     -1;
59static bool			no_inherit			=  false;
60static enum write_mode_t	write_mode			= WRITE_FORCE;
61static bool			call_graph			=  false;
62static bool			inherit_stat			=  false;
63static bool			no_samples			=  false;
64static bool			sample_address			=  false;
65static bool			sample_time			=  false;
66static bool			no_buildid			=  false;
67static bool			no_buildid_cache		=  false;
68static struct perf_evlist	*evsel_list;
69
70static long			samples				=      0;
71static u64			bytes_written			=      0;
72
73static int			file_new			=      1;
74static off_t			post_processing_offset;
75
76static struct perf_session	*session;
77static const char		*cpu_list;
78
79static void advance_output(size_t size)
80{
81	bytes_written += size;
82}
83
84static void write_output(void *buf, size_t size)
85{
86	while (size) {
87		int ret = write(output, buf, size);
88
89		if (ret < 0)
90			die("failed to write");
91
92		size -= ret;
93		buf += ret;
94
95		bytes_written += ret;
96	}
97}
98
99static int process_synthesized_event(union perf_event *event,
100				     struct perf_sample *sample __used,
101				     struct perf_session *self __used)
102{
103	write_output(event, event->header.size);
104	return 0;
105}
106
107static void mmap_read(struct perf_mmap *md)
108{
109	unsigned int head = perf_mmap__read_head(md);
110	unsigned int old = md->prev;
111	unsigned char *data = md->base + page_size;
112	unsigned long size;
113	void *buf;
114
115	if (old == head)
116		return;
117
118	samples++;
119
120	size = head - old;
121
122	if ((old & md->mask) + size != (head & md->mask)) {
123		buf = &data[old & md->mask];
124		size = md->mask + 1 - (old & md->mask);
125		old += size;
126
127		write_output(buf, size);
128	}
129
130	buf = &data[old & md->mask];
131	size = head - old;
132	old += size;
133
134	write_output(buf, size);
135
136	md->prev = old;
137	perf_mmap__write_tail(md, old);
138}
139
140static volatile int done = 0;
141static volatile int signr = -1;
142
143static void sig_handler(int sig)
144{
145	done = 1;
146	signr = sig;
147}
148
149static void sig_atexit(void)
150{
151	if (child_pid > 0)
152		kill(child_pid, SIGTERM);
153
154	if (signr == -1 || signr == SIGUSR1)
155		return;
156
157	signal(signr, SIG_DFL);
158	kill(getpid(), signr);
159}
160
161static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
162{
163	struct perf_event_attr *attr = &evsel->attr;
164	int track = !evsel->idx; /* only the first counter needs these */
165
166	attr->inherit		= !no_inherit;
167	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
168				  PERF_FORMAT_TOTAL_TIME_RUNNING |
169				  PERF_FORMAT_ID;
170
171	attr->sample_type	|= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
172
173	if (evlist->nr_entries > 1)
174		attr->sample_type |= PERF_SAMPLE_ID;
175
176	/*
177	 * We default some events to a 1 default interval. But keep
178	 * it a weak assumption overridable by the user.
179	 */
180	if (!attr->sample_period || (user_freq != UINT_MAX &&
181				     user_interval != ULLONG_MAX)) {
182		if (freq) {
183			attr->sample_type	|= PERF_SAMPLE_PERIOD;
184			attr->freq		= 1;
185			attr->sample_freq	= freq;
186		} else {
187			attr->sample_period = default_interval;
188		}
189	}
190
191	if (no_samples)
192		attr->sample_freq = 0;
193
194	if (inherit_stat)
195		attr->inherit_stat = 1;
196
197	if (sample_address) {
198		attr->sample_type	|= PERF_SAMPLE_ADDR;
199		attr->mmap_data = track;
200	}
201
202	if (call_graph)
203		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
204
205	if (system_wide)
206		attr->sample_type	|= PERF_SAMPLE_CPU;
207
208	if (sample_id_all_avail &&
209	    (sample_time || system_wide || !no_inherit || cpu_list))
210		attr->sample_type	|= PERF_SAMPLE_TIME;
211
212	if (raw_samples) {
213		attr->sample_type	|= PERF_SAMPLE_TIME;
214		attr->sample_type	|= PERF_SAMPLE_RAW;
215		attr->sample_type	|= PERF_SAMPLE_CPU;
216	}
217
218	if (nodelay) {
219		attr->watermark = 0;
220		attr->wakeup_events = 1;
221	}
222
223	attr->mmap		= track;
224	attr->comm		= track;
225
226	if (target_pid == -1 && target_tid == -1 && !system_wide) {
227		attr->disabled = 1;
228		attr->enable_on_exec = 1;
229	}
230}
231
232static bool perf_evlist__equal(struct perf_evlist *evlist,
233			       struct perf_evlist *other)
234{
235	struct perf_evsel *pos, *pair;
236
237	if (evlist->nr_entries != other->nr_entries)
238		return false;
239
240	pair = list_entry(other->entries.next, struct perf_evsel, node);
241
242	list_for_each_entry(pos, &evlist->entries, node) {
243		if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
244			return false;
245		pair = list_entry(pair->node.next, struct perf_evsel, node);
246	}
247
248	return true;
249}
250
251static void open_counters(struct perf_evlist *evlist)
252{
253	struct perf_evsel *pos;
254
255	if (evlist->cpus->map[0] < 0)
256		no_inherit = true;
257
258	list_for_each_entry(pos, &evlist->entries, node) {
259		struct perf_event_attr *attr = &pos->attr;
260		/*
261		 * Check if parse_single_tracepoint_event has already asked for
262		 * PERF_SAMPLE_TIME.
263		 *
264		 * XXX this is kludgy but short term fix for problems introduced by
265		 * eac23d1c that broke 'perf script' by having different sample_types
266		 * when using multiple tracepoint events when we use a perf binary
267		 * that tries to use sample_id_all on an older kernel.
268		 *
269		 * We need to move counter creation to perf_session, support
270		 * different sample_types, etc.
271		 */
272		bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
273
274		config_attr(pos, evlist);
275retry_sample_id:
276		attr->sample_id_all = sample_id_all_avail ? 1 : 0;
277try_again:
278		if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group) < 0) {
279			int err = errno;
280
281			if (err == EPERM || err == EACCES) {
282				ui__warning_paranoid();
283				exit(EXIT_FAILURE);
284			} else if (err ==  ENODEV && cpu_list) {
285				die("No such device - did you specify"
286					" an out-of-range profile CPU?\n");
287			} else if (err == EINVAL && sample_id_all_avail) {
288				/*
289				 * Old kernel, no attr->sample_id_type_all field
290				 */
291				sample_id_all_avail = false;
292				if (!sample_time && !raw_samples && !time_needed)
293					attr->sample_type &= ~PERF_SAMPLE_TIME;
294
295				goto retry_sample_id;
296			}
297
298			/*
299			 * If it's cycles then fall back to hrtimer
300			 * based cpu-clock-tick sw counter, which
301			 * is always available even if no PMU support:
302			 */
303			if (attr->type == PERF_TYPE_HARDWARE
304					&& attr->config == PERF_COUNT_HW_CPU_CYCLES) {
305
306				if (verbose)
307					ui__warning("The cycles event is not supported, "
308						    "trying to fall back to cpu-clock-ticks\n");
309				attr->type = PERF_TYPE_SOFTWARE;
310				attr->config = PERF_COUNT_SW_CPU_CLOCK;
311				goto try_again;
312			}
313
314			if (err == ENOENT) {
315				ui__warning("The %s event is not supported.\n",
316					    event_name(pos));
317				exit(EXIT_FAILURE);
318			}
319
320			printf("\n");
321			error("sys_perf_event_open() syscall returned with %d (%s).  /bin/dmesg may provide additional information.\n",
322			      err, strerror(err));
323
324#if defined(__i386__) || defined(__x86_64__)
325			if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
326				die("No hardware sampling interrupt available."
327				    " No APIC? If so then you can boot the kernel"
328				    " with the \"lapic\" boot parameter to"
329				    " force-enable it.\n");
330#endif
331
332			die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
333		}
334	}
335
336	if (perf_evlist__set_filters(evlist)) {
337		error("failed to set filter with %d (%s)\n", errno,
338			strerror(errno));
339		exit(-1);
340	}
341
342	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
343		die("failed to mmap with %d (%s)\n", errno, strerror(errno));
344
345	if (file_new)
346		session->evlist = evlist;
347	else {
348		if (!perf_evlist__equal(session->evlist, evlist)) {
349			fprintf(stderr, "incompatible append\n");
350			exit(-1);
351		}
352 	}
353
354	perf_session__update_sample_type(session);
355}
356
357static int process_buildids(void)
358{
359	u64 size = lseek(output, 0, SEEK_CUR);
360
361	if (size == 0)
362		return 0;
363
364	session->fd = output;
365	return __perf_session__process_events(session, post_processing_offset,
366					      size - post_processing_offset,
367					      size, &build_id__mark_dso_hit_ops);
368}
369
370static void atexit_header(void)
371{
372	if (!pipe_output) {
373		session->header.data_size += bytes_written;
374
375		if (!no_buildid)
376			process_buildids();
377		perf_session__write_header(session, evsel_list, output, true);
378		perf_session__delete(session);
379		perf_evlist__delete(evsel_list);
380		symbol__exit();
381	}
382}
383
384static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
385{
386	int err;
387	struct perf_session *psession = data;
388
389	if (machine__is_host(machine))
390		return;
391
392	/*
393	 *As for guest kernel when processing subcommand record&report,
394	 *we arrange module mmap prior to guest kernel mmap and trigger
395	 *a preload dso because default guest module symbols are loaded
396	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
397	 *method is used to avoid symbol missing when the first addr is
398	 *in module instead of in guest kernel.
399	 */
400	err = perf_event__synthesize_modules(process_synthesized_event,
401					     psession, machine);
402	if (err < 0)
403		pr_err("Couldn't record guest kernel [%d]'s reference"
404		       " relocation symbol.\n", machine->pid);
405
406	/*
407	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
408	 * have no _text sometimes.
409	 */
410	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
411						 psession, machine, "_text");
412	if (err < 0)
413		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
414							 psession, machine,
415							 "_stext");
416	if (err < 0)
417		pr_err("Couldn't record guest kernel [%d]'s reference"
418		       " relocation symbol.\n", machine->pid);
419}
420
421static struct perf_event_header finished_round_event = {
422	.size = sizeof(struct perf_event_header),
423	.type = PERF_RECORD_FINISHED_ROUND,
424};
425
426static void mmap_read_all(void)
427{
428	int i;
429
430	for (i = 0; i < evsel_list->nr_mmaps; i++) {
431		if (evsel_list->mmap[i].base)
432			mmap_read(&evsel_list->mmap[i]);
433	}
434
435	if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
436		write_output(&finished_round_event, sizeof(finished_round_event));
437}
438
439static int __cmd_record(int argc, const char **argv)
440{
441	/* ANDROID_CHANGE_BEGIN */
442#ifndef __APPLE__
443	int i;
444	struct stat st;
445	int flags;
446	int err;
447	unsigned long waking = 0;
448	int child_ready_pipe[2], go_pipe[2];
449	const bool forks = argc > 0;
450	char buf;
451	struct machine *machine;
452
453	page_size = sysconf(_SC_PAGE_SIZE);
454
455	atexit(sig_atexit);
456	signal(SIGCHLD, sig_handler);
457	signal(SIGINT, sig_handler);
458	signal(SIGUSR1, sig_handler);
459
460	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
461		perror("failed to create pipes");
462		exit(-1);
463	}
464
465	if (!output_name) {
466		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
467			pipe_output = 1;
468		else
469                        /* ANDROID_CHANGE_BEGIN */
470#ifdef __BIONIC__
471			output_name = "/data/perf.data";
472#else
473			output_name = "perf.data";
474#endif
475                        /* ANDROID_CHANGE_END */
476	}
477	if (output_name) {
478		if (!strcmp(output_name, "-"))
479			pipe_output = 1;
480		else if (!stat(output_name, &st) && st.st_size) {
481			if (write_mode == WRITE_FORCE) {
482				char oldname[PATH_MAX];
483				snprintf(oldname, sizeof(oldname), "%s.old",
484					 output_name);
485				unlink(oldname);
486				rename(output_name, oldname);
487			}
488		} else if (write_mode == WRITE_APPEND) {
489			write_mode = WRITE_FORCE;
490		}
491	}
492
493	flags = O_CREAT|O_RDWR;
494	if (write_mode == WRITE_APPEND)
495		file_new = 0;
496	else
497		flags |= O_TRUNC;
498
499	if (pipe_output)
500		output = STDOUT_FILENO;
501	else
502		output = open(output_name, flags, S_IRUSR | S_IWUSR);
503	if (output < 0) {
504		perror("failed to create output file");
505		exit(-1);
506	}
507
508	session = perf_session__new(output_name, O_WRONLY,
509				    write_mode == WRITE_FORCE, false, NULL);
510	if (session == NULL) {
511		pr_err("Not enough memory for reading perf file header\n");
512		return -1;
513	}
514
515	if (!no_buildid)
516		perf_header__set_feat(&session->header, HEADER_BUILD_ID);
517
518	if (!file_new) {
519		err = perf_session__read_header(session, output);
520		if (err < 0)
521			goto out_delete_session;
522	}
523
524	if (have_tracepoints(&evsel_list->entries))
525		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
526
527	/* 512 kiB: default amount of unprivileged mlocked memory */
528	if (mmap_pages == UINT_MAX)
529		mmap_pages = (512 * 1024) / page_size;
530
531	if (forks) {
532		child_pid = fork();
533		if (child_pid < 0) {
534			perror("failed to fork");
535			exit(-1);
536		}
537
538		if (!child_pid) {
539			if (pipe_output)
540				dup2(2, 1);
541			close(child_ready_pipe[0]);
542			close(go_pipe[1]);
543			fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
544
545			/*
546			 * Do a dummy execvp to get the PLT entry resolved,
547			 * so we avoid the resolver overhead on the real
548			 * execvp call.
549			 */
550			execvp("", (char **)argv);
551
552			/*
553			 * Tell the parent we're ready to go
554			 */
555			close(child_ready_pipe[1]);
556
557			/*
558			 * Wait until the parent tells us to go.
559			 */
560			if (read(go_pipe[0], &buf, 1) == -1)
561				perror("unable to read pipe");
562
563			execvp(argv[0], (char **)argv);
564
565			perror(argv[0]);
566			kill(getppid(), SIGUSR1);
567			exit(-1);
568		}
569
570		if (!system_wide && target_tid == -1 && target_pid == -1)
571			evsel_list->threads->map[0] = child_pid;
572
573		close(child_ready_pipe[1]);
574		close(go_pipe[0]);
575		/*
576		 * wait for child to settle
577		 */
578		if (read(child_ready_pipe[0], &buf, 1) == -1) {
579			perror("unable to read pipe");
580			exit(-1);
581		}
582		close(child_ready_pipe[0]);
583	}
584
585	open_counters(evsel_list);
586
587	/*
588	 * perf_session__delete(session) will be called at atexit_header()
589	 */
590	atexit(atexit_header);
591
592	if (pipe_output) {
593		err = perf_header__write_pipe(output);
594		if (err < 0)
595			return err;
596	} else if (file_new) {
597		err = perf_session__write_header(session, evsel_list,
598						 output, false);
599		if (err < 0)
600			return err;
601	}
602
603	post_processing_offset = lseek(output, 0, SEEK_CUR);
604
605	if (pipe_output) {
606		err = perf_session__synthesize_attrs(session,
607						     process_synthesized_event);
608		if (err < 0) {
609			pr_err("Couldn't synthesize attrs.\n");
610			return err;
611		}
612
613		err = perf_event__synthesize_event_types(process_synthesized_event,
614							 session);
615		if (err < 0) {
616			pr_err("Couldn't synthesize event_types.\n");
617			return err;
618		}
619
620		if (have_tracepoints(&evsel_list->entries)) {
621			/*
622			 * FIXME err <= 0 here actually means that
623			 * there were no tracepoints so its not really
624			 * an error, just that we don't need to
625			 * synthesize anything.  We really have to
626			 * return this more properly and also
627			 * propagate errors that now are calling die()
628			 */
629			err = perf_event__synthesize_tracing_data(output, evsel_list,
630								  process_synthesized_event,
631								  session);
632			if (err <= 0) {
633				pr_err("Couldn't record tracing data.\n");
634				return err;
635			}
636			advance_output(err);
637		}
638	}
639
640	machine = perf_session__find_host_machine(session);
641	if (!machine) {
642		pr_err("Couldn't find native kernel information.\n");
643		return -1;
644	}
645
646	err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
647						 session, machine, "_text");
648	if (err < 0)
649		err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
650							 session, machine, "_stext");
651	if (err < 0)
652		pr_err("Couldn't record kernel reference relocation symbol\n"
653		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
654		       "Check /proc/kallsyms permission or run as root.\n");
655
656	err = perf_event__synthesize_modules(process_synthesized_event,
657					     session, machine);
658	if (err < 0)
659		pr_err("Couldn't record kernel module information.\n"
660		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
661		       "Check /proc/modules permission or run as root.\n");
662
663	if (perf_guest)
664		perf_session__process_machines(session,
665					       perf_event__synthesize_guest_os);
666
667	if (!system_wide)
668		perf_event__synthesize_thread_map(evsel_list->threads,
669						  process_synthesized_event,
670						  session);
671	else
672		perf_event__synthesize_threads(process_synthesized_event,
673					       session);
674
675	if (realtime_prio) {
676		struct sched_param param;
677
678		param.sched_priority = realtime_prio;
679		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
680			pr_err("Could not set realtime priority.\n");
681			exit(-1);
682		}
683	}
684
685	/*
686	 * Let the child rip
687	 */
688	if (forks)
689		close(go_pipe[1]);
690
691	for (;;) {
692		int hits = samples;
693		int thread;
694
695		mmap_read_all();
696
697		if (hits == samples) {
698			if (done)
699				break;
700			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
701			waking++;
702		}
703
704		if (done) {
705			for (i = 0; i < evsel_list->cpus->nr; i++) {
706				struct perf_evsel *pos;
707
708				list_for_each_entry(pos, &evsel_list->entries, node) {
709					for (thread = 0;
710						thread < evsel_list->threads->nr;
711						thread++)
712						ioctl(FD(pos, i, thread),
713							PERF_EVENT_IOC_DISABLE);
714				}
715			}
716		}
717	}
718
719	if (quiet || signr == SIGUSR1)
720		return 0;
721
722	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
723
724	/*
725	 * Approximate RIP event size: 24 bytes.
726	 */
727	fprintf(stderr,
728		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
729		(double)bytes_written / 1024.0 / 1024.0,
730		output_name,
731		bytes_written / 24);
732
733	return 0;
734
735out_delete_session:
736	perf_session__delete(session);
737	return err;
738#else
739	return -1;
740#endif
741	/* ANDROID_CHANGE_END */
742}
743
744static const char * const record_usage[] = {
745	"perf record [<options>] [<command>]",
746	"perf record [<options>] -- <command> [<options>]",
747	NULL
748};
749
750static bool force, append_file;
751
752const struct option record_options[] = {
753	OPT_CALLBACK('e', "event", &evsel_list, "event",
754		     "event selector. use 'perf list' to list available events",
755		     parse_events),
756	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
757		     "event filter", parse_filter),
758	OPT_INTEGER('p', "pid", &target_pid,
759		    "record events on existing process id"),
760	OPT_INTEGER('t', "tid", &target_tid,
761		    "record events on existing thread id"),
762	OPT_INTEGER('r', "realtime", &realtime_prio,
763		    "collect data with this RT SCHED_FIFO priority"),
764	OPT_BOOLEAN('D', "no-delay", &nodelay,
765		    "collect data without buffering"),
766	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
767		    "collect raw sample records from all opened counters"),
768	OPT_BOOLEAN('a', "all-cpus", &system_wide,
769			    "system-wide collection from all CPUs"),
770	OPT_BOOLEAN('A', "append", &append_file,
771			    "append to the output file to do incremental profiling"),
772	OPT_STRING('C', "cpu", &cpu_list, "cpu",
773		    "list of cpus to monitor"),
774	OPT_BOOLEAN('f', "force", &force,
775			"overwrite existing data file (deprecated)"),
776	OPT_U64('c', "count", &user_interval, "event period to sample"),
777	OPT_STRING('o', "output", &output_name, "file",
778		    "output file name"),
779	OPT_BOOLEAN('i', "no-inherit", &no_inherit,
780		    "child tasks do not inherit counters"),
781	OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
782	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
783	OPT_BOOLEAN('g', "call-graph", &call_graph,
784		    "do call-graph (stack chain/backtrace) recording"),
785	OPT_INCR('v', "verbose", &verbose,
786		    "be more verbose (show counter open errors, etc)"),
787	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
788	OPT_BOOLEAN('s', "stat", &inherit_stat,
789		    "per thread counts"),
790	OPT_BOOLEAN('d', "data", &sample_address,
791		    "Sample addresses"),
792	OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
793	OPT_BOOLEAN('n', "no-samples", &no_samples,
794		    "don't sample"),
795	OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
796		    "do not update the buildid cache"),
797	OPT_BOOLEAN('B', "no-buildid", &no_buildid,
798		    "do not collect buildids in perf.data"),
799	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
800		     "monitor event in cgroup name only",
801		     parse_cgroups),
802	OPT_END()
803};
804
805int cmd_record(int argc, const char **argv, const char *prefix __used)
806{
807	int err = -ENOMEM;
808	struct perf_evsel *pos;
809
810	evsel_list = perf_evlist__new(NULL, NULL);
811	if (evsel_list == NULL)
812		return -ENOMEM;
813
814	argc = parse_options(argc, argv, record_options, record_usage,
815			    PARSE_OPT_STOP_AT_NON_OPTION);
816	if (!argc && target_pid == -1 && target_tid == -1 &&
817		!system_wide && !cpu_list)
818		usage_with_options(record_usage, record_options);
819
820	if (force && append_file) {
821		fprintf(stderr, "Can't overwrite and append at the same time."
822				" You need to choose between -f and -A");
823		usage_with_options(record_usage, record_options);
824	} else if (append_file) {
825		write_mode = WRITE_APPEND;
826	} else {
827		write_mode = WRITE_FORCE;
828	}
829
830	if (nr_cgroups && !system_wide) {
831		fprintf(stderr, "cgroup monitoring only available in"
832			" system-wide mode\n");
833		usage_with_options(record_usage, record_options);
834	}
835
836	symbol__init();
837
838	if (symbol_conf.kptr_restrict)
839		pr_warning(
840"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
841"check /proc/sys/kernel/kptr_restrict.\n\n"
842"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
843"file is not found in the buildid cache or in the vmlinux path.\n\n"
844"Samples in kernel modules won't be resolved at all.\n\n"
845"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
846"even with a suitable vmlinux or kallsyms file.\n\n");
847
848	if (no_buildid_cache || no_buildid)
849		disable_buildid_cache();
850
851	if (evsel_list->nr_entries == 0 &&
852	    perf_evlist__add_default(evsel_list) < 0) {
853		pr_err("Not enough memory for event selector list\n");
854		goto out_symbol_exit;
855	}
856
857	if (target_pid != -1)
858		target_tid = target_pid;
859
860	if (perf_evlist__create_maps(evsel_list, target_pid,
861				     target_tid, cpu_list) < 0)
862		usage_with_options(record_usage, record_options);
863
864	list_for_each_entry(pos, &evsel_list->entries, node) {
865		if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
866					 evsel_list->threads->nr) < 0)
867			goto out_free_fd;
868		if (perf_header__push_event(pos->attr.config, event_name(pos)))
869			goto out_free_fd;
870	}
871
872	if (perf_evlist__alloc_pollfd(evsel_list) < 0)
873		goto out_free_fd;
874
875	if (user_interval != ULLONG_MAX)
876		default_interval = user_interval;
877	if (user_freq != UINT_MAX)
878		freq = user_freq;
879
880	/*
881	 * User specified count overrides default frequency.
882	 */
883	if (default_interval)
884		freq = 0;
885	else if (freq) {
886		default_interval = freq;
887	} else {
888		fprintf(stderr, "frequency and count are zero, aborting\n");
889		err = -EINVAL;
890		goto out_free_fd;
891	}
892
893	err = __cmd_record(argc, argv);
894out_free_fd:
895	perf_evlist__delete_maps(evsel_list);
896out_symbol_exit:
897	symbol__exit();
898	return err;
899}
900