1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 *  Copyright (C) 2004-2006 Ingo Molnar
12 *  Copyright (C) 2004 Nadia Yvette Chambers
13 */
14#include <linux/ring_buffer.h>
15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/notifier.h>
21#include <linux/irqflags.h>
22#include <linux/debugfs.h>
23#include <linux/pagemap.h>
24#include <linux/hardirq.h>
25#include <linux/linkage.h>
26#include <linux/uaccess.h>
27#include <linux/kprobes.h>
28#include <linux/ftrace.h>
29#include <linux/module.h>
30#include <linux/percpu.h>
31#include <linux/splice.h>
32#include <linux/kdebug.h>
33#include <linux/string.h>
34#include <linux/rwsem.h>
35#include <linux/slab.h>
36#include <linux/ctype.h>
37#include <linux/init.h>
38#include <linux/poll.h>
39#include <linux/nmi.h>
40#include <linux/fs.h>
41#include <linux/sched/rt.h>
42
43#include "trace.h"
44#include "trace_output.h"
45
46/*
47 * On boot up, the ring buffer is set to the minimum size, so that
48 * we do not waste memory on systems that are not using tracing.
49 */
50bool ring_buffer_expanded;
51
52/*
53 * We need to change this state when a selftest is running.
54 * A selftest will lurk into the ring-buffer to count the
55 * entries inserted during the selftest although some concurrent
56 * insertions into the ring-buffer such as trace_printk could occurred
57 * at the same time, giving false positive or negative results.
58 */
59static bool __read_mostly tracing_selftest_running;
60
61/*
62 * If a tracer is running, we do not want to run SELFTEST.
63 */
64bool __read_mostly tracing_selftest_disabled;
65
66/* For tracers that don't implement custom flags */
67static struct tracer_opt dummy_tracer_opt[] = {
68	{ }
69};
70
71static struct tracer_flags dummy_tracer_flags = {
72	.val = 0,
73	.opts = dummy_tracer_opt
74};
75
76static int
77dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78{
79	return 0;
80}
81
82/*
83 * To prevent the comm cache from being overwritten when no
84 * tracing is active, only save the comm when a trace event
85 * occurred.
86 */
87static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89/*
90 * Kill all tracing for good (never come back).
91 * It is initialized to 1 but will turn to zero if the initialization
92 * of the tracer is successful. But that is the only place that sets
93 * this back to zero.
94 */
95static int tracing_disabled = 1;
96
97DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99cpumask_var_t __read_mostly	tracing_buffer_mask;
100
101/*
102 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103 *
104 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105 * is set, then ftrace_dump is called. This will output the contents
106 * of the ftrace buffers to the console.  This is very useful for
107 * capturing traces that lead to crashes and outputing it to a
108 * serial console.
109 *
110 * It is default off, but you can enable it with either specifying
111 * "ftrace_dump_on_oops" in the kernel command line, or setting
112 * /proc/sys/kernel/ftrace_dump_on_oops
113 * Set 1 if you want to dump buffers of all CPUs
114 * Set 2 if you want to dump the buffer of the CPU that triggered oops
115 */
116
117enum ftrace_dump_mode ftrace_dump_on_oops;
118
119/* When set, tracing will stop when a WARN*() is hit */
120int __disable_trace_on_warning;
121
122static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124#define MAX_TRACER_SIZE		100
125static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126static char *default_bootup_tracer;
127
128static bool allocate_snapshot;
129
130static int __init set_cmdline_ftrace(char *str)
131{
132	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133	default_bootup_tracer = bootup_tracer_buf;
134	/* We are using ftrace early, expand it */
135	ring_buffer_expanded = true;
136	return 1;
137}
138__setup("ftrace=", set_cmdline_ftrace);
139
140static int __init set_ftrace_dump_on_oops(char *str)
141{
142	if (*str++ != '=' || !*str) {
143		ftrace_dump_on_oops = DUMP_ALL;
144		return 1;
145	}
146
147	if (!strcmp("orig_cpu", str)) {
148		ftrace_dump_on_oops = DUMP_ORIG;
149                return 1;
150        }
151
152        return 0;
153}
154__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156static int __init stop_trace_on_warning(char *str)
157{
158	__disable_trace_on_warning = 1;
159	return 1;
160}
161__setup("traceoff_on_warning=", stop_trace_on_warning);
162
163static int __init boot_alloc_snapshot(char *str)
164{
165	allocate_snapshot = true;
166	/* We also need the main ring buffer expanded */
167	ring_buffer_expanded = true;
168	return 1;
169}
170__setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174static char *trace_boot_options __initdata;
175
176static int __init set_trace_boot_options(char *str)
177{
178	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179	trace_boot_options = trace_boot_options_buf;
180	return 0;
181}
182__setup("trace_options=", set_trace_boot_options);
183
184static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185static char *trace_boot_clock __initdata;
186
187static int __init set_trace_boot_clock(char *str)
188{
189	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190	trace_boot_clock = trace_boot_clock_buf;
191	return 0;
192}
193__setup("trace_clock=", set_trace_boot_clock);
194
195
196unsigned long long ns2usecs(cycle_t nsec)
197{
198	nsec += 500;
199	do_div(nsec, 1000);
200	return nsec;
201}
202
203/*
204 * The global_trace is the descriptor that holds the tracing
205 * buffers for the live tracing. For each CPU, it contains
206 * a link list of pages that will store trace entries. The
207 * page descriptor of the pages in the memory is used to hold
208 * the link list by linking the lru item in the page descriptor
209 * to each of the pages in the buffer per CPU.
210 *
211 * For each active CPU there is a data field that holds the
212 * pages for the buffer for that CPU. Each CPU has the same number
213 * of pages allocated for its buffer.
214 */
215static struct trace_array	global_trace;
216
217LIST_HEAD(ftrace_trace_arrays);
218
219int trace_array_get(struct trace_array *this_tr)
220{
221	struct trace_array *tr;
222	int ret = -ENODEV;
223
224	mutex_lock(&trace_types_lock);
225	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226		if (tr == this_tr) {
227			tr->ref++;
228			ret = 0;
229			break;
230		}
231	}
232	mutex_unlock(&trace_types_lock);
233
234	return ret;
235}
236
237static void __trace_array_put(struct trace_array *this_tr)
238{
239	WARN_ON(!this_tr->ref);
240	this_tr->ref--;
241}
242
243void trace_array_put(struct trace_array *this_tr)
244{
245	mutex_lock(&trace_types_lock);
246	__trace_array_put(this_tr);
247	mutex_unlock(&trace_types_lock);
248}
249
250int filter_check_discard(struct ftrace_event_file *file, void *rec,
251			 struct ring_buffer *buffer,
252			 struct ring_buffer_event *event)
253{
254	if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255	    !filter_match_preds(file->filter, rec)) {
256		ring_buffer_discard_commit(buffer, event);
257		return 1;
258	}
259
260	return 0;
261}
262EXPORT_SYMBOL_GPL(filter_check_discard);
263
264int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265			      struct ring_buffer *buffer,
266			      struct ring_buffer_event *event)
267{
268	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269	    !filter_match_preds(call->filter, rec)) {
270		ring_buffer_discard_commit(buffer, event);
271		return 1;
272	}
273
274	return 0;
275}
276EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279{
280	u64 ts;
281
282	/* Early boot up does not have a buffer yet */
283	if (!buf->buffer)
284		return trace_clock_local();
285
286	ts = ring_buffer_time_stamp(buf->buffer, cpu);
287	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289	return ts;
290}
291
292cycle_t ftrace_now(int cpu)
293{
294	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295}
296
297/**
298 * tracing_is_enabled - Show if global_trace has been disabled
299 *
300 * Shows if the global trace has been enabled or not. It uses the
301 * mirror flag "buffer_disabled" to be used in fast paths such as for
302 * the irqsoff tracer. But it may be inaccurate due to races. If you
303 * need to know the accurate state, use tracing_is_on() which is a little
304 * slower, but accurate.
305 */
306int tracing_is_enabled(void)
307{
308	/*
309	 * For quick access (irqsoff uses this in fast path), just
310	 * return the mirror variable of the state of the ring buffer.
311	 * It's a little racy, but we don't really care.
312	 */
313	smp_rmb();
314	return !global_trace.buffer_disabled;
315}
316
317/*
318 * trace_buf_size is the size in bytes that is allocated
319 * for a buffer. Note, the number of bytes is always rounded
320 * to page size.
321 *
322 * This number is purposely set to a low number of 16384.
323 * If the dump on oops happens, it will be much appreciated
324 * to not have to wait for all that output. Anyway this can be
325 * boot time and run time configurable.
326 */
327#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331/* trace_types holds a link list of available tracers. */
332static struct tracer		*trace_types __read_mostly;
333
334/*
335 * trace_types_lock is used to protect the trace_types list.
336 */
337DEFINE_MUTEX(trace_types_lock);
338
339/*
340 * serialize the access of the ring buffer
341 *
342 * ring buffer serializes readers, but it is low level protection.
343 * The validity of the events (which returns by ring_buffer_peek() ..etc)
344 * are not protected by ring buffer.
345 *
346 * The content of events may become garbage if we allow other process consumes
347 * these events concurrently:
348 *   A) the page of the consumed events may become a normal page
349 *      (not reader page) in ring buffer, and this page will be rewrited
350 *      by events producer.
351 *   B) The page of the consumed events may become a page for splice_read,
352 *      and this page will be returned to system.
353 *
354 * These primitives allow multi process access to different cpu ring buffer
355 * concurrently.
356 *
357 * These primitives don't distinguish read-only and read-consume access.
358 * Multi read-only access are also serialized.
359 */
360
361#ifdef CONFIG_SMP
362static DECLARE_RWSEM(all_cpu_access_lock);
363static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365static inline void trace_access_lock(int cpu)
366{
367	if (cpu == RING_BUFFER_ALL_CPUS) {
368		/* gain it for accessing the whole ring buffer. */
369		down_write(&all_cpu_access_lock);
370	} else {
371		/* gain it for accessing a cpu ring buffer. */
372
373		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374		down_read(&all_cpu_access_lock);
375
376		/* Secondly block other access to this @cpu ring buffer. */
377		mutex_lock(&per_cpu(cpu_access_lock, cpu));
378	}
379}
380
381static inline void trace_access_unlock(int cpu)
382{
383	if (cpu == RING_BUFFER_ALL_CPUS) {
384		up_write(&all_cpu_access_lock);
385	} else {
386		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387		up_read(&all_cpu_access_lock);
388	}
389}
390
391static inline void trace_access_lock_init(void)
392{
393	int cpu;
394
395	for_each_possible_cpu(cpu)
396		mutex_init(&per_cpu(cpu_access_lock, cpu));
397}
398
399#else
400
401static DEFINE_MUTEX(access_lock);
402
403static inline void trace_access_lock(int cpu)
404{
405	(void)cpu;
406	mutex_lock(&access_lock);
407}
408
409static inline void trace_access_unlock(int cpu)
410{
411	(void)cpu;
412	mutex_unlock(&access_lock);
413}
414
415static inline void trace_access_lock_init(void)
416{
417}
418
419#endif
420
421/* trace_flags holds trace_options default values */
422unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425	TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427static void tracer_tracing_on(struct trace_array *tr)
428{
429	if (tr->trace_buffer.buffer)
430		ring_buffer_record_on(tr->trace_buffer.buffer);
431	/*
432	 * This flag is looked at when buffers haven't been allocated
433	 * yet, or by some tracers (like irqsoff), that just want to
434	 * know if the ring buffer has been disabled, but it can handle
435	 * races of where it gets disabled but we still do a record.
436	 * As the check is in the fast path of the tracers, it is more
437	 * important to be fast than accurate.
438	 */
439	tr->buffer_disabled = 0;
440	/* Make the flag seen by readers */
441	smp_wmb();
442}
443
444/**
445 * tracing_on - enable tracing buffers
446 *
447 * This function enables tracing buffers that may have been
448 * disabled with tracing_off.
449 */
450void tracing_on(void)
451{
452	tracer_tracing_on(&global_trace);
453}
454EXPORT_SYMBOL_GPL(tracing_on);
455
456/**
457 * __trace_puts - write a constant string into the trace buffer.
458 * @ip:	   The address of the caller
459 * @str:   The constant string to write
460 * @size:  The size of the string.
461 */
462int __trace_puts(unsigned long ip, const char *str, int size)
463{
464	struct ring_buffer_event *event;
465	struct ring_buffer *buffer;
466	struct print_entry *entry;
467	unsigned long irq_flags;
468	int alloc;
469	int pc;
470
471	if (!(trace_flags & TRACE_ITER_PRINTK))
472		return 0;
473
474	pc = preempt_count();
475
476	if (unlikely(tracing_selftest_running || tracing_disabled))
477		return 0;
478
479	alloc = sizeof(*entry) + size + 2; /* possible \n added */
480
481	local_save_flags(irq_flags);
482	buffer = global_trace.trace_buffer.buffer;
483	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
484					  irq_flags, pc);
485	if (!event)
486		return 0;
487
488	entry = ring_buffer_event_data(event);
489	entry->ip = ip;
490
491	memcpy(&entry->buf, str, size);
492
493	/* Add a newline if necessary */
494	if (entry->buf[size - 1] != '\n') {
495		entry->buf[size] = '\n';
496		entry->buf[size + 1] = '\0';
497	} else
498		entry->buf[size] = '\0';
499
500	__buffer_unlock_commit(buffer, event);
501	ftrace_trace_stack(buffer, irq_flags, 4, pc);
502
503	return size;
504}
505EXPORT_SYMBOL_GPL(__trace_puts);
506
507/**
508 * __trace_bputs - write the pointer to a constant string into trace buffer
509 * @ip:	   The address of the caller
510 * @str:   The constant string to write to the buffer to
511 */
512int __trace_bputs(unsigned long ip, const char *str)
513{
514	struct ring_buffer_event *event;
515	struct ring_buffer *buffer;
516	struct bputs_entry *entry;
517	unsigned long irq_flags;
518	int size = sizeof(struct bputs_entry);
519	int pc;
520
521	if (!(trace_flags & TRACE_ITER_PRINTK))
522		return 0;
523
524	pc = preempt_count();
525
526	if (unlikely(tracing_selftest_running || tracing_disabled))
527		return 0;
528
529	local_save_flags(irq_flags);
530	buffer = global_trace.trace_buffer.buffer;
531	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
532					  irq_flags, pc);
533	if (!event)
534		return 0;
535
536	entry = ring_buffer_event_data(event);
537	entry->ip			= ip;
538	entry->str			= str;
539
540	__buffer_unlock_commit(buffer, event);
541	ftrace_trace_stack(buffer, irq_flags, 4, pc);
542
543	return 1;
544}
545EXPORT_SYMBOL_GPL(__trace_bputs);
546
547#ifdef CONFIG_TRACER_SNAPSHOT
548/**
549 * trace_snapshot - take a snapshot of the current buffer.
550 *
551 * This causes a swap between the snapshot buffer and the current live
552 * tracing buffer. You can use this to take snapshots of the live
553 * trace when some condition is triggered, but continue to trace.
554 *
555 * Note, make sure to allocate the snapshot with either
556 * a tracing_snapshot_alloc(), or by doing it manually
557 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
558 *
559 * If the snapshot buffer is not allocated, it will stop tracing.
560 * Basically making a permanent snapshot.
561 */
562void tracing_snapshot(void)
563{
564	struct trace_array *tr = &global_trace;
565	struct tracer *tracer = tr->current_trace;
566	unsigned long flags;
567
568	if (in_nmi()) {
569		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
570		internal_trace_puts("*** snapshot is being ignored        ***\n");
571		return;
572	}
573
574	if (!tr->allocated_snapshot) {
575		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
576		internal_trace_puts("*** stopping trace here!   ***\n");
577		tracing_off();
578		return;
579	}
580
581	/* Note, snapshot can not be used when the tracer uses it */
582	if (tracer->use_max_tr) {
583		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
584		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
585		return;
586	}
587
588	local_irq_save(flags);
589	update_max_tr(tr, current, smp_processor_id());
590	local_irq_restore(flags);
591}
592EXPORT_SYMBOL_GPL(tracing_snapshot);
593
594static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
595					struct trace_buffer *size_buf, int cpu_id);
596static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
597
598static int alloc_snapshot(struct trace_array *tr)
599{
600	int ret;
601
602	if (!tr->allocated_snapshot) {
603
604		/* allocate spare buffer */
605		ret = resize_buffer_duplicate_size(&tr->max_buffer,
606				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
607		if (ret < 0)
608			return ret;
609
610		tr->allocated_snapshot = true;
611	}
612
613	return 0;
614}
615
616static void free_snapshot(struct trace_array *tr)
617{
618	/*
619	 * We don't free the ring buffer. instead, resize it because
620	 * The max_tr ring buffer has some state (e.g. ring->clock) and
621	 * we want preserve it.
622	 */
623	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
624	set_buffer_entries(&tr->max_buffer, 1);
625	tracing_reset_online_cpus(&tr->max_buffer);
626	tr->allocated_snapshot = false;
627}
628
629/**
630 * tracing_alloc_snapshot - allocate snapshot buffer.
631 *
632 * This only allocates the snapshot buffer if it isn't already
633 * allocated - it doesn't also take a snapshot.
634 *
635 * This is meant to be used in cases where the snapshot buffer needs
636 * to be set up for events that can't sleep but need to be able to
637 * trigger a snapshot.
638 */
639int tracing_alloc_snapshot(void)
640{
641	struct trace_array *tr = &global_trace;
642	int ret;
643
644	ret = alloc_snapshot(tr);
645	WARN_ON(ret < 0);
646
647	return ret;
648}
649EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
650
651/**
652 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
653 *
654 * This is similar to trace_snapshot(), but it will allocate the
655 * snapshot buffer if it isn't already allocated. Use this only
656 * where it is safe to sleep, as the allocation may sleep.
657 *
658 * This causes a swap between the snapshot buffer and the current live
659 * tracing buffer. You can use this to take snapshots of the live
660 * trace when some condition is triggered, but continue to trace.
661 */
662void tracing_snapshot_alloc(void)
663{
664	int ret;
665
666	ret = tracing_alloc_snapshot();
667	if (ret < 0)
668		return;
669
670	tracing_snapshot();
671}
672EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
673#else
674void tracing_snapshot(void)
675{
676	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
677}
678EXPORT_SYMBOL_GPL(tracing_snapshot);
679int tracing_alloc_snapshot(void)
680{
681	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
682	return -ENODEV;
683}
684EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
685void tracing_snapshot_alloc(void)
686{
687	/* Give warning */
688	tracing_snapshot();
689}
690EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
691#endif /* CONFIG_TRACER_SNAPSHOT */
692
693static void tracer_tracing_off(struct trace_array *tr)
694{
695	if (tr->trace_buffer.buffer)
696		ring_buffer_record_off(tr->trace_buffer.buffer);
697	/*
698	 * This flag is looked at when buffers haven't been allocated
699	 * yet, or by some tracers (like irqsoff), that just want to
700	 * know if the ring buffer has been disabled, but it can handle
701	 * races of where it gets disabled but we still do a record.
702	 * As the check is in the fast path of the tracers, it is more
703	 * important to be fast than accurate.
704	 */
705	tr->buffer_disabled = 1;
706	/* Make the flag seen by readers */
707	smp_wmb();
708}
709
710/**
711 * tracing_off - turn off tracing buffers
712 *
713 * This function stops the tracing buffers from recording data.
714 * It does not disable any overhead the tracers themselves may
715 * be causing. This function simply causes all recording to
716 * the ring buffers to fail.
717 */
718void tracing_off(void)
719{
720	tracer_tracing_off(&global_trace);
721}
722EXPORT_SYMBOL_GPL(tracing_off);
723
724void disable_trace_on_warning(void)
725{
726	if (__disable_trace_on_warning)
727		tracing_off();
728}
729
730/**
731 * tracer_tracing_is_on - show real state of ring buffer enabled
732 * @tr : the trace array to know if ring buffer is enabled
733 *
734 * Shows real state of the ring buffer if it is enabled or not.
735 */
736static int tracer_tracing_is_on(struct trace_array *tr)
737{
738	if (tr->trace_buffer.buffer)
739		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
740	return !tr->buffer_disabled;
741}
742
743/**
744 * tracing_is_on - show state of ring buffers enabled
745 */
746int tracing_is_on(void)
747{
748	return tracer_tracing_is_on(&global_trace);
749}
750EXPORT_SYMBOL_GPL(tracing_is_on);
751
752static int __init set_buf_size(char *str)
753{
754	unsigned long buf_size;
755
756	if (!str)
757		return 0;
758	buf_size = memparse(str, &str);
759	/* nr_entries can not be zero */
760	if (buf_size == 0)
761		return 0;
762	trace_buf_size = buf_size;
763	return 1;
764}
765__setup("trace_buf_size=", set_buf_size);
766
767static int __init set_tracing_thresh(char *str)
768{
769	unsigned long threshold;
770	int ret;
771
772	if (!str)
773		return 0;
774	ret = kstrtoul(str, 0, &threshold);
775	if (ret < 0)
776		return 0;
777	tracing_thresh = threshold * 1000;
778	return 1;
779}
780__setup("tracing_thresh=", set_tracing_thresh);
781
782unsigned long nsecs_to_usecs(unsigned long nsecs)
783{
784	return nsecs / 1000;
785}
786
787/* These must match the bit postions in trace_iterator_flags */
788static const char *trace_options[] = {
789	"print-parent",
790	"sym-offset",
791	"sym-addr",
792	"verbose",
793	"raw",
794	"hex",
795	"bin",
796	"block",
797	"stacktrace",
798	"trace_printk",
799	"ftrace_preempt",
800	"branch",
801	"annotate",
802	"userstacktrace",
803	"sym-userobj",
804	"printk-msg-only",
805	"context-info",
806	"latency-format",
807	"sleep-time",
808	"graph-time",
809	"record-cmd",
810	"overwrite",
811	"disable_on_free",
812	"irq-info",
813	"markers",
814	"function-trace",
815	"print-tgid",
816	NULL
817};
818
819static struct {
820	u64 (*func)(void);
821	const char *name;
822	int in_ns;		/* is this clock in nanoseconds? */
823} trace_clocks[] = {
824	{ trace_clock_local,		"local",	1 },
825	{ trace_clock_global,		"global",	1 },
826	{ trace_clock_counter,		"counter",	0 },
827	{ trace_clock_jiffies,		"uptime",	0 },
828	{ trace_clock,			"perf",		1 },
829	{ ktime_get_mono_fast_ns,	"mono",		1 },
830	ARCH_TRACE_CLOCKS
831};
832
833/*
834 * trace_parser_get_init - gets the buffer for trace parser
835 */
836int trace_parser_get_init(struct trace_parser *parser, int size)
837{
838	memset(parser, 0, sizeof(*parser));
839
840	parser->buffer = kmalloc(size, GFP_KERNEL);
841	if (!parser->buffer)
842		return 1;
843
844	parser->size = size;
845	return 0;
846}
847
848/*
849 * trace_parser_put - frees the buffer for trace parser
850 */
851void trace_parser_put(struct trace_parser *parser)
852{
853	kfree(parser->buffer);
854}
855
856/*
857 * trace_get_user - reads the user input string separated by  space
858 * (matched by isspace(ch))
859 *
860 * For each string found the 'struct trace_parser' is updated,
861 * and the function returns.
862 *
863 * Returns number of bytes read.
864 *
865 * See kernel/trace/trace.h for 'struct trace_parser' details.
866 */
867int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
868	size_t cnt, loff_t *ppos)
869{
870	char ch;
871	size_t read = 0;
872	ssize_t ret;
873
874	if (!*ppos)
875		trace_parser_clear(parser);
876
877	ret = get_user(ch, ubuf++);
878	if (ret)
879		goto out;
880
881	read++;
882	cnt--;
883
884	/*
885	 * The parser is not finished with the last write,
886	 * continue reading the user input without skipping spaces.
887	 */
888	if (!parser->cont) {
889		/* skip white space */
890		while (cnt && isspace(ch)) {
891			ret = get_user(ch, ubuf++);
892			if (ret)
893				goto out;
894			read++;
895			cnt--;
896		}
897
898		/* only spaces were written */
899		if (isspace(ch)) {
900			*ppos += read;
901			ret = read;
902			goto out;
903		}
904
905		parser->idx = 0;
906	}
907
908	/* read the non-space input */
909	while (cnt && !isspace(ch)) {
910		if (parser->idx < parser->size - 1)
911			parser->buffer[parser->idx++] = ch;
912		else {
913			ret = -EINVAL;
914			goto out;
915		}
916		ret = get_user(ch, ubuf++);
917		if (ret)
918			goto out;
919		read++;
920		cnt--;
921	}
922
923	/* We either got finished input or we have to wait for another call. */
924	if (isspace(ch)) {
925		parser->buffer[parser->idx] = 0;
926		parser->cont = false;
927	} else if (parser->idx < parser->size - 1) {
928		parser->cont = true;
929		parser->buffer[parser->idx++] = ch;
930	} else {
931		ret = -EINVAL;
932		goto out;
933	}
934
935	*ppos += read;
936	ret = read;
937
938out:
939	return ret;
940}
941
942static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
943{
944	int len;
945
946	if (s->len <= s->readpos)
947		return -EBUSY;
948
949	len = s->len - s->readpos;
950	if (cnt > len)
951		cnt = len;
952	memcpy(buf, s->buffer + s->readpos, cnt);
953
954	s->readpos += cnt;
955	return cnt;
956}
957
958unsigned long __read_mostly	tracing_thresh;
959
960#ifdef CONFIG_TRACER_MAX_TRACE
961/*
962 * Copy the new maximum trace into the separate maximum-trace
963 * structure. (this way the maximum trace is permanently saved,
964 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
965 */
966static void
967__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
968{
969	struct trace_buffer *trace_buf = &tr->trace_buffer;
970	struct trace_buffer *max_buf = &tr->max_buffer;
971	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
972	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
973
974	max_buf->cpu = cpu;
975	max_buf->time_start = data->preempt_timestamp;
976
977	max_data->saved_latency = tr->max_latency;
978	max_data->critical_start = data->critical_start;
979	max_data->critical_end = data->critical_end;
980
981	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
982	max_data->pid = tsk->pid;
983	/*
984	 * If tsk == current, then use current_uid(), as that does not use
985	 * RCU. The irq tracer can be called out of RCU scope.
986	 */
987	if (tsk == current)
988		max_data->uid = current_uid();
989	else
990		max_data->uid = task_uid(tsk);
991
992	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
993	max_data->policy = tsk->policy;
994	max_data->rt_priority = tsk->rt_priority;
995
996	/* record this tasks comm */
997	tracing_record_cmdline(tsk);
998}
999
1000/**
1001 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1002 * @tr: tracer
1003 * @tsk: the task with the latency
1004 * @cpu: The cpu that initiated the trace.
1005 *
1006 * Flip the buffers between the @tr and the max_tr and record information
1007 * about which task was the cause of this latency.
1008 */
1009void
1010update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1011{
1012	struct ring_buffer *buf;
1013
1014	if (tr->stop_count)
1015		return;
1016
1017	WARN_ON_ONCE(!irqs_disabled());
1018
1019	if (!tr->allocated_snapshot) {
1020		/* Only the nop tracer should hit this when disabling */
1021		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1022		return;
1023	}
1024
1025	arch_spin_lock(&tr->max_lock);
1026
1027	buf = tr->trace_buffer.buffer;
1028	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1029	tr->max_buffer.buffer = buf;
1030
1031	__update_max_tr(tr, tsk, cpu);
1032	arch_spin_unlock(&tr->max_lock);
1033}
1034
1035/**
1036 * update_max_tr_single - only copy one trace over, and reset the rest
1037 * @tr - tracer
1038 * @tsk - task with the latency
1039 * @cpu - the cpu of the buffer to copy.
1040 *
1041 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1042 */
1043void
1044update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1045{
1046	int ret;
1047
1048	if (tr->stop_count)
1049		return;
1050
1051	WARN_ON_ONCE(!irqs_disabled());
1052	if (!tr->allocated_snapshot) {
1053		/* Only the nop tracer should hit this when disabling */
1054		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1055		return;
1056	}
1057
1058	arch_spin_lock(&tr->max_lock);
1059
1060	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1061
1062	if (ret == -EBUSY) {
1063		/*
1064		 * We failed to swap the buffer due to a commit taking
1065		 * place on this CPU. We fail to record, but we reset
1066		 * the max trace buffer (no one writes directly to it)
1067		 * and flag that it failed.
1068		 */
1069		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1070			"Failed to swap buffers due to commit in progress\n");
1071	}
1072
1073	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1074
1075	__update_max_tr(tr, tsk, cpu);
1076	arch_spin_unlock(&tr->max_lock);
1077}
1078#endif /* CONFIG_TRACER_MAX_TRACE */
1079
1080static int wait_on_pipe(struct trace_iterator *iter, bool full)
1081{
1082	/* Iterators are static, they should be filled or empty */
1083	if (trace_buffer_iter(iter, iter->cpu_file))
1084		return 0;
1085
1086	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1087				full);
1088}
1089
1090#ifdef CONFIG_FTRACE_STARTUP_TEST
1091static int run_tracer_selftest(struct tracer *type)
1092{
1093	struct trace_array *tr = &global_trace;
1094	struct tracer *saved_tracer = tr->current_trace;
1095	int ret;
1096
1097	if (!type->selftest || tracing_selftest_disabled)
1098		return 0;
1099
1100	/*
1101	 * Run a selftest on this tracer.
1102	 * Here we reset the trace buffer, and set the current
1103	 * tracer to be this tracer. The tracer can then run some
1104	 * internal tracing to verify that everything is in order.
1105	 * If we fail, we do not register this tracer.
1106	 */
1107	tracing_reset_online_cpus(&tr->trace_buffer);
1108
1109	tr->current_trace = type;
1110
1111#ifdef CONFIG_TRACER_MAX_TRACE
1112	if (type->use_max_tr) {
1113		/* If we expanded the buffers, make sure the max is expanded too */
1114		if (ring_buffer_expanded)
1115			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1116					   RING_BUFFER_ALL_CPUS);
1117		tr->allocated_snapshot = true;
1118	}
1119#endif
1120
1121	/* the test is responsible for initializing and enabling */
1122	pr_info("Testing tracer %s: ", type->name);
1123	ret = type->selftest(type, tr);
1124	/* the test is responsible for resetting too */
1125	tr->current_trace = saved_tracer;
1126	if (ret) {
1127		printk(KERN_CONT "FAILED!\n");
1128		/* Add the warning after printing 'FAILED' */
1129		WARN_ON(1);
1130		return -1;
1131	}
1132	/* Only reset on passing, to avoid touching corrupted buffers */
1133	tracing_reset_online_cpus(&tr->trace_buffer);
1134
1135#ifdef CONFIG_TRACER_MAX_TRACE
1136	if (type->use_max_tr) {
1137		tr->allocated_snapshot = false;
1138
1139		/* Shrink the max buffer again */
1140		if (ring_buffer_expanded)
1141			ring_buffer_resize(tr->max_buffer.buffer, 1,
1142					   RING_BUFFER_ALL_CPUS);
1143	}
1144#endif
1145
1146	printk(KERN_CONT "PASSED\n");
1147	return 0;
1148}
1149#else
1150static inline int run_tracer_selftest(struct tracer *type)
1151{
1152	return 0;
1153}
1154#endif /* CONFIG_FTRACE_STARTUP_TEST */
1155
1156/**
1157 * register_tracer - register a tracer with the ftrace system.
1158 * @type - the plugin for the tracer
1159 *
1160 * Register a new plugin tracer.
1161 */
1162int register_tracer(struct tracer *type)
1163{
1164	struct tracer *t;
1165	int ret = 0;
1166
1167	if (!type->name) {
1168		pr_info("Tracer must have a name\n");
1169		return -1;
1170	}
1171
1172	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1173		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1174		return -1;
1175	}
1176
1177	mutex_lock(&trace_types_lock);
1178
1179	tracing_selftest_running = true;
1180
1181	for (t = trace_types; t; t = t->next) {
1182		if (strcmp(type->name, t->name) == 0) {
1183			/* already found */
1184			pr_info("Tracer %s already registered\n",
1185				type->name);
1186			ret = -1;
1187			goto out;
1188		}
1189	}
1190
1191	if (!type->set_flag)
1192		type->set_flag = &dummy_set_flag;
1193	if (!type->flags)
1194		type->flags = &dummy_tracer_flags;
1195	else
1196		if (!type->flags->opts)
1197			type->flags->opts = dummy_tracer_opt;
1198
1199	ret = run_tracer_selftest(type);
1200	if (ret < 0)
1201		goto out;
1202
1203	type->next = trace_types;
1204	trace_types = type;
1205
1206 out:
1207	tracing_selftest_running = false;
1208	mutex_unlock(&trace_types_lock);
1209
1210	if (ret || !default_bootup_tracer)
1211		goto out_unlock;
1212
1213	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1214		goto out_unlock;
1215
1216	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1217	/* Do we want this tracer to start on bootup? */
1218	tracing_set_tracer(&global_trace, type->name);
1219	default_bootup_tracer = NULL;
1220	/* disable other selftests, since this will break it. */
1221	tracing_selftest_disabled = true;
1222#ifdef CONFIG_FTRACE_STARTUP_TEST
1223	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1224	       type->name);
1225#endif
1226
1227 out_unlock:
1228	return ret;
1229}
1230
1231void tracing_reset(struct trace_buffer *buf, int cpu)
1232{
1233	struct ring_buffer *buffer = buf->buffer;
1234
1235	if (!buffer)
1236		return;
1237
1238	ring_buffer_record_disable(buffer);
1239
1240	/* Make sure all commits have finished */
1241	synchronize_sched();
1242	ring_buffer_reset_cpu(buffer, cpu);
1243
1244	ring_buffer_record_enable(buffer);
1245}
1246
1247void tracing_reset_online_cpus(struct trace_buffer *buf)
1248{
1249	struct ring_buffer *buffer = buf->buffer;
1250	int cpu;
1251
1252	if (!buffer)
1253		return;
1254
1255	ring_buffer_record_disable(buffer);
1256
1257	/* Make sure all commits have finished */
1258	synchronize_sched();
1259
1260	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1261
1262	for_each_online_cpu(cpu)
1263		ring_buffer_reset_cpu(buffer, cpu);
1264
1265	ring_buffer_record_enable(buffer);
1266}
1267
1268/* Must have trace_types_lock held */
1269void tracing_reset_all_online_cpus(void)
1270{
1271	struct trace_array *tr;
1272
1273	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1274		tracing_reset_online_cpus(&tr->trace_buffer);
1275#ifdef CONFIG_TRACER_MAX_TRACE
1276		tracing_reset_online_cpus(&tr->max_buffer);
1277#endif
1278	}
1279}
1280
1281#define SAVED_CMDLINES_DEFAULT 128
1282#define NO_CMDLINE_MAP UINT_MAX
1283static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
1284static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1285struct saved_cmdlines_buffer {
1286	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1287	unsigned *map_cmdline_to_pid;
1288	unsigned cmdline_num;
1289	int cmdline_idx;
1290	char *saved_cmdlines;
1291};
1292static struct saved_cmdlines_buffer *savedcmd;
1293
1294/* temporary disable recording */
1295static atomic_t trace_record_cmdline_disabled __read_mostly;
1296
1297static inline char *get_saved_cmdlines(int idx)
1298{
1299	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1300}
1301
1302static inline void set_cmdline(int idx, const char *cmdline)
1303{
1304	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1305}
1306
1307static int allocate_cmdlines_buffer(unsigned int val,
1308				    struct saved_cmdlines_buffer *s)
1309{
1310	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1311					GFP_KERNEL);
1312	if (!s->map_cmdline_to_pid)
1313		return -ENOMEM;
1314
1315	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1316	if (!s->saved_cmdlines) {
1317		kfree(s->map_cmdline_to_pid);
1318		return -ENOMEM;
1319	}
1320
1321	s->cmdline_idx = 0;
1322	s->cmdline_num = val;
1323	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1324	       sizeof(s->map_pid_to_cmdline));
1325	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1326	       val * sizeof(*s->map_cmdline_to_pid));
1327
1328	return 0;
1329}
1330
1331static int trace_create_savedcmd(void)
1332{
1333	int ret;
1334
1335	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1336	if (!savedcmd)
1337		return -ENOMEM;
1338
1339	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1340	if (ret < 0) {
1341		kfree(savedcmd);
1342		savedcmd = NULL;
1343		return -ENOMEM;
1344	}
1345
1346	return 0;
1347}
1348
1349int is_tracing_stopped(void)
1350{
1351	return global_trace.stop_count;
1352}
1353
1354/**
1355 * tracing_start - quick start of the tracer
1356 *
1357 * If tracing is enabled but was stopped by tracing_stop,
1358 * this will start the tracer back up.
1359 */
1360void tracing_start(void)
1361{
1362	struct ring_buffer *buffer;
1363	unsigned long flags;
1364
1365	if (tracing_disabled)
1366		return;
1367
1368	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1369	if (--global_trace.stop_count) {
1370		if (global_trace.stop_count < 0) {
1371			/* Someone screwed up their debugging */
1372			WARN_ON_ONCE(1);
1373			global_trace.stop_count = 0;
1374		}
1375		goto out;
1376	}
1377
1378	/* Prevent the buffers from switching */
1379	arch_spin_lock(&global_trace.max_lock);
1380
1381	buffer = global_trace.trace_buffer.buffer;
1382	if (buffer)
1383		ring_buffer_record_enable(buffer);
1384
1385#ifdef CONFIG_TRACER_MAX_TRACE
1386	buffer = global_trace.max_buffer.buffer;
1387	if (buffer)
1388		ring_buffer_record_enable(buffer);
1389#endif
1390
1391	arch_spin_unlock(&global_trace.max_lock);
1392
1393 out:
1394	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1395}
1396
1397static void tracing_start_tr(struct trace_array *tr)
1398{
1399	struct ring_buffer *buffer;
1400	unsigned long flags;
1401
1402	if (tracing_disabled)
1403		return;
1404
1405	/* If global, we need to also start the max tracer */
1406	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1407		return tracing_start();
1408
1409	raw_spin_lock_irqsave(&tr->start_lock, flags);
1410
1411	if (--tr->stop_count) {
1412		if (tr->stop_count < 0) {
1413			/* Someone screwed up their debugging */
1414			WARN_ON_ONCE(1);
1415			tr->stop_count = 0;
1416		}
1417		goto out;
1418	}
1419
1420	buffer = tr->trace_buffer.buffer;
1421	if (buffer)
1422		ring_buffer_record_enable(buffer);
1423
1424 out:
1425	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1426}
1427
1428/**
1429 * tracing_stop - quick stop of the tracer
1430 *
1431 * Light weight way to stop tracing. Use in conjunction with
1432 * tracing_start.
1433 */
1434void tracing_stop(void)
1435{
1436	struct ring_buffer *buffer;
1437	unsigned long flags;
1438
1439	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1440	if (global_trace.stop_count++)
1441		goto out;
1442
1443	/* Prevent the buffers from switching */
1444	arch_spin_lock(&global_trace.max_lock);
1445
1446	buffer = global_trace.trace_buffer.buffer;
1447	if (buffer)
1448		ring_buffer_record_disable(buffer);
1449
1450#ifdef CONFIG_TRACER_MAX_TRACE
1451	buffer = global_trace.max_buffer.buffer;
1452	if (buffer)
1453		ring_buffer_record_disable(buffer);
1454#endif
1455
1456	arch_spin_unlock(&global_trace.max_lock);
1457
1458 out:
1459	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1460}
1461
1462static void tracing_stop_tr(struct trace_array *tr)
1463{
1464	struct ring_buffer *buffer;
1465	unsigned long flags;
1466
1467	/* If global, we need to also stop the max tracer */
1468	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1469		return tracing_stop();
1470
1471	raw_spin_lock_irqsave(&tr->start_lock, flags);
1472	if (tr->stop_count++)
1473		goto out;
1474
1475	buffer = tr->trace_buffer.buffer;
1476	if (buffer)
1477		ring_buffer_record_disable(buffer);
1478
1479 out:
1480	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1481}
1482
1483void trace_stop_cmdline_recording(void);
1484
1485static int trace_save_cmdline(struct task_struct *tsk)
1486{
1487	unsigned pid, idx;
1488
1489	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1490		return 0;
1491
1492	/*
1493	 * It's not the end of the world if we don't get
1494	 * the lock, but we also don't want to spin
1495	 * nor do we want to disable interrupts,
1496	 * so if we miss here, then better luck next time.
1497	 */
1498	if (!arch_spin_trylock(&trace_cmdline_lock))
1499		return 0;
1500
1501	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1502	if (idx == NO_CMDLINE_MAP) {
1503		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1504
1505		/*
1506		 * Check whether the cmdline buffer at idx has a pid
1507		 * mapped. We are going to overwrite that entry so we
1508		 * need to clear the map_pid_to_cmdline. Otherwise we
1509		 * would read the new comm for the old pid.
1510		 */
1511		pid = savedcmd->map_cmdline_to_pid[idx];
1512		if (pid != NO_CMDLINE_MAP)
1513			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1514
1515		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1516		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1517
1518		savedcmd->cmdline_idx = idx;
1519	}
1520
1521	set_cmdline(idx, tsk->comm);
1522	saved_tgids[idx] = tsk->tgid;
1523	arch_spin_unlock(&trace_cmdline_lock);
1524
1525	return 1;
1526}
1527
1528static void __trace_find_cmdline(int pid, char comm[])
1529{
1530	unsigned map;
1531
1532	if (!pid) {
1533		strcpy(comm, "<idle>");
1534		return;
1535	}
1536
1537	if (WARN_ON_ONCE(pid < 0)) {
1538		strcpy(comm, "<XXX>");
1539		return;
1540	}
1541
1542	if (pid > PID_MAX_DEFAULT) {
1543		strcpy(comm, "<...>");
1544		return;
1545	}
1546
1547	map = savedcmd->map_pid_to_cmdline[pid];
1548	if (map != NO_CMDLINE_MAP)
1549		strcpy(comm, get_saved_cmdlines(map));
1550	else
1551		strcpy(comm, "<...>");
1552}
1553
1554void trace_find_cmdline(int pid, char comm[])
1555{
1556	preempt_disable();
1557	arch_spin_lock(&trace_cmdline_lock);
1558
1559	__trace_find_cmdline(pid, comm);
1560
1561	arch_spin_unlock(&trace_cmdline_lock);
1562	preempt_enable();
1563}
1564
1565int trace_find_tgid(int pid)
1566{
1567	unsigned map;
1568	int tgid;
1569
1570	preempt_disable();
1571	arch_spin_lock(&trace_cmdline_lock);
1572	map = savedcmd->map_pid_to_cmdline[pid];
1573	if (map != NO_CMDLINE_MAP)
1574		tgid = saved_tgids[map];
1575	else
1576		tgid = -1;
1577
1578	arch_spin_unlock(&trace_cmdline_lock);
1579	preempt_enable();
1580
1581	return tgid;
1582}
1583
1584void tracing_record_cmdline(struct task_struct *tsk)
1585{
1586	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1587		return;
1588
1589	if (!__this_cpu_read(trace_cmdline_save))
1590		return;
1591
1592	if (trace_save_cmdline(tsk))
1593		__this_cpu_write(trace_cmdline_save, false);
1594}
1595
1596void
1597tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1598			     int pc)
1599{
1600	struct task_struct *tsk = current;
1601
1602	entry->preempt_count		= pc & 0xff;
1603	entry->pid			= (tsk) ? tsk->pid : 0;
1604	entry->flags =
1605#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1606		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1607#else
1608		TRACE_FLAG_IRQS_NOSUPPORT |
1609#endif
1610		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1611		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1612		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1613		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1614}
1615EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1616
1617struct ring_buffer_event *
1618trace_buffer_lock_reserve(struct ring_buffer *buffer,
1619			  int type,
1620			  unsigned long len,
1621			  unsigned long flags, int pc)
1622{
1623	struct ring_buffer_event *event;
1624
1625	event = ring_buffer_lock_reserve(buffer, len);
1626	if (event != NULL) {
1627		struct trace_entry *ent = ring_buffer_event_data(event);
1628
1629		tracing_generic_entry_update(ent, flags, pc);
1630		ent->type = type;
1631	}
1632
1633	return event;
1634}
1635
1636void
1637__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1638{
1639	__this_cpu_write(trace_cmdline_save, true);
1640	ring_buffer_unlock_commit(buffer, event);
1641}
1642
1643static inline void
1644__trace_buffer_unlock_commit(struct ring_buffer *buffer,
1645			     struct ring_buffer_event *event,
1646			     unsigned long flags, int pc)
1647{
1648	__buffer_unlock_commit(buffer, event);
1649
1650	ftrace_trace_stack(buffer, flags, 6, pc);
1651	ftrace_trace_userstack(buffer, flags, pc);
1652}
1653
1654void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1655				struct ring_buffer_event *event,
1656				unsigned long flags, int pc)
1657{
1658	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1659}
1660EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1661
1662static struct ring_buffer *temp_buffer;
1663
1664struct ring_buffer_event *
1665trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1666			  struct ftrace_event_file *ftrace_file,
1667			  int type, unsigned long len,
1668			  unsigned long flags, int pc)
1669{
1670	struct ring_buffer_event *entry;
1671
1672	*current_rb = ftrace_file->tr->trace_buffer.buffer;
1673	entry = trace_buffer_lock_reserve(*current_rb,
1674					 type, len, flags, pc);
1675	/*
1676	 * If tracing is off, but we have triggers enabled
1677	 * we still need to look at the event data. Use the temp_buffer
1678	 * to store the trace event for the tigger to use. It's recusive
1679	 * safe and will not be recorded anywhere.
1680	 */
1681	if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1682		*current_rb = temp_buffer;
1683		entry = trace_buffer_lock_reserve(*current_rb,
1684						  type, len, flags, pc);
1685	}
1686	return entry;
1687}
1688EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1689
1690struct ring_buffer_event *
1691trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1692				  int type, unsigned long len,
1693				  unsigned long flags, int pc)
1694{
1695	*current_rb = global_trace.trace_buffer.buffer;
1696	return trace_buffer_lock_reserve(*current_rb,
1697					 type, len, flags, pc);
1698}
1699EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1700
1701void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1702					struct ring_buffer_event *event,
1703					unsigned long flags, int pc)
1704{
1705	__trace_buffer_unlock_commit(buffer, event, flags, pc);
1706}
1707EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1708
1709void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1710				     struct ring_buffer_event *event,
1711				     unsigned long flags, int pc,
1712				     struct pt_regs *regs)
1713{
1714	__buffer_unlock_commit(buffer, event);
1715
1716	ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1717	ftrace_trace_userstack(buffer, flags, pc);
1718}
1719EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1720
1721void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1722					 struct ring_buffer_event *event)
1723{
1724	ring_buffer_discard_commit(buffer, event);
1725}
1726EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1727
1728void
1729trace_function(struct trace_array *tr,
1730	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1731	       int pc)
1732{
1733	struct ftrace_event_call *call = &event_function;
1734	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1735	struct ring_buffer_event *event;
1736	struct ftrace_entry *entry;
1737
1738	/* If we are reading the ring buffer, don't trace */
1739	if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1740		return;
1741
1742	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1743					  flags, pc);
1744	if (!event)
1745		return;
1746	entry	= ring_buffer_event_data(event);
1747	entry->ip			= ip;
1748	entry->parent_ip		= parent_ip;
1749
1750	if (!call_filter_check_discard(call, entry, buffer, event))
1751		__buffer_unlock_commit(buffer, event);
1752}
1753
1754#ifdef CONFIG_STACKTRACE
1755
1756#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1757struct ftrace_stack {
1758	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1759};
1760
1761static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1762static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1763
1764static void __ftrace_trace_stack(struct ring_buffer *buffer,
1765				 unsigned long flags,
1766				 int skip, int pc, struct pt_regs *regs)
1767{
1768	struct ftrace_event_call *call = &event_kernel_stack;
1769	struct ring_buffer_event *event;
1770	struct stack_entry *entry;
1771	struct stack_trace trace;
1772	int use_stack;
1773	int size = FTRACE_STACK_ENTRIES;
1774
1775	trace.nr_entries	= 0;
1776	trace.skip		= skip;
1777
1778	/*
1779	 * Since events can happen in NMIs there's no safe way to
1780	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1781	 * or NMI comes in, it will just have to use the default
1782	 * FTRACE_STACK_SIZE.
1783	 */
1784	preempt_disable_notrace();
1785
1786	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1787	/*
1788	 * We don't need any atomic variables, just a barrier.
1789	 * If an interrupt comes in, we don't care, because it would
1790	 * have exited and put the counter back to what we want.
1791	 * We just need a barrier to keep gcc from moving things
1792	 * around.
1793	 */
1794	barrier();
1795	if (use_stack == 1) {
1796		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1797		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1798
1799		if (regs)
1800			save_stack_trace_regs(regs, &trace);
1801		else
1802			save_stack_trace(&trace);
1803
1804		if (trace.nr_entries > size)
1805			size = trace.nr_entries;
1806	} else
1807		/* From now on, use_stack is a boolean */
1808		use_stack = 0;
1809
1810	size *= sizeof(unsigned long);
1811
1812	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1813					  sizeof(*entry) + size, flags, pc);
1814	if (!event)
1815		goto out;
1816	entry = ring_buffer_event_data(event);
1817
1818	memset(&entry->caller, 0, size);
1819
1820	if (use_stack)
1821		memcpy(&entry->caller, trace.entries,
1822		       trace.nr_entries * sizeof(unsigned long));
1823	else {
1824		trace.max_entries	= FTRACE_STACK_ENTRIES;
1825		trace.entries		= entry->caller;
1826		if (regs)
1827			save_stack_trace_regs(regs, &trace);
1828		else
1829			save_stack_trace(&trace);
1830	}
1831
1832	entry->size = trace.nr_entries;
1833
1834	if (!call_filter_check_discard(call, entry, buffer, event))
1835		__buffer_unlock_commit(buffer, event);
1836
1837 out:
1838	/* Again, don't let gcc optimize things here */
1839	barrier();
1840	__this_cpu_dec(ftrace_stack_reserve);
1841	preempt_enable_notrace();
1842
1843}
1844
1845void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1846			     int skip, int pc, struct pt_regs *regs)
1847{
1848	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1849		return;
1850
1851	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1852}
1853
1854void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1855			int skip, int pc)
1856{
1857	if (!(trace_flags & TRACE_ITER_STACKTRACE))
1858		return;
1859
1860	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1861}
1862
1863void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1864		   int pc)
1865{
1866	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1867}
1868
1869/**
1870 * trace_dump_stack - record a stack back trace in the trace buffer
1871 * @skip: Number of functions to skip (helper handlers)
1872 */
1873void trace_dump_stack(int skip)
1874{
1875	unsigned long flags;
1876
1877	if (tracing_disabled || tracing_selftest_running)
1878		return;
1879
1880	local_save_flags(flags);
1881
1882	/*
1883	 * Skip 3 more, seems to get us at the caller of
1884	 * this function.
1885	 */
1886	skip += 3;
1887	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
1888			     flags, skip, preempt_count(), NULL);
1889}
1890
1891static DEFINE_PER_CPU(int, user_stack_count);
1892
1893void
1894ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1895{
1896	struct ftrace_event_call *call = &event_user_stack;
1897	struct ring_buffer_event *event;
1898	struct userstack_entry *entry;
1899	struct stack_trace trace;
1900
1901	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1902		return;
1903
1904	/*
1905	 * NMIs can not handle page faults, even with fix ups.
1906	 * The save user stack can (and often does) fault.
1907	 */
1908	if (unlikely(in_nmi()))
1909		return;
1910
1911	/*
1912	 * prevent recursion, since the user stack tracing may
1913	 * trigger other kernel events.
1914	 */
1915	preempt_disable();
1916	if (__this_cpu_read(user_stack_count))
1917		goto out;
1918
1919	__this_cpu_inc(user_stack_count);
1920
1921	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1922					  sizeof(*entry), flags, pc);
1923	if (!event)
1924		goto out_drop_count;
1925	entry	= ring_buffer_event_data(event);
1926
1927	entry->tgid		= current->tgid;
1928	memset(&entry->caller, 0, sizeof(entry->caller));
1929
1930	trace.nr_entries	= 0;
1931	trace.max_entries	= FTRACE_STACK_ENTRIES;
1932	trace.skip		= 0;
1933	trace.entries		= entry->caller;
1934
1935	save_stack_trace_user(&trace);
1936	if (!call_filter_check_discard(call, entry, buffer, event))
1937		__buffer_unlock_commit(buffer, event);
1938
1939 out_drop_count:
1940	__this_cpu_dec(user_stack_count);
1941 out:
1942	preempt_enable();
1943}
1944
1945#ifdef UNUSED
1946static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1947{
1948	ftrace_trace_userstack(tr, flags, preempt_count());
1949}
1950#endif /* UNUSED */
1951
1952#endif /* CONFIG_STACKTRACE */
1953
1954/* created for use with alloc_percpu */
1955struct trace_buffer_struct {
1956	char buffer[TRACE_BUF_SIZE];
1957};
1958
1959static struct trace_buffer_struct *trace_percpu_buffer;
1960static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1961static struct trace_buffer_struct *trace_percpu_irq_buffer;
1962static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1963
1964/*
1965 * The buffer used is dependent on the context. There is a per cpu
1966 * buffer for normal context, softirq contex, hard irq context and
1967 * for NMI context. Thise allows for lockless recording.
1968 *
1969 * Note, if the buffers failed to be allocated, then this returns NULL
1970 */
1971static char *get_trace_buf(void)
1972{
1973	struct trace_buffer_struct *percpu_buffer;
1974
1975	/*
1976	 * If we have allocated per cpu buffers, then we do not
1977	 * need to do any locking.
1978	 */
1979	if (in_nmi())
1980		percpu_buffer = trace_percpu_nmi_buffer;
1981	else if (in_irq())
1982		percpu_buffer = trace_percpu_irq_buffer;
1983	else if (in_softirq())
1984		percpu_buffer = trace_percpu_sirq_buffer;
1985	else
1986		percpu_buffer = trace_percpu_buffer;
1987
1988	if (!percpu_buffer)
1989		return NULL;
1990
1991	return this_cpu_ptr(&percpu_buffer->buffer[0]);
1992}
1993
1994static int alloc_percpu_trace_buffer(void)
1995{
1996	struct trace_buffer_struct *buffers;
1997	struct trace_buffer_struct *sirq_buffers;
1998	struct trace_buffer_struct *irq_buffers;
1999	struct trace_buffer_struct *nmi_buffers;
2000
2001	buffers = alloc_percpu(struct trace_buffer_struct);
2002	if (!buffers)
2003		goto err_warn;
2004
2005	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2006	if (!sirq_buffers)
2007		goto err_sirq;
2008
2009	irq_buffers = alloc_percpu(struct trace_buffer_struct);
2010	if (!irq_buffers)
2011		goto err_irq;
2012
2013	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2014	if (!nmi_buffers)
2015		goto err_nmi;
2016
2017	trace_percpu_buffer = buffers;
2018	trace_percpu_sirq_buffer = sirq_buffers;
2019	trace_percpu_irq_buffer = irq_buffers;
2020	trace_percpu_nmi_buffer = nmi_buffers;
2021
2022	return 0;
2023
2024 err_nmi:
2025	free_percpu(irq_buffers);
2026 err_irq:
2027	free_percpu(sirq_buffers);
2028 err_sirq:
2029	free_percpu(buffers);
2030 err_warn:
2031	WARN(1, "Could not allocate percpu trace_printk buffer");
2032	return -ENOMEM;
2033}
2034
2035static int buffers_allocated;
2036
2037void trace_printk_init_buffers(void)
2038{
2039	if (buffers_allocated)
2040		return;
2041
2042	if (alloc_percpu_trace_buffer())
2043		return;
2044
2045	/* trace_printk() is for debug use only. Don't use it in production. */
2046
2047	pr_warning("\n**********************************************************\n");
2048	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2049	pr_warning("**                                                      **\n");
2050	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2051	pr_warning("**                                                      **\n");
2052	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2053	pr_warning("** unsafe for produciton use.                           **\n");
2054	pr_warning("**                                                      **\n");
2055	pr_warning("** If you see this message and you are not debugging    **\n");
2056	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2057	pr_warning("**                                                      **\n");
2058	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2059	pr_warning("**********************************************************\n");
2060
2061	/* Expand the buffers to set size */
2062	tracing_update_buffers();
2063
2064	buffers_allocated = 1;
2065
2066	/*
2067	 * trace_printk_init_buffers() can be called by modules.
2068	 * If that happens, then we need to start cmdline recording
2069	 * directly here. If the global_trace.buffer is already
2070	 * allocated here, then this was called by module code.
2071	 */
2072	if (global_trace.trace_buffer.buffer)
2073		tracing_start_cmdline_record();
2074}
2075
2076void trace_printk_start_comm(void)
2077{
2078	/* Start tracing comms if trace printk is set */
2079	if (!buffers_allocated)
2080		return;
2081	tracing_start_cmdline_record();
2082}
2083
2084static void trace_printk_start_stop_comm(int enabled)
2085{
2086	if (!buffers_allocated)
2087		return;
2088
2089	if (enabled)
2090		tracing_start_cmdline_record();
2091	else
2092		tracing_stop_cmdline_record();
2093}
2094
2095/**
2096 * trace_vbprintk - write binary msg to tracing buffer
2097 *
2098 */
2099int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2100{
2101	struct ftrace_event_call *call = &event_bprint;
2102	struct ring_buffer_event *event;
2103	struct ring_buffer *buffer;
2104	struct trace_array *tr = &global_trace;
2105	struct bprint_entry *entry;
2106	unsigned long flags;
2107	char *tbuffer;
2108	int len = 0, size, pc;
2109
2110	if (unlikely(tracing_selftest_running || tracing_disabled))
2111		return 0;
2112
2113	/* Don't pollute graph traces with trace_vprintk internals */
2114	pause_graph_tracing();
2115
2116	pc = preempt_count();
2117	preempt_disable_notrace();
2118
2119	tbuffer = get_trace_buf();
2120	if (!tbuffer) {
2121		len = 0;
2122		goto out;
2123	}
2124
2125	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2126
2127	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2128		goto out;
2129
2130	local_save_flags(flags);
2131	size = sizeof(*entry) + sizeof(u32) * len;
2132	buffer = tr->trace_buffer.buffer;
2133	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2134					  flags, pc);
2135	if (!event)
2136		goto out;
2137	entry = ring_buffer_event_data(event);
2138	entry->ip			= ip;
2139	entry->fmt			= fmt;
2140
2141	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2142	if (!call_filter_check_discard(call, entry, buffer, event)) {
2143		__buffer_unlock_commit(buffer, event);
2144		ftrace_trace_stack(buffer, flags, 6, pc);
2145	}
2146
2147out:
2148	preempt_enable_notrace();
2149	unpause_graph_tracing();
2150
2151	return len;
2152}
2153EXPORT_SYMBOL_GPL(trace_vbprintk);
2154
2155static int
2156__trace_array_vprintk(struct ring_buffer *buffer,
2157		      unsigned long ip, const char *fmt, va_list args)
2158{
2159	struct ftrace_event_call *call = &event_print;
2160	struct ring_buffer_event *event;
2161	int len = 0, size, pc;
2162	struct print_entry *entry;
2163	unsigned long flags;
2164	char *tbuffer;
2165
2166	if (tracing_disabled || tracing_selftest_running)
2167		return 0;
2168
2169	/* Don't pollute graph traces with trace_vprintk internals */
2170	pause_graph_tracing();
2171
2172	pc = preempt_count();
2173	preempt_disable_notrace();
2174
2175
2176	tbuffer = get_trace_buf();
2177	if (!tbuffer) {
2178		len = 0;
2179		goto out;
2180	}
2181
2182	len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2183	if (len > TRACE_BUF_SIZE)
2184		goto out;
2185
2186	local_save_flags(flags);
2187	size = sizeof(*entry) + len + 1;
2188	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2189					  flags, pc);
2190	if (!event)
2191		goto out;
2192	entry = ring_buffer_event_data(event);
2193	entry->ip = ip;
2194
2195	memcpy(&entry->buf, tbuffer, len);
2196	entry->buf[len] = '\0';
2197	if (!call_filter_check_discard(call, entry, buffer, event)) {
2198		__buffer_unlock_commit(buffer, event);
2199		ftrace_trace_stack(buffer, flags, 6, pc);
2200	}
2201 out:
2202	preempt_enable_notrace();
2203	unpause_graph_tracing();
2204
2205	return len;
2206}
2207
2208int trace_array_vprintk(struct trace_array *tr,
2209			unsigned long ip, const char *fmt, va_list args)
2210{
2211	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2212}
2213
2214int trace_array_printk(struct trace_array *tr,
2215		       unsigned long ip, const char *fmt, ...)
2216{
2217	int ret;
2218	va_list ap;
2219
2220	if (!(trace_flags & TRACE_ITER_PRINTK))
2221		return 0;
2222
2223	va_start(ap, fmt);
2224	ret = trace_array_vprintk(tr, ip, fmt, ap);
2225	va_end(ap);
2226	return ret;
2227}
2228
2229int trace_array_printk_buf(struct ring_buffer *buffer,
2230			   unsigned long ip, const char *fmt, ...)
2231{
2232	int ret;
2233	va_list ap;
2234
2235	if (!(trace_flags & TRACE_ITER_PRINTK))
2236		return 0;
2237
2238	va_start(ap, fmt);
2239	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2240	va_end(ap);
2241	return ret;
2242}
2243
2244int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2245{
2246	return trace_array_vprintk(&global_trace, ip, fmt, args);
2247}
2248EXPORT_SYMBOL_GPL(trace_vprintk);
2249
2250static void trace_iterator_increment(struct trace_iterator *iter)
2251{
2252	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2253
2254	iter->idx++;
2255	if (buf_iter)
2256		ring_buffer_read(buf_iter, NULL);
2257}
2258
2259static struct trace_entry *
2260peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2261		unsigned long *lost_events)
2262{
2263	struct ring_buffer_event *event;
2264	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2265
2266	if (buf_iter)
2267		event = ring_buffer_iter_peek(buf_iter, ts);
2268	else
2269		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2270					 lost_events);
2271
2272	if (event) {
2273		iter->ent_size = ring_buffer_event_length(event);
2274		return ring_buffer_event_data(event);
2275	}
2276	iter->ent_size = 0;
2277	return NULL;
2278}
2279
2280static struct trace_entry *
2281__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2282		  unsigned long *missing_events, u64 *ent_ts)
2283{
2284	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2285	struct trace_entry *ent, *next = NULL;
2286	unsigned long lost_events = 0, next_lost = 0;
2287	int cpu_file = iter->cpu_file;
2288	u64 next_ts = 0, ts;
2289	int next_cpu = -1;
2290	int next_size = 0;
2291	int cpu;
2292
2293	/*
2294	 * If we are in a per_cpu trace file, don't bother by iterating over
2295	 * all cpu and peek directly.
2296	 */
2297	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2298		if (ring_buffer_empty_cpu(buffer, cpu_file))
2299			return NULL;
2300		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2301		if (ent_cpu)
2302			*ent_cpu = cpu_file;
2303
2304		return ent;
2305	}
2306
2307	for_each_tracing_cpu(cpu) {
2308
2309		if (ring_buffer_empty_cpu(buffer, cpu))
2310			continue;
2311
2312		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2313
2314		/*
2315		 * Pick the entry with the smallest timestamp:
2316		 */
2317		if (ent && (!next || ts < next_ts)) {
2318			next = ent;
2319			next_cpu = cpu;
2320			next_ts = ts;
2321			next_lost = lost_events;
2322			next_size = iter->ent_size;
2323		}
2324	}
2325
2326	iter->ent_size = next_size;
2327
2328	if (ent_cpu)
2329		*ent_cpu = next_cpu;
2330
2331	if (ent_ts)
2332		*ent_ts = next_ts;
2333
2334	if (missing_events)
2335		*missing_events = next_lost;
2336
2337	return next;
2338}
2339
2340/* Find the next real entry, without updating the iterator itself */
2341struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2342					  int *ent_cpu, u64 *ent_ts)
2343{
2344	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2345}
2346
2347/* Find the next real entry, and increment the iterator to the next entry */
2348void *trace_find_next_entry_inc(struct trace_iterator *iter)
2349{
2350	iter->ent = __find_next_entry(iter, &iter->cpu,
2351				      &iter->lost_events, &iter->ts);
2352
2353	if (iter->ent)
2354		trace_iterator_increment(iter);
2355
2356	return iter->ent ? iter : NULL;
2357}
2358
2359static void trace_consume(struct trace_iterator *iter)
2360{
2361	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2362			    &iter->lost_events);
2363}
2364
2365static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2366{
2367	struct trace_iterator *iter = m->private;
2368	int i = (int)*pos;
2369	void *ent;
2370
2371	WARN_ON_ONCE(iter->leftover);
2372
2373	(*pos)++;
2374
2375	/* can't go backwards */
2376	if (iter->idx > i)
2377		return NULL;
2378
2379	if (iter->idx < 0)
2380		ent = trace_find_next_entry_inc(iter);
2381	else
2382		ent = iter;
2383
2384	while (ent && iter->idx < i)
2385		ent = trace_find_next_entry_inc(iter);
2386
2387	iter->pos = *pos;
2388
2389	return ent;
2390}
2391
2392void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2393{
2394	struct ring_buffer_event *event;
2395	struct ring_buffer_iter *buf_iter;
2396	unsigned long entries = 0;
2397	u64 ts;
2398
2399	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2400
2401	buf_iter = trace_buffer_iter(iter, cpu);
2402	if (!buf_iter)
2403		return;
2404
2405	ring_buffer_iter_reset(buf_iter);
2406
2407	/*
2408	 * We could have the case with the max latency tracers
2409	 * that a reset never took place on a cpu. This is evident
2410	 * by the timestamp being before the start of the buffer.
2411	 */
2412	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2413		if (ts >= iter->trace_buffer->time_start)
2414			break;
2415		entries++;
2416		ring_buffer_read(buf_iter, NULL);
2417	}
2418
2419	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2420}
2421
2422/*
2423 * The current tracer is copied to avoid a global locking
2424 * all around.
2425 */
2426static void *s_start(struct seq_file *m, loff_t *pos)
2427{
2428	struct trace_iterator *iter = m->private;
2429	struct trace_array *tr = iter->tr;
2430	int cpu_file = iter->cpu_file;
2431	void *p = NULL;
2432	loff_t l = 0;
2433	int cpu;
2434
2435	/*
2436	 * copy the tracer to avoid using a global lock all around.
2437	 * iter->trace is a copy of current_trace, the pointer to the
2438	 * name may be used instead of a strcmp(), as iter->trace->name
2439	 * will point to the same string as current_trace->name.
2440	 */
2441	mutex_lock(&trace_types_lock);
2442	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2443		*iter->trace = *tr->current_trace;
2444	mutex_unlock(&trace_types_lock);
2445
2446#ifdef CONFIG_TRACER_MAX_TRACE
2447	if (iter->snapshot && iter->trace->use_max_tr)
2448		return ERR_PTR(-EBUSY);
2449#endif
2450
2451	if (!iter->snapshot)
2452		atomic_inc(&trace_record_cmdline_disabled);
2453
2454	if (*pos != iter->pos) {
2455		iter->ent = NULL;
2456		iter->cpu = 0;
2457		iter->idx = -1;
2458
2459		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2460			for_each_tracing_cpu(cpu)
2461				tracing_iter_reset(iter, cpu);
2462		} else
2463			tracing_iter_reset(iter, cpu_file);
2464
2465		iter->leftover = 0;
2466		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2467			;
2468
2469	} else {
2470		/*
2471		 * If we overflowed the seq_file before, then we want
2472		 * to just reuse the trace_seq buffer again.
2473		 */
2474		if (iter->leftover)
2475			p = iter;
2476		else {
2477			l = *pos - 1;
2478			p = s_next(m, p, &l);
2479		}
2480	}
2481
2482	trace_event_read_lock();
2483	trace_access_lock(cpu_file);
2484	return p;
2485}
2486
2487static void s_stop(struct seq_file *m, void *p)
2488{
2489	struct trace_iterator *iter = m->private;
2490
2491#ifdef CONFIG_TRACER_MAX_TRACE
2492	if (iter->snapshot && iter->trace->use_max_tr)
2493		return;
2494#endif
2495
2496	if (!iter->snapshot)
2497		atomic_dec(&trace_record_cmdline_disabled);
2498
2499	trace_access_unlock(iter->cpu_file);
2500	trace_event_read_unlock();
2501}
2502
2503static void
2504get_total_entries(struct trace_buffer *buf,
2505		  unsigned long *total, unsigned long *entries)
2506{
2507	unsigned long count;
2508	int cpu;
2509
2510	*total = 0;
2511	*entries = 0;
2512
2513	for_each_tracing_cpu(cpu) {
2514		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2515		/*
2516		 * If this buffer has skipped entries, then we hold all
2517		 * entries for the trace and we need to ignore the
2518		 * ones before the time stamp.
2519		 */
2520		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2521			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2522			/* total is the same as the entries */
2523			*total += count;
2524		} else
2525			*total += count +
2526				ring_buffer_overrun_cpu(buf->buffer, cpu);
2527		*entries += count;
2528	}
2529}
2530
2531static void print_lat_help_header(struct seq_file *m)
2532{
2533	seq_puts(m, "#                  _------=> CPU#            \n");
2534	seq_puts(m, "#                 / _-----=> irqs-off        \n");
2535	seq_puts(m, "#                | / _----=> need-resched    \n");
2536	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2537	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2538	seq_puts(m, "#                |||| /     delay             \n");
2539	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2540	seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2541}
2542
2543static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2544{
2545	unsigned long total;
2546	unsigned long entries;
2547
2548	get_total_entries(buf, &total, &entries);
2549	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2550		   entries, total, num_online_cpus());
2551	seq_puts(m, "#\n");
2552}
2553
2554static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2555{
2556	print_event_info(buf, m);
2557	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2558	seq_puts(m, "#              | |       |          |         |\n");
2559}
2560
2561static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
2562{
2563	print_event_info(buf, m);
2564	seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
2565	seq_puts(m, "#              | |        |      |          |         |\n");
2566}
2567
2568static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2569{
2570	print_event_info(buf, m);
2571	seq_puts(m, "#                              _-----=> irqs-off\n");
2572	seq_puts(m, "#                             / _----=> need-resched\n");
2573	seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2574	seq_puts(m, "#                            || / _--=> preempt-depth\n");
2575	seq_puts(m, "#                            ||| /     delay\n");
2576	seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2577	seq_puts(m, "#              | |       |   ||||       |         |\n");
2578}
2579
2580static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
2581{
2582	print_event_info(buf, m);
2583	seq_puts(m, "#                                      _-----=> irqs-off\n");
2584	seq_puts(m, "#                                     / _----=> need-resched\n");
2585	seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
2586	seq_puts(m, "#                                    || / _--=> preempt-depth\n");
2587	seq_puts(m, "#                                    ||| /     delay\n");
2588	seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2589	seq_puts(m, "#              | |        |      |   ||||       |         |\n");
2590}
2591
2592void
2593print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2594{
2595	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2596	struct trace_buffer *buf = iter->trace_buffer;
2597	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2598	struct tracer *type = iter->trace;
2599	unsigned long entries;
2600	unsigned long total;
2601	const char *name = "preemption";
2602
2603	name = type->name;
2604
2605	get_total_entries(buf, &total, &entries);
2606
2607	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2608		   name, UTS_RELEASE);
2609	seq_puts(m, "# -----------------------------------"
2610		 "---------------------------------\n");
2611	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2612		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2613		   nsecs_to_usecs(data->saved_latency),
2614		   entries,
2615		   total,
2616		   buf->cpu,
2617#if defined(CONFIG_PREEMPT_NONE)
2618		   "server",
2619#elif defined(CONFIG_PREEMPT_VOLUNTARY)
2620		   "desktop",
2621#elif defined(CONFIG_PREEMPT)
2622		   "preempt",
2623#else
2624		   "unknown",
2625#endif
2626		   /* These are reserved for later use */
2627		   0, 0, 0, 0);
2628#ifdef CONFIG_SMP
2629	seq_printf(m, " #P:%d)\n", num_online_cpus());
2630#else
2631	seq_puts(m, ")\n");
2632#endif
2633	seq_puts(m, "#    -----------------\n");
2634	seq_printf(m, "#    | task: %.16s-%d "
2635		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2636		   data->comm, data->pid,
2637		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2638		   data->policy, data->rt_priority);
2639	seq_puts(m, "#    -----------------\n");
2640
2641	if (data->critical_start) {
2642		seq_puts(m, "#  => started at: ");
2643		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2644		trace_print_seq(m, &iter->seq);
2645		seq_puts(m, "\n#  => ended at:   ");
2646		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2647		trace_print_seq(m, &iter->seq);
2648		seq_puts(m, "\n#\n");
2649	}
2650
2651	seq_puts(m, "#\n");
2652}
2653
2654static void test_cpu_buff_start(struct trace_iterator *iter)
2655{
2656	struct trace_seq *s = &iter->seq;
2657
2658	if (!(trace_flags & TRACE_ITER_ANNOTATE))
2659		return;
2660
2661	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2662		return;
2663
2664	if (cpumask_test_cpu(iter->cpu, iter->started))
2665		return;
2666
2667	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2668		return;
2669
2670	cpumask_set_cpu(iter->cpu, iter->started);
2671
2672	/* Don't print started cpu buffer for the first entry of the trace */
2673	if (iter->idx > 1)
2674		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2675				iter->cpu);
2676}
2677
2678static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2679{
2680	struct trace_seq *s = &iter->seq;
2681	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2682	struct trace_entry *entry;
2683	struct trace_event *event;
2684
2685	entry = iter->ent;
2686
2687	test_cpu_buff_start(iter);
2688
2689	event = ftrace_find_event(entry->type);
2690
2691	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2692		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2693			if (!trace_print_lat_context(iter))
2694				goto partial;
2695		} else {
2696			if (!trace_print_context(iter))
2697				goto partial;
2698		}
2699	}
2700
2701	if (event)
2702		return event->funcs->trace(iter, sym_flags, event);
2703
2704	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2705		goto partial;
2706
2707	return TRACE_TYPE_HANDLED;
2708partial:
2709	return TRACE_TYPE_PARTIAL_LINE;
2710}
2711
2712static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2713{
2714	struct trace_seq *s = &iter->seq;
2715	struct trace_entry *entry;
2716	struct trace_event *event;
2717
2718	entry = iter->ent;
2719
2720	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2721		if (!trace_seq_printf(s, "%d %d %llu ",
2722				      entry->pid, iter->cpu, iter->ts))
2723			goto partial;
2724	}
2725
2726	event = ftrace_find_event(entry->type);
2727	if (event)
2728		return event->funcs->raw(iter, 0, event);
2729
2730	if (!trace_seq_printf(s, "%d ?\n", entry->type))
2731		goto partial;
2732
2733	return TRACE_TYPE_HANDLED;
2734partial:
2735	return TRACE_TYPE_PARTIAL_LINE;
2736}
2737
2738static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2739{
2740	struct trace_seq *s = &iter->seq;
2741	unsigned char newline = '\n';
2742	struct trace_entry *entry;
2743	struct trace_event *event;
2744
2745	entry = iter->ent;
2746
2747	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2748		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2749		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2750		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2751	}
2752
2753	event = ftrace_find_event(entry->type);
2754	if (event) {
2755		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2756		if (ret != TRACE_TYPE_HANDLED)
2757			return ret;
2758	}
2759
2760	SEQ_PUT_FIELD_RET(s, newline);
2761
2762	return TRACE_TYPE_HANDLED;
2763}
2764
2765static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2766{
2767	struct trace_seq *s = &iter->seq;
2768	struct trace_entry *entry;
2769	struct trace_event *event;
2770
2771	entry = iter->ent;
2772
2773	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2774		SEQ_PUT_FIELD_RET(s, entry->pid);
2775		SEQ_PUT_FIELD_RET(s, iter->cpu);
2776		SEQ_PUT_FIELD_RET(s, iter->ts);
2777	}
2778
2779	event = ftrace_find_event(entry->type);
2780	return event ? event->funcs->binary(iter, 0, event) :
2781		TRACE_TYPE_HANDLED;
2782}
2783
2784int trace_empty(struct trace_iterator *iter)
2785{
2786	struct ring_buffer_iter *buf_iter;
2787	int cpu;
2788
2789	/* If we are looking at one CPU buffer, only check that one */
2790	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2791		cpu = iter->cpu_file;
2792		buf_iter = trace_buffer_iter(iter, cpu);
2793		if (buf_iter) {
2794			if (!ring_buffer_iter_empty(buf_iter))
2795				return 0;
2796		} else {
2797			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2798				return 0;
2799		}
2800		return 1;
2801	}
2802
2803	for_each_tracing_cpu(cpu) {
2804		buf_iter = trace_buffer_iter(iter, cpu);
2805		if (buf_iter) {
2806			if (!ring_buffer_iter_empty(buf_iter))
2807				return 0;
2808		} else {
2809			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2810				return 0;
2811		}
2812	}
2813
2814	return 1;
2815}
2816
2817/*  Called with trace_event_read_lock() held. */
2818enum print_line_t print_trace_line(struct trace_iterator *iter)
2819{
2820	enum print_line_t ret;
2821
2822	if (iter->lost_events &&
2823	    !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2824				 iter->cpu, iter->lost_events))
2825		return TRACE_TYPE_PARTIAL_LINE;
2826
2827	if (iter->trace && iter->trace->print_line) {
2828		ret = iter->trace->print_line(iter);
2829		if (ret != TRACE_TYPE_UNHANDLED)
2830			return ret;
2831	}
2832
2833	if (iter->ent->type == TRACE_BPUTS &&
2834			trace_flags & TRACE_ITER_PRINTK &&
2835			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2836		return trace_print_bputs_msg_only(iter);
2837
2838	if (iter->ent->type == TRACE_BPRINT &&
2839			trace_flags & TRACE_ITER_PRINTK &&
2840			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2841		return trace_print_bprintk_msg_only(iter);
2842
2843	if (iter->ent->type == TRACE_PRINT &&
2844			trace_flags & TRACE_ITER_PRINTK &&
2845			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2846		return trace_print_printk_msg_only(iter);
2847
2848	if (trace_flags & TRACE_ITER_BIN)
2849		return print_bin_fmt(iter);
2850
2851	if (trace_flags & TRACE_ITER_HEX)
2852		return print_hex_fmt(iter);
2853
2854	if (trace_flags & TRACE_ITER_RAW)
2855		return print_raw_fmt(iter);
2856
2857	return print_trace_fmt(iter);
2858}
2859
2860void trace_latency_header(struct seq_file *m)
2861{
2862	struct trace_iterator *iter = m->private;
2863
2864	/* print nothing if the buffers are empty */
2865	if (trace_empty(iter))
2866		return;
2867
2868	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2869		print_trace_header(m, iter);
2870
2871	if (!(trace_flags & TRACE_ITER_VERBOSE))
2872		print_lat_help_header(m);
2873}
2874
2875void trace_default_header(struct seq_file *m)
2876{
2877	struct trace_iterator *iter = m->private;
2878
2879	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2880		return;
2881
2882	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2883		/* print nothing if the buffers are empty */
2884		if (trace_empty(iter))
2885			return;
2886		print_trace_header(m, iter);
2887		if (!(trace_flags & TRACE_ITER_VERBOSE))
2888			print_lat_help_header(m);
2889	} else {
2890		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2891			if (trace_flags & TRACE_ITER_IRQ_INFO)
2892				if (trace_flags & TRACE_ITER_TGID)
2893					print_func_help_header_irq_tgid(iter->trace_buffer, m);
2894				else
2895					print_func_help_header_irq(iter->trace_buffer, m);
2896			else
2897				if (trace_flags & TRACE_ITER_TGID)
2898					print_func_help_header_tgid(iter->trace_buffer, m);
2899				else
2900					print_func_help_header(iter->trace_buffer, m);
2901		}
2902	}
2903}
2904
2905static void test_ftrace_alive(struct seq_file *m)
2906{
2907	if (!ftrace_is_dead())
2908		return;
2909	seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2910	seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2911}
2912
2913#ifdef CONFIG_TRACER_MAX_TRACE
2914static void show_snapshot_main_help(struct seq_file *m)
2915{
2916	seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2917	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2918	seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2919	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2920	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2921	seq_printf(m, "#                       is not a '0' or '1')\n");
2922}
2923
2924static void show_snapshot_percpu_help(struct seq_file *m)
2925{
2926	seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2927#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2928	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2929	seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2930#else
2931	seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2932	seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2933#endif
2934	seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2935	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2936	seq_printf(m, "#                       is not a '0' or '1')\n");
2937}
2938
2939static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2940{
2941	if (iter->tr->allocated_snapshot)
2942		seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2943	else
2944		seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2945
2946	seq_printf(m, "# Snapshot commands:\n");
2947	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2948		show_snapshot_main_help(m);
2949	else
2950		show_snapshot_percpu_help(m);
2951}
2952#else
2953/* Should never be called */
2954static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2955#endif
2956
2957static int s_show(struct seq_file *m, void *v)
2958{
2959	struct trace_iterator *iter = v;
2960	int ret;
2961
2962	if (iter->ent == NULL) {
2963		if (iter->tr) {
2964			seq_printf(m, "# tracer: %s\n", iter->trace->name);
2965			seq_puts(m, "#\n");
2966			test_ftrace_alive(m);
2967		}
2968		if (iter->snapshot && trace_empty(iter))
2969			print_snapshot_help(m, iter);
2970		else if (iter->trace && iter->trace->print_header)
2971			iter->trace->print_header(m);
2972		else
2973			trace_default_header(m);
2974
2975	} else if (iter->leftover) {
2976		/*
2977		 * If we filled the seq_file buffer earlier, we
2978		 * want to just show it now.
2979		 */
2980		ret = trace_print_seq(m, &iter->seq);
2981
2982		/* ret should this time be zero, but you never know */
2983		iter->leftover = ret;
2984
2985	} else {
2986		print_trace_line(iter);
2987		ret = trace_print_seq(m, &iter->seq);
2988		/*
2989		 * If we overflow the seq_file buffer, then it will
2990		 * ask us for this data again at start up.
2991		 * Use that instead.
2992		 *  ret is 0 if seq_file write succeeded.
2993		 *        -1 otherwise.
2994		 */
2995		iter->leftover = ret;
2996	}
2997
2998	return 0;
2999}
3000
3001/*
3002 * Should be used after trace_array_get(), trace_types_lock
3003 * ensures that i_cdev was already initialized.
3004 */
3005static inline int tracing_get_cpu(struct inode *inode)
3006{
3007	if (inode->i_cdev) /* See trace_create_cpu_file() */
3008		return (long)inode->i_cdev - 1;
3009	return RING_BUFFER_ALL_CPUS;
3010}
3011
3012static const struct seq_operations tracer_seq_ops = {
3013	.start		= s_start,
3014	.next		= s_next,
3015	.stop		= s_stop,
3016	.show		= s_show,
3017};
3018
3019static struct trace_iterator *
3020__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3021{
3022	struct trace_array *tr = inode->i_private;
3023	struct trace_iterator *iter;
3024	int cpu;
3025
3026	if (tracing_disabled)
3027		return ERR_PTR(-ENODEV);
3028
3029	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3030	if (!iter)
3031		return ERR_PTR(-ENOMEM);
3032
3033	iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
3034				    GFP_KERNEL);
3035	if (!iter->buffer_iter)
3036		goto release;
3037
3038	/*
3039	 * We make a copy of the current tracer to avoid concurrent
3040	 * changes on it while we are reading.
3041	 */
3042	mutex_lock(&trace_types_lock);
3043	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3044	if (!iter->trace)
3045		goto fail;
3046
3047	*iter->trace = *tr->current_trace;
3048
3049	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3050		goto fail;
3051
3052	iter->tr = tr;
3053
3054#ifdef CONFIG_TRACER_MAX_TRACE
3055	/* Currently only the top directory has a snapshot */
3056	if (tr->current_trace->print_max || snapshot)
3057		iter->trace_buffer = &tr->max_buffer;
3058	else
3059#endif
3060		iter->trace_buffer = &tr->trace_buffer;
3061	iter->snapshot = snapshot;
3062	iter->pos = -1;
3063	iter->cpu_file = tracing_get_cpu(inode);
3064	mutex_init(&iter->mutex);
3065
3066	/* Notify the tracer early; before we stop tracing. */
3067	if (iter->trace && iter->trace->open)
3068		iter->trace->open(iter);
3069
3070	/* Annotate start of buffers if we had overruns */
3071	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3072		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3073
3074	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3075	if (trace_clocks[tr->clock_id].in_ns)
3076		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3077
3078	/* stop the trace while dumping if we are not opening "snapshot" */
3079	if (!iter->snapshot)
3080		tracing_stop_tr(tr);
3081
3082	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3083		for_each_tracing_cpu(cpu) {
3084			iter->buffer_iter[cpu] =
3085				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3086		}
3087		ring_buffer_read_prepare_sync();
3088		for_each_tracing_cpu(cpu) {
3089			ring_buffer_read_start(iter->buffer_iter[cpu]);
3090			tracing_iter_reset(iter, cpu);
3091		}
3092	} else {
3093		cpu = iter->cpu_file;
3094		iter->buffer_iter[cpu] =
3095			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3096		ring_buffer_read_prepare_sync();
3097		ring_buffer_read_start(iter->buffer_iter[cpu]);
3098		tracing_iter_reset(iter, cpu);
3099	}
3100
3101	mutex_unlock(&trace_types_lock);
3102
3103	return iter;
3104
3105 fail:
3106	mutex_unlock(&trace_types_lock);
3107	kfree(iter->trace);
3108	kfree(iter->buffer_iter);
3109release:
3110	seq_release_private(inode, file);
3111	return ERR_PTR(-ENOMEM);
3112}
3113
3114int tracing_open_generic(struct inode *inode, struct file *filp)
3115{
3116	if (tracing_disabled)
3117		return -ENODEV;
3118
3119	filp->private_data = inode->i_private;
3120	return 0;
3121}
3122
3123bool tracing_is_disabled(void)
3124{
3125	return (tracing_disabled) ? true: false;
3126}
3127
3128/*
3129 * Open and update trace_array ref count.
3130 * Must have the current trace_array passed to it.
3131 */
3132static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3133{
3134	struct trace_array *tr = inode->i_private;
3135
3136	if (tracing_disabled)
3137		return -ENODEV;
3138
3139	if (trace_array_get(tr) < 0)
3140		return -ENODEV;
3141
3142	filp->private_data = inode->i_private;
3143
3144	return 0;
3145}
3146
3147static int tracing_release(struct inode *inode, struct file *file)
3148{
3149	struct trace_array *tr = inode->i_private;
3150	struct seq_file *m = file->private_data;
3151	struct trace_iterator *iter;
3152	int cpu;
3153
3154	if (!(file->f_mode & FMODE_READ)) {
3155		trace_array_put(tr);
3156		return 0;
3157	}
3158
3159	/* Writes do not use seq_file */
3160	iter = m->private;
3161	mutex_lock(&trace_types_lock);
3162
3163	for_each_tracing_cpu(cpu) {
3164		if (iter->buffer_iter[cpu])
3165			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3166	}
3167
3168	if (iter->trace && iter->trace->close)
3169		iter->trace->close(iter);
3170
3171	if (!iter->snapshot)
3172		/* reenable tracing if it was previously enabled */
3173		tracing_start_tr(tr);
3174
3175	__trace_array_put(tr);
3176
3177	mutex_unlock(&trace_types_lock);
3178
3179	mutex_destroy(&iter->mutex);
3180	free_cpumask_var(iter->started);
3181	kfree(iter->trace);
3182	kfree(iter->buffer_iter);
3183	seq_release_private(inode, file);
3184
3185	return 0;
3186}
3187
3188static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3189{
3190	struct trace_array *tr = inode->i_private;
3191
3192	trace_array_put(tr);
3193	return 0;
3194}
3195
3196static int tracing_single_release_tr(struct inode *inode, struct file *file)
3197{
3198	struct trace_array *tr = inode->i_private;
3199
3200	trace_array_put(tr);
3201
3202	return single_release(inode, file);
3203}
3204
3205static int tracing_open(struct inode *inode, struct file *file)
3206{
3207	struct trace_array *tr = inode->i_private;
3208	struct trace_iterator *iter;
3209	int ret = 0;
3210
3211	if (trace_array_get(tr) < 0)
3212		return -ENODEV;
3213
3214	/* If this file was open for write, then erase contents */
3215	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3216		int cpu = tracing_get_cpu(inode);
3217
3218		if (cpu == RING_BUFFER_ALL_CPUS)
3219			tracing_reset_online_cpus(&tr->trace_buffer);
3220		else
3221			tracing_reset(&tr->trace_buffer, cpu);
3222	}
3223
3224	if (file->f_mode & FMODE_READ) {
3225		iter = __tracing_open(inode, file, false);
3226		if (IS_ERR(iter))
3227			ret = PTR_ERR(iter);
3228		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3229			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3230	}
3231
3232	if (ret < 0)
3233		trace_array_put(tr);
3234
3235	return ret;
3236}
3237
3238/*
3239 * Some tracers are not suitable for instance buffers.
3240 * A tracer is always available for the global array (toplevel)
3241 * or if it explicitly states that it is.
3242 */
3243static bool
3244trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3245{
3246	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3247}
3248
3249/* Find the next tracer that this trace array may use */
3250static struct tracer *
3251get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3252{
3253	while (t && !trace_ok_for_array(t, tr))
3254		t = t->next;
3255
3256	return t;
3257}
3258
3259static void *
3260t_next(struct seq_file *m, void *v, loff_t *pos)
3261{
3262	struct trace_array *tr = m->private;
3263	struct tracer *t = v;
3264
3265	(*pos)++;
3266
3267	if (t)
3268		t = get_tracer_for_array(tr, t->next);
3269
3270	return t;
3271}
3272
3273static void *t_start(struct seq_file *m, loff_t *pos)
3274{
3275	struct trace_array *tr = m->private;
3276	struct tracer *t;
3277	loff_t l = 0;
3278
3279	mutex_lock(&trace_types_lock);
3280
3281	t = get_tracer_for_array(tr, trace_types);
3282	for (; t && l < *pos; t = t_next(m, t, &l))
3283			;
3284
3285	return t;
3286}
3287
3288static void t_stop(struct seq_file *m, void *p)
3289{
3290	mutex_unlock(&trace_types_lock);
3291}
3292
3293static int t_show(struct seq_file *m, void *v)
3294{
3295	struct tracer *t = v;
3296
3297	if (!t)
3298		return 0;
3299
3300	seq_printf(m, "%s", t->name);
3301	if (t->next)
3302		seq_putc(m, ' ');
3303	else
3304		seq_putc(m, '\n');
3305
3306	return 0;
3307}
3308
3309static const struct seq_operations show_traces_seq_ops = {
3310	.start		= t_start,
3311	.next		= t_next,
3312	.stop		= t_stop,
3313	.show		= t_show,
3314};
3315
3316static int show_traces_open(struct inode *inode, struct file *file)
3317{
3318	struct trace_array *tr = inode->i_private;
3319	struct seq_file *m;
3320	int ret;
3321
3322	if (tracing_disabled)
3323		return -ENODEV;
3324
3325	ret = seq_open(file, &show_traces_seq_ops);
3326	if (ret)
3327		return ret;
3328
3329	m = file->private_data;
3330	m->private = tr;
3331
3332	return 0;
3333}
3334
3335static ssize_t
3336tracing_write_stub(struct file *filp, const char __user *ubuf,
3337		   size_t count, loff_t *ppos)
3338{
3339	return count;
3340}
3341
3342loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3343{
3344	int ret;
3345
3346	if (file->f_mode & FMODE_READ)
3347		ret = seq_lseek(file, offset, whence);
3348	else
3349		file->f_pos = ret = 0;
3350
3351	return ret;
3352}
3353
3354static const struct file_operations tracing_fops = {
3355	.open		= tracing_open,
3356	.read		= seq_read,
3357	.write		= tracing_write_stub,
3358	.llseek		= tracing_lseek,
3359	.release	= tracing_release,
3360};
3361
3362static const struct file_operations show_traces_fops = {
3363	.open		= show_traces_open,
3364	.read		= seq_read,
3365	.release	= seq_release,
3366	.llseek		= seq_lseek,
3367};
3368
3369/*
3370 * The tracer itself will not take this lock, but still we want
3371 * to provide a consistent cpumask to user-space:
3372 */
3373static DEFINE_MUTEX(tracing_cpumask_update_lock);
3374
3375/*
3376 * Temporary storage for the character representation of the
3377 * CPU bitmask (and one more byte for the newline):
3378 */
3379static char mask_str[NR_CPUS + 1];
3380
3381static ssize_t
3382tracing_cpumask_read(struct file *filp, char __user *ubuf,
3383		     size_t count, loff_t *ppos)
3384{
3385	struct trace_array *tr = file_inode(filp)->i_private;
3386	int len;
3387
3388	mutex_lock(&tracing_cpumask_update_lock);
3389
3390	len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3391	if (count - len < 2) {
3392		count = -EINVAL;
3393		goto out_err;
3394	}
3395	len += sprintf(mask_str + len, "\n");
3396	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3397
3398out_err:
3399	mutex_unlock(&tracing_cpumask_update_lock);
3400
3401	return count;
3402}
3403
3404static ssize_t
3405tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3406		      size_t count, loff_t *ppos)
3407{
3408	struct trace_array *tr = file_inode(filp)->i_private;
3409	cpumask_var_t tracing_cpumask_new;
3410	int err, cpu;
3411
3412	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3413		return -ENOMEM;
3414
3415	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3416	if (err)
3417		goto err_unlock;
3418
3419	mutex_lock(&tracing_cpumask_update_lock);
3420
3421	local_irq_disable();
3422	arch_spin_lock(&tr->max_lock);
3423	for_each_tracing_cpu(cpu) {
3424		/*
3425		 * Increase/decrease the disabled counter if we are
3426		 * about to flip a bit in the cpumask:
3427		 */
3428		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3429				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3430			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3431			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3432		}
3433		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3434				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3435			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3436			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3437		}
3438	}
3439	arch_spin_unlock(&tr->max_lock);
3440	local_irq_enable();
3441
3442	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3443
3444	mutex_unlock(&tracing_cpumask_update_lock);
3445	free_cpumask_var(tracing_cpumask_new);
3446
3447	return count;
3448
3449err_unlock:
3450	free_cpumask_var(tracing_cpumask_new);
3451
3452	return err;
3453}
3454
3455static const struct file_operations tracing_cpumask_fops = {
3456	.open		= tracing_open_generic_tr,
3457	.read		= tracing_cpumask_read,
3458	.write		= tracing_cpumask_write,
3459	.release	= tracing_release_generic_tr,
3460	.llseek		= generic_file_llseek,
3461};
3462
3463static int tracing_trace_options_show(struct seq_file *m, void *v)
3464{
3465	struct tracer_opt *trace_opts;
3466	struct trace_array *tr = m->private;
3467	u32 tracer_flags;
3468	int i;
3469
3470	mutex_lock(&trace_types_lock);
3471	tracer_flags = tr->current_trace->flags->val;
3472	trace_opts = tr->current_trace->flags->opts;
3473
3474	for (i = 0; trace_options[i]; i++) {
3475		if (trace_flags & (1 << i))
3476			seq_printf(m, "%s\n", trace_options[i]);
3477		else
3478			seq_printf(m, "no%s\n", trace_options[i]);
3479	}
3480
3481	for (i = 0; trace_opts[i].name; i++) {
3482		if (tracer_flags & trace_opts[i].bit)
3483			seq_printf(m, "%s\n", trace_opts[i].name);
3484		else
3485			seq_printf(m, "no%s\n", trace_opts[i].name);
3486	}
3487	mutex_unlock(&trace_types_lock);
3488
3489	return 0;
3490}
3491
3492static int __set_tracer_option(struct trace_array *tr,
3493			       struct tracer_flags *tracer_flags,
3494			       struct tracer_opt *opts, int neg)
3495{
3496	struct tracer *trace = tr->current_trace;
3497	int ret;
3498
3499	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3500	if (ret)
3501		return ret;
3502
3503	if (neg)
3504		tracer_flags->val &= ~opts->bit;
3505	else
3506		tracer_flags->val |= opts->bit;
3507	return 0;
3508}
3509
3510/* Try to assign a tracer specific option */
3511static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3512{
3513	struct tracer *trace = tr->current_trace;
3514	struct tracer_flags *tracer_flags = trace->flags;
3515	struct tracer_opt *opts = NULL;
3516	int i;
3517
3518	for (i = 0; tracer_flags->opts[i].name; i++) {
3519		opts = &tracer_flags->opts[i];
3520
3521		if (strcmp(cmp, opts->name) == 0)
3522			return __set_tracer_option(tr, trace->flags, opts, neg);
3523	}
3524
3525	return -EINVAL;
3526}
3527
3528/* Some tracers require overwrite to stay enabled */
3529int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3530{
3531	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3532		return -1;
3533
3534	return 0;
3535}
3536
3537int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3538{
3539	/* do nothing if flag is already set */
3540	if (!!(trace_flags & mask) == !!enabled)
3541		return 0;
3542
3543	/* Give the tracer a chance to approve the change */
3544	if (tr->current_trace->flag_changed)
3545		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3546			return -EINVAL;
3547
3548	if (enabled)
3549		trace_flags |= mask;
3550	else
3551		trace_flags &= ~mask;
3552
3553	if (mask == TRACE_ITER_RECORD_CMD)
3554		trace_event_enable_cmd_record(enabled);
3555
3556	if (mask == TRACE_ITER_OVERWRITE) {
3557		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3558#ifdef CONFIG_TRACER_MAX_TRACE
3559		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3560#endif
3561	}
3562
3563	if (mask == TRACE_ITER_PRINTK)
3564		trace_printk_start_stop_comm(enabled);
3565
3566	return 0;
3567}
3568
3569static int trace_set_options(struct trace_array *tr, char *option)
3570{
3571	char *cmp;
3572	int neg = 0;
3573	int ret = -ENODEV;
3574	int i;
3575
3576	cmp = strstrip(option);
3577
3578	if (strncmp(cmp, "no", 2) == 0) {
3579		neg = 1;
3580		cmp += 2;
3581	}
3582
3583	mutex_lock(&trace_types_lock);
3584
3585	for (i = 0; trace_options[i]; i++) {
3586		if (strcmp(cmp, trace_options[i]) == 0) {
3587			ret = set_tracer_flag(tr, 1 << i, !neg);
3588			break;
3589		}
3590	}
3591
3592	/* If no option could be set, test the specific tracer options */
3593	if (!trace_options[i])
3594		ret = set_tracer_option(tr, cmp, neg);
3595
3596	mutex_unlock(&trace_types_lock);
3597
3598	return ret;
3599}
3600
3601static ssize_t
3602tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3603			size_t cnt, loff_t *ppos)
3604{
3605	struct seq_file *m = filp->private_data;
3606	struct trace_array *tr = m->private;
3607	char buf[64];
3608	int ret;
3609
3610	if (cnt >= sizeof(buf))
3611		return -EINVAL;
3612
3613	if (copy_from_user(&buf, ubuf, cnt))
3614		return -EFAULT;
3615
3616	buf[cnt] = 0;
3617
3618	ret = trace_set_options(tr, buf);
3619	if (ret < 0)
3620		return ret;
3621
3622	*ppos += cnt;
3623
3624	return cnt;
3625}
3626
3627static int tracing_trace_options_open(struct inode *inode, struct file *file)
3628{
3629	struct trace_array *tr = inode->i_private;
3630	int ret;
3631
3632	if (tracing_disabled)
3633		return -ENODEV;
3634
3635	if (trace_array_get(tr) < 0)
3636		return -ENODEV;
3637
3638	ret = single_open(file, tracing_trace_options_show, inode->i_private);
3639	if (ret < 0)
3640		trace_array_put(tr);
3641
3642	return ret;
3643}
3644
3645static const struct file_operations tracing_iter_fops = {
3646	.open		= tracing_trace_options_open,
3647	.read		= seq_read,
3648	.llseek		= seq_lseek,
3649	.release	= tracing_single_release_tr,
3650	.write		= tracing_trace_options_write,
3651};
3652
3653static const char readme_msg[] =
3654	"tracing mini-HOWTO:\n\n"
3655	"# echo 0 > tracing_on : quick way to disable tracing\n"
3656	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3657	" Important files:\n"
3658	"  trace\t\t\t- The static contents of the buffer\n"
3659	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
3660	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3661	"  current_tracer\t- function and latency tracers\n"
3662	"  available_tracers\t- list of configured tracers for current_tracer\n"
3663	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3664	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3665	"  trace_clock\t\t-change the clock used to order events\n"
3666	"       local:   Per cpu clock but may not be synced across CPUs\n"
3667	"      global:   Synced across CPUs but slows tracing down.\n"
3668	"     counter:   Not a clock, but just an increment\n"
3669	"      uptime:   Jiffy counter from time of boot\n"
3670	"        perf:   Same clock that perf events use\n"
3671#ifdef CONFIG_X86_64
3672	"     x86-tsc:   TSC cycle counter\n"
3673#endif
3674	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3675	"  tracing_cpumask\t- Limit which CPUs to trace\n"
3676	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3677	"\t\t\t  Remove sub-buffer with rmdir\n"
3678	"  trace_options\t\t- Set format or modify how tracing happens\n"
3679	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3680	"\t\t\t  option name\n"
3681	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3682#ifdef CONFIG_DYNAMIC_FTRACE
3683	"\n  available_filter_functions - list of functions that can be filtered on\n"
3684	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
3685	"\t\t\t  functions\n"
3686	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3687	"\t     modules: Can select a group via module\n"
3688	"\t      Format: :mod:<module-name>\n"
3689	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3690	"\t    triggers: a command to perform when function is hit\n"
3691	"\t      Format: <function>:<trigger>[:count]\n"
3692	"\t     trigger: traceon, traceoff\n"
3693	"\t\t      enable_event:<system>:<event>\n"
3694	"\t\t      disable_event:<system>:<event>\n"
3695#ifdef CONFIG_STACKTRACE
3696	"\t\t      stacktrace\n"
3697#endif
3698#ifdef CONFIG_TRACER_SNAPSHOT
3699	"\t\t      snapshot\n"
3700#endif
3701	"\t\t      dump\n"
3702	"\t\t      cpudump\n"
3703	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3704	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3705	"\t     The first one will disable tracing every time do_fault is hit\n"
3706	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3707	"\t       The first time do trap is hit and it disables tracing, the\n"
3708	"\t       counter will decrement to 2. If tracing is already disabled,\n"
3709	"\t       the counter will not decrement. It only decrements when the\n"
3710	"\t       trigger did work\n"
3711	"\t     To remove trigger without count:\n"
3712	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3713	"\t     To remove trigger with a count:\n"
3714	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3715	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3716	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3717	"\t    modules: Can select a group via module command :mod:\n"
3718	"\t    Does not accept triggers\n"
3719#endif /* CONFIG_DYNAMIC_FTRACE */
3720#ifdef CONFIG_FUNCTION_TRACER
3721	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3722	"\t\t    (function)\n"
3723#endif
3724#ifdef CONFIG_FUNCTION_GRAPH_TRACER
3725	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3726	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3727	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3728#endif
3729#ifdef CONFIG_TRACER_SNAPSHOT
3730	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3731	"\t\t\t  snapshot buffer. Read the contents for more\n"
3732	"\t\t\t  information\n"
3733#endif
3734#ifdef CONFIG_STACK_TRACER
3735	"  stack_trace\t\t- Shows the max stack trace when active\n"
3736	"  stack_max_size\t- Shows current max stack size that was traced\n"
3737	"\t\t\t  Write into this file to reset the max size (trigger a\n"
3738	"\t\t\t  new trace)\n"
3739#ifdef CONFIG_DYNAMIC_FTRACE
3740	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3741	"\t\t\t  traces\n"
3742#endif
3743#endif /* CONFIG_STACK_TRACER */
3744	"  events/\t\t- Directory containing all trace event subsystems:\n"
3745	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3746	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3747	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3748	"\t\t\t  events\n"
3749	"      filter\t\t- If set, only events passing filter are traced\n"
3750	"  events/<system>/<event>/\t- Directory containing control files for\n"
3751	"\t\t\t  <event>:\n"
3752	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3753	"      filter\t\t- If set, only events passing filter are traced\n"
3754	"      trigger\t\t- If set, a command to perform when event is hit\n"
3755	"\t    Format: <trigger>[:count][if <filter>]\n"
3756	"\t   trigger: traceon, traceoff\n"
3757	"\t            enable_event:<system>:<event>\n"
3758	"\t            disable_event:<system>:<event>\n"
3759#ifdef CONFIG_STACKTRACE
3760	"\t\t    stacktrace\n"
3761#endif
3762#ifdef CONFIG_TRACER_SNAPSHOT
3763	"\t\t    snapshot\n"
3764#endif
3765	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3766	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3767	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3768	"\t                  events/block/block_unplug/trigger\n"
3769	"\t   The first disables tracing every time block_unplug is hit.\n"
3770	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3771	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3772	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3773	"\t   Like function triggers, the counter is only decremented if it\n"
3774	"\t    enabled or disabled tracing.\n"
3775	"\t   To remove a trigger without a count:\n"
3776	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
3777	"\t   To remove a trigger with a count:\n"
3778	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3779	"\t   Filters can be ignored when removing a trigger.\n"
3780;
3781
3782static ssize_t
3783tracing_readme_read(struct file *filp, char __user *ubuf,
3784		       size_t cnt, loff_t *ppos)
3785{
3786	return simple_read_from_buffer(ubuf, cnt, ppos,
3787					readme_msg, strlen(readme_msg));
3788}
3789
3790static const struct file_operations tracing_readme_fops = {
3791	.open		= tracing_open_generic,
3792	.read		= tracing_readme_read,
3793	.llseek		= generic_file_llseek,
3794};
3795
3796static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3797{
3798	unsigned int *ptr = v;
3799
3800	if (*pos || m->count)
3801		ptr++;
3802
3803	(*pos)++;
3804
3805	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3806	     ptr++) {
3807		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3808			continue;
3809
3810		return ptr;
3811	}
3812
3813	return NULL;
3814}
3815
3816static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3817{
3818	void *v;
3819	loff_t l = 0;
3820
3821	preempt_disable();
3822	arch_spin_lock(&trace_cmdline_lock);
3823
3824	v = &savedcmd->map_cmdline_to_pid[0];
3825	while (l <= *pos) {
3826		v = saved_cmdlines_next(m, v, &l);
3827		if (!v)
3828			return NULL;
3829	}
3830
3831	return v;
3832}
3833
3834static void saved_cmdlines_stop(struct seq_file *m, void *v)
3835{
3836	arch_spin_unlock(&trace_cmdline_lock);
3837	preempt_enable();
3838}
3839
3840static int saved_cmdlines_show(struct seq_file *m, void *v)
3841{
3842	char buf[TASK_COMM_LEN];
3843	unsigned int *pid = v;
3844
3845	__trace_find_cmdline(*pid, buf);
3846	seq_printf(m, "%d %s\n", *pid, buf);
3847	return 0;
3848}
3849
3850static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3851	.start		= saved_cmdlines_start,
3852	.next		= saved_cmdlines_next,
3853	.stop		= saved_cmdlines_stop,
3854	.show		= saved_cmdlines_show,
3855};
3856
3857static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3858{
3859	if (tracing_disabled)
3860		return -ENODEV;
3861
3862	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3863}
3864
3865static const struct file_operations tracing_saved_cmdlines_fops = {
3866	.open		= tracing_saved_cmdlines_open,
3867	.read		= seq_read,
3868	.llseek		= seq_lseek,
3869	.release	= seq_release,
3870};
3871
3872static ssize_t
3873tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3874				 size_t cnt, loff_t *ppos)
3875{
3876	char buf[64];
3877	int r;
3878
3879	arch_spin_lock(&trace_cmdline_lock);
3880	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3881	arch_spin_unlock(&trace_cmdline_lock);
3882
3883	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3884}
3885
3886static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3887{
3888	kfree(s->saved_cmdlines);
3889	kfree(s->map_cmdline_to_pid);
3890	kfree(s);
3891}
3892
3893static int tracing_resize_saved_cmdlines(unsigned int val)
3894{
3895	struct saved_cmdlines_buffer *s, *savedcmd_temp;
3896
3897	s = kmalloc(sizeof(*s), GFP_KERNEL);
3898	if (!s)
3899		return -ENOMEM;
3900
3901	if (allocate_cmdlines_buffer(val, s) < 0) {
3902		kfree(s);
3903		return -ENOMEM;
3904	}
3905
3906	arch_spin_lock(&trace_cmdline_lock);
3907	savedcmd_temp = savedcmd;
3908	savedcmd = s;
3909	arch_spin_unlock(&trace_cmdline_lock);
3910	free_saved_cmdlines_buffer(savedcmd_temp);
3911
3912	return 0;
3913}
3914
3915static ssize_t
3916tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3917				  size_t cnt, loff_t *ppos)
3918{
3919	unsigned long val;
3920	int ret;
3921
3922	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3923	if (ret)
3924		return ret;
3925
3926	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
3927	if (!val || val > PID_MAX_DEFAULT)
3928		return -EINVAL;
3929
3930	ret = tracing_resize_saved_cmdlines((unsigned int)val);
3931	if (ret < 0)
3932		return ret;
3933
3934	*ppos += cnt;
3935
3936	return cnt;
3937}
3938
3939static const struct file_operations tracing_saved_cmdlines_size_fops = {
3940	.open		= tracing_open_generic,
3941	.read		= tracing_saved_cmdlines_size_read,
3942	.write		= tracing_saved_cmdlines_size_write,
3943};
3944
3945static ssize_t
3946tracing_saved_tgids_read(struct file *file, char __user *ubuf,
3947				size_t cnt, loff_t *ppos)
3948{
3949	char *file_buf;
3950	char *buf;
3951	int len = 0;
3952	int pid;
3953	int i;
3954
3955	file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
3956	if (!file_buf)
3957		return -ENOMEM;
3958
3959	buf = file_buf;
3960
3961	for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
3962		int tgid;
3963		int r;
3964
3965		pid = savedcmd->map_cmdline_to_pid[i];
3966		if (pid == -1 || pid == NO_CMDLINE_MAP)
3967			continue;
3968
3969		tgid = trace_find_tgid(pid);
3970		r = sprintf(buf, "%d %d\n", pid, tgid);
3971		buf += r;
3972		len += r;
3973	}
3974
3975	len = simple_read_from_buffer(ubuf, cnt, ppos,
3976				      file_buf, len);
3977
3978	kfree(file_buf);
3979
3980	return len;
3981}
3982
3983static const struct file_operations tracing_saved_tgids_fops = {
3984	.open	= tracing_open_generic,
3985	.read	= tracing_saved_tgids_read,
3986	.llseek	= generic_file_llseek,
3987};
3988
3989static ssize_t
3990tracing_set_trace_read(struct file *filp, char __user *ubuf,
3991		       size_t cnt, loff_t *ppos)
3992{
3993	struct trace_array *tr = filp->private_data;
3994	char buf[MAX_TRACER_SIZE+2];
3995	int r;
3996
3997	mutex_lock(&trace_types_lock);
3998	r = sprintf(buf, "%s\n", tr->current_trace->name);
3999	mutex_unlock(&trace_types_lock);
4000
4001	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4002}
4003
4004int tracer_init(struct tracer *t, struct trace_array *tr)
4005{
4006	tracing_reset_online_cpus(&tr->trace_buffer);
4007	return t->init(tr);
4008}
4009
4010static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4011{
4012	int cpu;
4013
4014	for_each_tracing_cpu(cpu)
4015		per_cpu_ptr(buf->data, cpu)->entries = val;
4016}
4017
4018#ifdef CONFIG_TRACER_MAX_TRACE
4019/* resize @tr's buffer to the size of @size_tr's entries */
4020static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4021					struct trace_buffer *size_buf, int cpu_id)
4022{
4023	int cpu, ret = 0;
4024
4025	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4026		for_each_tracing_cpu(cpu) {
4027			ret = ring_buffer_resize(trace_buf->buffer,
4028				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4029			if (ret < 0)
4030				break;
4031			per_cpu_ptr(trace_buf->data, cpu)->entries =
4032				per_cpu_ptr(size_buf->data, cpu)->entries;
4033		}
4034	} else {
4035		ret = ring_buffer_resize(trace_buf->buffer,
4036				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4037		if (ret == 0)
4038			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4039				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4040	}
4041
4042	return ret;
4043}
4044#endif /* CONFIG_TRACER_MAX_TRACE */
4045
4046static int __tracing_resize_ring_buffer(struct trace_array *tr,
4047					unsigned long size, int cpu)
4048{
4049	int ret;
4050
4051	/*
4052	 * If kernel or user changes the size of the ring buffer
4053	 * we use the size that was given, and we can forget about
4054	 * expanding it later.
4055	 */
4056	ring_buffer_expanded = true;
4057
4058	/* May be called before buffers are initialized */
4059	if (!tr->trace_buffer.buffer)
4060		return 0;
4061
4062	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4063	if (ret < 0)
4064		return ret;
4065
4066#ifdef CONFIG_TRACER_MAX_TRACE
4067	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4068	    !tr->current_trace->use_max_tr)
4069		goto out;
4070
4071	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4072	if (ret < 0) {
4073		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4074						     &tr->trace_buffer, cpu);
4075		if (r < 0) {
4076			/*
4077			 * AARGH! We are left with different
4078			 * size max buffer!!!!
4079			 * The max buffer is our "snapshot" buffer.
4080			 * When a tracer needs a snapshot (one of the
4081			 * latency tracers), it swaps the max buffer
4082			 * with the saved snap shot. We succeeded to
4083			 * update the size of the main buffer, but failed to
4084			 * update the size of the max buffer. But when we tried
4085			 * to reset the main buffer to the original size, we
4086			 * failed there too. This is very unlikely to
4087			 * happen, but if it does, warn and kill all
4088			 * tracing.
4089			 */
4090			WARN_ON(1);
4091			tracing_disabled = 1;
4092		}
4093		return ret;
4094	}
4095
4096	if (cpu == RING_BUFFER_ALL_CPUS)
4097		set_buffer_entries(&tr->max_buffer, size);
4098	else
4099		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4100
4101 out:
4102#endif /* CONFIG_TRACER_MAX_TRACE */
4103
4104	if (cpu == RING_BUFFER_ALL_CPUS)
4105		set_buffer_entries(&tr->trace_buffer, size);
4106	else
4107		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4108
4109	return ret;
4110}
4111
4112static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4113					  unsigned long size, int cpu_id)
4114{
4115	int ret = size;
4116
4117	mutex_lock(&trace_types_lock);
4118
4119	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4120		/* make sure, this cpu is enabled in the mask */
4121		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4122			ret = -EINVAL;
4123			goto out;
4124		}
4125	}
4126
4127	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4128	if (ret < 0)
4129		ret = -ENOMEM;
4130
4131out:
4132	mutex_unlock(&trace_types_lock);
4133
4134	return ret;
4135}
4136
4137
4138/**
4139 * tracing_update_buffers - used by tracing facility to expand ring buffers
4140 *
4141 * To save on memory when the tracing is never used on a system with it
4142 * configured in. The ring buffers are set to a minimum size. But once
4143 * a user starts to use the tracing facility, then they need to grow
4144 * to their default size.
4145 *
4146 * This function is to be called when a tracer is about to be used.
4147 */
4148int tracing_update_buffers(void)
4149{
4150	int ret = 0;
4151
4152	mutex_lock(&trace_types_lock);
4153	if (!ring_buffer_expanded)
4154		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4155						RING_BUFFER_ALL_CPUS);
4156	mutex_unlock(&trace_types_lock);
4157
4158	return ret;
4159}
4160
4161struct trace_option_dentry;
4162
4163static struct trace_option_dentry *
4164create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4165
4166static void
4167destroy_trace_option_files(struct trace_option_dentry *topts);
4168
4169/*
4170 * Used to clear out the tracer before deletion of an instance.
4171 * Must have trace_types_lock held.
4172 */
4173static void tracing_set_nop(struct trace_array *tr)
4174{
4175	if (tr->current_trace == &nop_trace)
4176		return;
4177
4178	tr->current_trace->enabled--;
4179
4180	if (tr->current_trace->reset)
4181		tr->current_trace->reset(tr);
4182
4183	tr->current_trace = &nop_trace;
4184}
4185
4186static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4187{
4188	static struct trace_option_dentry *topts;
4189	struct tracer *t;
4190#ifdef CONFIG_TRACER_MAX_TRACE
4191	bool had_max_tr;
4192#endif
4193	int ret = 0;
4194
4195	mutex_lock(&trace_types_lock);
4196
4197	if (!ring_buffer_expanded) {
4198		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4199						RING_BUFFER_ALL_CPUS);
4200		if (ret < 0)
4201			goto out;
4202		ret = 0;
4203	}
4204
4205	for (t = trace_types; t; t = t->next) {
4206		if (strcmp(t->name, buf) == 0)
4207			break;
4208	}
4209	if (!t) {
4210		ret = -EINVAL;
4211		goto out;
4212	}
4213	if (t == tr->current_trace)
4214		goto out;
4215
4216	/* Some tracers are only allowed for the top level buffer */
4217	if (!trace_ok_for_array(t, tr)) {
4218		ret = -EINVAL;
4219		goto out;
4220	}
4221
4222	trace_branch_disable();
4223
4224	tr->current_trace->enabled--;
4225
4226	if (tr->current_trace->reset)
4227		tr->current_trace->reset(tr);
4228
4229	/* Current trace needs to be nop_trace before synchronize_sched */
4230	tr->current_trace = &nop_trace;
4231
4232#ifdef CONFIG_TRACER_MAX_TRACE
4233	had_max_tr = tr->allocated_snapshot;
4234
4235	if (had_max_tr && !t->use_max_tr) {
4236		/*
4237		 * We need to make sure that the update_max_tr sees that
4238		 * current_trace changed to nop_trace to keep it from
4239		 * swapping the buffers after we resize it.
4240		 * The update_max_tr is called from interrupts disabled
4241		 * so a synchronized_sched() is sufficient.
4242		 */
4243		synchronize_sched();
4244		free_snapshot(tr);
4245	}
4246#endif
4247	/* Currently, only the top instance has options */
4248	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4249		destroy_trace_option_files(topts);
4250		topts = create_trace_option_files(tr, t);
4251	}
4252
4253#ifdef CONFIG_TRACER_MAX_TRACE
4254	if (t->use_max_tr && !had_max_tr) {
4255		ret = alloc_snapshot(tr);
4256		if (ret < 0)
4257			goto out;
4258	}
4259#endif
4260
4261	if (t->init) {
4262		ret = tracer_init(t, tr);
4263		if (ret)
4264			goto out;
4265	}
4266
4267	tr->current_trace = t;
4268	tr->current_trace->enabled++;
4269	trace_branch_enable(tr);
4270 out:
4271	mutex_unlock(&trace_types_lock);
4272
4273	return ret;
4274}
4275
4276static ssize_t
4277tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4278			size_t cnt, loff_t *ppos)
4279{
4280	struct trace_array *tr = filp->private_data;
4281	char buf[MAX_TRACER_SIZE+1];
4282	int i;
4283	size_t ret;
4284	int err;
4285
4286	ret = cnt;
4287
4288	if (cnt > MAX_TRACER_SIZE)
4289		cnt = MAX_TRACER_SIZE;
4290
4291	if (copy_from_user(&buf, ubuf, cnt))
4292		return -EFAULT;
4293
4294	buf[cnt] = 0;
4295
4296	/* strip ending whitespace. */
4297	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4298		buf[i] = 0;
4299
4300	err = tracing_set_tracer(tr, buf);
4301	if (err)
4302		return err;
4303
4304	*ppos += ret;
4305
4306	return ret;
4307}
4308
4309static ssize_t
4310tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4311		   size_t cnt, loff_t *ppos)
4312{
4313	char buf[64];
4314	int r;
4315
4316	r = snprintf(buf, sizeof(buf), "%ld\n",
4317		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4318	if (r > sizeof(buf))
4319		r = sizeof(buf);
4320	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4321}
4322
4323static ssize_t
4324tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4325		    size_t cnt, loff_t *ppos)
4326{
4327	unsigned long val;
4328	int ret;
4329
4330	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4331	if (ret)
4332		return ret;
4333
4334	*ptr = val * 1000;
4335
4336	return cnt;
4337}
4338
4339static ssize_t
4340tracing_thresh_read(struct file *filp, char __user *ubuf,
4341		    size_t cnt, loff_t *ppos)
4342{
4343	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4344}
4345
4346static ssize_t
4347tracing_thresh_write(struct file *filp, const char __user *ubuf,
4348		     size_t cnt, loff_t *ppos)
4349{
4350	struct trace_array *tr = filp->private_data;
4351	int ret;
4352
4353	mutex_lock(&trace_types_lock);
4354	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4355	if (ret < 0)
4356		goto out;
4357
4358	if (tr->current_trace->update_thresh) {
4359		ret = tr->current_trace->update_thresh(tr);
4360		if (ret < 0)
4361			goto out;
4362	}
4363
4364	ret = cnt;
4365out:
4366	mutex_unlock(&trace_types_lock);
4367
4368	return ret;
4369}
4370
4371static ssize_t
4372tracing_max_lat_read(struct file *filp, char __user *ubuf,
4373		     size_t cnt, loff_t *ppos)
4374{
4375	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4376}
4377
4378static ssize_t
4379tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4380		      size_t cnt, loff_t *ppos)
4381{
4382	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4383}
4384
4385static int tracing_open_pipe(struct inode *inode, struct file *filp)
4386{
4387	struct trace_array *tr = inode->i_private;
4388	struct trace_iterator *iter;
4389	int ret = 0;
4390
4391	if (tracing_disabled)
4392		return -ENODEV;
4393
4394	if (trace_array_get(tr) < 0)
4395		return -ENODEV;
4396
4397	mutex_lock(&trace_types_lock);
4398
4399	/* create a buffer to store the information to pass to userspace */
4400	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4401	if (!iter) {
4402		ret = -ENOMEM;
4403		__trace_array_put(tr);
4404		goto out;
4405	}
4406
4407	/*
4408	 * We make a copy of the current tracer to avoid concurrent
4409	 * changes on it while we are reading.
4410	 */
4411	iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4412	if (!iter->trace) {
4413		ret = -ENOMEM;
4414		goto fail;
4415	}
4416	*iter->trace = *tr->current_trace;
4417
4418	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4419		ret = -ENOMEM;
4420		goto fail;
4421	}
4422
4423	/* trace pipe does not show start of buffer */
4424	cpumask_setall(iter->started);
4425
4426	if (trace_flags & TRACE_ITER_LATENCY_FMT)
4427		iter->iter_flags |= TRACE_FILE_LAT_FMT;
4428
4429	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4430	if (trace_clocks[tr->clock_id].in_ns)
4431		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4432
4433	iter->tr = tr;
4434	iter->trace_buffer = &tr->trace_buffer;
4435	iter->cpu_file = tracing_get_cpu(inode);
4436	mutex_init(&iter->mutex);
4437	filp->private_data = iter;
4438
4439	if (iter->trace->pipe_open)
4440		iter->trace->pipe_open(iter);
4441
4442	nonseekable_open(inode, filp);
4443out:
4444	mutex_unlock(&trace_types_lock);
4445	return ret;
4446
4447fail:
4448	kfree(iter->trace);
4449	kfree(iter);
4450	__trace_array_put(tr);
4451	mutex_unlock(&trace_types_lock);
4452	return ret;
4453}
4454
4455static int tracing_release_pipe(struct inode *inode, struct file *file)
4456{
4457	struct trace_iterator *iter = file->private_data;
4458	struct trace_array *tr = inode->i_private;
4459
4460	mutex_lock(&trace_types_lock);
4461
4462	if (iter->trace->pipe_close)
4463		iter->trace->pipe_close(iter);
4464
4465	mutex_unlock(&trace_types_lock);
4466
4467	free_cpumask_var(iter->started);
4468	mutex_destroy(&iter->mutex);
4469	kfree(iter->trace);
4470	kfree(iter);
4471
4472	trace_array_put(tr);
4473
4474	return 0;
4475}
4476
4477static unsigned int
4478trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4479{
4480	/* Iterators are static, they should be filled or empty */
4481	if (trace_buffer_iter(iter, iter->cpu_file))
4482		return POLLIN | POLLRDNORM;
4483
4484	if (trace_flags & TRACE_ITER_BLOCK)
4485		/*
4486		 * Always select as readable when in blocking mode
4487		 */
4488		return POLLIN | POLLRDNORM;
4489	else
4490		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4491					     filp, poll_table);
4492}
4493
4494static unsigned int
4495tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4496{
4497	struct trace_iterator *iter = filp->private_data;
4498
4499	return trace_poll(iter, filp, poll_table);
4500}
4501
4502/* Must be called with trace_types_lock mutex held. */
4503static int tracing_wait_pipe(struct file *filp)
4504{
4505	struct trace_iterator *iter = filp->private_data;
4506	int ret;
4507
4508	while (trace_empty(iter)) {
4509
4510		if ((filp->f_flags & O_NONBLOCK)) {
4511			return -EAGAIN;
4512		}
4513
4514		/*
4515		 * We block until we read something and tracing is disabled.
4516		 * We still block if tracing is disabled, but we have never
4517		 * read anything. This allows a user to cat this file, and
4518		 * then enable tracing. But after we have read something,
4519		 * we give an EOF when tracing is again disabled.
4520		 *
4521		 * iter->pos will be 0 if we haven't read anything.
4522		 */
4523		if (!tracing_is_on() && iter->pos)
4524			break;
4525
4526		mutex_unlock(&iter->mutex);
4527
4528		ret = wait_on_pipe(iter, false);
4529
4530		mutex_lock(&iter->mutex);
4531
4532		if (ret)
4533			return ret;
4534	}
4535
4536	return 1;
4537}
4538
4539/*
4540 * Consumer reader.
4541 */
4542static ssize_t
4543tracing_read_pipe(struct file *filp, char __user *ubuf,
4544		  size_t cnt, loff_t *ppos)
4545{
4546	struct trace_iterator *iter = filp->private_data;
4547	struct trace_array *tr = iter->tr;
4548	ssize_t sret;
4549
4550	/* return any leftover data */
4551	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4552	if (sret != -EBUSY)
4553		return sret;
4554
4555	trace_seq_init(&iter->seq);
4556
4557	/* copy the tracer to avoid using a global lock all around */
4558	mutex_lock(&trace_types_lock);
4559	if (unlikely(iter->trace->name != tr->current_trace->name))
4560		*iter->trace = *tr->current_trace;
4561	mutex_unlock(&trace_types_lock);
4562
4563	/*
4564	 * Avoid more than one consumer on a single file descriptor
4565	 * This is just a matter of traces coherency, the ring buffer itself
4566	 * is protected.
4567	 */
4568	mutex_lock(&iter->mutex);
4569	if (iter->trace->read) {
4570		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4571		if (sret)
4572			goto out;
4573	}
4574
4575waitagain:
4576	sret = tracing_wait_pipe(filp);
4577	if (sret <= 0)
4578		goto out;
4579
4580	/* stop when tracing is finished */
4581	if (trace_empty(iter)) {
4582		sret = 0;
4583		goto out;
4584	}
4585
4586	if (cnt >= PAGE_SIZE)
4587		cnt = PAGE_SIZE - 1;
4588
4589	/* reset all but tr, trace, and overruns */
4590	memset(&iter->seq, 0,
4591	       sizeof(struct trace_iterator) -
4592	       offsetof(struct trace_iterator, seq));
4593	cpumask_clear(iter->started);
4594	iter->pos = -1;
4595
4596	trace_event_read_lock();
4597	trace_access_lock(iter->cpu_file);
4598	while (trace_find_next_entry_inc(iter) != NULL) {
4599		enum print_line_t ret;
4600		int len = iter->seq.len;
4601
4602		ret = print_trace_line(iter);
4603		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4604			/* don't print partial lines */
4605			iter->seq.len = len;
4606			break;
4607		}
4608		if (ret != TRACE_TYPE_NO_CONSUME)
4609			trace_consume(iter);
4610
4611		if (iter->seq.len >= cnt)
4612			break;
4613
4614		/*
4615		 * Setting the full flag means we reached the trace_seq buffer
4616		 * size and we should leave by partial output condition above.
4617		 * One of the trace_seq_* functions is not used properly.
4618		 */
4619		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4620			  iter->ent->type);
4621	}
4622	trace_access_unlock(iter->cpu_file);
4623	trace_event_read_unlock();
4624
4625	/* Now copy what we have to the user */
4626	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4627	if (iter->seq.readpos >= iter->seq.len)
4628		trace_seq_init(&iter->seq);
4629
4630	/*
4631	 * If there was nothing to send to user, in spite of consuming trace
4632	 * entries, go back to wait for more entries.
4633	 */
4634	if (sret == -EBUSY)
4635		goto waitagain;
4636
4637out:
4638	mutex_unlock(&iter->mutex);
4639
4640	return sret;
4641}
4642
4643static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4644				     unsigned int idx)
4645{
4646	__free_page(spd->pages[idx]);
4647}
4648
4649static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4650	.can_merge		= 0,
4651	.confirm		= generic_pipe_buf_confirm,
4652	.release		= generic_pipe_buf_release,
4653	.steal			= generic_pipe_buf_steal,
4654	.get			= generic_pipe_buf_get,
4655};
4656
4657static size_t
4658tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4659{
4660	size_t count;
4661	int ret;
4662
4663	/* Seq buffer is page-sized, exactly what we need. */
4664	for (;;) {
4665		count = iter->seq.len;
4666		ret = print_trace_line(iter);
4667		count = iter->seq.len - count;
4668		if (rem < count) {
4669			rem = 0;
4670			iter->seq.len -= count;
4671			break;
4672		}
4673		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4674			iter->seq.len -= count;
4675			break;
4676		}
4677
4678		if (ret != TRACE_TYPE_NO_CONSUME)
4679			trace_consume(iter);
4680		rem -= count;
4681		if (!trace_find_next_entry_inc(iter))	{
4682			rem = 0;
4683			iter->ent = NULL;
4684			break;
4685		}
4686	}
4687
4688	return rem;
4689}
4690
4691static ssize_t tracing_splice_read_pipe(struct file *filp,
4692					loff_t *ppos,
4693					struct pipe_inode_info *pipe,
4694					size_t len,
4695					unsigned int flags)
4696{
4697	struct page *pages_def[PIPE_DEF_BUFFERS];
4698	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4699	struct trace_iterator *iter = filp->private_data;
4700	struct splice_pipe_desc spd = {
4701		.pages		= pages_def,
4702		.partial	= partial_def,
4703		.nr_pages	= 0, /* This gets updated below. */
4704		.nr_pages_max	= PIPE_DEF_BUFFERS,
4705		.flags		= flags,
4706		.ops		= &tracing_pipe_buf_ops,
4707		.spd_release	= tracing_spd_release_pipe,
4708	};
4709	struct trace_array *tr = iter->tr;
4710	ssize_t ret;
4711	size_t rem;
4712	unsigned int i;
4713
4714	if (splice_grow_spd(pipe, &spd))
4715		return -ENOMEM;
4716
4717	/* copy the tracer to avoid using a global lock all around */
4718	mutex_lock(&trace_types_lock);
4719	if (unlikely(iter->trace->name != tr->current_trace->name))
4720		*iter->trace = *tr->current_trace;
4721	mutex_unlock(&trace_types_lock);
4722
4723	mutex_lock(&iter->mutex);
4724
4725	if (iter->trace->splice_read) {
4726		ret = iter->trace->splice_read(iter, filp,
4727					       ppos, pipe, len, flags);
4728		if (ret)
4729			goto out_err;
4730	}
4731
4732	ret = tracing_wait_pipe(filp);
4733	if (ret <= 0)
4734		goto out_err;
4735
4736	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4737		ret = -EFAULT;
4738		goto out_err;
4739	}
4740
4741	trace_event_read_lock();
4742	trace_access_lock(iter->cpu_file);
4743
4744	/* Fill as many pages as possible. */
4745	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4746		spd.pages[i] = alloc_page(GFP_KERNEL);
4747		if (!spd.pages[i])
4748			break;
4749
4750		rem = tracing_fill_pipe_page(rem, iter);
4751
4752		/* Copy the data into the page, so we can start over. */
4753		ret = trace_seq_to_buffer(&iter->seq,
4754					  page_address(spd.pages[i]),
4755					  iter->seq.len);
4756		if (ret < 0) {
4757			__free_page(spd.pages[i]);
4758			break;
4759		}
4760		spd.partial[i].offset = 0;
4761		spd.partial[i].len = iter->seq.len;
4762
4763		trace_seq_init(&iter->seq);
4764	}
4765
4766	trace_access_unlock(iter->cpu_file);
4767	trace_event_read_unlock();
4768	mutex_unlock(&iter->mutex);
4769
4770	spd.nr_pages = i;
4771
4772	ret = splice_to_pipe(pipe, &spd);
4773out:
4774	splice_shrink_spd(&spd);
4775	return ret;
4776
4777out_err:
4778	mutex_unlock(&iter->mutex);
4779	goto out;
4780}
4781
4782static ssize_t
4783tracing_entries_read(struct file *filp, char __user *ubuf,
4784		     size_t cnt, loff_t *ppos)
4785{
4786	struct inode *inode = file_inode(filp);
4787	struct trace_array *tr = inode->i_private;
4788	int cpu = tracing_get_cpu(inode);
4789	char buf[64];
4790	int r = 0;
4791	ssize_t ret;
4792
4793	mutex_lock(&trace_types_lock);
4794
4795	if (cpu == RING_BUFFER_ALL_CPUS) {
4796		int cpu, buf_size_same;
4797		unsigned long size;
4798
4799		size = 0;
4800		buf_size_same = 1;
4801		/* check if all cpu sizes are same */
4802		for_each_tracing_cpu(cpu) {
4803			/* fill in the size from first enabled cpu */
4804			if (size == 0)
4805				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4806			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4807				buf_size_same = 0;
4808				break;
4809			}
4810		}
4811
4812		if (buf_size_same) {
4813			if (!ring_buffer_expanded)
4814				r = sprintf(buf, "%lu (expanded: %lu)\n",
4815					    size >> 10,
4816					    trace_buf_size >> 10);
4817			else
4818				r = sprintf(buf, "%lu\n", size >> 10);
4819		} else
4820			r = sprintf(buf, "X\n");
4821	} else
4822		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4823
4824	mutex_unlock(&trace_types_lock);
4825
4826	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4827	return ret;
4828}
4829
4830static ssize_t
4831tracing_entries_write(struct file *filp, const char __user *ubuf,
4832		      size_t cnt, loff_t *ppos)
4833{
4834	struct inode *inode = file_inode(filp);
4835	struct trace_array *tr = inode->i_private;
4836	unsigned long val;
4837	int ret;
4838
4839	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4840	if (ret)
4841		return ret;
4842
4843	/* must have at least 1 entry */
4844	if (!val)
4845		return -EINVAL;
4846
4847	/* value is in KB */
4848	val <<= 10;
4849	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4850	if (ret < 0)
4851		return ret;
4852
4853	*ppos += cnt;
4854
4855	return cnt;
4856}
4857
4858static ssize_t
4859tracing_total_entries_read(struct file *filp, char __user *ubuf,
4860				size_t cnt, loff_t *ppos)
4861{
4862	struct trace_array *tr = filp->private_data;
4863	char buf[64];
4864	int r, cpu;
4865	unsigned long size = 0, expanded_size = 0;
4866
4867	mutex_lock(&trace_types_lock);
4868	for_each_tracing_cpu(cpu) {
4869		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4870		if (!ring_buffer_expanded)
4871			expanded_size += trace_buf_size >> 10;
4872	}
4873	if (ring_buffer_expanded)
4874		r = sprintf(buf, "%lu\n", size);
4875	else
4876		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4877	mutex_unlock(&trace_types_lock);
4878
4879	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4880}
4881
4882static ssize_t
4883tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4884			  size_t cnt, loff_t *ppos)
4885{
4886	/*
4887	 * There is no need to read what the user has written, this function
4888	 * is just to make sure that there is no error when "echo" is used
4889	 */
4890
4891	*ppos += cnt;
4892
4893	return cnt;
4894}
4895
4896static int
4897tracing_free_buffer_release(struct inode *inode, struct file *filp)
4898{
4899	struct trace_array *tr = inode->i_private;
4900
4901	/* disable tracing ? */
4902	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4903		tracer_tracing_off(tr);
4904	/* resize the ring buffer to 0 */
4905	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4906
4907	trace_array_put(tr);
4908
4909	return 0;
4910}
4911
4912static ssize_t
4913tracing_mark_write(struct file *filp, const char __user *ubuf,
4914					size_t cnt, loff_t *fpos)
4915{
4916	unsigned long addr = (unsigned long)ubuf;
4917	struct trace_array *tr = filp->private_data;
4918	struct ring_buffer_event *event;
4919	struct ring_buffer *buffer;
4920	struct print_entry *entry;
4921	unsigned long irq_flags;
4922	struct page *pages[2];
4923	void *map_page[2];
4924	int nr_pages = 1;
4925	ssize_t written;
4926	int offset;
4927	int size;
4928	int len;
4929	int ret;
4930	int i;
4931
4932	if (tracing_disabled)
4933		return -EINVAL;
4934
4935	if (!(trace_flags & TRACE_ITER_MARKERS))
4936		return -EINVAL;
4937
4938	if (cnt > TRACE_BUF_SIZE)
4939		cnt = TRACE_BUF_SIZE;
4940
4941	/*
4942	 * Userspace is injecting traces into the kernel trace buffer.
4943	 * We want to be as non intrusive as possible.
4944	 * To do so, we do not want to allocate any special buffers
4945	 * or take any locks, but instead write the userspace data
4946	 * straight into the ring buffer.
4947	 *
4948	 * First we need to pin the userspace buffer into memory,
4949	 * which, most likely it is, because it just referenced it.
4950	 * But there's no guarantee that it is. By using get_user_pages_fast()
4951	 * and kmap_atomic/kunmap_atomic() we can get access to the
4952	 * pages directly. We then write the data directly into the
4953	 * ring buffer.
4954	 */
4955	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4956
4957	/* check if we cross pages */
4958	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4959		nr_pages = 2;
4960
4961	offset = addr & (PAGE_SIZE - 1);
4962	addr &= PAGE_MASK;
4963
4964	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4965	if (ret < nr_pages) {
4966		while (--ret >= 0)
4967			put_page(pages[ret]);
4968		written = -EFAULT;
4969		goto out;
4970	}
4971
4972	for (i = 0; i < nr_pages; i++)
4973		map_page[i] = kmap_atomic(pages[i]);
4974
4975	local_save_flags(irq_flags);
4976	size = sizeof(*entry) + cnt + 2; /* possible \n added */
4977	buffer = tr->trace_buffer.buffer;
4978	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4979					  irq_flags, preempt_count());
4980	if (!event) {
4981		/* Ring buffer disabled, return as if not open for write */
4982		written = -EBADF;
4983		goto out_unlock;
4984	}
4985
4986	entry = ring_buffer_event_data(event);
4987	entry->ip = _THIS_IP_;
4988
4989	if (nr_pages == 2) {
4990		len = PAGE_SIZE - offset;
4991		memcpy(&entry->buf, map_page[0] + offset, len);
4992		memcpy(&entry->buf[len], map_page[1], cnt - len);
4993	} else
4994		memcpy(&entry->buf, map_page[0] + offset, cnt);
4995
4996	if (entry->buf[cnt - 1] != '\n') {
4997		entry->buf[cnt] = '\n';
4998		entry->buf[cnt + 1] = '\0';
4999	} else
5000		entry->buf[cnt] = '\0';
5001
5002	__buffer_unlock_commit(buffer, event);
5003
5004	written = cnt;
5005
5006	*fpos += written;
5007
5008 out_unlock:
5009	for (i = 0; i < nr_pages; i++){
5010		kunmap_atomic(map_page[i]);
5011		put_page(pages[i]);
5012	}
5013 out:
5014	return written;
5015}
5016
5017static int tracing_clock_show(struct seq_file *m, void *v)
5018{
5019	struct trace_array *tr = m->private;
5020	int i;
5021
5022	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5023		seq_printf(m,
5024			"%s%s%s%s", i ? " " : "",
5025			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5026			i == tr->clock_id ? "]" : "");
5027	seq_putc(m, '\n');
5028
5029	return 0;
5030}
5031
5032static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5033{
5034	int i;
5035
5036	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5037		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5038			break;
5039	}
5040	if (i == ARRAY_SIZE(trace_clocks))
5041		return -EINVAL;
5042
5043	mutex_lock(&trace_types_lock);
5044
5045	tr->clock_id = i;
5046
5047	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5048
5049	/*
5050	 * New clock may not be consistent with the previous clock.
5051	 * Reset the buffer so that it doesn't have incomparable timestamps.
5052	 */
5053	tracing_reset_online_cpus(&tr->trace_buffer);
5054
5055#ifdef CONFIG_TRACER_MAX_TRACE
5056	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5057		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5058	tracing_reset_online_cpus(&tr->max_buffer);
5059#endif
5060
5061	mutex_unlock(&trace_types_lock);
5062
5063	return 0;
5064}
5065
5066static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5067				   size_t cnt, loff_t *fpos)
5068{
5069	struct seq_file *m = filp->private_data;
5070	struct trace_array *tr = m->private;
5071	char buf[64];
5072	const char *clockstr;
5073	int ret;
5074
5075	if (cnt >= sizeof(buf))
5076		return -EINVAL;
5077
5078	if (copy_from_user(&buf, ubuf, cnt))
5079		return -EFAULT;
5080
5081	buf[cnt] = 0;
5082
5083	clockstr = strstrip(buf);
5084
5085	ret = tracing_set_clock(tr, clockstr);
5086	if (ret)
5087		return ret;
5088
5089	*fpos += cnt;
5090
5091	return cnt;
5092}
5093
5094static int tracing_clock_open(struct inode *inode, struct file *file)
5095{
5096	struct trace_array *tr = inode->i_private;
5097	int ret;
5098
5099	if (tracing_disabled)
5100		return -ENODEV;
5101
5102	if (trace_array_get(tr))
5103		return -ENODEV;
5104
5105	ret = single_open(file, tracing_clock_show, inode->i_private);
5106	if (ret < 0)
5107		trace_array_put(tr);
5108
5109	return ret;
5110}
5111
5112struct ftrace_buffer_info {
5113	struct trace_iterator	iter;
5114	void			*spare;
5115	unsigned int		read;
5116};
5117
5118#ifdef CONFIG_TRACER_SNAPSHOT
5119static int tracing_snapshot_open(struct inode *inode, struct file *file)
5120{
5121	struct trace_array *tr = inode->i_private;
5122	struct trace_iterator *iter;
5123	struct seq_file *m;
5124	int ret = 0;
5125
5126	if (trace_array_get(tr) < 0)
5127		return -ENODEV;
5128
5129	if (file->f_mode & FMODE_READ) {
5130		iter = __tracing_open(inode, file, true);
5131		if (IS_ERR(iter))
5132			ret = PTR_ERR(iter);
5133	} else {
5134		/* Writes still need the seq_file to hold the private data */
5135		ret = -ENOMEM;
5136		m = kzalloc(sizeof(*m), GFP_KERNEL);
5137		if (!m)
5138			goto out;
5139		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5140		if (!iter) {
5141			kfree(m);
5142			goto out;
5143		}
5144		ret = 0;
5145
5146		iter->tr = tr;
5147		iter->trace_buffer = &tr->max_buffer;
5148		iter->cpu_file = tracing_get_cpu(inode);
5149		m->private = iter;
5150		file->private_data = m;
5151	}
5152out:
5153	if (ret < 0)
5154		trace_array_put(tr);
5155
5156	return ret;
5157}
5158
5159static ssize_t
5160tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5161		       loff_t *ppos)
5162{
5163	struct seq_file *m = filp->private_data;
5164	struct trace_iterator *iter = m->private;
5165	struct trace_array *tr = iter->tr;
5166	unsigned long val;
5167	int ret;
5168
5169	ret = tracing_update_buffers();
5170	if (ret < 0)
5171		return ret;
5172
5173	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5174	if (ret)
5175		return ret;
5176
5177	mutex_lock(&trace_types_lock);
5178
5179	if (tr->current_trace->use_max_tr) {
5180		ret = -EBUSY;
5181		goto out;
5182	}
5183
5184	switch (val) {
5185	case 0:
5186		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5187			ret = -EINVAL;
5188			break;
5189		}
5190		if (tr->allocated_snapshot)
5191			free_snapshot(tr);
5192		break;
5193	case 1:
5194/* Only allow per-cpu swap if the ring buffer supports it */
5195#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5196		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5197			ret = -EINVAL;
5198			break;
5199		}
5200#endif
5201		if (!tr->allocated_snapshot) {
5202			ret = alloc_snapshot(tr);
5203			if (ret < 0)
5204				break;
5205		}
5206		local_irq_disable();
5207		/* Now, we're going to swap */
5208		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5209			update_max_tr(tr, current, smp_processor_id());
5210		else
5211			update_max_tr_single(tr, current, iter->cpu_file);
5212		local_irq_enable();
5213		break;
5214	default:
5215		if (tr->allocated_snapshot) {
5216			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5217				tracing_reset_online_cpus(&tr->max_buffer);
5218			else
5219				tracing_reset(&tr->max_buffer, iter->cpu_file);
5220		}
5221		break;
5222	}
5223
5224	if (ret >= 0) {
5225		*ppos += cnt;
5226		ret = cnt;
5227	}
5228out:
5229	mutex_unlock(&trace_types_lock);
5230	return ret;
5231}
5232
5233static int tracing_snapshot_release(struct inode *inode, struct file *file)
5234{
5235	struct seq_file *m = file->private_data;
5236	int ret;
5237
5238	ret = tracing_release(inode, file);
5239
5240	if (file->f_mode & FMODE_READ)
5241		return ret;
5242
5243	/* If write only, the seq_file is just a stub */
5244	if (m)
5245		kfree(m->private);
5246	kfree(m);
5247
5248	return 0;
5249}
5250
5251static int tracing_buffers_open(struct inode *inode, struct file *filp);
5252static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5253				    size_t count, loff_t *ppos);
5254static int tracing_buffers_release(struct inode *inode, struct file *file);
5255static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5256		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5257
5258static int snapshot_raw_open(struct inode *inode, struct file *filp)
5259{
5260	struct ftrace_buffer_info *info;
5261	int ret;
5262
5263	ret = tracing_buffers_open(inode, filp);
5264	if (ret < 0)
5265		return ret;
5266
5267	info = filp->private_data;
5268
5269	if (info->iter.trace->use_max_tr) {
5270		tracing_buffers_release(inode, filp);
5271		return -EBUSY;
5272	}
5273
5274	info->iter.snapshot = true;
5275	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5276
5277	return ret;
5278}
5279
5280#endif /* CONFIG_TRACER_SNAPSHOT */
5281
5282
5283static const struct file_operations tracing_thresh_fops = {
5284	.open		= tracing_open_generic,
5285	.read		= tracing_thresh_read,
5286	.write		= tracing_thresh_write,
5287	.llseek		= generic_file_llseek,
5288};
5289
5290static const struct file_operations tracing_max_lat_fops = {
5291	.open		= tracing_open_generic,
5292	.read		= tracing_max_lat_read,
5293	.write		= tracing_max_lat_write,
5294	.llseek		= generic_file_llseek,
5295};
5296
5297static const struct file_operations set_tracer_fops = {
5298	.open		= tracing_open_generic,
5299	.read		= tracing_set_trace_read,
5300	.write		= tracing_set_trace_write,
5301	.llseek		= generic_file_llseek,
5302};
5303
5304static const struct file_operations tracing_pipe_fops = {
5305	.open		= tracing_open_pipe,
5306	.poll		= tracing_poll_pipe,
5307	.read		= tracing_read_pipe,
5308	.splice_read	= tracing_splice_read_pipe,
5309	.release	= tracing_release_pipe,
5310	.llseek		= no_llseek,
5311};
5312
5313static const struct file_operations tracing_entries_fops = {
5314	.open		= tracing_open_generic_tr,
5315	.read		= tracing_entries_read,
5316	.write		= tracing_entries_write,
5317	.llseek		= generic_file_llseek,
5318	.release	= tracing_release_generic_tr,
5319};
5320
5321static const struct file_operations tracing_total_entries_fops = {
5322	.open		= tracing_open_generic_tr,
5323	.read		= tracing_total_entries_read,
5324	.llseek		= generic_file_llseek,
5325	.release	= tracing_release_generic_tr,
5326};
5327
5328static const struct file_operations tracing_free_buffer_fops = {
5329	.open		= tracing_open_generic_tr,
5330	.write		= tracing_free_buffer_write,
5331	.release	= tracing_free_buffer_release,
5332};
5333
5334static const struct file_operations tracing_mark_fops = {
5335	.open		= tracing_open_generic_tr,
5336	.write		= tracing_mark_write,
5337	.llseek		= generic_file_llseek,
5338	.release	= tracing_release_generic_tr,
5339};
5340
5341static const struct file_operations trace_clock_fops = {
5342	.open		= tracing_clock_open,
5343	.read		= seq_read,
5344	.llseek		= seq_lseek,
5345	.release	= tracing_single_release_tr,
5346	.write		= tracing_clock_write,
5347};
5348
5349#ifdef CONFIG_TRACER_SNAPSHOT
5350static const struct file_operations snapshot_fops = {
5351	.open		= tracing_snapshot_open,
5352	.read		= seq_read,
5353	.write		= tracing_snapshot_write,
5354	.llseek		= tracing_lseek,
5355	.release	= tracing_snapshot_release,
5356};
5357
5358static const struct file_operations snapshot_raw_fops = {
5359	.open		= snapshot_raw_open,
5360	.read		= tracing_buffers_read,
5361	.release	= tracing_buffers_release,
5362	.splice_read	= tracing_buffers_splice_read,
5363	.llseek		= no_llseek,
5364};
5365
5366#endif /* CONFIG_TRACER_SNAPSHOT */
5367
5368static int tracing_buffers_open(struct inode *inode, struct file *filp)
5369{
5370	struct trace_array *tr = inode->i_private;
5371	struct ftrace_buffer_info *info;
5372	int ret;
5373
5374	if (tracing_disabled)
5375		return -ENODEV;
5376
5377	if (trace_array_get(tr) < 0)
5378		return -ENODEV;
5379
5380	info = kzalloc(sizeof(*info), GFP_KERNEL);
5381	if (!info) {
5382		trace_array_put(tr);
5383		return -ENOMEM;
5384	}
5385
5386	mutex_lock(&trace_types_lock);
5387
5388	info->iter.tr		= tr;
5389	info->iter.cpu_file	= tracing_get_cpu(inode);
5390	info->iter.trace	= tr->current_trace;
5391	info->iter.trace_buffer = &tr->trace_buffer;
5392	info->spare		= NULL;
5393	/* Force reading ring buffer for first read */
5394	info->read		= (unsigned int)-1;
5395
5396	filp->private_data = info;
5397
5398	mutex_unlock(&trace_types_lock);
5399
5400	ret = nonseekable_open(inode, filp);
5401	if (ret < 0)
5402		trace_array_put(tr);
5403
5404	return ret;
5405}
5406
5407static unsigned int
5408tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5409{
5410	struct ftrace_buffer_info *info = filp->private_data;
5411	struct trace_iterator *iter = &info->iter;
5412
5413	return trace_poll(iter, filp, poll_table);
5414}
5415
5416static ssize_t
5417tracing_buffers_read(struct file *filp, char __user *ubuf,
5418		     size_t count, loff_t *ppos)
5419{
5420	struct ftrace_buffer_info *info = filp->private_data;
5421	struct trace_iterator *iter = &info->iter;
5422	ssize_t ret;
5423	ssize_t size;
5424
5425	if (!count)
5426		return 0;
5427
5428	mutex_lock(&trace_types_lock);
5429
5430#ifdef CONFIG_TRACER_MAX_TRACE
5431	if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5432		size = -EBUSY;
5433		goto out_unlock;
5434	}
5435#endif
5436
5437	if (!info->spare)
5438		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5439							  iter->cpu_file);
5440	size = -ENOMEM;
5441	if (!info->spare)
5442		goto out_unlock;
5443
5444	/* Do we have previous read data to read? */
5445	if (info->read < PAGE_SIZE)
5446		goto read;
5447
5448 again:
5449	trace_access_lock(iter->cpu_file);
5450	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5451				    &info->spare,
5452				    count,
5453				    iter->cpu_file, 0);
5454	trace_access_unlock(iter->cpu_file);
5455
5456	if (ret < 0) {
5457		if (trace_empty(iter)) {
5458			if ((filp->f_flags & O_NONBLOCK)) {
5459				size = -EAGAIN;
5460				goto out_unlock;
5461			}
5462			mutex_unlock(&trace_types_lock);
5463			ret = wait_on_pipe(iter, false);
5464			mutex_lock(&trace_types_lock);
5465			if (ret) {
5466				size = ret;
5467				goto out_unlock;
5468			}
5469			goto again;
5470		}
5471		size = 0;
5472		goto out_unlock;
5473	}
5474
5475	info->read = 0;
5476 read:
5477	size = PAGE_SIZE - info->read;
5478	if (size > count)
5479		size = count;
5480
5481	ret = copy_to_user(ubuf, info->spare + info->read, size);
5482	if (ret == size) {
5483		size = -EFAULT;
5484		goto out_unlock;
5485	}
5486	size -= ret;
5487
5488	*ppos += size;
5489	info->read += size;
5490
5491 out_unlock:
5492	mutex_unlock(&trace_types_lock);
5493
5494	return size;
5495}
5496
5497static int tracing_buffers_release(struct inode *inode, struct file *file)
5498{
5499	struct ftrace_buffer_info *info = file->private_data;
5500	struct trace_iterator *iter = &info->iter;
5501
5502	mutex_lock(&trace_types_lock);
5503
5504	__trace_array_put(iter->tr);
5505
5506	if (info->spare)
5507		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5508	kfree(info);
5509
5510	mutex_unlock(&trace_types_lock);
5511
5512	return 0;
5513}
5514
5515struct buffer_ref {
5516	struct ring_buffer	*buffer;
5517	void			*page;
5518	int			ref;
5519};
5520
5521static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5522				    struct pipe_buffer *buf)
5523{
5524	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5525
5526	if (--ref->ref)
5527		return;
5528
5529	ring_buffer_free_read_page(ref->buffer, ref->page);
5530	kfree(ref);
5531	buf->private = 0;
5532}
5533
5534static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5535				struct pipe_buffer *buf)
5536{
5537	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5538
5539	ref->ref++;
5540}
5541
5542/* Pipe buffer operations for a buffer. */
5543static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5544	.can_merge		= 0,
5545	.confirm		= generic_pipe_buf_confirm,
5546	.release		= buffer_pipe_buf_release,
5547	.steal			= generic_pipe_buf_steal,
5548	.get			= buffer_pipe_buf_get,
5549};
5550
5551/*
5552 * Callback from splice_to_pipe(), if we need to release some pages
5553 * at the end of the spd in case we error'ed out in filling the pipe.
5554 */
5555static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5556{
5557	struct buffer_ref *ref =
5558		(struct buffer_ref *)spd->partial[i].private;
5559
5560	if (--ref->ref)
5561		return;
5562
5563	ring_buffer_free_read_page(ref->buffer, ref->page);
5564	kfree(ref);
5565	spd->partial[i].private = 0;
5566}
5567
5568static ssize_t
5569tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5570			    struct pipe_inode_info *pipe, size_t len,
5571			    unsigned int flags)
5572{
5573	struct ftrace_buffer_info *info = file->private_data;
5574	struct trace_iterator *iter = &info->iter;
5575	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5576	struct page *pages_def[PIPE_DEF_BUFFERS];
5577	struct splice_pipe_desc spd = {
5578		.pages		= pages_def,
5579		.partial	= partial_def,
5580		.nr_pages_max	= PIPE_DEF_BUFFERS,
5581		.flags		= flags,
5582		.ops		= &buffer_pipe_buf_ops,
5583		.spd_release	= buffer_spd_release,
5584	};
5585	struct buffer_ref *ref;
5586	int entries, size, i;
5587	ssize_t ret = 0;
5588
5589	mutex_lock(&trace_types_lock);
5590
5591#ifdef CONFIG_TRACER_MAX_TRACE
5592	if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5593		ret = -EBUSY;
5594		goto out;
5595	}
5596#endif
5597
5598	if (splice_grow_spd(pipe, &spd)) {
5599		ret = -ENOMEM;
5600		goto out;
5601	}
5602
5603	if (*ppos & (PAGE_SIZE - 1)) {
5604		ret = -EINVAL;
5605		goto out;
5606	}
5607
5608	if (len & (PAGE_SIZE - 1)) {
5609		if (len < PAGE_SIZE) {
5610			ret = -EINVAL;
5611			goto out;
5612		}
5613		len &= PAGE_MASK;
5614	}
5615
5616 again:
5617	trace_access_lock(iter->cpu_file);
5618	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5619
5620	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5621		struct page *page;
5622		int r;
5623
5624		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5625		if (!ref) {
5626			ret = -ENOMEM;
5627			break;
5628		}
5629
5630		ref->ref = 1;
5631		ref->buffer = iter->trace_buffer->buffer;
5632		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5633		if (!ref->page) {
5634			ret = -ENOMEM;
5635			kfree(ref);
5636			break;
5637		}
5638
5639		r = ring_buffer_read_page(ref->buffer, &ref->page,
5640					  len, iter->cpu_file, 1);
5641		if (r < 0) {
5642			ring_buffer_free_read_page(ref->buffer, ref->page);
5643			kfree(ref);
5644			break;
5645		}
5646
5647		/*
5648		 * zero out any left over data, this is going to
5649		 * user land.
5650		 */
5651		size = ring_buffer_page_len(ref->page);
5652		if (size < PAGE_SIZE)
5653			memset(ref->page + size, 0, PAGE_SIZE - size);
5654
5655		page = virt_to_page(ref->page);
5656
5657		spd.pages[i] = page;
5658		spd.partial[i].len = PAGE_SIZE;
5659		spd.partial[i].offset = 0;
5660		spd.partial[i].private = (unsigned long)ref;
5661		spd.nr_pages++;
5662		*ppos += PAGE_SIZE;
5663
5664		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5665	}
5666
5667	trace_access_unlock(iter->cpu_file);
5668	spd.nr_pages = i;
5669
5670	/* did we read anything? */
5671	if (!spd.nr_pages) {
5672		if (ret)
5673			goto out;
5674
5675		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5676			ret = -EAGAIN;
5677			goto out;
5678		}
5679		mutex_unlock(&trace_types_lock);
5680		ret = wait_on_pipe(iter, true);
5681		mutex_lock(&trace_types_lock);
5682		if (ret)
5683			goto out;
5684
5685		goto again;
5686	}
5687
5688	ret = splice_to_pipe(pipe, &spd);
5689	splice_shrink_spd(&spd);
5690out:
5691	mutex_unlock(&trace_types_lock);
5692
5693	return ret;
5694}
5695
5696static const struct file_operations tracing_buffers_fops = {
5697	.open		= tracing_buffers_open,
5698	.read		= tracing_buffers_read,
5699	.poll		= tracing_buffers_poll,
5700	.release	= tracing_buffers_release,
5701	.splice_read	= tracing_buffers_splice_read,
5702	.llseek		= no_llseek,
5703};
5704
5705static ssize_t
5706tracing_stats_read(struct file *filp, char __user *ubuf,
5707		   size_t count, loff_t *ppos)
5708{
5709	struct inode *inode = file_inode(filp);
5710	struct trace_array *tr = inode->i_private;
5711	struct trace_buffer *trace_buf = &tr->trace_buffer;
5712	int cpu = tracing_get_cpu(inode);
5713	struct trace_seq *s;
5714	unsigned long cnt;
5715	unsigned long long t;
5716	unsigned long usec_rem;
5717
5718	s = kmalloc(sizeof(*s), GFP_KERNEL);
5719	if (!s)
5720		return -ENOMEM;
5721
5722	trace_seq_init(s);
5723
5724	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5725	trace_seq_printf(s, "entries: %ld\n", cnt);
5726
5727	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5728	trace_seq_printf(s, "overrun: %ld\n", cnt);
5729
5730	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5731	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5732
5733	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5734	trace_seq_printf(s, "bytes: %ld\n", cnt);
5735
5736	if (trace_clocks[tr->clock_id].in_ns) {
5737		/* local or global for trace_clock */
5738		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5739		usec_rem = do_div(t, USEC_PER_SEC);
5740		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5741								t, usec_rem);
5742
5743		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5744		usec_rem = do_div(t, USEC_PER_SEC);
5745		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5746	} else {
5747		/* counter or tsc mode for trace_clock */
5748		trace_seq_printf(s, "oldest event ts: %llu\n",
5749				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5750
5751		trace_seq_printf(s, "now ts: %llu\n",
5752				ring_buffer_time_stamp(trace_buf->buffer, cpu));
5753	}
5754
5755	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5756	trace_seq_printf(s, "dropped events: %ld\n", cnt);
5757
5758	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5759	trace_seq_printf(s, "read events: %ld\n", cnt);
5760
5761	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5762
5763	kfree(s);
5764
5765	return count;
5766}
5767
5768static const struct file_operations tracing_stats_fops = {
5769	.open		= tracing_open_generic_tr,
5770	.read		= tracing_stats_read,
5771	.llseek		= generic_file_llseek,
5772	.release	= tracing_release_generic_tr,
5773};
5774
5775#ifdef CONFIG_DYNAMIC_FTRACE
5776
5777int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5778{
5779	return 0;
5780}
5781
5782static ssize_t
5783tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5784		  size_t cnt, loff_t *ppos)
5785{
5786	static char ftrace_dyn_info_buffer[1024];
5787	static DEFINE_MUTEX(dyn_info_mutex);
5788	unsigned long *p = filp->private_data;
5789	char *buf = ftrace_dyn_info_buffer;
5790	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5791	int r;
5792
5793	mutex_lock(&dyn_info_mutex);
5794	r = sprintf(buf, "%ld ", *p);
5795
5796	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5797	buf[r++] = '\n';
5798
5799	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5800
5801	mutex_unlock(&dyn_info_mutex);
5802
5803	return r;
5804}
5805
5806static const struct file_operations tracing_dyn_info_fops = {
5807	.open		= tracing_open_generic,
5808	.read		= tracing_read_dyn_info,
5809	.llseek		= generic_file_llseek,
5810};
5811#endif /* CONFIG_DYNAMIC_FTRACE */
5812
5813#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5814static void
5815ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5816{
5817	tracing_snapshot();
5818}
5819
5820static void
5821ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5822{
5823	unsigned long *count = (long *)data;
5824
5825	if (!*count)
5826		return;
5827
5828	if (*count != -1)
5829		(*count)--;
5830
5831	tracing_snapshot();
5832}
5833
5834static int
5835ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5836		      struct ftrace_probe_ops *ops, void *data)
5837{
5838	long count = (long)data;
5839
5840	seq_printf(m, "%ps:", (void *)ip);
5841
5842	seq_printf(m, "snapshot");
5843
5844	if (count == -1)
5845		seq_printf(m, ":unlimited\n");
5846	else
5847		seq_printf(m, ":count=%ld\n", count);
5848
5849	return 0;
5850}
5851
5852static struct ftrace_probe_ops snapshot_probe_ops = {
5853	.func			= ftrace_snapshot,
5854	.print			= ftrace_snapshot_print,
5855};
5856
5857static struct ftrace_probe_ops snapshot_count_probe_ops = {
5858	.func			= ftrace_count_snapshot,
5859	.print			= ftrace_snapshot_print,
5860};
5861
5862static int
5863ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5864			       char *glob, char *cmd, char *param, int enable)
5865{
5866	struct ftrace_probe_ops *ops;
5867	void *count = (void *)-1;
5868	char *number;
5869	int ret;
5870
5871	/* hash funcs only work with set_ftrace_filter */
5872	if (!enable)
5873		return -EINVAL;
5874
5875	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5876
5877	if (glob[0] == '!') {
5878		unregister_ftrace_function_probe_func(glob+1, ops);
5879		return 0;
5880	}
5881
5882	if (!param)
5883		goto out_reg;
5884
5885	number = strsep(&param, ":");
5886
5887	if (!strlen(number))
5888		goto out_reg;
5889
5890	/*
5891	 * We use the callback data field (which is a pointer)
5892	 * as our counter.
5893	 */
5894	ret = kstrtoul(number, 0, (unsigned long *)&count);
5895	if (ret)
5896		return ret;
5897
5898 out_reg:
5899	ret = register_ftrace_function_probe(glob, ops, count);
5900
5901	if (ret >= 0)
5902		alloc_snapshot(&global_trace);
5903
5904	return ret < 0 ? ret : 0;
5905}
5906
5907static struct ftrace_func_command ftrace_snapshot_cmd = {
5908	.name			= "snapshot",
5909	.func			= ftrace_trace_snapshot_callback,
5910};
5911
5912static __init int register_snapshot_cmd(void)
5913{
5914	return register_ftrace_command(&ftrace_snapshot_cmd);
5915}
5916#else
5917static inline __init int register_snapshot_cmd(void) { return 0; }
5918#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5919
5920struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5921{
5922	if (tr->dir)
5923		return tr->dir;
5924
5925	if (!debugfs_initialized())
5926		return NULL;
5927
5928	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5929		tr->dir = debugfs_create_dir("tracing", NULL);
5930
5931	if (!tr->dir)
5932		pr_warn_once("Could not create debugfs directory 'tracing'\n");
5933
5934	return tr->dir;
5935}
5936
5937struct dentry *tracing_init_dentry(void)
5938{
5939	return tracing_init_dentry_tr(&global_trace);
5940}
5941
5942static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5943{
5944	struct dentry *d_tracer;
5945
5946	if (tr->percpu_dir)
5947		return tr->percpu_dir;
5948
5949	d_tracer = tracing_init_dentry_tr(tr);
5950	if (!d_tracer)
5951		return NULL;
5952
5953	tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5954
5955	WARN_ONCE(!tr->percpu_dir,
5956		  "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5957
5958	return tr->percpu_dir;
5959}
5960
5961static struct dentry *
5962trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5963		      void *data, long cpu, const struct file_operations *fops)
5964{
5965	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5966
5967	if (ret) /* See tracing_get_cpu() */
5968		ret->d_inode->i_cdev = (void *)(cpu + 1);
5969	return ret;
5970}
5971
5972static void
5973tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5974{
5975	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5976	struct dentry *d_cpu;
5977	char cpu_dir[30]; /* 30 characters should be more than enough */
5978
5979	if (!d_percpu)
5980		return;
5981
5982	snprintf(cpu_dir, 30, "cpu%ld", cpu);
5983	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5984	if (!d_cpu) {
5985		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5986		return;
5987	}
5988
5989	/* per cpu trace_pipe */
5990	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5991				tr, cpu, &tracing_pipe_fops);
5992
5993	/* per cpu trace */
5994	trace_create_cpu_file("trace", 0644, d_cpu,
5995				tr, cpu, &tracing_fops);
5996
5997	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5998				tr, cpu, &tracing_buffers_fops);
5999
6000	trace_create_cpu_file("stats", 0444, d_cpu,
6001				tr, cpu, &tracing_stats_fops);
6002
6003	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6004				tr, cpu, &tracing_entries_fops);
6005
6006#ifdef CONFIG_TRACER_SNAPSHOT
6007	trace_create_cpu_file("snapshot", 0644, d_cpu,
6008				tr, cpu, &snapshot_fops);
6009
6010	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6011				tr, cpu, &snapshot_raw_fops);
6012#endif
6013}
6014
6015#ifdef CONFIG_FTRACE_SELFTEST
6016/* Let selftest have access to static functions in this file */
6017#include "trace_selftest.c"
6018#endif
6019
6020struct trace_option_dentry {
6021	struct tracer_opt		*opt;
6022	struct tracer_flags		*flags;
6023	struct trace_array		*tr;
6024	struct dentry			*entry;
6025};
6026
6027static ssize_t
6028trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6029			loff_t *ppos)
6030{
6031	struct trace_option_dentry *topt = filp->private_data;
6032	char *buf;
6033
6034	if (topt->flags->val & topt->opt->bit)
6035		buf = "1\n";
6036	else
6037		buf = "0\n";
6038
6039	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6040}
6041
6042static ssize_t
6043trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6044			 loff_t *ppos)
6045{
6046	struct trace_option_dentry *topt = filp->private_data;
6047	unsigned long val;
6048	int ret;
6049
6050	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6051	if (ret)
6052		return ret;
6053
6054	if (val != 0 && val != 1)
6055		return -EINVAL;
6056
6057	if (!!(topt->flags->val & topt->opt->bit) != val) {
6058		mutex_lock(&trace_types_lock);
6059		ret = __set_tracer_option(topt->tr, topt->flags,
6060					  topt->opt, !val);
6061		mutex_unlock(&trace_types_lock);
6062		if (ret)
6063			return ret;
6064	}
6065
6066	*ppos += cnt;
6067
6068	return cnt;
6069}
6070
6071
6072static const struct file_operations trace_options_fops = {
6073	.open = tracing_open_generic,
6074	.read = trace_options_read,
6075	.write = trace_options_write,
6076	.llseek	= generic_file_llseek,
6077};
6078
6079static ssize_t
6080trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6081			loff_t *ppos)
6082{
6083	long index = (long)filp->private_data;
6084	char *buf;
6085
6086	if (trace_flags & (1 << index))
6087		buf = "1\n";
6088	else
6089		buf = "0\n";
6090
6091	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6092}
6093
6094static ssize_t
6095trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6096			 loff_t *ppos)
6097{
6098	struct trace_array *tr = &global_trace;
6099	long index = (long)filp->private_data;
6100	unsigned long val;
6101	int ret;
6102
6103	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6104	if (ret)
6105		return ret;
6106
6107	if (val != 0 && val != 1)
6108		return -EINVAL;
6109
6110	mutex_lock(&trace_types_lock);
6111	ret = set_tracer_flag(tr, 1 << index, val);
6112	mutex_unlock(&trace_types_lock);
6113
6114	if (ret < 0)
6115		return ret;
6116
6117	*ppos += cnt;
6118
6119	return cnt;
6120}
6121
6122static const struct file_operations trace_options_core_fops = {
6123	.open = tracing_open_generic,
6124	.read = trace_options_core_read,
6125	.write = trace_options_core_write,
6126	.llseek = generic_file_llseek,
6127};
6128
6129struct dentry *trace_create_file(const char *name,
6130				 umode_t mode,
6131				 struct dentry *parent,
6132				 void *data,
6133				 const struct file_operations *fops)
6134{
6135	struct dentry *ret;
6136
6137	ret = debugfs_create_file(name, mode, parent, data, fops);
6138	if (!ret)
6139		pr_warning("Could not create debugfs '%s' entry\n", name);
6140
6141	return ret;
6142}
6143
6144
6145static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6146{
6147	struct dentry *d_tracer;
6148
6149	if (tr->options)
6150		return tr->options;
6151
6152	d_tracer = tracing_init_dentry_tr(tr);
6153	if (!d_tracer)
6154		return NULL;
6155
6156	tr->options = debugfs_create_dir("options", d_tracer);
6157	if (!tr->options) {
6158		pr_warning("Could not create debugfs directory 'options'\n");
6159		return NULL;
6160	}
6161
6162	return tr->options;
6163}
6164
6165static void
6166create_trace_option_file(struct trace_array *tr,
6167			 struct trace_option_dentry *topt,
6168			 struct tracer_flags *flags,
6169			 struct tracer_opt *opt)
6170{
6171	struct dentry *t_options;
6172
6173	t_options = trace_options_init_dentry(tr);
6174	if (!t_options)
6175		return;
6176
6177	topt->flags = flags;
6178	topt->opt = opt;
6179	topt->tr = tr;
6180
6181	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6182				    &trace_options_fops);
6183
6184}
6185
6186static struct trace_option_dentry *
6187create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6188{
6189	struct trace_option_dentry *topts;
6190	struct tracer_flags *flags;
6191	struct tracer_opt *opts;
6192	int cnt;
6193
6194	if (!tracer)
6195		return NULL;
6196
6197	flags = tracer->flags;
6198
6199	if (!flags || !flags->opts)
6200		return NULL;
6201
6202	opts = flags->opts;
6203
6204	for (cnt = 0; opts[cnt].name; cnt++)
6205		;
6206
6207	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6208	if (!topts)
6209		return NULL;
6210
6211	for (cnt = 0; opts[cnt].name; cnt++)
6212		create_trace_option_file(tr, &topts[cnt], flags,
6213					 &opts[cnt]);
6214
6215	return topts;
6216}
6217
6218static void
6219destroy_trace_option_files(struct trace_option_dentry *topts)
6220{
6221	int cnt;
6222
6223	if (!topts)
6224		return;
6225
6226	for (cnt = 0; topts[cnt].opt; cnt++)
6227		debugfs_remove(topts[cnt].entry);
6228
6229	kfree(topts);
6230}
6231
6232static struct dentry *
6233create_trace_option_core_file(struct trace_array *tr,
6234			      const char *option, long index)
6235{
6236	struct dentry *t_options;
6237
6238	t_options = trace_options_init_dentry(tr);
6239	if (!t_options)
6240		return NULL;
6241
6242	return trace_create_file(option, 0644, t_options, (void *)index,
6243				    &trace_options_core_fops);
6244}
6245
6246static __init void create_trace_options_dir(struct trace_array *tr)
6247{
6248	struct dentry *t_options;
6249	int i;
6250
6251	t_options = trace_options_init_dentry(tr);
6252	if (!t_options)
6253		return;
6254
6255	for (i = 0; trace_options[i]; i++)
6256		create_trace_option_core_file(tr, trace_options[i], i);
6257}
6258
6259static ssize_t
6260rb_simple_read(struct file *filp, char __user *ubuf,
6261	       size_t cnt, loff_t *ppos)
6262{
6263	struct trace_array *tr = filp->private_data;
6264	char buf[64];
6265	int r;
6266
6267	r = tracer_tracing_is_on(tr);
6268	r = sprintf(buf, "%d\n", r);
6269
6270	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6271}
6272
6273static ssize_t
6274rb_simple_write(struct file *filp, const char __user *ubuf,
6275		size_t cnt, loff_t *ppos)
6276{
6277	struct trace_array *tr = filp->private_data;
6278	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6279	unsigned long val;
6280	int ret;
6281
6282	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6283	if (ret)
6284		return ret;
6285
6286	if (buffer) {
6287		mutex_lock(&trace_types_lock);
6288		if (val) {
6289			tracer_tracing_on(tr);
6290			if (tr->current_trace->start)
6291				tr->current_trace->start(tr);
6292		} else {
6293			tracer_tracing_off(tr);
6294			if (tr->current_trace->stop)
6295				tr->current_trace->stop(tr);
6296		}
6297		mutex_unlock(&trace_types_lock);
6298	}
6299
6300	(*ppos)++;
6301
6302	return cnt;
6303}
6304
6305static const struct file_operations rb_simple_fops = {
6306	.open		= tracing_open_generic_tr,
6307	.read		= rb_simple_read,
6308	.write		= rb_simple_write,
6309	.release	= tracing_release_generic_tr,
6310	.llseek		= default_llseek,
6311};
6312
6313struct dentry *trace_instance_dir;
6314
6315static void
6316init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6317
6318static int
6319allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6320{
6321	enum ring_buffer_flags rb_flags;
6322
6323	rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6324
6325	buf->tr = tr;
6326
6327	buf->buffer = ring_buffer_alloc(size, rb_flags);
6328	if (!buf->buffer)
6329		return -ENOMEM;
6330
6331	buf->data = alloc_percpu(struct trace_array_cpu);
6332	if (!buf->data) {
6333		ring_buffer_free(buf->buffer);
6334		return -ENOMEM;
6335	}
6336
6337	/* Allocate the first page for all buffers */
6338	set_buffer_entries(&tr->trace_buffer,
6339			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6340
6341	return 0;
6342}
6343
6344static int allocate_trace_buffers(struct trace_array *tr, int size)
6345{
6346	int ret;
6347
6348	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6349	if (ret)
6350		return ret;
6351
6352#ifdef CONFIG_TRACER_MAX_TRACE
6353	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6354				    allocate_snapshot ? size : 1);
6355	if (WARN_ON(ret)) {
6356		ring_buffer_free(tr->trace_buffer.buffer);
6357		free_percpu(tr->trace_buffer.data);
6358		return -ENOMEM;
6359	}
6360	tr->allocated_snapshot = allocate_snapshot;
6361
6362	/*
6363	 * Only the top level trace array gets its snapshot allocated
6364	 * from the kernel command line.
6365	 */
6366	allocate_snapshot = false;
6367#endif
6368	return 0;
6369}
6370
6371static void free_trace_buffer(struct trace_buffer *buf)
6372{
6373	if (buf->buffer) {
6374		ring_buffer_free(buf->buffer);
6375		buf->buffer = NULL;
6376		free_percpu(buf->data);
6377		buf->data = NULL;
6378	}
6379}
6380
6381static void free_trace_buffers(struct trace_array *tr)
6382{
6383	if (!tr)
6384		return;
6385
6386	free_trace_buffer(&tr->trace_buffer);
6387
6388#ifdef CONFIG_TRACER_MAX_TRACE
6389	free_trace_buffer(&tr->max_buffer);
6390#endif
6391}
6392
6393static int new_instance_create(const char *name)
6394{
6395	struct trace_array *tr;
6396	int ret;
6397
6398	mutex_lock(&trace_types_lock);
6399
6400	ret = -EEXIST;
6401	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6402		if (tr->name && strcmp(tr->name, name) == 0)
6403			goto out_unlock;
6404	}
6405
6406	ret = -ENOMEM;
6407	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6408	if (!tr)
6409		goto out_unlock;
6410
6411	tr->name = kstrdup(name, GFP_KERNEL);
6412	if (!tr->name)
6413		goto out_free_tr;
6414
6415	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6416		goto out_free_tr;
6417
6418	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6419
6420	raw_spin_lock_init(&tr->start_lock);
6421
6422	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6423
6424	tr->current_trace = &nop_trace;
6425
6426	INIT_LIST_HEAD(&tr->systems);
6427	INIT_LIST_HEAD(&tr->events);
6428
6429	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6430		goto out_free_tr;
6431
6432	tr->dir = debugfs_create_dir(name, trace_instance_dir);
6433	if (!tr->dir)
6434		goto out_free_tr;
6435
6436	ret = event_trace_add_tracer(tr->dir, tr);
6437	if (ret) {
6438		debugfs_remove_recursive(tr->dir);
6439		goto out_free_tr;
6440	}
6441
6442	init_tracer_debugfs(tr, tr->dir);
6443
6444	list_add(&tr->list, &ftrace_trace_arrays);
6445
6446	mutex_unlock(&trace_types_lock);
6447
6448	return 0;
6449
6450 out_free_tr:
6451	free_trace_buffers(tr);
6452	free_cpumask_var(tr->tracing_cpumask);
6453	kfree(tr->name);
6454	kfree(tr);
6455
6456 out_unlock:
6457	mutex_unlock(&trace_types_lock);
6458
6459	return ret;
6460
6461}
6462
6463static int instance_delete(const char *name)
6464{
6465	struct trace_array *tr;
6466	int found = 0;
6467	int ret;
6468
6469	mutex_lock(&trace_types_lock);
6470
6471	ret = -ENODEV;
6472	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6473		if (tr->name && strcmp(tr->name, name) == 0) {
6474			found = 1;
6475			break;
6476		}
6477	}
6478	if (!found)
6479		goto out_unlock;
6480
6481	ret = -EBUSY;
6482	if (tr->ref)
6483		goto out_unlock;
6484
6485	list_del(&tr->list);
6486
6487	tracing_set_nop(tr);
6488	event_trace_del_tracer(tr);
6489	ftrace_destroy_function_files(tr);
6490	debugfs_remove_recursive(tr->dir);
6491	free_trace_buffers(tr);
6492
6493	kfree(tr->name);
6494	kfree(tr);
6495
6496	ret = 0;
6497
6498 out_unlock:
6499	mutex_unlock(&trace_types_lock);
6500
6501	return ret;
6502}
6503
6504static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6505{
6506	struct dentry *parent;
6507	int ret;
6508
6509	/* Paranoid: Make sure the parent is the "instances" directory */
6510	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6511	if (WARN_ON_ONCE(parent != trace_instance_dir))
6512		return -ENOENT;
6513
6514	/*
6515	 * The inode mutex is locked, but debugfs_create_dir() will also
6516	 * take the mutex. As the instances directory can not be destroyed
6517	 * or changed in any other way, it is safe to unlock it, and
6518	 * let the dentry try. If two users try to make the same dir at
6519	 * the same time, then the new_instance_create() will determine the
6520	 * winner.
6521	 */
6522	mutex_unlock(&inode->i_mutex);
6523
6524	ret = new_instance_create(dentry->d_iname);
6525
6526	mutex_lock(&inode->i_mutex);
6527
6528	return ret;
6529}
6530
6531static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6532{
6533	struct dentry *parent;
6534	int ret;
6535
6536	/* Paranoid: Make sure the parent is the "instances" directory */
6537	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6538	if (WARN_ON_ONCE(parent != trace_instance_dir))
6539		return -ENOENT;
6540
6541	/* The caller did a dget() on dentry */
6542	mutex_unlock(&dentry->d_inode->i_mutex);
6543
6544	/*
6545	 * The inode mutex is locked, but debugfs_create_dir() will also
6546	 * take the mutex. As the instances directory can not be destroyed
6547	 * or changed in any other way, it is safe to unlock it, and
6548	 * let the dentry try. If two users try to make the same dir at
6549	 * the same time, then the instance_delete() will determine the
6550	 * winner.
6551	 */
6552	mutex_unlock(&inode->i_mutex);
6553
6554	ret = instance_delete(dentry->d_iname);
6555
6556	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6557	mutex_lock(&dentry->d_inode->i_mutex);
6558
6559	return ret;
6560}
6561
6562static const struct inode_operations instance_dir_inode_operations = {
6563	.lookup		= simple_lookup,
6564	.mkdir		= instance_mkdir,
6565	.rmdir		= instance_rmdir,
6566};
6567
6568static __init void create_trace_instances(struct dentry *d_tracer)
6569{
6570	trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6571	if (WARN_ON(!trace_instance_dir))
6572		return;
6573
6574	/* Hijack the dir inode operations, to allow mkdir */
6575	trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6576}
6577
6578static void
6579init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6580{
6581	int cpu;
6582
6583	trace_create_file("available_tracers", 0444, d_tracer,
6584			tr, &show_traces_fops);
6585
6586	trace_create_file("current_tracer", 0644, d_tracer,
6587			tr, &set_tracer_fops);
6588
6589	trace_create_file("tracing_cpumask", 0644, d_tracer,
6590			  tr, &tracing_cpumask_fops);
6591
6592	trace_create_file("trace_options", 0644, d_tracer,
6593			  tr, &tracing_iter_fops);
6594
6595	trace_create_file("trace", 0644, d_tracer,
6596			  tr, &tracing_fops);
6597
6598	trace_create_file("trace_pipe", 0444, d_tracer,
6599			  tr, &tracing_pipe_fops);
6600
6601	trace_create_file("buffer_size_kb", 0644, d_tracer,
6602			  tr, &tracing_entries_fops);
6603
6604	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6605			  tr, &tracing_total_entries_fops);
6606
6607	trace_create_file("free_buffer", 0200, d_tracer,
6608			  tr, &tracing_free_buffer_fops);
6609
6610	trace_create_file("trace_marker", 0220, d_tracer,
6611			  tr, &tracing_mark_fops);
6612
6613	trace_create_file("saved_tgids", 0444, d_tracer,
6614			  tr, &tracing_saved_tgids_fops);
6615
6616	trace_create_file("trace_clock", 0644, d_tracer, tr,
6617			  &trace_clock_fops);
6618
6619	trace_create_file("tracing_on", 0644, d_tracer,
6620			  tr, &rb_simple_fops);
6621
6622#ifdef CONFIG_TRACER_MAX_TRACE
6623	trace_create_file("tracing_max_latency", 0644, d_tracer,
6624			&tr->max_latency, &tracing_max_lat_fops);
6625#endif
6626
6627	if (ftrace_create_function_files(tr, d_tracer))
6628		WARN(1, "Could not allocate function filter files");
6629
6630#ifdef CONFIG_TRACER_SNAPSHOT
6631	trace_create_file("snapshot", 0644, d_tracer,
6632			  tr, &snapshot_fops);
6633#endif
6634
6635	for_each_tracing_cpu(cpu)
6636		tracing_init_debugfs_percpu(tr, cpu);
6637
6638}
6639
6640static __init int tracer_init_debugfs(void)
6641{
6642	struct dentry *d_tracer;
6643
6644	trace_access_lock_init();
6645
6646	d_tracer = tracing_init_dentry();
6647	if (!d_tracer)
6648		return 0;
6649
6650	init_tracer_debugfs(&global_trace, d_tracer);
6651
6652	trace_create_file("tracing_thresh", 0644, d_tracer,
6653			&global_trace, &tracing_thresh_fops);
6654
6655	trace_create_file("README", 0444, d_tracer,
6656			NULL, &tracing_readme_fops);
6657
6658	trace_create_file("saved_cmdlines", 0444, d_tracer,
6659			NULL, &tracing_saved_cmdlines_fops);
6660
6661	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6662			  NULL, &tracing_saved_cmdlines_size_fops);
6663
6664#ifdef CONFIG_DYNAMIC_FTRACE
6665	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6666			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6667#endif
6668
6669	create_trace_instances(d_tracer);
6670
6671	create_trace_options_dir(&global_trace);
6672
6673	return 0;
6674}
6675
6676static int trace_panic_handler(struct notifier_block *this,
6677			       unsigned long event, void *unused)
6678{
6679	if (ftrace_dump_on_oops)
6680		ftrace_dump(ftrace_dump_on_oops);
6681	return NOTIFY_OK;
6682}
6683
6684static struct notifier_block trace_panic_notifier = {
6685	.notifier_call  = trace_panic_handler,
6686	.next           = NULL,
6687	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
6688};
6689
6690static int trace_die_handler(struct notifier_block *self,
6691			     unsigned long val,
6692			     void *data)
6693{
6694	switch (val) {
6695	case DIE_OOPS:
6696		if (ftrace_dump_on_oops)
6697			ftrace_dump(ftrace_dump_on_oops);
6698		break;
6699	default:
6700		break;
6701	}
6702	return NOTIFY_OK;
6703}
6704
6705static struct notifier_block trace_die_notifier = {
6706	.notifier_call = trace_die_handler,
6707	.priority = 200
6708};
6709
6710/*
6711 * printk is set to max of 1024, we really don't need it that big.
6712 * Nothing should be printing 1000 characters anyway.
6713 */
6714#define TRACE_MAX_PRINT		1000
6715
6716/*
6717 * Define here KERN_TRACE so that we have one place to modify
6718 * it if we decide to change what log level the ftrace dump
6719 * should be at.
6720 */
6721#define KERN_TRACE		KERN_EMERG
6722
6723void
6724trace_printk_seq(struct trace_seq *s)
6725{
6726	/* Probably should print a warning here. */
6727	if (s->len >= TRACE_MAX_PRINT)
6728		s->len = TRACE_MAX_PRINT;
6729
6730	/* should be zero ended, but we are paranoid. */
6731	s->buffer[s->len] = 0;
6732
6733	printk(KERN_TRACE "%s", s->buffer);
6734
6735	trace_seq_init(s);
6736}
6737
6738void trace_init_global_iter(struct trace_iterator *iter)
6739{
6740	iter->tr = &global_trace;
6741	iter->trace = iter->tr->current_trace;
6742	iter->cpu_file = RING_BUFFER_ALL_CPUS;
6743	iter->trace_buffer = &global_trace.trace_buffer;
6744
6745	if (iter->trace && iter->trace->open)
6746		iter->trace->open(iter);
6747
6748	/* Annotate start of buffers if we had overruns */
6749	if (ring_buffer_overruns(iter->trace_buffer->buffer))
6750		iter->iter_flags |= TRACE_FILE_ANNOTATE;
6751
6752	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6753	if (trace_clocks[iter->tr->clock_id].in_ns)
6754		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6755}
6756
6757void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6758{
6759	/* use static because iter can be a bit big for the stack */
6760	static struct trace_iterator iter;
6761	static atomic_t dump_running;
6762	unsigned int old_userobj;
6763	unsigned long flags;
6764	int cnt = 0, cpu;
6765
6766	/* Only allow one dump user at a time. */
6767	if (atomic_inc_return(&dump_running) != 1) {
6768		atomic_dec(&dump_running);
6769		return;
6770	}
6771
6772	/*
6773	 * Always turn off tracing when we dump.
6774	 * We don't need to show trace output of what happens
6775	 * between multiple crashes.
6776	 *
6777	 * If the user does a sysrq-z, then they can re-enable
6778	 * tracing with echo 1 > tracing_on.
6779	 */
6780	tracing_off();
6781
6782	local_irq_save(flags);
6783
6784	/* Simulate the iterator */
6785	trace_init_global_iter(&iter);
6786
6787	for_each_tracing_cpu(cpu) {
6788		atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6789	}
6790
6791	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6792
6793	/* don't look at user memory in panic mode */
6794	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6795
6796	switch (oops_dump_mode) {
6797	case DUMP_ALL:
6798		iter.cpu_file = RING_BUFFER_ALL_CPUS;
6799		break;
6800	case DUMP_ORIG:
6801		iter.cpu_file = raw_smp_processor_id();
6802		break;
6803	case DUMP_NONE:
6804		goto out_enable;
6805	default:
6806		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6807		iter.cpu_file = RING_BUFFER_ALL_CPUS;
6808	}
6809
6810	printk(KERN_TRACE "Dumping ftrace buffer:\n");
6811
6812	/* Did function tracer already get disabled? */
6813	if (ftrace_is_dead()) {
6814		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6815		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6816	}
6817
6818	/*
6819	 * We need to stop all tracing on all CPUS to read the
6820	 * the next buffer. This is a bit expensive, but is
6821	 * not done often. We fill all what we can read,
6822	 * and then release the locks again.
6823	 */
6824
6825	while (!trace_empty(&iter)) {
6826
6827		if (!cnt)
6828			printk(KERN_TRACE "---------------------------------\n");
6829
6830		cnt++;
6831
6832		/* reset all but tr, trace, and overruns */
6833		memset(&iter.seq, 0,
6834		       sizeof(struct trace_iterator) -
6835		       offsetof(struct trace_iterator, seq));
6836		iter.iter_flags |= TRACE_FILE_LAT_FMT;
6837		iter.pos = -1;
6838
6839		if (trace_find_next_entry_inc(&iter) != NULL) {
6840			int ret;
6841
6842			ret = print_trace_line(&iter);
6843			if (ret != TRACE_TYPE_NO_CONSUME)
6844				trace_consume(&iter);
6845		}
6846		touch_nmi_watchdog();
6847
6848		trace_printk_seq(&iter.seq);
6849	}
6850
6851	if (!cnt)
6852		printk(KERN_TRACE "   (ftrace buffer empty)\n");
6853	else
6854		printk(KERN_TRACE "---------------------------------\n");
6855
6856 out_enable:
6857	trace_flags |= old_userobj;
6858
6859	for_each_tracing_cpu(cpu) {
6860		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6861	}
6862 	atomic_dec(&dump_running);
6863	local_irq_restore(flags);
6864}
6865EXPORT_SYMBOL_GPL(ftrace_dump);
6866
6867__init static int tracer_alloc_buffers(void)
6868{
6869	int ring_buf_size;
6870	int ret = -ENOMEM;
6871
6872
6873	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6874		goto out;
6875
6876	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6877		goto out_free_buffer_mask;
6878
6879	/* Only allocate trace_printk buffers if a trace_printk exists */
6880	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6881		/* Must be called before global_trace.buffer is allocated */
6882		trace_printk_init_buffers();
6883
6884	/* To save memory, keep the ring buffer size to its minimum */
6885	if (ring_buffer_expanded)
6886		ring_buf_size = trace_buf_size;
6887	else
6888		ring_buf_size = 1;
6889
6890	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6891	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6892
6893	raw_spin_lock_init(&global_trace.start_lock);
6894
6895	/* Used for event triggers */
6896	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6897	if (!temp_buffer)
6898		goto out_free_cpumask;
6899
6900	if (trace_create_savedcmd() < 0)
6901		goto out_free_temp_buffer;
6902
6903	/* TODO: make the number of buffers hot pluggable with CPUS */
6904	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6905		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6906		WARN_ON(1);
6907		goto out_free_savedcmd;
6908	}
6909
6910	if (global_trace.buffer_disabled)
6911		tracing_off();
6912
6913	if (trace_boot_clock) {
6914		ret = tracing_set_clock(&global_trace, trace_boot_clock);
6915		if (ret < 0)
6916			pr_warning("Trace clock %s not defined, going back to default\n",
6917				   trace_boot_clock);
6918	}
6919
6920	/*
6921	 * register_tracer() might reference current_trace, so it
6922	 * needs to be set before we register anything. This is
6923	 * just a bootstrap of current_trace anyway.
6924	 */
6925	global_trace.current_trace = &nop_trace;
6926
6927	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6928
6929	ftrace_init_global_array_ops(&global_trace);
6930
6931	register_tracer(&nop_trace);
6932
6933	/* All seems OK, enable tracing */
6934	tracing_disabled = 0;
6935
6936	atomic_notifier_chain_register(&panic_notifier_list,
6937				       &trace_panic_notifier);
6938
6939	register_die_notifier(&trace_die_notifier);
6940
6941	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6942
6943	INIT_LIST_HEAD(&global_trace.systems);
6944	INIT_LIST_HEAD(&global_trace.events);
6945	list_add(&global_trace.list, &ftrace_trace_arrays);
6946
6947	while (trace_boot_options) {
6948		char *option;
6949
6950		option = strsep(&trace_boot_options, ",");
6951		trace_set_options(&global_trace, option);
6952	}
6953
6954	register_snapshot_cmd();
6955
6956	return 0;
6957
6958out_free_savedcmd:
6959	free_saved_cmdlines_buffer(savedcmd);
6960out_free_temp_buffer:
6961	ring_buffer_free(temp_buffer);
6962out_free_cpumask:
6963	free_cpumask_var(global_trace.tracing_cpumask);
6964out_free_buffer_mask:
6965	free_cpumask_var(tracing_buffer_mask);
6966out:
6967	return ret;
6968}
6969
6970__init static int clear_boot_tracer(void)
6971{
6972	/*
6973	 * The default tracer at boot buffer is an init section.
6974	 * This function is called in lateinit. If we did not
6975	 * find the boot tracer, then clear it out, to prevent
6976	 * later registration from accessing the buffer that is
6977	 * about to be freed.
6978	 */
6979	if (!default_bootup_tracer)
6980		return 0;
6981
6982	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6983	       default_bootup_tracer);
6984	default_bootup_tracer = NULL;
6985
6986	return 0;
6987}
6988
6989early_initcall(tracer_alloc_buffers);
6990fs_initcall(tracer_init_debugfs);
6991late_initcall(clear_boot_tracer);
6992