1/*
2 * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl>
3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@altlinux.org>
4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
5 * Copyright (c) 2015-2017 The strace developers.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 *    derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "defs.h"
32
33#include "perf_event_struct.h"
34
35#include "xlat/hw_breakpoint_len.h"
36#include "xlat/hw_breakpoint_type.h"
37#include "xlat/perf_attr_size.h"
38#include "xlat/perf_branch_sample_type.h"
39#include "xlat/perf_event_open_flags.h"
40#include "xlat/perf_event_read_format.h"
41#include "xlat/perf_event_sample_format.h"
42#include "xlat/perf_hw_cache_id.h"
43#include "xlat/perf_hw_cache_op_id.h"
44#include "xlat/perf_hw_cache_op_result_id.h"
45#include "xlat/perf_hw_id.h"
46#include "xlat/perf_sw_ids.h"
47#include "xlat/perf_type_id.h"
48
49struct pea_desc {
50	struct perf_event_attr *attr;
51	uint32_t size;
52};
53
54static void
55free_pea_desc(void *pea_desc_ptr)
56{
57	struct pea_desc *desc = pea_desc_ptr;
58
59	free(desc->attr);
60	free(desc);
61}
62
63static int
64fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
65{
66	struct pea_desc *desc;
67	struct perf_event_attr *attr;
68	uint32_t size;
69
70	if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
71		printaddr(addr);
72		return 1;
73	}
74
75	if (size > sizeof(*attr))
76		size = sizeof(*attr);
77
78	if (!size)
79		size = PERF_ATTR_SIZE_VER0;
80
81	/*
82	 * Kernel (rightfully) deems invalid attribute structures with size less
83	 * than first published format size, and we do the same.
84	 */
85	if (size < PERF_ATTR_SIZE_VER0) {
86		printaddr(addr);
87		return 1;
88	}
89
90	if (abbrev(tcp))
91		size = offsetofend(struct perf_event_attr, config);
92
93	/* Size should be multiple of 8, but kernel doesn't check for it */
94	/* size &= ~7; */
95
96	attr = xcalloc(1, sizeof(*attr));
97
98	if (umoven_or_printaddr(tcp, addr, size, attr)) {
99		free(attr);
100
101		return 1;
102	}
103
104	desc = xmalloc(sizeof(*desc));
105
106	desc->attr = attr;
107	desc->size = size;
108
109	set_tcb_priv_data(tcp, desc, free_pea_desc);
110
111	return 0;
112}
113
114#define PRINT_XLAT(prefix, xlat, x, dflt) \
115	do { \
116		tprints(prefix); \
117		printxval_search(xlat, x, dflt); \
118	} while (0)
119
120static void
121print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
122{
123	static const char *precise_ip_desc[] = {
124		"arbitrary skid",
125		"constant skid",
126		"requested to have 0 skid",
127		"must have 0 skid",
128	};
129
130	struct pea_desc *desc;
131	struct perf_event_attr *attr;
132	uint32_t size;
133	uint32_t new_size;
134	int use_new_size = 0;
135
136	/*
137	 * Amusingly, kernel accepts structures with only part of the field
138	 * present, so we making check like this (instead of checking
139	 * offsetofend against size) in order to print fields as kernel sees
140	 * them. This also should work great on big endian architectures.
141	 */
142	#define _PERF_CHECK_FIELD(_field) \
143		do { \
144			if (offsetof(struct perf_event_attr, _field) >= size) \
145				goto print_perf_event_attr_out; \
146		} while (0)
147
148	desc = get_tcb_priv_data(tcp);
149
150	attr = desc->attr;
151	size = desc->size;
152
153	/* The only error which expected to change size field currently */
154	if (tcp->u_error == E2BIG) {
155		if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
156		    &new_size))
157			use_new_size = -1;
158		else
159			use_new_size = 1;
160	}
161
162	PRINT_XLAT("{type=", perf_type_id, attr->type, "PERF_TYPE_???");
163	tprints(", size=");
164	printxval(perf_attr_size, attr->size, "PERF_ATTR_SIZE_???");
165
166	if (use_new_size) {
167		tprints(" => ");
168
169		if (use_new_size > 0)
170			printxval(perf_attr_size, new_size,
171				  "PERF_ATTR_SIZE_???");
172		else
173			tprints("???");
174	}
175
176	switch (attr->type) {
177	case PERF_TYPE_HARDWARE:
178		PRINT_XLAT(", config=", perf_hw_id, attr->config,
179			   "PERF_COUNT_HW_???");
180		break;
181	case PERF_TYPE_SOFTWARE:
182		PRINT_XLAT(", config=", perf_sw_ids, attr->config,
183			   "PERF_COUNT_SW_???");
184		break;
185	case PERF_TYPE_TRACEPOINT:
186		/*
187		 * "The value to use in config can be obtained from under
188		 * debugfs tracing/events/../../id if ftrace is enabled
189		 * in the kernel."
190		 */
191		tprintf(", config=%" PRIu64, attr->config);
192		break;
193	case PERF_TYPE_HW_CACHE:
194		/*
195		 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
196		 * (perf_hw_cache_op_result_id << 16)
197		 */
198		PRINT_XLAT(", config=", perf_hw_cache_id, attr->config & 0xFF,
199			   "PERF_COUNT_HW_CACHE_???");
200		PRINT_XLAT("|", perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
201			   "PERF_COUNT_HW_CACHE_OP_???");
202		/*
203		 * Current code (see set_ext_hw_attr in arch/x86/events/core.c,
204		 * tile_map_cache_event in arch/tile/kernel/perf_event.c,
205		 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c,
206		 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c,
207		 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c,
208		 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c,
209		 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c,
210		 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c,
211		 * hw_perf_cache_event in arch/sh/kernel/perf_event.c,
212		 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c,
213		 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c,
214		 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes
215		 * that cache result is 8 bits in size.
216		 */
217		PRINT_XLAT("<<8|", perf_hw_cache_op_result_id,
218			   (attr->config >> 16) & 0xFF,
219			   "PERF_COUNT_HW_CACHE_RESULT_???");
220		tprints("<<16");
221		if (attr->config >> 24) {
222			tprintf("|%#" PRIx64 "<<24", attr->config >> 24);
223			tprints_comment("PERF_COUNT_HW_CACHE_???");
224		}
225		break;
226	case PERF_TYPE_RAW:
227		/*
228		 * "If type is PERF_TYPE_RAW, then a custom "raw" config
229		 * value is needed. Most CPUs support events that are not
230		 * covered by the "generalized" events. These are
231		 * implementation defined; see your CPU manual (for example the
232		 * Intel Volume 3B documentation or the AMD BIOS and Kernel
233		 * Developer Guide). The libpfm4 library can be used to
234		 * translate from the name in the architectural manuals
235		 * to the raw hex value perf_event_open() expects in this
236		 * field."
237		 */
238	case PERF_TYPE_BREAKPOINT:
239		/*
240		 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
241		 * to zero. Its parameters are set in other places."
242		 */
243	default:
244		tprintf(", config=%#" PRIx64, attr->config);
245		break;
246	}
247
248	if (abbrev(tcp))
249		goto print_perf_event_attr_out;
250
251	if (attr->freq)
252		tprintf(", sample_freq=%" PRIu64, attr->sample_freq);
253	else
254		tprintf(", sample_period=%" PRIu64, attr->sample_period);
255
256	tprints(", sample_type=");
257	printflags64(perf_event_sample_format, attr->sample_type,
258		"PERF_SAMPLE_???");
259
260	tprints(", read_format=");
261	printflags64(perf_event_read_format, attr->read_format,
262		"PERF_FORMAT_???");
263
264	tprintf(", disabled=%u"
265		", inherit=%u"
266		", pinned=%u"
267		", exclusive=%u"
268		", exclusive_user=%u"
269		", exclude_kernel=%u"
270		", exclude_hv=%u"
271		", exclude_idle=%u"
272		", mmap=%u"
273		", comm=%u"
274		", freq=%u"
275		", inherit_stat=%u"
276		", enable_on_exec=%u"
277		", task=%u"
278		", watermark=%u"
279		", precise_ip=%u",
280		attr->disabled,
281		attr->inherit,
282		attr->pinned,
283		attr->exclusive,
284		attr->exclude_user,
285		attr->exclude_kernel,
286		attr->exclude_hv,
287		attr->exclude_idle,
288		attr->mmap,
289		attr->comm,
290		attr->freq,
291		attr->inherit_stat,
292		attr->enable_on_exec,
293		attr->task,
294		attr->watermark,
295		attr->precise_ip);
296	tprints_comment(precise_ip_desc[attr->precise_ip]);
297	tprintf(", mmap_data=%u"
298		", sample_id_all=%u"
299		", exclude_host=%u"
300		", exclude_guest=%u"
301		", exclude_callchain_kernel=%u"
302		", exclude_callchain_user=%u"
303		", mmap2=%u"
304		", comm_exec=%u"
305		", use_clockid=%u"
306		", context_switch=%u"
307		", write_backward=%u",
308		attr->mmap_data,
309		attr->sample_id_all,
310		attr->exclude_host,
311		attr->exclude_guest,
312		attr->exclude_callchain_kernel,
313		attr->exclude_callchain_user,
314		attr->mmap2,
315		attr->comm_exec,
316		attr->use_clockid,
317		attr->context_switch,
318		attr->write_backward);
319
320	/*
321	 * Print it only in case it is non-zero, since it may contain flags we
322	 * are not aware about.
323	 */
324	if (attr->__reserved_1) {
325		tprintf(", __reserved_1=%#" PRIx64,
326			(uint64_t) attr->__reserved_1);
327		tprints_comment("Bits 63..28");
328	}
329
330	if (attr->watermark)
331		tprintf(", wakeup_watermark=%u", attr->wakeup_watermark);
332	else
333		tprintf(", wakeup_events=%u", attr->wakeup_events);
334
335	if (attr->type == PERF_TYPE_BREAKPOINT)
336		/* Any combination of R/W with X is deemed invalid */
337		PRINT_XLAT(", bp_type=", hw_breakpoint_type, attr->bp_type,
338			   (attr->bp_type <=
339				   (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) ?
340					   "HW_BREAKPOINT_INVALID" :
341					   "HW_BREAKPOINT_???");
342
343	if (attr->type == PERF_TYPE_BREAKPOINT)
344		tprintf(", bp_addr=%#" PRIx64, attr->bp_addr);
345	else
346		tprintf(", config1=%#" PRIx64, attr->config1);
347
348	/*
349	 * Fields after bp_addr/config1 are optional and may not present; check
350	 * against size is needed.
351	 */
352
353	_PERF_CHECK_FIELD(bp_len);
354	if (attr->type == PERF_TYPE_BREAKPOINT)
355		tprintf(", bp_len=%" PRIu64, attr->bp_len);
356	else
357		tprintf(", config2=%#" PRIx64, attr->config2);
358
359	_PERF_CHECK_FIELD(branch_sample_type);
360	if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
361		tprints(", branch_sample_type=");
362		printflags64(perf_branch_sample_type, attr->branch_sample_type,
363			     "PERF_SAMPLE_BRANCH_???");
364	}
365
366	_PERF_CHECK_FIELD(sample_regs_user);
367	/*
368	 * "This bit mask defines the set of user CPU registers to dump on
369	 * samples. The layout of the register mask is architecture-specific and
370	 * described in the kernel header
371	 * arch/ARCH/include/uapi/asm/perf_regs.h."
372	 */
373	tprintf(", sample_regs_user=%#" PRIx64, attr->sample_regs_user);
374
375	_PERF_CHECK_FIELD(sample_stack_user);
376	/*
377	 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
378	 * specified."
379	 */
380	if (attr->sample_type & PERF_SAMPLE_STACK_USER)
381		tprintf(", sample_stack_user=%#" PRIx32,
382			attr->sample_stack_user);
383
384	if (attr->use_clockid) {
385		_PERF_CHECK_FIELD(clockid);
386		tprints(", clockid=");
387		printxval(clocknames, attr->clockid, "CLOCK_???");
388	}
389
390	_PERF_CHECK_FIELD(sample_regs_intr);
391	tprintf(", sample_regs_intr=%#" PRIx64, attr->sample_regs_intr);
392
393	_PERF_CHECK_FIELD(aux_watermark);
394	tprintf(", aux_watermark=%" PRIu32, attr->aux_watermark);
395
396	_PERF_CHECK_FIELD(sample_max_stack);
397	tprintf(", sample_max_stack=%" PRIu16, attr->sample_max_stack);
398
399	/* _PERF_CHECK_FIELD(__reserved_2);
400	tprintf(", __reserved2=%" PRIu16, attr->__reserved_2); */
401
402print_perf_event_attr_out:
403	if ((attr->size && (attr->size > size)) ||
404	    (!attr->size && (size < PERF_ATTR_SIZE_VER0)))
405		tprints(", ...");
406
407	tprints("}");
408}
409
410SYS_FUNC(perf_event_open)
411{
412	/*
413	 * We try to copy out the whole structure on entering in order to check
414	 * size value on exiting. We do not check the rest of the fields because
415	 * they shouldn't be changed, but copy the whole structure instead
416	 * of just size field because they could.
417	 */
418	if (entering(tcp)) {
419		if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
420			return 0;
421	} else {
422		print_perf_event_attr(tcp, tcp->u_arg[0]);
423	}
424
425	tprintf(", %d, %d, %d, ",
426		(int) tcp->u_arg[1],
427		(int) tcp->u_arg[2],
428		(int) tcp->u_arg[3]);
429	printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
430
431	return RVAL_DECODED | RVAL_FD;
432}
433