1/* 2 * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl> 3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@altlinux.org> 4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com> 5 * Copyright (c) 2015-2017 The strace developers. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31#include "defs.h" 32 33#include "perf_event_struct.h" 34 35#include "xlat/hw_breakpoint_len.h" 36#include "xlat/hw_breakpoint_type.h" 37#include "xlat/perf_attr_size.h" 38#include "xlat/perf_branch_sample_type.h" 39#include "xlat/perf_event_open_flags.h" 40#include "xlat/perf_event_read_format.h" 41#include "xlat/perf_event_sample_format.h" 42#include "xlat/perf_hw_cache_id.h" 43#include "xlat/perf_hw_cache_op_id.h" 44#include "xlat/perf_hw_cache_op_result_id.h" 45#include "xlat/perf_hw_id.h" 46#include "xlat/perf_sw_ids.h" 47#include "xlat/perf_type_id.h" 48 49struct pea_desc { 50 struct perf_event_attr *attr; 51 uint32_t size; 52}; 53 54static void 55free_pea_desc(void *pea_desc_ptr) 56{ 57 struct pea_desc *desc = pea_desc_ptr; 58 59 free(desc->attr); 60 free(desc); 61} 62 63static int 64fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 65{ 66 struct pea_desc *desc; 67 struct perf_event_attr *attr; 68 uint32_t size; 69 70 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) { 71 printaddr(addr); 72 return 1; 73 } 74 75 if (size > sizeof(*attr)) 76 size = sizeof(*attr); 77 78 if (!size) 79 size = PERF_ATTR_SIZE_VER0; 80 81 /* 82 * Kernel (rightfully) deems invalid attribute structures with size less 83 * than first published format size, and we do the same. 84 */ 85 if (size < PERF_ATTR_SIZE_VER0) { 86 printaddr(addr); 87 return 1; 88 } 89 90 if (abbrev(tcp)) 91 size = offsetofend(struct perf_event_attr, config); 92 93 /* Size should be multiple of 8, but kernel doesn't check for it */ 94 /* size &= ~7; */ 95 96 attr = xcalloc(1, sizeof(*attr)); 97 98 if (umoven_or_printaddr(tcp, addr, size, attr)) { 99 free(attr); 100 101 return 1; 102 } 103 104 desc = xmalloc(sizeof(*desc)); 105 106 desc->attr = attr; 107 desc->size = size; 108 109 set_tcb_priv_data(tcp, desc, free_pea_desc); 110 111 return 0; 112} 113 114#define PRINT_XLAT(prefix, xlat, x, dflt) \ 115 do { \ 116 tprints(prefix); \ 117 printxval_search(xlat, x, dflt); \ 118 } while (0) 119 120static void 121print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr) 122{ 123 static const char *precise_ip_desc[] = { 124 "arbitrary skid", 125 "constant skid", 126 "requested to have 0 skid", 127 "must have 0 skid", 128 }; 129 130 struct pea_desc *desc; 131 struct perf_event_attr *attr; 132 uint32_t size; 133 uint32_t new_size; 134 int use_new_size = 0; 135 136 /* 137 * Amusingly, kernel accepts structures with only part of the field 138 * present, so we making check like this (instead of checking 139 * offsetofend against size) in order to print fields as kernel sees 140 * them. This also should work great on big endian architectures. 141 */ 142 #define _PERF_CHECK_FIELD(_field) \ 143 do { \ 144 if (offsetof(struct perf_event_attr, _field) >= size) \ 145 goto print_perf_event_attr_out; \ 146 } while (0) 147 148 desc = get_tcb_priv_data(tcp); 149 150 attr = desc->attr; 151 size = desc->size; 152 153 /* The only error which expected to change size field currently */ 154 if (tcp->u_error == E2BIG) { 155 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), 156 &new_size)) 157 use_new_size = -1; 158 else 159 use_new_size = 1; 160 } 161 162 PRINT_XLAT("{type=", perf_type_id, attr->type, "PERF_TYPE_???"); 163 tprints(", size="); 164 printxval(perf_attr_size, attr->size, "PERF_ATTR_SIZE_???"); 165 166 if (use_new_size) { 167 tprints(" => "); 168 169 if (use_new_size > 0) 170 printxval(perf_attr_size, new_size, 171 "PERF_ATTR_SIZE_???"); 172 else 173 tprints("???"); 174 } 175 176 switch (attr->type) { 177 case PERF_TYPE_HARDWARE: 178 PRINT_XLAT(", config=", perf_hw_id, attr->config, 179 "PERF_COUNT_HW_???"); 180 break; 181 case PERF_TYPE_SOFTWARE: 182 PRINT_XLAT(", config=", perf_sw_ids, attr->config, 183 "PERF_COUNT_SW_???"); 184 break; 185 case PERF_TYPE_TRACEPOINT: 186 /* 187 * "The value to use in config can be obtained from under 188 * debugfs tracing/events/../../id if ftrace is enabled 189 * in the kernel." 190 */ 191 tprintf(", config=%" PRIu64, attr->config); 192 break; 193 case PERF_TYPE_HW_CACHE: 194 /* 195 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) | 196 * (perf_hw_cache_op_result_id << 16) 197 */ 198 PRINT_XLAT(", config=", perf_hw_cache_id, attr->config & 0xFF, 199 "PERF_COUNT_HW_CACHE_???"); 200 PRINT_XLAT("|", perf_hw_cache_op_id, (attr->config >> 8) & 0xFF, 201 "PERF_COUNT_HW_CACHE_OP_???"); 202 /* 203 * Current code (see set_ext_hw_attr in arch/x86/events/core.c, 204 * tile_map_cache_event in arch/tile/kernel/perf_event.c, 205 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c, 206 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c, 207 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c, 208 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c, 209 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c, 210 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c, 211 * hw_perf_cache_event in arch/sh/kernel/perf_event.c, 212 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c, 213 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c, 214 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes 215 * that cache result is 8 bits in size. 216 */ 217 PRINT_XLAT("<<8|", perf_hw_cache_op_result_id, 218 (attr->config >> 16) & 0xFF, 219 "PERF_COUNT_HW_CACHE_RESULT_???"); 220 tprints("<<16"); 221 if (attr->config >> 24) { 222 tprintf("|%#" PRIx64 "<<24", attr->config >> 24); 223 tprints_comment("PERF_COUNT_HW_CACHE_???"); 224 } 225 break; 226 case PERF_TYPE_RAW: 227 /* 228 * "If type is PERF_TYPE_RAW, then a custom "raw" config 229 * value is needed. Most CPUs support events that are not 230 * covered by the "generalized" events. These are 231 * implementation defined; see your CPU manual (for example the 232 * Intel Volume 3B documentation or the AMD BIOS and Kernel 233 * Developer Guide). The libpfm4 library can be used to 234 * translate from the name in the architectural manuals 235 * to the raw hex value perf_event_open() expects in this 236 * field." 237 */ 238 case PERF_TYPE_BREAKPOINT: 239 /* 240 * "If type is PERF_TYPE_BREAKPOINT, then leave config set 241 * to zero. Its parameters are set in other places." 242 */ 243 default: 244 tprintf(", config=%#" PRIx64, attr->config); 245 break; 246 } 247 248 if (abbrev(tcp)) 249 goto print_perf_event_attr_out; 250 251 if (attr->freq) 252 tprintf(", sample_freq=%" PRIu64, attr->sample_freq); 253 else 254 tprintf(", sample_period=%" PRIu64, attr->sample_period); 255 256 tprints(", sample_type="); 257 printflags64(perf_event_sample_format, attr->sample_type, 258 "PERF_SAMPLE_???"); 259 260 tprints(", read_format="); 261 printflags64(perf_event_read_format, attr->read_format, 262 "PERF_FORMAT_???"); 263 264 tprintf(", disabled=%u" 265 ", inherit=%u" 266 ", pinned=%u" 267 ", exclusive=%u" 268 ", exclusive_user=%u" 269 ", exclude_kernel=%u" 270 ", exclude_hv=%u" 271 ", exclude_idle=%u" 272 ", mmap=%u" 273 ", comm=%u" 274 ", freq=%u" 275 ", inherit_stat=%u" 276 ", enable_on_exec=%u" 277 ", task=%u" 278 ", watermark=%u" 279 ", precise_ip=%u", 280 attr->disabled, 281 attr->inherit, 282 attr->pinned, 283 attr->exclusive, 284 attr->exclude_user, 285 attr->exclude_kernel, 286 attr->exclude_hv, 287 attr->exclude_idle, 288 attr->mmap, 289 attr->comm, 290 attr->freq, 291 attr->inherit_stat, 292 attr->enable_on_exec, 293 attr->task, 294 attr->watermark, 295 attr->precise_ip); 296 tprints_comment(precise_ip_desc[attr->precise_ip]); 297 tprintf(", mmap_data=%u" 298 ", sample_id_all=%u" 299 ", exclude_host=%u" 300 ", exclude_guest=%u" 301 ", exclude_callchain_kernel=%u" 302 ", exclude_callchain_user=%u" 303 ", mmap2=%u" 304 ", comm_exec=%u" 305 ", use_clockid=%u" 306 ", context_switch=%u" 307 ", write_backward=%u", 308 attr->mmap_data, 309 attr->sample_id_all, 310 attr->exclude_host, 311 attr->exclude_guest, 312 attr->exclude_callchain_kernel, 313 attr->exclude_callchain_user, 314 attr->mmap2, 315 attr->comm_exec, 316 attr->use_clockid, 317 attr->context_switch, 318 attr->write_backward); 319 320 /* 321 * Print it only in case it is non-zero, since it may contain flags we 322 * are not aware about. 323 */ 324 if (attr->__reserved_1) { 325 tprintf(", __reserved_1=%#" PRIx64, 326 (uint64_t) attr->__reserved_1); 327 tprints_comment("Bits 63..28"); 328 } 329 330 if (attr->watermark) 331 tprintf(", wakeup_watermark=%u", attr->wakeup_watermark); 332 else 333 tprintf(", wakeup_events=%u", attr->wakeup_events); 334 335 if (attr->type == PERF_TYPE_BREAKPOINT) 336 /* Any combination of R/W with X is deemed invalid */ 337 PRINT_XLAT(", bp_type=", hw_breakpoint_type, attr->bp_type, 338 (attr->bp_type <= 339 (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) ? 340 "HW_BREAKPOINT_INVALID" : 341 "HW_BREAKPOINT_???"); 342 343 if (attr->type == PERF_TYPE_BREAKPOINT) 344 tprintf(", bp_addr=%#" PRIx64, attr->bp_addr); 345 else 346 tprintf(", config1=%#" PRIx64, attr->config1); 347 348 /* 349 * Fields after bp_addr/config1 are optional and may not present; check 350 * against size is needed. 351 */ 352 353 _PERF_CHECK_FIELD(bp_len); 354 if (attr->type == PERF_TYPE_BREAKPOINT) 355 tprintf(", bp_len=%" PRIu64, attr->bp_len); 356 else 357 tprintf(", config2=%#" PRIx64, attr->config2); 358 359 _PERF_CHECK_FIELD(branch_sample_type); 360 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { 361 tprints(", branch_sample_type="); 362 printflags64(perf_branch_sample_type, attr->branch_sample_type, 363 "PERF_SAMPLE_BRANCH_???"); 364 } 365 366 _PERF_CHECK_FIELD(sample_regs_user); 367 /* 368 * "This bit mask defines the set of user CPU registers to dump on 369 * samples. The layout of the register mask is architecture-specific and 370 * described in the kernel header 371 * arch/ARCH/include/uapi/asm/perf_regs.h." 372 */ 373 tprintf(", sample_regs_user=%#" PRIx64, attr->sample_regs_user); 374 375 _PERF_CHECK_FIELD(sample_stack_user); 376 /* 377 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is 378 * specified." 379 */ 380 if (attr->sample_type & PERF_SAMPLE_STACK_USER) 381 tprintf(", sample_stack_user=%#" PRIx32, 382 attr->sample_stack_user); 383 384 if (attr->use_clockid) { 385 _PERF_CHECK_FIELD(clockid); 386 tprints(", clockid="); 387 printxval(clocknames, attr->clockid, "CLOCK_???"); 388 } 389 390 _PERF_CHECK_FIELD(sample_regs_intr); 391 tprintf(", sample_regs_intr=%#" PRIx64, attr->sample_regs_intr); 392 393 _PERF_CHECK_FIELD(aux_watermark); 394 tprintf(", aux_watermark=%" PRIu32, attr->aux_watermark); 395 396 _PERF_CHECK_FIELD(sample_max_stack); 397 tprintf(", sample_max_stack=%" PRIu16, attr->sample_max_stack); 398 399 /* _PERF_CHECK_FIELD(__reserved_2); 400 tprintf(", __reserved2=%" PRIu16, attr->__reserved_2); */ 401 402print_perf_event_attr_out: 403 if ((attr->size && (attr->size > size)) || 404 (!attr->size && (size < PERF_ATTR_SIZE_VER0))) 405 tprints(", ..."); 406 407 tprints("}"); 408} 409 410SYS_FUNC(perf_event_open) 411{ 412 /* 413 * We try to copy out the whole structure on entering in order to check 414 * size value on exiting. We do not check the rest of the fields because 415 * they shouldn't be changed, but copy the whole structure instead 416 * of just size field because they could. 417 */ 418 if (entering(tcp)) { 419 if (!fetch_perf_event_attr(tcp, tcp->u_arg[0])) 420 return 0; 421 } else { 422 print_perf_event_attr(tcp, tcp->u_arg[0]); 423 } 424 425 tprintf(", %d, %d, %d, ", 426 (int) tcp->u_arg[1], 427 (int) tcp->u_arg[2], 428 (int) tcp->u_arg[3]); 429 printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???"); 430 431 return RVAL_DECODED | RVAL_FD; 432} 433