1#!/usr/bin/env python 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 python app_profiler.py 23 python pprof_proto_generator.py 24 pprof -text pprof.profile 25""" 26 27from __future__ import print_function 28import argparse 29import os 30import os.path 31import profile_pb2 32import re 33import shutil 34import sys 35import time 36 37from annotate import Addr2Line 38from simpleperf_report_lib import * 39from utils import * 40 41 42def load_pprof_profile(filename): 43 profile = profile_pb2.Profile() 44 with open(filename, "rb") as f: 45 profile.ParseFromString(f.read()) 46 return profile 47 48 49def store_pprof_profile(filename, profile): 50 with open(filename, 'wb') as f: 51 f.write(profile.SerializeToString()) 52 53 54class PprofProfilePrinter(object): 55 56 def __init__(self, profile): 57 self.profile = profile 58 self.string_table = profile.string_table 59 60 def show(self): 61 p = self.profile 62 sub_space = ' ' 63 print('Profile {') 64 print('%d sample_types' % len(p.sample_type)) 65 for i in range(len(p.sample_type)): 66 print('sample_type[%d] = ' % i, end='') 67 self.show_value_type(p.sample_type[i]) 68 print('%d samples' % len(p.sample)) 69 for i in range(len(p.sample)): 70 print('sample[%d]:' % i) 71 self.show_sample(p.sample[i], sub_space) 72 print('%d mappings' % len(p.mapping)) 73 for i in range(len(p.mapping)): 74 print('mapping[%d]:' % i) 75 self.show_mapping(p.mapping[i], sub_space) 76 print('%d locations' % len(p.location)) 77 for i in range(len(p.location)): 78 print('location[%d]:' % i) 79 self.show_location(p.location[i], sub_space) 80 for i in range(len(p.function)): 81 print('function[%d]:' % i) 82 self.show_function(p.function[i], sub_space) 83 print('%d strings' % len(p.string_table)) 84 for i in range(len(p.string_table)): 85 print('string[%d]: %s' % (i, p.string_table[i])) 86 print('drop_frames: %s' % self.string(p.drop_frames)) 87 print('keep_frames: %s' % self.string(p.keep_frames)) 88 print('time_nanos: %u' % p.time_nanos) 89 print('duration_nanos: %u' % p.duration_nanos) 90 print('period_type: ', end='') 91 self.show_value_type(p.period_type) 92 print('period: %u' % p.period) 93 for i in range(len(p.comment)): 94 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 95 print('default_sample_type: %d' % p.default_sample_type) 96 print('} // Profile') 97 print() 98 99 def show_value_type(self, value_type, space=''): 100 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 101 (space, value_type.type, value_type.unit, 102 self.string(value_type.type), self.string(value_type.unit))) 103 104 def show_sample(self, sample, space=''): 105 sub_space = space + ' ' 106 for i in range(len(sample.location_id)): 107 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 108 self.show_location_id(sample.location_id[i], sub_space) 109 for i in range(len(sample.value)): 110 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 111 for i in range(len(sample.label)): 112 print('%slabel[%d] = ', (space, i)) 113 114 def show_location_id(self, location_id, space=''): 115 location = self.profile.location[location_id - 1] 116 self.show_location(location, space) 117 118 def show_location(self, location, space=''): 119 sub_space = space + ' ' 120 print('%sid: %d' % (space, location.id)) 121 print('%smapping_id: %d' % (space, location.mapping_id)) 122 self.show_mapping_id(location.mapping_id, sub_space) 123 print('%saddress: %x' % (space, location.address)) 124 for i in range(len(location.line)): 125 print('%sline[%d]:' % (space, i)) 126 self.show_line(location.line[i], sub_space) 127 128 def show_mapping_id(self, mapping_id, space=''): 129 mapping = self.profile.mapping[mapping_id - 1] 130 self.show_mapping(mapping, space) 131 132 def show_mapping(self, mapping, space=''): 133 print('%sid: %d' % (space, mapping.id)) 134 print('%smemory_start: %x' % (space, mapping.memory_start)) 135 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 136 print('%sfile_offset: %x' % (space, mapping.file_offset)) 137 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 138 mapping.filename)) 139 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 140 mapping.build_id)) 141 print('%shas_functions: %s' % (space, mapping.has_functions)) 142 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 143 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 144 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 145 146 def show_line(self, line, space=''): 147 sub_space = space + ' ' 148 print('%sfunction_id: %d' % (space, line.function_id)) 149 self.show_function_id(line.function_id, sub_space) 150 print('%sline: %d' % (space, line.line)) 151 152 def show_function_id(self, function_id, space=''): 153 function = self.profile.function[function_id - 1] 154 self.show_function(function, space) 155 156 def show_function(self, function, space=''): 157 print('%sid: %d' % (space, function.id)) 158 print('%sname: %s' % (space, self.string(function.name))) 159 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 160 print('%sfilename: %s' % (space, self.string(function.filename))) 161 print('%sstart_line: %d' % (space, function.start_line)) 162 163 def show_label(self, label, space=''): 164 print('%sLabel(%s =', space, self.string(label.key), end='') 165 if label.HasField('str'): 166 print('%s)' % self.get_string(label.str)) 167 else: 168 print('%d)' % label.num) 169 170 def string(self, id): 171 return self.string_table[id] 172 173 174class Sample(object): 175 176 def __init__(self): 177 self.location_ids = [] 178 self.values = {} 179 180 def add_location_id(self, location_id): 181 self.location_ids.append(location_id) 182 183 def add_value(self, id, value): 184 self.values[id] = self.values.get(id, 0) + value 185 186 def add_values(self, values): 187 for id in values.keys(): 188 value = values[id] 189 self.add_value(id, value) 190 191 @property 192 def key(self): 193 return tuple(self.location_ids) 194 195 196class Location(object): 197 198 def __init__(self, mapping_id, address, vaddr_in_dso): 199 self.id = -1 # unset 200 self.mapping_id = mapping_id 201 self.address = address 202 self.vaddr_in_dso = vaddr_in_dso 203 self.lines = [] 204 205 @property 206 def key(self): 207 return (self.mapping_id, self.address) 208 209 210class Line(object): 211 212 def __init__(self): 213 self.function_id = 0 214 self.line = 0 215 216 217class Mapping(object): 218 219 def __init__(self, start, end, pgoff, filename_id, build_id_id): 220 self.id = -1 # unset 221 self.memory_start = start 222 self.memory_limit = end 223 self.file_offset = pgoff 224 self.filename_id = filename_id 225 self.build_id_id = build_id_id 226 227 @property 228 def key(self): 229 return ( 230 self.memory_start, 231 self.memory_limit, 232 self.file_offset, 233 self.filename_id, 234 self.build_id_id) 235 236 237class Function(object): 238 239 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 240 self.id = -1 # unset 241 self.name_id = name_id 242 self.dso_name_id = dso_name_id 243 self.vaddr_in_dso = vaddr_in_dso 244 self.source_filename_id = 0 245 self.start_line = 0 246 247 @property 248 def key(self): 249 return (self.name_id, self.dso_name_id) 250 251 252class PprofProfileGenerator(object): 253 254 def __init__(self, config): 255 self.config = config 256 self.lib = ReportLib() 257 258 config['binary_cache_dir'] = 'binary_cache' 259 if not os.path.isdir(config['binary_cache_dir']): 260 config['binary_cache_dir'] = None 261 else: 262 self.lib.SetSymfs(config['binary_cache_dir']) 263 if config.get('record_file'): 264 self.lib.SetRecordFile(config['record_file']) 265 kallsyms = 'binary_cache/kallsyms' 266 if os.path.isfile(kallsyms): 267 self.lib.SetKallsymsFile(kallsyms) 268 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 269 if config.get('pid_filters'): 270 self.pid_filter = {int(x) for x in config['pid_filters']} 271 else: 272 self.pid_filter = None 273 if config.get('tid_filters'): 274 self.tid_filter = {int(x) for x in config['tid_filters']} 275 else: 276 self.tid_filter = None 277 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 278 279 def gen(self): 280 self.profile = profile_pb2.Profile() 281 self.profile.string_table.append('') 282 self.string_table = {} 283 self.sample_types = {} 284 self.sample_map = {} 285 self.sample_list = [] 286 self.location_map = {} 287 self.location_list = [] 288 self.mapping_map = {} 289 self.mapping_list = [] 290 self.function_map = {} 291 self.function_list = [] 292 293 # 1. Process all samples in perf.data, aggregate samples. 294 while True: 295 report_sample = self.lib.GetNextSample() 296 if report_sample is None: 297 self.lib.Close() 298 break 299 event = self.lib.GetEventOfCurrentSample() 300 symbol = self.lib.GetSymbolOfCurrentSample() 301 callchain = self.lib.GetCallChainOfCurrentSample() 302 303 if not self._filter_report_sample(report_sample): 304 continue 305 306 sample_type_id = self.get_sample_type_id(event.name) 307 sample = Sample() 308 sample.add_value(sample_type_id, 1) 309 sample.add_value(sample_type_id + 1, report_sample.period) 310 if self._filter_symbol(symbol): 311 location_id = self.get_location_id(symbol.vaddr_in_file, symbol) 312 sample.add_location_id(location_id) 313 for i in range(callchain.nr): 314 entry = callchain.entries[i] 315 if self._filter_symbol(symbol): 316 location_id = self.get_location_id(entry.ip, entry.symbol) 317 sample.add_location_id(location_id) 318 if sample.location_ids: 319 self.add_sample(sample) 320 321 # 2. Generate line info for locations and functions. 322 self.gen_source_lines() 323 324 # 3. Produce samples/locations/functions in profile 325 for sample in self.sample_list: 326 self.gen_profile_sample(sample) 327 for mapping in self.mapping_list: 328 self.gen_profile_mapping(mapping) 329 for location in self.location_list: 330 self.gen_profile_location(location) 331 for function in self.function_list: 332 self.gen_profile_function(function) 333 334 return self.profile 335 336 def _filter_report_sample(self, sample): 337 """Return true if the sample can be used.""" 338 if self.comm_filter: 339 if sample.thread_comm not in self.comm_filter: 340 return False 341 if self.pid_filter: 342 if sample.pid not in self.pid_filter: 343 return False 344 if self.tid_filter: 345 if sample.tid not in self.tid_filter: 346 return False 347 return True 348 349 def _filter_symbol(self, symbol): 350 if not self.dso_filter or symbol.dso_name in self.dso_filter: 351 return True 352 return False 353 354 def get_string_id(self, str): 355 if len(str) == 0: 356 return 0 357 id = self.string_table.get(str) 358 if id is not None: 359 return id 360 id = len(self.string_table) + 1 361 self.string_table[str] = id 362 self.profile.string_table.append(str) 363 return id 364 365 def get_string(self, string_id): 366 return self.profile.string_table[string_id] 367 368 def get_sample_type_id(self, name): 369 id = self.sample_types.get(name) 370 if id is not None: 371 return id 372 id = len(self.profile.sample_type) 373 sample_type = self.profile.sample_type.add() 374 sample_type.type = self.get_string_id('event_' + name + '_samples') 375 sample_type.unit = self.get_string_id('count') 376 sample_type = self.profile.sample_type.add() 377 sample_type.type = self.get_string_id('event_' + name + '_count') 378 sample_type.unit = self.get_string_id('count') 379 self.sample_types[name] = id 380 return id 381 382 def get_location_id(self, ip, symbol): 383 mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name) 384 location = Location(mapping_id, ip, symbol.vaddr_in_file) 385 function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name, 386 symbol.symbol_addr) 387 if function_id: 388 # Add Line only when it has a valid function id, see http://b/36988814. 389 # Default line info only contains the function name 390 line = Line() 391 line.function_id = function_id 392 location.lines.append(line) 393 394 exist_location = self.location_map.get(location.key) 395 if exist_location: 396 return exist_location.id 397 # location_id starts from 1 398 location.id = len(self.location_list) + 1 399 self.location_list.append(location) 400 self.location_map[location.key] = location 401 return location.id 402 403 def get_mapping_id(self, report_mapping, filename): 404 filename_id = self.get_string_id(filename) 405 build_id = self.lib.GetBuildIdForPath(filename) 406 if build_id and build_id[0:2] == "0x": 407 build_id = build_id[2:] 408 build_id_id = self.get_string_id(build_id) 409 mapping = Mapping(report_mapping.start, report_mapping.end, 410 report_mapping.pgoff, filename_id, build_id_id) 411 exist_mapping = self.mapping_map.get(mapping.key) 412 if exist_mapping: 413 return exist_mapping.id 414 # mapping_id starts from 1 415 mapping.id = len(self.mapping_list) + 1 416 self.mapping_list.append(mapping) 417 self.mapping_map[mapping.key] = mapping 418 return mapping.id 419 420 def get_mapping(self, mapping_id): 421 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 422 423 def get_function_id(self, name, dso_name, vaddr_in_file): 424 if name == 'unknown': 425 return 0 426 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 427 exist_function = self.function_map.get(function.key) 428 if exist_function: 429 return exist_function.id 430 # function_id starts from 1 431 function.id = len(self.function_list) + 1 432 self.function_list.append(function) 433 self.function_map[function.key] = function 434 return function.id 435 436 def get_function(self, function_id): 437 return self.function_list[function_id - 1] if function_id > 0 else None 438 439 def add_sample(self, sample): 440 exist_sample = self.sample_map.get(sample.key) 441 if exist_sample: 442 exist_sample.add_values(sample.values) 443 else: 444 self.sample_list.append(sample) 445 self.sample_map[sample.key] = sample 446 447 def gen_source_lines(self): 448 # 1. Create Addr2line instance 449 if not self.config.get('binary_cache_dir'): 450 log_info("Can't generate line information because binary_cache is missing.") 451 return 452 if not self.config['addr2line_path'] or not is_executable_available( 453 self.config['addr2line_path']): 454 if not find_tool_path('addr2line'): 455 log_info("Can't generate line information because can't find addr2line.") 456 return 457 458 addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir']) 459 460 # 2. Put all needed addresses to it. 461 for location in self.location_list: 462 mapping = self.get_mapping(location.mapping_id) 463 dso_name = self.get_string(mapping.filename_id) 464 addr2line.add_addr(dso_name, location.vaddr_in_dso) 465 for function in self.function_list: 466 dso_name = self.get_string(function.dso_name_id) 467 addr2line.add_addr(dso_name, function.vaddr_in_dso) 468 469 # 3. Generate source lines. 470 addr2line.convert_addrs_to_lines() 471 472 # 4. Annotate locations and functions. 473 for location in self.location_list: 474 mapping = self.get_mapping(location.mapping_id) 475 dso_name = self.get_string(mapping.filename_id) 476 sources = addr2line.get_sources(dso_name, location.vaddr_in_dso) 477 source_id = 0 478 for source in sources: 479 if source.file and source.function and source.line: 480 function_id = self.get_function_id(source.function, dso_name, 0) 481 if function_id == 0: 482 continue 483 if source_id == 0: 484 # Clear default line info 485 location.lines = [] 486 location.lines.append(self.add_line(source, dso_name, function_id)) 487 source_id += 1 488 489 for function in self.function_list: 490 dso_name = self.get_string(function.dso_name_id) 491 if function.vaddr_in_dso: 492 sources = addr2line.get_sources(dso_name, function.vaddr_in_dso) 493 source = sources[0] if sources else None 494 if source and source.file: 495 function.source_filename_id = self.get_string_id(source.file) 496 if source.line: 497 function.start_line = source.line 498 499 def add_line(self, source, dso_name, function_id): 500 line = Line() 501 function = self.get_function(function_id) 502 function.source_filename_id = self.get_string_id(source.file) 503 line.function_id = function_id 504 line.line = source.line 505 return line 506 507 def gen_profile_sample(self, sample): 508 profile_sample = self.profile.sample.add() 509 profile_sample.location_id.extend(sample.location_ids) 510 sample_type_count = len(self.sample_types) * 2 511 values = [0] * sample_type_count 512 for id in sample.values.keys(): 513 values[id] = sample.values[id] 514 profile_sample.value.extend(values) 515 516 def gen_profile_mapping(self, mapping): 517 profile_mapping = self.profile.mapping.add() 518 profile_mapping.id = mapping.id 519 profile_mapping.memory_start = mapping.memory_start 520 profile_mapping.memory_limit = mapping.memory_limit 521 profile_mapping.file_offset = mapping.file_offset 522 profile_mapping.filename = mapping.filename_id 523 profile_mapping.build_id = mapping.build_id_id 524 profile_mapping.has_filenames = True 525 profile_mapping.has_functions = True 526 if self.config.get('binary_cache_dir'): 527 profile_mapping.has_line_numbers = True 528 profile_mapping.has_inline_frames = True 529 else: 530 profile_mapping.has_line_numbers = False 531 profile_mapping.has_inline_frames = False 532 533 def gen_profile_location(self, location): 534 profile_location = self.profile.location.add() 535 profile_location.id = location.id 536 profile_location.mapping_id = location.mapping_id 537 profile_location.address = location.address 538 for i in range(len(location.lines)): 539 line = profile_location.line.add() 540 line.function_id = location.lines[i].function_id 541 line.line = location.lines[i].line 542 543 def gen_profile_function(self, function): 544 profile_function = self.profile.function.add() 545 profile_function.id = function.id 546 profile_function.name = function.name_id 547 profile_function.system_name = function.name_id 548 profile_function.filename = function.source_filename_id 549 profile_function.start_line = function.start_line 550 551 552def main(): 553 parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') 554 parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') 555 parser.add_argument('-i', '--perf_data_path', default='perf.data', help= 556"""The path of profiling data.""") 557 parser.add_argument('-o', '--output_file', default='pprof.profile', help= 558"""The path of generated pprof profile data.""") 559 parser.add_argument('--comm', nargs='+', action='append', help= 560"""Use samples only in threads with selected names.""") 561 parser.add_argument('--pid', nargs='+', action='append', help= 562"""Use samples only in processes with selected process ids.""") 563 parser.add_argument('--tid', nargs='+', action='append', help= 564"""Use samples only in threads with selected thread ids.""") 565 parser.add_argument('--dso', nargs='+', action='append', help= 566"""Use samples only in selected binaries.""") 567 parser.add_argument('--addr2line', help= 568"""Set the path of addr2line.""") 569 570 args = parser.parse_args() 571 if args.show: 572 show_file = args.show[0] if args.show[0] else 'pprof.profile' 573 profile = load_pprof_profile(show_file) 574 printer = PprofProfilePrinter(profile) 575 printer.show() 576 return 577 578 config = {} 579 config['perf_data_path'] = args.perf_data_path 580 config['output_file'] = args.output_file 581 config['comm_filters'] = flatten_arg_list(args.comm) 582 config['pid_filters'] = flatten_arg_list(args.pid) 583 config['tid_filters'] = flatten_arg_list(args.tid) 584 config['dso_filters'] = flatten_arg_list(args.dso) 585 config['addr2line_path'] = args.addr2line 586 generator = PprofProfileGenerator(config) 587 profile = generator.gen() 588 store_pprof_profile(config['output_file'], profile) 589 590 591if __name__ == '__main__': 592 main() 593