1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21 22import argparse 23import os 24import os.path 25import shutil 26import subprocess 27import sys 28 29from simpleperf_report_lib import * 30from utils import * 31 32class SourceLine(object): 33 def __init__(self, file, function, line): 34 self.file = file 35 self.function = function 36 self.line = line 37 38 @property 39 def file_key(self): 40 return self.file 41 42 @property 43 def function_key(self): 44 return (self.file, self.function) 45 46 @property 47 def line_key(self): 48 return (self.file, self.line) 49 50 51# TODO: using addr2line can't convert from function_start_address to 52# source_file:line very well for java code. Because in .debug_line section, 53# there is some distance between function_start_address and the address 54# of the first instruction which can be mapped to source line. 55class Addr2Line(object): 56 """collect information of how to map [dso_name,vaddr] to [source_file:line]. 57 """ 58 def __init__(self, addr2line_path, symfs_dir=None): 59 self.dso_dict = dict() 60 if addr2line_path and is_executable_available(addr2line_path): 61 self.addr2line_path = addr2line_path 62 else: 63 self.addr2line_path = find_tool_path('addr2line') 64 if not self.addr2line_path: 65 log_exit("Can't find addr2line.") 66 self.symfs_dir = symfs_dir 67 68 69 def add_addr(self, dso_name, addr): 70 dso = self.dso_dict.get(dso_name) 71 if dso is None: 72 self.dso_dict[dso_name] = dso = dict() 73 if addr not in dso: 74 dso[addr] = None 75 76 77 def convert_addrs_to_lines(self): 78 # store a list of source files 79 self.file_list = [] 80 # map from file to id with file_list[id] == file 81 self.file_dict = {} 82 self.file_list.append('') 83 self.file_dict[''] = 0 84 85 for dso_name in self.dso_dict.keys(): 86 self._convert_addrs_to_lines(dso_name, self.dso_dict[dso_name]) 87 self._combine_source_files() 88 89 90 def _convert_addrs_to_lines(self, dso_name, dso): 91 dso_path = self._find_dso_path(dso_name) 92 if dso_path is None: 93 log_warning("can't find dso '%s'" % dso_name) 94 dso.clear() 95 return 96 addrs = sorted(dso.keys()) 97 addr_str = [] 98 for addr in addrs: 99 addr_str.append('0x%x' % addr) 100 addr_str = '\n'.join(addr_str) 101 subproc = subprocess.Popen([self.addr2line_path, '-e', dso_path, '-aifC'], 102 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 103 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_str)) 104 stdoutdata = bytes_to_str(stdoutdata) 105 stdoutdata = stdoutdata.strip().split('\n') 106 if len(stdoutdata) < len(addrs): 107 log_fatal("addr2line didn't output enough lines") 108 addr_pos = 0 109 out_pos = 0 110 while addr_pos < len(addrs) and out_pos < len(stdoutdata): 111 addr_line = stdoutdata[out_pos] 112 out_pos += 1 113 assert addr_line[:2] == "0x" 114 assert out_pos < len(stdoutdata) 115 source_lines = [] 116 while out_pos < len(stdoutdata) and stdoutdata[out_pos][:2] != "0x": 117 function = stdoutdata[out_pos] 118 out_pos += 1 119 assert out_pos < len(stdoutdata) 120 # Handle lines like "C:\Users\...\file:32". 121 items = stdoutdata[out_pos].rsplit(':', 1) 122 if len(items) != 2: 123 continue 124 (file, line) = items 125 line = line.split()[0] # Remove comments after line number 126 out_pos += 1 127 if file.find('?') != -1: 128 file = 0 129 else: 130 file = self._get_file_id(file) 131 if line.find('?') != -1: 132 line = 0 133 else: 134 line = int(line) 135 source_lines.append(SourceLine(file, function, line)) 136 dso[addrs[addr_pos]] = source_lines 137 addr_pos += 1 138 assert addr_pos == len(addrs) 139 140 141 def _get_file_id(self, file): 142 id = self.file_dict.get(file) 143 if id is None: 144 id = len(self.file_list) 145 self.file_list.append(file) 146 self.file_dict[file] = id 147 return id 148 149 def _combine_source_files(self): 150 """It is possible that addr2line gives us different names for the same 151 file, like: 152 /usr/local/.../src/main/jni/sudo-game-jni.cpp 153 sudo-game-jni.cpp 154 We'd better combine these two files. We can do it by combining 155 source files with no conflicts in path. 156 """ 157 # Collect files having the same filename. 158 filename_dict = dict() 159 for file in self.file_list: 160 index = max(file.rfind('/'), file.rfind(os.sep)) 161 filename = file[index+1:] 162 entry = filename_dict.get(filename) 163 if entry is None: 164 filename_dict[filename] = entry = [] 165 entry.append(file) 166 167 # Combine files having the same filename and having no conflicts in path. 168 for filename in filename_dict.keys(): 169 files = filename_dict[filename] 170 if len(files) == 1: 171 continue 172 for file in files: 173 to_file = file 174 # Test if we can merge files[i] with another file having longer 175 # path. 176 for f in files: 177 if len(f) > len(to_file) and f.find(file) != -1: 178 to_file = f 179 if to_file != file: 180 from_id = self.file_dict[file] 181 to_id = self.file_dict[to_file] 182 self.file_list[from_id] = self.file_list[to_id] 183 184 185 def get_sources(self, dso_name, addr): 186 dso = self.dso_dict.get(dso_name) 187 if dso is None: 188 return [] 189 item = dso.get(addr, []) 190 source_lines = [] 191 for source in item: 192 source_lines.append(SourceLine(self.file_list[source.file], 193 source.function, source.line)) 194 return source_lines 195 196 197 def _find_dso_path(self, dso): 198 if dso[0] != '/' or dso == '//anon': 199 return None 200 if self.symfs_dir: 201 dso_path = os.path.join(self.symfs_dir, dso[1:]) 202 if os.path.isfile(dso_path): 203 return dso_path 204 if os.path.isfile(dso): 205 return dso 206 return None 207 208 209class Period(object): 210 """event count information. It can be used to represent event count 211 of a line, a function, a source file, or a binary. It contains two 212 parts: period and acc_period. 213 When used for a line, period is the event count occurred when running 214 that line, acc_period is the accumulated event count occurred when 215 running that line and functions called by that line. Same thing applies 216 when it is used for a function, a source file, or a binary. 217 """ 218 def __init__(self, period=0, acc_period=0): 219 self.period = period 220 self.acc_period = acc_period 221 222 223 def __iadd__(self, other): 224 self.period += other.period 225 self.acc_period += other.acc_period 226 return self 227 228 229class DsoPeriod(object): 230 """Period for each shared library""" 231 def __init__(self, dso_name): 232 self.dso_name = dso_name 233 self.period = Period() 234 235 236 def add_period(self, period): 237 self.period += period 238 239 240class FilePeriod(object): 241 """Period for each source file""" 242 def __init__(self, file): 243 self.file = file 244 self.period = Period() 245 # Period for each line in the file. 246 self.line_dict = {} 247 # Period for each function in the source file. 248 self.function_dict = {} 249 250 251 def add_period(self, period): 252 self.period += period 253 254 255 def add_line_period(self, line, period): 256 a = self.line_dict.get(line) 257 if a is None: 258 self.line_dict[line] = a = Period() 259 a += period 260 261 262 def add_function_period(self, function_name, function_start_line, period): 263 a = self.function_dict.get(function_name) 264 if not a: 265 if function_start_line is None: 266 function_start_line = -1 267 self.function_dict[function_name] = a = [function_start_line, Period()] 268 a[1] += period 269 270 271class SourceFileAnnotator(object): 272 """group code for annotating source files""" 273 def __init__(self, config): 274 # check config variables 275 config_names = ['perf_data_list', 'source_dirs', 'comm_filters', 276 'pid_filters', 'tid_filters', 'dso_filters', 'addr2line_path'] 277 for name in config_names: 278 if name not in config: 279 log_exit('config [%s] is missing' % name) 280 symfs_dir = 'binary_cache' 281 if not os.path.isdir(symfs_dir): 282 symfs_dir = None 283 kallsyms = 'binary_cache/kallsyms' 284 if not os.path.isfile(kallsyms): 285 kallsyms = None 286 source_dirs = config['source_dirs'] 287 for dir in source_dirs: 288 if not os.path.isdir(dir): 289 log_exit('[source_dirs] "%s" is not a dir' % dir) 290 if not config['source_dirs']: 291 log_exit('Please set source directories.') 292 293 # init member variables 294 self.config = config 295 self.symfs_dir = symfs_dir 296 self.kallsyms = kallsyms 297 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 298 if config.get('pid_filters'): 299 self.pid_filter = {int(x) for x in config['pid_filters']} 300 else: 301 self.pid_filter = None 302 if config.get('tid_filters'): 303 self.tid_filter = {int(x) for x in config['tid_filters']} 304 else: 305 self.tid_filter = None 306 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 307 308 config['annotate_dest_dir'] = 'annotated_files' 309 output_dir = config['annotate_dest_dir'] 310 if os.path.isdir(output_dir): 311 shutil.rmtree(output_dir) 312 os.makedirs(output_dir) 313 314 self.addr2line = Addr2Line(self.config['addr2line_path'], symfs_dir) 315 316 317 def annotate(self): 318 self._collect_addrs() 319 self._convert_addrs_to_lines() 320 self._generate_periods() 321 self._write_summary() 322 self._collect_source_files() 323 self._annotate_files() 324 325 326 def _collect_addrs(self): 327 """Read perf.data, collect all addresses we need to convert to 328 source file:line. 329 """ 330 for perf_data in self.config['perf_data_list']: 331 lib = ReportLib() 332 lib.SetRecordFile(perf_data) 333 if self.symfs_dir: 334 lib.SetSymfs(self.symfs_dir) 335 if self.kallsyms: 336 lib.SetKallsymsFile(self.kallsyms) 337 while True: 338 sample = lib.GetNextSample() 339 if sample is None: 340 lib.Close() 341 break 342 if not self._filter_sample(sample): 343 continue 344 symbols = [] 345 symbols.append(lib.GetSymbolOfCurrentSample()) 346 callchain = lib.GetCallChainOfCurrentSample() 347 for i in range(callchain.nr): 348 symbols.append(callchain.entries[i].symbol) 349 for symbol in symbols: 350 if self._filter_symbol(symbol): 351 self.addr2line.add_addr(symbol.dso_name, symbol.vaddr_in_file) 352 self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr) 353 354 355 def _filter_sample(self, sample): 356 """Return true if the sample can be used.""" 357 if self.comm_filter: 358 if sample.thread_comm not in self.comm_filter: 359 return False 360 if self.pid_filter: 361 if sample.pid not in self.pid_filter: 362 return False 363 if self.tid_filter: 364 if sample.tid not in self.tid_filter: 365 return False 366 return True 367 368 369 def _filter_symbol(self, symbol): 370 if not self.dso_filter or symbol.dso_name in self.dso_filter: 371 return True 372 return False 373 374 375 def _convert_addrs_to_lines(self): 376 self.addr2line.convert_addrs_to_lines() 377 378 379 def _generate_periods(self): 380 """read perf.data, collect Period for all types: 381 binaries, source files, functions, lines. 382 """ 383 self.period = 0 384 self.dso_periods = dict() 385 self.file_periods = dict() 386 for perf_data in self.config['perf_data_list']: 387 lib = ReportLib() 388 lib.SetRecordFile(perf_data) 389 if self.symfs_dir: 390 lib.SetSymfs(self.symfs_dir) 391 if self.kallsyms: 392 lib.SetKallsymsFile(self.kallsyms) 393 while True: 394 sample = lib.GetNextSample() 395 if sample is None: 396 lib.Close() 397 break 398 if not self._filter_sample(sample): 399 continue 400 symbols = [] 401 symbols.append(lib.GetSymbolOfCurrentSample()) 402 callchain = lib.GetCallChainOfCurrentSample() 403 for i in range(callchain.nr): 404 symbols.append(callchain.entries[i].symbol) 405 # Each sample has a callchain, but its period is only used once 406 # to add period for each function/source_line/source_file/binary. 407 # For example, if more than one entry in the callchain hits a 408 # function, the event count of that function is only increased once. 409 # Otherwise, we may get periods > 100%. 410 is_sample_used = False 411 used_dso_dict = dict() 412 used_file_dict = dict() 413 used_function_dict = dict() 414 used_line_dict = dict() 415 period = Period(sample.period, sample.period) 416 for i in range(len(symbols)): 417 symbol = symbols[i] 418 if i == 1: 419 period = Period(0, sample.period) 420 if not self._filter_symbol(symbol): 421 continue 422 is_sample_used = True 423 # Add period to dso. 424 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 425 # Add period to source file. 426 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 427 for source in sources: 428 if source.file: 429 self._add_file_period(source, period, used_file_dict) 430 # Add period to line. 431 if source.line: 432 self._add_line_period(source, period, used_line_dict) 433 # Add period to function. 434 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 435 for source in sources: 436 if source.file: 437 self._add_file_period(source, period, used_file_dict) 438 if source.function: 439 self._add_function_period(source, period, used_function_dict) 440 441 if is_sample_used: 442 self.period += sample.period 443 444 445 def _add_dso_period(self, dso_name, period, used_dso_dict): 446 if dso_name not in used_dso_dict: 447 used_dso_dict[dso_name] = True 448 dso_period = self.dso_periods.get(dso_name) 449 if dso_period is None: 450 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 451 dso_period.add_period(period) 452 453 454 def _add_file_period(self, source, period, used_file_dict): 455 if source.file_key not in used_file_dict: 456 used_file_dict[source.file_key] = True 457 file_period = self.file_periods.get(source.file) 458 if file_period is None: 459 file_period = self.file_periods[source.file] = FilePeriod(source.file) 460 file_period.add_period(period) 461 462 463 def _add_line_period(self, source, period, used_line_dict): 464 if source.line_key not in used_line_dict: 465 used_line_dict[source.line_key] = True 466 file_period = self.file_periods[source.file] 467 file_period.add_line_period(source.line, period) 468 469 470 def _add_function_period(self, source, period, used_function_dict): 471 if source.function_key not in used_function_dict: 472 used_function_dict[source.function_key] = True 473 file_period = self.file_periods[source.file] 474 file_period.add_function_period(source.function, source.line, period) 475 476 477 def _write_summary(self): 478 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 479 with open(summary, 'w') as f: 480 f.write('total period: %d\n\n' % self.period) 481 dso_periods = sorted(self.dso_periods.values(), 482 key=lambda x: x.period.acc_period, reverse=True) 483 for dso_period in dso_periods: 484 f.write('dso %s: %s\n' % (dso_period.dso_name, 485 self._get_percentage_str(dso_period.period))) 486 f.write('\n') 487 488 file_periods = sorted(self.file_periods.values(), 489 key=lambda x: x.period.acc_period, reverse=True) 490 for file_period in file_periods: 491 f.write('file %s: %s\n' % (file_period.file, 492 self._get_percentage_str(file_period.period))) 493 for file_period in file_periods: 494 f.write('\n\n%s: %s\n' % (file_period.file, 495 self._get_percentage_str(file_period.period))) 496 values = [] 497 for func_name in file_period.function_dict.keys(): 498 func_start_line, period = file_period.function_dict[func_name] 499 values.append((func_name, func_start_line, period)) 500 values = sorted(values, key=lambda x: x[2].acc_period, reverse=True) 501 for value in values: 502 f.write('\tfunction (%s): line %d, %s\n' % ( 503 value[0], value[1], self._get_percentage_str(value[2]))) 504 f.write('\n') 505 for line in sorted(file_period.line_dict.keys()): 506 f.write('\tline %d: %s\n' % ( 507 line, self._get_percentage_str(file_period.line_dict[line]))) 508 509 510 def _get_percentage_str(self, period, short=False): 511 s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%' 512 return s % self._get_percentage(period) 513 514 515 def _get_percentage(self, period): 516 if self.period == 0: 517 return (0, 0) 518 acc_p = 100.0 * period.acc_period / self.period 519 p = 100.0 * period.period / self.period 520 return (acc_p, p) 521 522 523 def _collect_source_files(self): 524 self.source_file_dict = dict() 525 source_file_suffix = ['h', 'c', 'cpp', 'cc', 'java', 'kt'] 526 for source_dir in self.config['source_dirs']: 527 for root, _, files in os.walk(source_dir): 528 for file in files: 529 if file[file.rfind('.')+1:] in source_file_suffix: 530 entry = self.source_file_dict.get(file) 531 if entry is None: 532 entry = self.source_file_dict[file] = [] 533 entry.append(os.path.join(root, file)) 534 535 536 def _find_source_file(self, file): 537 filename = file[file.rfind(os.sep)+1:] 538 source_files = self.source_file_dict.get(filename) 539 if source_files is None: 540 return None 541 match_count = 0 542 result = None 543 for path in source_files: 544 if path.find(file) != -1: 545 match_count += 1 546 result = path 547 if match_count > 1: 548 log_warning('multiple source for %s, select %s' % (file, result)) 549 return result 550 551 552 def _annotate_files(self): 553 """Annotate Source files: add acc_period/period for each source file. 554 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 555 2. Annotate c++ source files. 556 """ 557 dest_dir = self.config['annotate_dest_dir'] 558 for key in self.file_periods.keys(): 559 is_java = False 560 if key.startswith('$JAVA_SRC_ROOT/'): 561 path = key[len('$JAVA_SRC_ROOT/'):] 562 items = path.split('/') 563 path = os.sep.join(items) 564 from_path = self._find_source_file(path) 565 to_path = os.path.join(dest_dir, 'java', path) 566 is_java = True 567 elif key.startswith('/') and os.path.isfile(key): 568 path = key 569 from_path = path 570 to_path = os.path.join(dest_dir, path[1:]) 571 elif is_windows() and key.find(':\\') != -1 and os.path.isfile(key): 572 from_path = key 573 to_path = os.path.join(dest_dir, key.replace(':\\', '\\')) 574 else: 575 path = key[1:] if key.startswith('/') else key 576 # Change path on device to path on host 577 path = os.sep.join(path.split('/')) 578 from_path = self._find_source_file(path) 579 to_path = os.path.join(dest_dir, path) 580 if from_path is None: 581 log_warning("can't find source file for path %s" % key) 582 continue 583 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 584 585 586 def _annotate_file(self, from_path, to_path, file_period, is_java): 587 """Annotate a source file. 588 589 Annotate a source file in three steps: 590 1. In the first line, show periods of this file. 591 2. For each function, show periods of this function. 592 3. For each line not hitting the same line as functions, show 593 line periods. 594 """ 595 log_info('annotate file %s' % from_path) 596 with open(from_path, 'r') as rf: 597 lines = rf.readlines() 598 599 annotates = dict() 600 for line in file_period.line_dict.keys(): 601 annotates[line] = self._get_percentage_str(file_period.line_dict[line], True) 602 for func_name in file_period.function_dict.keys(): 603 func_start_line, period = file_period.function_dict[func_name] 604 if func_start_line == -1: 605 continue 606 line = func_start_line - 1 if is_java else func_start_line 607 annotates[line] = '[func] ' + self._get_percentage_str(period, True) 608 annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True) 609 610 max_annotate_cols = 0 611 for key in annotates.keys(): 612 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 613 614 empty_annotate = ' ' * (max_annotate_cols + 6) 615 616 dirname = os.path.dirname(to_path) 617 if not os.path.isdir(dirname): 618 os.makedirs(dirname) 619 with open(to_path, 'w') as wf: 620 for line in range(1, len(lines) + 1): 621 annotate = annotates.get(line) 622 if annotate is None: 623 if not lines[line-1].strip(): 624 annotate = '' 625 else: 626 annotate = empty_annotate 627 else: 628 annotate = '/* ' + annotate + ( 629 ' ' * (max_annotate_cols - len(annotate))) + ' */' 630 wf.write(annotate) 631 wf.write(lines[line-1]) 632 633def main(): 634 parser = argparse.ArgumentParser(description= 635"""Annotate source files based on profiling data. It reads line information from 636binary_cache generated by app_profiler.py or binary_cache_builder.py, and 637generate annotated source files in annotated_files directory.""") 638 parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help= 639"""The paths of profiling data. Default is perf.data.""") 640 parser.add_argument('-s', '--source_dirs', nargs='+', action='append', help= 641"""Directories to find source files.""") 642 parser.add_argument('--comm', nargs='+', action='append', help= 643"""Use samples only in threads with selected names.""") 644 parser.add_argument('--pid', nargs='+', action='append', help= 645"""Use samples only in processes with selected process ids.""") 646 parser.add_argument('--tid', nargs='+', action='append', help= 647"""Use samples only in threads with selected thread ids.""") 648 parser.add_argument('--dso', nargs='+', action='append', help= 649"""Use samples only in selected binaries.""") 650 parser.add_argument('--addr2line', help= 651"""Set the path of addr2line.""") 652 653 args = parser.parse_args() 654 config = {} 655 config['perf_data_list'] = flatten_arg_list(args.perf_data_list) 656 if not config['perf_data_list']: 657 config['perf_data_list'].append('perf.data') 658 config['source_dirs'] = flatten_arg_list(args.source_dirs) 659 config['comm_filters'] = flatten_arg_list(args.comm) 660 config['pid_filters'] = flatten_arg_list(args.pid) 661 config['tid_filters'] = flatten_arg_list(args.tid) 662 config['dso_filters'] = flatten_arg_list(args.dso) 663 config['addr2line_path'] = args.addr2line 664 665 annotator = SourceFileAnnotator(config) 666 annotator.annotate() 667 log_info('annotate finish successfully, please check result in annotated_files/.') 668 669if __name__ == '__main__': 670 main() 671