summarize_hot_blocks.py revision f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbe
1# Copyright 2011 Google Inc. All Rights Reserved. 2"""Summarize hottest basic blocks found while doing a ChromeOS FDO build. 3 4Here is an example execution: 5 6 summarize_hot_blocks.py 7 --data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000 8 --output_dir=/home/x/y 9 10With the cutoff, it will ignore any basic blocks that have a count less 11than what is specified (in this example 10000) 12The script looks inside the directory (this is typically a directory where 13the object files are generated) for files with *.profile and *.optimized 14suffixes. To get these, the following flags were added to the compiler 15invokation within vanilla_vs_fdo.py in the profile-use phase. 16 17 "-fdump-tree-optimized-blocks-lineno " 18 "-fdump-ipa-profile-blocks-lineno " 19 20Here is an example of the *.profile and *.optimized files contents: 21 22# BLOCK 7 freq:3901 count:60342, starting at line 92 23# PRED: 6 [39.0%] count:60342 (true,exec) 24 [url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] = 25 D.28080_17; 26 [url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] = 27 MEM[(const struct Component &)repl_1(D) + 80]; 28# SUCC: 8 [100.0%] count:60342 (fallthru,exec) 29# BLOCK 8 freq:10000 count:154667, starting at line 321 30# PRED: 7 [100.0%] count:60342 (fallthru,exec) 6 [61.0%] count:94325 31(false,exec) 32 [url_canon_internal.cc : 321:51] # DEBUG D#10 => 33 [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host 34 35this script finds the blocks with highest count and shows the first line 36of each block so that it is easy to identify the origin of the basic block. 37 38""" 39 40__author__ = 'llozano@google.com (Luis Lozano)' 41 42import optparse 43import os 44import re 45import shutil 46import sys 47import tempfile 48 49from utils import command_executer 50 51 52# Given a line, check if it has a block count and return it. 53# Return -1 if there is no match 54def GetBlockCount(line): 55 match_obj = re.match('.*# BLOCK \d+ .*count:(\d+)', line) 56 if match_obj: 57 return int(match_obj.group(1)) 58 else: 59 return -1 60 61 62class Collector(object): 63 64 def __init__(self, data_dir, cutoff, output_dir, tempdir): 65 self._data_dir = data_dir 66 self._cutoff = cutoff 67 self._output_dir = output_dir 68 self._tempdir = tempdir 69 self._ce = command_executer.GetCommandExecuter() 70 71 def CollectFileList(self, file_exp, list_file): 72 command = ("find %s -type f -name '%s' > %s" % 73 (self._data_dir, file_exp, 74 os.path.join(self._tempdir, list_file))) 75 ret = self._ce.RunCommand(command) 76 if ret: 77 raise Exception('Failed: %s' % command) 78 79 def SummarizeLines(self, data_file): 80 sum_lines = [] 81 search_lno = False 82 for line in data_file: 83 count = GetBlockCount(line) 84 if count != -1: 85 if count >= self._cutoff: 86 search_lno = True 87 sum_line = line.strip() 88 sum_count = count 89 # look for a line that starts with line number information 90 elif search_lno and re.match('^\s*\[.*: \d*:\d*]', line): 91 search_lno = False 92 sum_lines.append('%d:%s: %s %s' % 93 (sum_count, data_file.name, sum_line, line)) 94 return sum_lines 95 96 # Look for blocks in the data file that have a count larger than the cutoff 97 # and generate a sorted summary file of the hottest blocks. 98 def SummarizeFile(self, data_file, sum_file): 99 with open(data_file, 'r') as f: 100 sum_lines = self.SummarizeLines(f) 101 102 # sort reverse the list in place by the block count number 103 sum_lines.sort(key=GetBlockCount, reverse=True) 104 105 with open(sum_file, 'w') as sf: 106 sf.write(''.join(sum_lines)) 107 108 print 'Generated file Summary: ', sum_file 109 110 # Find hottest blocks in the list of files, generate a sorted summary for 111 # each file and then do a sorted merge of all the summaries. 112 def SummarizeList(self, list_file, summary_file): 113 with open(os.path.join(self._tempdir, list_file)) as f: 114 sort_list = [] 115 for file_name in f: 116 file_name = file_name.strip() 117 sum_file = '%s.sum' % file_name 118 sort_list.append('%s%s' % (sum_file, chr(0))) 119 self.SummarizeFile(file_name, sum_file) 120 121 tmp_list_file = os.path.join(self._tempdir, 'file_list.dat') 122 with open(tmp_list_file, 'w') as file_list_file: 123 for x in sort_list: 124 file_list_file.write(x) 125 126 merge_command = ('sort -nr -t: -k1 --merge --files0-from=%s > %s ' % 127 (tmp_list_file, summary_file)) 128 129 ret = self._ce.RunCommand(merge_command) 130 if ret: 131 raise Exception('Failed: %s' % merge_command) 132 print 'Generated general summary: ', summary_file 133 134 def SummarizePreOptimized(self, summary_file): 135 self.CollectFileList('*.profile', 'chrome.profile.list') 136 self.SummarizeList('chrome.profile.list', 137 os.path.join(self._output_dir, summary_file)) 138 139 def SummarizeOptimized(self, summary_file): 140 self.CollectFileList('*.optimized', 'chrome.optimized.list') 141 self.SummarizeList('chrome.optimized.list', 142 os.path.join(self._output_dir, summary_file)) 143 144 145def Main(argv): 146 command_executer.InitCommandExecuter() 147 usage = ('usage: %prog --data_dir=<dir> --cutoff=<value> ' 148 '--output_dir=<dir> [--keep_tmp]') 149 parser = optparse.OptionParser(usage=usage) 150 parser.add_option('--data_dir', 151 dest='data_dir', 152 help=('directory where the FDO (*.profile and ' 153 '*.optimized) files are located')) 154 parser.add_option('--cutoff', 155 dest='cutoff', 156 help='Minimum count to consider for each basic block') 157 parser.add_option('--output_dir', 158 dest='output_dir', 159 help=('directory where summary data will be generated' 160 '(pre_optimized.txt, optimized.txt)')) 161 parser.add_option('--keep_tmp', 162 action='store_true', 163 dest='keep_tmp', 164 default=False, 165 help=('Keep directory with temporary files' 166 '(for debugging purposes)')) 167 options = parser.parse_args(argv)[0] 168 if not all((options.data_dir, options.cutoff, options.output_dir)): 169 parser.print_help() 170 sys.exit(1) 171 172 tempdir = tempfile.mkdtemp() 173 174 co = Collector(options.data_dir, int(options.cutoff), options.output_dir, 175 tempdir) 176 co.SummarizePreOptimized('pre_optimized.txt') 177 co.SummarizeOptimized('optimized.txt') 178 179 if not options.keep_tmp: 180 shutil.rmtree(tempdir, ignore_errors=True) 181 182 return 0 183 184 185if __name__ == '__main__': 186 retval = Main(sys.argv) 187 sys.exit(retval) 188