summarize_hot_blocks.py revision a8af9a7a2462b00e72deff99327bdb452a715277
1# Copyright 2011 Google Inc. All Rights Reserved.
2"""Summarize hottest basic blocks found while doing a ChromeOS FDO build.
3
4Here is an example execution:
5
6  summarize_hot_blocks.py
7   --data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000
8   --output_dir=/home/x/y
9
10With the cutoff, it will ignore any basic blocks that have a count less
11than what is specified (in this example 10000)
12The script looks inside the directory (this is typically a directory where
13the object files are generated) for files with *.profile and *.optimized
14suffixes. To get these, the following flags were added to the compiler
15invokation within vanilla_vs_fdo.py in the profile-use phase.
16
17              "-fdump-tree-optimized-blocks-lineno "
18              "-fdump-ipa-profile-blocks-lineno "
19
20Here is an example of the *.profile and *.optimized files contents:
21
22# BLOCK 7 freq:3901 count:60342, starting at line 92
23# PRED: 6 [39.0%]  count:60342 (true,exec)
24  [url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] =
25  D.28080_17;
26  [url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] =
27  MEM[(const struct Component &)repl_1(D) + 80];
28# SUCC: 8 [100.0%]  count:60342 (fallthru,exec)
29# BLOCK 8 freq:10000 count:154667, starting at line 321
30# PRED: 7 [100.0%]  count:60342 (fallthru,exec) 6 [61.0%]  count:94325
31(false,exec)
32  [url_canon_internal.cc : 321:51] # DEBUG D#10 =>
33  [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host
34
35this script finds the blocks with highest count and shows the first line
36of each block so that it is easy to identify the origin of the basic block.
37
38"""
39
40__author__ = 'llozano@google.com (Luis Lozano)'
41
42import optparse
43import os
44import re
45import shutil
46import sys
47import tempfile
48
49from cros_utils import command_executer
50
51
52# Given a line, check if it has a block count and return it.
53# Return -1 if there is no match
54def GetBlockCount(line):
55  match_obj = re.match('.*# BLOCK \d+ .*count:(\d+)', line)
56  if match_obj:
57    return int(match_obj.group(1))
58  else:
59    return -1
60
61
62class Collector(object):
63
64  def __init__(self, data_dir, cutoff, output_dir, tempdir):
65    self._data_dir = data_dir
66    self._cutoff = cutoff
67    self._output_dir = output_dir
68    self._tempdir = tempdir
69    self._ce = command_executer.GetCommandExecuter()
70
71  def CollectFileList(self, file_exp, list_file):
72    command = ("find %s -type f -name '%s' > %s" %
73               (self._data_dir, file_exp,
74                os.path.join(self._tempdir, list_file)))
75    ret = self._ce.RunCommand(command)
76    if ret:
77      raise Exception('Failed: %s' % command)
78
79  def SummarizeLines(self, data_file):
80    sum_lines = []
81    search_lno = False
82    for line in data_file:
83      count = GetBlockCount(line)
84      if count != -1:
85        if count >= self._cutoff:
86          search_lno = True
87          sum_line = line.strip()
88          sum_count = count
89      # look for a line that starts with line number information
90      elif search_lno and re.match('^\s*\[.*: \d*:\d*]', line):
91        search_lno = False
92        sum_lines.append('%d:%s: %s %s' %
93                         (sum_count, data_file.name, sum_line, line))
94    return sum_lines
95
96  # Look for blocks in the data file that have a count larger than the cutoff
97  # and generate a sorted summary file of the hottest blocks.
98  def SummarizeFile(self, data_file, sum_file):
99    with open(data_file, 'r') as f:
100      sum_lines = self.SummarizeLines(f)
101
102    # sort reverse the list in place by the block count number
103    sum_lines.sort(key=GetBlockCount, reverse=True)
104
105    with open(sum_file, 'w') as sf:
106      sf.write(''.join(sum_lines))
107
108    print 'Generated file Summary: ', sum_file
109
110  # Find hottest blocks in the list of files, generate a sorted summary for
111  # each file and then do a sorted merge of all the summaries.
112  def SummarizeList(self, list_file, summary_file):
113    with open(os.path.join(self._tempdir, list_file)) as f:
114      sort_list = []
115      for file_name in f:
116        file_name = file_name.strip()
117        sum_file = '%s.sum' % file_name
118        sort_list.append('%s%s' % (sum_file, chr(0)))
119        self.SummarizeFile(file_name, sum_file)
120
121    tmp_list_file = os.path.join(self._tempdir, 'file_list.dat')
122    with open(tmp_list_file, 'w') as file_list_file:
123      for x in sort_list:
124        file_list_file.write(x)
125
126    merge_command = ('sort -nr -t: -k1 --merge --files0-from=%s > %s ' %
127                     (tmp_list_file, summary_file))
128
129    ret = self._ce.RunCommand(merge_command)
130    if ret:
131      raise Exception('Failed: %s' % merge_command)
132    print 'Generated general summary: ', summary_file
133
134  def SummarizePreOptimized(self, summary_file):
135    self.CollectFileList('*.profile', 'chrome.profile.list')
136    self.SummarizeList('chrome.profile.list',
137                       os.path.join(self._output_dir, summary_file))
138
139  def SummarizeOptimized(self, summary_file):
140    self.CollectFileList('*.optimized', 'chrome.optimized.list')
141    self.SummarizeList('chrome.optimized.list',
142                       os.path.join(self._output_dir, summary_file))
143
144
145def Main(argv):
146  command_executer.InitCommandExecuter()
147  usage = ('usage: %prog --data_dir=<dir> --cutoff=<value> '
148           '--output_dir=<dir> [--keep_tmp]')
149  parser = optparse.OptionParser(usage=usage)
150  parser.add_option('--data_dir',
151                    dest='data_dir',
152                    help=('directory where the FDO (*.profile and '
153                          '*.optimized) files are located'))
154  parser.add_option('--cutoff',
155                    dest='cutoff',
156                    help='Minimum count to consider for each basic block')
157  parser.add_option('--output_dir',
158                    dest='output_dir',
159                    help=('directory where summary data will be generated'
160                          '(pre_optimized.txt, optimized.txt)'))
161  parser.add_option('--keep_tmp',
162                    action='store_true',
163                    dest='keep_tmp',
164                    default=False,
165                    help=('Keep directory with temporary files'
166                          '(for debugging purposes)'))
167  options = parser.parse_args(argv)[0]
168  if not all((options.data_dir, options.cutoff, options.output_dir)):
169    parser.print_help()
170    sys.exit(1)
171
172  tempdir = tempfile.mkdtemp()
173
174  co = Collector(options.data_dir, int(options.cutoff), options.output_dir,
175                 tempdir)
176  co.SummarizePreOptimized('pre_optimized.txt')
177  co.SummarizeOptimized('optimized.txt')
178
179  if not options.keep_tmp:
180    shutil.rmtree(tempdir, ignore_errors=True)
181
182  return 0
183
184
185if __name__ == '__main__':
186  retval = Main(sys.argv)
187  sys.exit(retval)
188