summarize_hot_blocks.py revision f81680c018729fd4499e1e200d04b48c4b90127c
1#!/usr/bin/python2.6
2#
3# Copyright 2011 Google Inc. All Rights Reserved.
4
5"""Summarize hottest basic blocks found while doing a ChromeOS FDO build.
6
7Here is an example execution:
8
9  summarize_hot_blocks.py
10   --data_dir=~/chromeos/chroot/var/cache/chromeos-chrome/ --cutoff=10000
11   --output_dir=/home/x/y
12
13With the cutoff, it will ignore any basic blocks that have a count less
14than what is specified (in this example 10000)
15The script looks inside the directory (this is typically a directory where
16the object files are generated) for files with *.profile and *.optimized
17suffixes. To get these, the following flags were added to the compiler
18invokation within vanilla_vs_fdo.py in the profile-use phase.
19
20              "-fdump-tree-optimized-blocks-lineno "
21              "-fdump-ipa-profile-blocks-lineno "
22
23Here is an example of the *.profile and *.optimized files contents:
24
25# BLOCK 7 freq:3901 count:60342, starting at line 92
26# PRED: 6 [39.0%]  count:60342 (true,exec)
27  [url_canon_internal.cc : 92:28] MEM[(const char * *)source_6(D) + 16B] = D.28080_17;
28  [url_canon_internal.cc : 93:41] MEM[(struct Component *)parsed_4(D) + 16B] = MEM[(const struct Component &)repl_1(D) + 80];
29# SUCC: 8 [100.0%]  count:60342 (fallthru,exec)
30# BLOCK 8 freq:10000 count:154667, starting at line 321
31# PRED: 7 [100.0%]  count:60342 (fallthru,exec) 6 [61.0%]  count:94325 (false,exec)
32  [url_canon_internal.cc : 321:51] # DEBUG D#10 => [googleurl/src/url_canon_internal.cc : 321] &parsed_4(D)->host
33
34this script finds the blocks with highest count and shows the first line
35of each block so that it is easy to identify the origin of the basic block.
36
37"""
38
39__author__ = "llozano@google.com (Luis Lozano)"
40
41import optparse
42import os
43import re
44import shutil
45import sys
46import tempfile
47
48from utils import command_executer
49
50
51# Given a line, check if it has a block count and return it.
52# Return -1 if there is no match
53def GetBlockCount(line):
54  match_obj = re.match(".*# BLOCK \d+ .*count:(\d+)", line)
55  if match_obj:
56    return int(match_obj.group(1))
57  else:
58    return -1
59
60
61class Collector(object):
62  def __init__(self, data_dir, cutoff, output_dir, tempdir):
63    self._data_dir = data_dir
64    self._cutoff = cutoff
65    self._output_dir = output_dir
66    self._tempdir = tempdir
67    self._ce = command_executer.GetCommandExecuter()
68
69  def CollectFileList(self, file_exp, list_file):
70    command = ("find %s -type f -name '%s' > %s" %
71               (self._data_dir, file_exp,
72                os.path.join(self._tempdir, list_file)))
73    ret = self._ce.RunCommand(command)
74    if ret:
75      raise Exception("Failed: %s" % command)
76
77  def SummarizeLines(self, data_file):
78    sum_lines = []
79    search_lno = False
80    for line in data_file:
81      count = GetBlockCount(line)
82      if count != -1:
83        if count >= self._cutoff:
84          search_lno = True
85          sum_line = line.strip()
86          sum_count = count
87      # look for a line that starts with line number information
88      elif search_lno and re.match("^\s*\[.*: \d*:\d*]", line):
89        search_lno = False
90        sum_lines.append("%d:%s: %s %s" %
91                         (sum_count, data_file.name, sum_line, line))
92    return sum_lines
93
94  # Look for blocks in the data file that have a count larger than the cutoff
95  # and generate a sorted summary file of the hottest blocks.
96  def SummarizeFile(self, data_file, sum_file):
97    with open(data_file, "r") as f:
98      sum_lines = self.SummarizeLines(f)
99
100    # sort reverse the list in place by the block count number
101    sum_lines.sort(key=GetBlockCount, reverse=True)
102
103    with open(sum_file, "w") as sf:
104      sf.write("".join(sum_lines))
105
106    print "Generated file Summary: ", sum_file
107
108  # Find hottest blocks in the list of files, generate a sorted summary for
109  # each file and then do a sorted merge of all the summaries.
110  def SummarizeList(self, list_file, summary_file):
111    with open(os.path.join(self._tempdir, list_file)) as f:
112      sort_list = []
113      for file_name in f:
114        file_name = file_name.strip()
115        sum_file = "%s.sum" % file_name
116        sort_list.append("%s%s" % (sum_file, chr(0)))
117        self.SummarizeFile(file_name, sum_file)
118
119    tmp_list_file = os.path.join(self._tempdir, "file_list.dat")
120    with open(tmp_list_file, "w") as file_list_file:
121      for x in sort_list:
122        file_list_file.write(x)
123
124    merge_command = ("sort -nr -t: -k1 --merge --files0-from=%s > %s " %
125                     (tmp_list_file, summary_file))
126
127    ret = self._ce.RunCommand(merge_command)
128    if ret:
129      raise Exception("Failed: %s" % merge_command)
130    print "Generated general summary: ", summary_file
131
132  def SummarizePreOptimized(self, summary_file):
133    self.CollectFileList("*.profile", "chrome.profile.list")
134    self.SummarizeList("chrome.profile.list",
135                       os.path.join(self._output_dir, summary_file))
136
137  def SummarizeOptimized(self, summary_file):
138    self.CollectFileList("*.optimized", "chrome.optimized.list")
139    self.SummarizeList("chrome.optimized.list",
140                       os.path.join(self._output_dir, summary_file))
141
142
143def Main(argv):
144  command_executer.InitCommandExecuter()
145  usage = ("usage: %prog --data_dir=<dir> --cutoff=<value> "
146           "--output_dir=<dir> [--keep_tmp]")
147  parser = optparse.OptionParser(usage=usage)
148  parser.add_option("--data_dir",
149                    dest="data_dir",
150                    help=("directory where the FDO (*.profile and "
151                          "*.optimized) files are located"))
152  parser.add_option("--cutoff",
153                    dest="cutoff",
154                    help="Minimum count to consider for each basic block")
155  parser.add_option("--output_dir",
156                    dest="output_dir",
157                    help=("directory where summary data will be generated"
158                          "(pre_optimized.txt, optimized.txt)"))
159  parser.add_option("--keep_tmp",
160                    action="store_true",
161                    dest="keep_tmp",
162                    default=False,
163                    help=("Keep directory with temporary files"
164                          "(for debugging purposes)"))
165  options = parser.parse_args(argv)[0]
166  if not all((options.data_dir, options.cutoff, options.output_dir)):
167    parser.print_help()
168    sys.exit(1)
169
170  tempdir = tempfile.mkdtemp()
171
172  co = Collector(options.data_dir, int(options.cutoff), options.output_dir,
173                 tempdir)
174  co.SummarizePreOptimized("pre_optimized.txt")
175  co.SummarizeOptimized("optimized.txt")
176
177  if not options.keep_tmp:
178    shutil.rmtree(tempdir, ignore_errors=True)
179
180  return 0
181
182if __name__ == "__main__":
183  retval = Main(sys.argv)
184  sys.exit(retval)
185