perf_diff.py revision a8af9a7a2462b00e72deff99327bdb452a715277
1#!/usr/bin/python2
2# Copyright 2012 Google Inc. All Rights Reserved.
3"""One-line documentation for perf_diff module.
4
5A detailed description of perf_diff.
6"""
7
8from __future__ import print_function
9
10__author__ = 'asharif@google.com (Ahmad Sharif)'
11
12import argparse
13import re
14import sys
15
16import misc
17import tabulator
18
19ROWS_TO_SHOW = 'Rows_to_show_in_the_perf_table'
20TOTAL_EVENTS = 'Total_events_of_this_profile'
21
22
23def GetPerfDictFromReport(report_file):
24  output = {}
25  perf_report = PerfReport(report_file)
26  for k, v in perf_report.sections.items():
27    if k not in output:
28      output[k] = {}
29    output[k][ROWS_TO_SHOW] = 0
30    output[k][TOTAL_EVENTS] = 0
31    for function in v.functions:
32      out_key = '%s' % (function.name)
33      output[k][out_key] = function.count
34      output[k][TOTAL_EVENTS] += function.count
35      if function.percent > 1:
36        output[k][ROWS_TO_SHOW] += 1
37  return output
38
39
40def _SortDictionaryByValue(d):
41  l = [(k, v) for (k, v) in d.iteritems()]
42
43  def GetFloat(x):
44    if misc.IsFloat(x):
45      return float(x)
46    else:
47      return x
48
49  sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
50  sorted_l.reverse()
51  return [f[0] for f in sorted_l]
52
53
54class Tabulator(object):
55  """Make tables."""
56
57  def __init__(self, all_dicts):
58    self._all_dicts = all_dicts
59
60  def PrintTable(self):
61    for dicts in self._all_dicts:
62      self.PrintTableHelper(dicts)
63
64  def PrintTableHelper(self, dicts):
65    """Transfrom dicts to tables."""
66    fields = {}
67    for d in dicts:
68      for f in d.keys():
69        if f not in fields:
70          fields[f] = d[f]
71        else:
72          fields[f] = max(fields[f], d[f])
73    table = []
74    header = ['name']
75    for i in range(len(dicts)):
76      header.append(i)
77
78    table.append(header)
79
80    sorted_fields = _SortDictionaryByValue(fields)
81
82    for f in sorted_fields:
83      row = [f]
84      for d in dicts:
85        if f in d:
86          row.append(d[f])
87        else:
88          row.append('0')
89      table.append(row)
90
91    print(tabulator.GetSimpleTable(table))
92
93
94class Function(object):
95  """Function for formatting."""
96
97  def __init__(self):
98    self.count = 0
99    self.name = ''
100    self.percent = 0
101
102
103class Section(object):
104  """Section formatting."""
105
106  def __init__(self, contents):
107    self.name = ''
108    self.raw_contents = contents
109    self._ParseSection()
110
111  def _ParseSection(self):
112    matches = re.findall(r'Events: (\w+)\s+(.*)', self.raw_contents)
113    assert len(matches) <= 1, 'More than one event found in 1 section'
114    if not matches:
115      return
116    match = matches[0]
117    self.name = match[1]
118    self.count = misc.UnitToNumber(match[0])
119
120    self.functions = []
121    for line in self.raw_contents.splitlines():
122      if not line.strip():
123        continue
124      if '%' not in line:
125        continue
126      if not line.startswith('#'):
127        fields = [f for f in line.split(' ') if f]
128        function = Function()
129        function.percent = float(fields[0].strip('%'))
130        function.count = int(fields[1])
131        function.name = ' '.join(fields[2:])
132        self.functions.append(function)
133
134
135class PerfReport(object):
136  """Get report from raw report."""
137
138  def __init__(self, perf_file):
139    self.perf_file = perf_file
140    self._ReadFile()
141    self.sections = {}
142    self.metadata = {}
143    self._section_contents = []
144    self._section_header = ''
145    self._SplitSections()
146    self._ParseSections()
147    self._ParseSectionHeader()
148
149  def _ParseSectionHeader(self):
150    """Parse a header of a perf report file."""
151    # The "captured on" field is inaccurate - this actually refers to when the
152    # report was generated, not when the data was captured.
153    for line in self._section_header.splitlines():
154      line = line[2:]
155      if ':' in line:
156        key, val = line.strip().split(':', 1)
157        key = key.strip()
158        val = val.strip()
159        self.metadata[key] = val
160
161  def _ReadFile(self):
162    self._perf_contents = open(self.perf_file).read()
163
164  def _ParseSections(self):
165    self.event_counts = {}
166    self.sections = {}
167    for section_content in self._section_contents:
168      section = Section(section_content)
169      section.name = self._GetHumanReadableName(section.name)
170      self.sections[section.name] = section
171
172  # TODO(asharif): Do this better.
173  def _GetHumanReadableName(self, section_name):
174    if not 'raw' in section_name:
175      return section_name
176    raw_number = section_name.strip().split(' ')[-1]
177    for line in self._section_header.splitlines():
178      if raw_number in line:
179        name = line.strip().split(' ')[5]
180        return name
181
182  def _SplitSections(self):
183    self._section_contents = []
184    indices = [m.start() for m in re.finditer('# Events:', self._perf_contents)]
185    indices.append(len(self._perf_contents))
186    for i in range(len(indices) - 1):
187      section_content = self._perf_contents[indices[i]:indices[i + 1]]
188      self._section_contents.append(section_content)
189    self._section_header = ''
190    if indices:
191      self._section_header = self._perf_contents[0:indices[0]]
192
193
194class PerfDiffer(object):
195  """Perf differ class."""
196
197  def __init__(self, reports, num_symbols, common_only):
198    self._reports = reports
199    self._num_symbols = num_symbols
200    self._common_only = common_only
201    self._common_function_names = {}
202
203  def DoDiff(self):
204    """The function that does the diff."""
205    section_names = self._FindAllSections()
206
207    filename_dicts = []
208    summary_dicts = []
209    for report in self._reports:
210      d = {}
211      filename_dicts.append({'file': report.perf_file})
212      for section_name in section_names:
213        if section_name in report.sections:
214          d[section_name] = report.sections[section_name].count
215      summary_dicts.append(d)
216
217    all_dicts = [filename_dicts, summary_dicts]
218
219    for section_name in section_names:
220      function_names = self._GetTopFunctions(section_name, self._num_symbols)
221      self._FindCommonFunctions(section_name)
222      dicts = []
223      for report in self._reports:
224        d = {}
225        if section_name in report.sections:
226          section = report.sections[section_name]
227
228          # Get a common scaling factor for this report.
229          common_scaling_factor = self._GetCommonScalingFactor(section)
230
231          for function in section.functions:
232            if function.name in function_names:
233              key = '%s %s' % (section.name, function.name)
234              d[key] = function.count
235              # Compute a factor to scale the function count by in common_only
236              # mode.
237              if self._common_only and (
238                  function.name in self._common_function_names[section.name]):
239                d[key + ' scaled'] = common_scaling_factor * function.count
240        dicts.append(d)
241
242      all_dicts.append(dicts)
243
244    mytabulator = Tabulator(all_dicts)
245    mytabulator.PrintTable()
246
247  def _FindAllSections(self):
248    sections = {}
249    for report in self._reports:
250      for section in report.sections.values():
251        if section.name not in sections:
252          sections[section.name] = section.count
253        else:
254          sections[section.name] = max(sections[section.name], section.count)
255    return _SortDictionaryByValue(sections)
256
257  def _GetCommonScalingFactor(self, section):
258    unique_count = self._GetCount(
259        section, lambda x: x in self._common_function_names[section.name])
260    return 100.0 / unique_count
261
262  def _GetCount(self, section, filter_fun=None):
263    total_count = 0
264    for function in section.functions:
265      if not filter_fun or filter_fun(function.name):
266        total_count += int(function.count)
267    return total_count
268
269  def _FindCommonFunctions(self, section_name):
270    function_names_list = []
271    for report in self._reports:
272      if section_name in report.sections:
273        section = report.sections[section_name]
274        function_names = [f.name for f in section.functions]
275        function_names_list.append(function_names)
276
277    self._common_function_names[section_name] = (
278        reduce(set.intersection, map(set, function_names_list)))
279
280  def _GetTopFunctions(self, section_name, num_functions):
281    all_functions = {}
282    for report in self._reports:
283      if section_name in report.sections:
284        section = report.sections[section_name]
285        for f in section.functions[:num_functions]:
286          if f.name in all_functions:
287            all_functions[f.name] = max(all_functions[f.name], f.count)
288          else:
289            all_functions[f.name] = f.count
290    # FIXME(asharif): Don't really need to sort these...
291    return _SortDictionaryByValue(all_functions)
292
293  def _GetFunctionsDict(self, section, function_names):
294    d = {}
295    for function in section.functions:
296      if function.name in function_names:
297        d[function.name] = function.count
298    return d
299
300
301def Main(argv):
302  """The entry of the main."""
303  parser = argparse.ArgumentParser()
304  parser.add_argument('-n',
305                      '--num_symbols',
306                      dest='num_symbols',
307                      default='5',
308                      help='The number of symbols to show.')
309  parser.add_argument('-c',
310                      '--common_only',
311                      dest='common_only',
312                      action='store_true',
313                      default=False,
314                      help='Diff common symbols only.')
315
316  options, args = parser.parse_known_args(argv)
317
318  try:
319    reports = []
320    for report in args[1:]:
321      report = PerfReport(report)
322      reports.append(report)
323    pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
324    pd.DoDiff()
325  finally:
326    pass
327
328  return 0
329
330
331if __name__ == '__main__':
332  sys.exit(Main(sys.argv))
333