process_hot_functions.py revision a78d63f8571cce07e39fc3ad50d8a49979413b9f
1#!/usr/bin/python2
2
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data.
7
8The pprof output files should have the format given by the output of the
9pprof --top command. A line containing a statistic should include the flat,
10flat%, sum%, cum, cum%, function name and file name, separated by a space.
11
12The CWP hot functions should be specified in a CSV file that should contain the
13fields for the function name, the file and the object where that function is
14declared and the inclusive count value.
15
16For each pprof output file, the tool will output a file that contains the hot
17functions present also in the CWP hot functions file. Afterwards, it extracts
18the functions that are present in the CWP functions file and not in the
19pprof output files.
20
21Optionally, it will organize the extra CWP functions in groups that have to
22represent a ChromeOS component. A function belongs to a group that is defined
23by a given file path if it is declared in a file that shares that path.
24"""
25
26import argparse
27import csv
28import os
29import re
30import sys
31
32
33class HotFunctionsProcessor(object):
34  """Does the pprof and CWP output processing.
35
36  Extracts the common and extra functions from the pprof output files, based on
37  the provided CWP functions.
38  """
39
40  # Constants used to identify if a function is common in the pprof and CWP
41  # files.
42  COMMON_FUNCTION = 1
43  NOT_COMMON_FUNCTION = 0
44
45  def __init__(self, pprof_path, cwp_functions_file, common_functions_path,
46               extra_cwp_functions_file, cwp_function_groups_file,
47               cwp_function_groups_statistics_file,
48               cwp_function_groups_file_prefix):
49    """Initializes the HotFunctionsProcessor.
50
51    Args:
52      pprof_path: The directory containing the pprof output files.
53      cwp_functions_file: The file containing the CWP data.
54      common_functions_path: The directory where the files with the CWP and
55        pprof common functions should be stored.
56      extra_cwp_functions_file: The file where should be stored the CWP
57        functions that are not in the given pprof output files.
58      cwp_function_groups_file: The name of the file containing the groups of
59        functions.
60      cwp_function_groups_statistics_file: The name of the file containing the
61        statistics for the function groups.
62      cwp_function_groups_file_prefix: The prefix of the files that will store
63        the function statistics for each function group.
64    """
65    self._pprof_path = pprof_path
66    self._cwp_functions_file = cwp_functions_file
67    self._common_functions_path = common_functions_path
68    self._extra_cwp_functions_file = extra_cwp_functions_file
69    self._cwp_function_groups_file = cwp_function_groups_file
70    self._cwp_function_groups_statistics_file = \
71        cwp_function_groups_statistics_file
72    self._cwp_function_groups_file_prefix = cwp_function_groups_file_prefix
73
74  def ProcessHotFunctions(self):
75    """Does the processing of the hot functions."""
76    cwp_statistics = \
77      self.ExtractCommonFunctions(self._pprof_path,
78                                   self._common_functions_path,
79                                   self._cwp_functions_file)
80
81    self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file)
82    if all([self._cwp_function_groups_file,
83            self._cwp_function_groups_statistics_file,
84            self._cwp_function_groups_file_prefix]):
85      self.GroupExtraFunctions(cwp_statistics,
86                               self._cwp_function_groups_file_prefix,
87                               self._cwp_function_groups_file,
88                               self._cwp_function_groups_statistics_file)
89
90  def ParseCWPStatistics(self, cwp_statistics_file_name):
91    """Parses the contents of the file containing the CWP data.
92
93    A line contains the name of the function, the corresponding filenames, the
94    object files and their inclusive count values in CSV format.
95
96    Args:
97      cwp_statistics_file_name: The name of the file containing the CWP data
98      in CSV format.
99
100    Returns:
101      A dict containing the CWP statistics. The key contains the name of the
102      functions with the file name comma separated. The value represents a
103      tuple with the statistics and a marker to identify if the function is
104      present in one of the pprof files.
105    """
106    cwp_statistics = {}
107
108    with open(cwp_statistics_file_name) as cwp_statistics_file:
109      statistics_reader = csv.DictReader(cwp_statistics_file, delimiter=',')
110
111      for statistic in statistics_reader:
112        function_name = statistic['function']
113        file_name = statistic['file']
114        dso_name = statistic['dso']
115        inclusive_count = statistic['inclusive_count']
116
117        # We ignore the lines that have empty fields(i.e they specify only the
118        # addresses of the functions and the inclusive counts values).
119        if all([function_name, file_name, dso_name, inclusive_count]):
120          key = '%s,%s' % (function_name, file_name)
121          value = \
122            ('%s,%s' % (dso_name, inclusive_count), self.NOT_COMMON_FUNCTION)
123          # All the functions are marked as NOT_COMMON_FUNCTION.
124          cwp_statistics[key] = value
125
126    return cwp_statistics
127
128  def ExtractCommonFunctions(self, pprof_path, common_functions_path,
129                             cwp_functions_file):
130    """Extracts the common functions of the pprof files and the CWP file.
131
132    For each pprof file, it creates a separate file with the same name
133    containing the common functions, that will be placed in the
134    common_functions_path directory.
135
136    The resulting file is CSV format, containing the following fields:
137    function name, file name, object, inclusive count, flat, flat%, sum%, cum,
138    cum%.
139
140    It builds a dict of the CWP statistics and if a function is common, it is
141    marked as a COMMON_FUNCTION.
142
143    Args:
144      pprof_path: The directory with the pprof files.
145      common_functions_path: The directory with the common functions files.
146      cwp_functions_file: The file with the CWP data.
147
148    Returns:
149      A dict containing the CWP statistics with the common functions marked as
150      COMMON_FUNCTION.
151    """
152    # Get the list of pprof files from the given path.
153    pprof_files = os.listdir(pprof_path)
154    cwp_statistics = self.ParseCWPStatistics(cwp_functions_file)
155    function_statistic_regex = re.compile(r'\S+\s+\S+%\s+\S+%\s+\S+\s+\S+%')
156    function_regex = re.compile(r'[a-zA-Z0-9-/_:.~\[\]]+[ a-zA-Z0-9-/_~:.]*')
157
158    for pprof_file in pprof_files:
159      # In the pprof output, the statistics of the functions start from the
160      # 8th line.
161      with open(os.path.join(pprof_path, pprof_file), 'r') as input_file:
162        pprof_statistics = input_file.readlines()[6:]
163      output_lines = \
164        ['function,file,dso,inclusive_count,flat,flat%,sum%,cum,cum%']
165
166      for pprof_statistic in pprof_statistics:
167        function_statistic_match = \
168          function_statistic_regex.search(pprof_statistic)
169        function_statistic = \
170          ','.join(function_statistic_match.group(0).split())
171        function_match = function_regex.search(pprof_statistic[
172            function_statistic_match.end():])
173        function = ','.join(function_match.group(0).split())
174
175        if function in cwp_statistics:
176          cwp_statistic = cwp_statistics[function]
177          output_lines.append(','.join([function, cwp_statistic[0],
178                                        function_statistic]))
179          cwp_statistics[function] = (cwp_statistic[0], self.COMMON_FUNCTION)
180
181      with open(os.path.join(common_functions_path, pprof_file), 'w') \
182        as output_file:
183        output_file.write('\n'.join(output_lines))
184
185    return cwp_statistics
186
187  @staticmethod
188  def ParseFunctionGroups(cwp_function_groups_lines):
189    """Parses the contents of the function groups file.
190
191    Args:
192      cwp_function_groups_lines: A list of the lines contained in the CWP
193        function groups file.
194    Returns:
195      A list of tuples containing the group name, the file path, the total
196      number of inclusive count values for that group, a list that will contain
197      the CWP statistics of the functions declared in files that share the file
198      path.
199    """
200    cwp_function_groups = []
201
202    for line in cwp_function_groups_lines:
203      group_name, file_path = line.split()
204      cwp_function_groups.append((group_name, file_path, 0, []))
205
206    return cwp_function_groups
207
208  def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups_file_prefix,
209                          cwp_function_groups_file,
210                          cwp_function_groups_statistics_file):
211    """Groups the functions that are in the CWP statistics and not in the pprof
212    output. A function belongs to a group that is defined by a given file path
213    if it is declared in a file that shares that path.
214
215    Writes the data of the functions that belong to a group in a file, sorted
216    by their inclusive count value, in descendant order. The file name is
217    composed by the cwp_function_groups_file_prefix and the name of the group.
218    The file is in CSV format, containing the fields: function name, file name,
219    object name, inclusive count.
220
221    It creates a CSV file containing the name of the groups, their
222    common path, the total inclusive count value of all the functions declared
223    in files that share the common path, sorted in descendant order by the
224    inclusive count value.
225
226    Args:
227      cwp_statistics: A dict containing the CWP statistics.
228      cwp_function_groups_file_prefix: The prefix used for naming the files that
229        the function data for a specific group.
230      cwp_function_groups_file: The name of the file containing the groups of
231        functions.
232      cwp_function_groups_statistics_file: The name of the file that will
233        contain the statistics for the function groups.
234    """
235    with open(cwp_function_groups_file, 'r') as input_file:
236      cwp_function_groups = self.ParseFunctionGroups(input_file.readlines())
237
238    for function, statistics in cwp_statistics.iteritems():
239      if statistics[1] == self.COMMON_FUNCTION:
240        continue
241      file_name = function.split(',')[1]
242      group_inclusive_count = int(statistics[0].split(',')[1])
243      for i, group in enumerate(cwp_function_groups):
244        group_common_path = group[1]
245
246        # The order of the groups mentioned in the cwp_functions_groups
247        # matters. A function declared in a file will belong to the first
248        # mentioned group that matches it's path to the one of the file.
249        # It is possible to have multiple paths that belong to the same group.
250        if group_common_path in file_name:
251          group_name = group[0]
252          group_inclusive_count += group[2]
253          group_lines = group[3]
254
255          group_lines.append(','.join([function, statistics[0]]))
256          cwp_function_groups[i] = (group_name, group_common_path,
257                                    group_inclusive_count, group_lines)
258          break
259
260    group_statistics_lines = []
261
262    for group_name, group_path, group_inclusive_count, group_lines in \
263        cwp_function_groups:
264      group_statistics_lines.append(','.join([group_name, group_path,
265                                              str(group_inclusive_count)]))
266      if group_lines:
267        # Sort the output in descendant order based on the inclusive_count
268        # value.
269        group_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True)
270        group_lines.insert(0, 'function,file,dso,inclusive_count')
271        group_file_name = cwp_function_groups_file_prefix + group_name
272
273        with open(group_file_name, 'w') as output_file:
274          output_file.write('\n'.join(group_lines))
275
276    group_statistics_lines.sort(
277        key=lambda x: int(x.split(',')[2]), reverse=True)
278    group_statistics_lines.insert(0, 'group,shared_path,inclusive_count')
279
280    with open(cwp_function_groups_statistics_file, 'w') as output_file:
281      output_file.write('\n'.join(group_statistics_lines))
282
283  def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file):
284    """Gets the functions that are in the CWP file, but not in the pprof output.
285
286    Writes the functions and their statistics in the extra_cwp_functions_file
287    file. The output is sorted based on the inclusive_count value. The file is
288    in CSV format, containing the fields: function name, file name, object name,
289    inclusive count.
290
291    Args:
292      cwp_statistics: A dict containing the CWP statistics.
293      extra_cwp_functions_file: The file where should be stored the CWP
294        functions and statistics that are marked as NOT_COMMON_FUNCTIONS.
295    """
296    output_lines = []
297
298    for function, statistics in cwp_statistics.iteritems():
299      if statistics[1] == self.NOT_COMMON_FUNCTION:
300        output_lines.append(','.join([function, statistics[0]]))
301
302    with open(extra_cwp_functions_file, 'w') as output_file:
303      output_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True)
304      output_lines.insert(0, 'function,file,dso,inclusive_count')
305      output_file.write('\n'.join(output_lines))
306
307
308def ParseArguments(arguments):
309  parser = argparse.ArgumentParser()
310
311  parser.add_argument(
312      '-p',
313      '--pprof_path',
314      dest='pprof_path',
315      required=True,
316      help='The directory containing the pprof output files.')
317  parser.add_argument(
318      '-w',
319      '--cwp_hot_functions_file',
320      dest='cwp_hot_functions_file',
321      required=True,
322      help='The CSV file containing the CWP hot functions. The '
323      'file should include the name of the functions, the '
324      'file names with the definition, the object file '
325      'and the CWP inclusive count values, comma '
326      'separated.')
327  parser.add_argument(
328      '-c',
329      '--common_functions_path',
330      dest='common_functions_path',
331      required=True,
332      help='The directory containing the files with the pprof '
333      'and CWP common functions. A file will contain all '
334      'the hot functions from a pprof output file that '
335      'are also included in the CWP hot functions file. '
336      'The files with the common functions will have the '
337      'same names with the corresponding pprof output '
338      'files.')
339  parser.add_argument(
340      '-e',
341      '--extra_cwp_functions_file',
342      dest='extra_cwp_functions_file',
343      required=True,
344      help='The file that will contain the CWP hot functions '
345      'that are not in any of the pprof output files. '
346      'The file should include the name of the functions, '
347      'the file names with the definition, the object '
348      'file and the CWP inclusive count values, comma '
349      'separated.')
350  parser.add_argument(
351      '-g',
352      '--cwp_function_groups_file',
353      dest='cwp_function_groups_file',
354      help='The file that will contain the CWP function groups.'
355      'A line consists in the group name and a file path. A group must '
356      'represent a ChromeOS component.')
357  parser.add_argument(
358      '-s',
359      '--cwp_function_groups_statistics_file',
360      dest='cwp_function_groups_statistics_file',
361      help='The file that will contain the total inclusive count values of CWP '
362      'function groups in CSV format. A line will contain the name of the '
363      'group, the common path, the total inclusive count value of all the'
364      'functions declared in files that share the common path.')
365  parser.add_argument(
366      '-x',
367      '--cwp_function_groups_file_prefix',
368      dest='cwp_function_groups_file_prefix',
369      help='The prefix of the files that will store the function statistics '
370      'for each function group.')
371
372  options = parser.parse_args(arguments)
373
374  return options
375
376
377def Main(argv):
378  options = ParseArguments(argv)
379
380  hot_functions_processor = HotFunctionsProcessor(options.pprof_path,
381    options.cwp_hot_functions_file, options.common_functions_path,
382    options.extra_cwp_functions_file, options.cwp_function_groups_file,
383    options.cwp_function_groups_statistics_file,
384    options.cwp_function_groups_file_prefix)
385
386  hot_functions_processor.ProcessHotFunctions()
387
388
389if __name__ == '__main__':
390  Main(sys.argv[1:])
391