process_hot_functions.py revision a78d63f8571cce07e39fc3ad50d8a49979413b9f
1#!/usr/bin/python2 2 3# Copyright 2016 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6"""Processes the functions from the pprof(go/pprof) files and CWP(go/cwp) data. 7 8The pprof output files should have the format given by the output of the 9pprof --top command. A line containing a statistic should include the flat, 10flat%, sum%, cum, cum%, function name and file name, separated by a space. 11 12The CWP hot functions should be specified in a CSV file that should contain the 13fields for the function name, the file and the object where that function is 14declared and the inclusive count value. 15 16For each pprof output file, the tool will output a file that contains the hot 17functions present also in the CWP hot functions file. Afterwards, it extracts 18the functions that are present in the CWP functions file and not in the 19pprof output files. 20 21Optionally, it will organize the extra CWP functions in groups that have to 22represent a ChromeOS component. A function belongs to a group that is defined 23by a given file path if it is declared in a file that shares that path. 24""" 25 26import argparse 27import csv 28import os 29import re 30import sys 31 32 33class HotFunctionsProcessor(object): 34 """Does the pprof and CWP output processing. 35 36 Extracts the common and extra functions from the pprof output files, based on 37 the provided CWP functions. 38 """ 39 40 # Constants used to identify if a function is common in the pprof and CWP 41 # files. 42 COMMON_FUNCTION = 1 43 NOT_COMMON_FUNCTION = 0 44 45 def __init__(self, pprof_path, cwp_functions_file, common_functions_path, 46 extra_cwp_functions_file, cwp_function_groups_file, 47 cwp_function_groups_statistics_file, 48 cwp_function_groups_file_prefix): 49 """Initializes the HotFunctionsProcessor. 50 51 Args: 52 pprof_path: The directory containing the pprof output files. 53 cwp_functions_file: The file containing the CWP data. 54 common_functions_path: The directory where the files with the CWP and 55 pprof common functions should be stored. 56 extra_cwp_functions_file: The file where should be stored the CWP 57 functions that are not in the given pprof output files. 58 cwp_function_groups_file: The name of the file containing the groups of 59 functions. 60 cwp_function_groups_statistics_file: The name of the file containing the 61 statistics for the function groups. 62 cwp_function_groups_file_prefix: The prefix of the files that will store 63 the function statistics for each function group. 64 """ 65 self._pprof_path = pprof_path 66 self._cwp_functions_file = cwp_functions_file 67 self._common_functions_path = common_functions_path 68 self._extra_cwp_functions_file = extra_cwp_functions_file 69 self._cwp_function_groups_file = cwp_function_groups_file 70 self._cwp_function_groups_statistics_file = \ 71 cwp_function_groups_statistics_file 72 self._cwp_function_groups_file_prefix = cwp_function_groups_file_prefix 73 74 def ProcessHotFunctions(self): 75 """Does the processing of the hot functions.""" 76 cwp_statistics = \ 77 self.ExtractCommonFunctions(self._pprof_path, 78 self._common_functions_path, 79 self._cwp_functions_file) 80 81 self.ExtractExtraFunctions(cwp_statistics, self._extra_cwp_functions_file) 82 if all([self._cwp_function_groups_file, 83 self._cwp_function_groups_statistics_file, 84 self._cwp_function_groups_file_prefix]): 85 self.GroupExtraFunctions(cwp_statistics, 86 self._cwp_function_groups_file_prefix, 87 self._cwp_function_groups_file, 88 self._cwp_function_groups_statistics_file) 89 90 def ParseCWPStatistics(self, cwp_statistics_file_name): 91 """Parses the contents of the file containing the CWP data. 92 93 A line contains the name of the function, the corresponding filenames, the 94 object files and their inclusive count values in CSV format. 95 96 Args: 97 cwp_statistics_file_name: The name of the file containing the CWP data 98 in CSV format. 99 100 Returns: 101 A dict containing the CWP statistics. The key contains the name of the 102 functions with the file name comma separated. The value represents a 103 tuple with the statistics and a marker to identify if the function is 104 present in one of the pprof files. 105 """ 106 cwp_statistics = {} 107 108 with open(cwp_statistics_file_name) as cwp_statistics_file: 109 statistics_reader = csv.DictReader(cwp_statistics_file, delimiter=',') 110 111 for statistic in statistics_reader: 112 function_name = statistic['function'] 113 file_name = statistic['file'] 114 dso_name = statistic['dso'] 115 inclusive_count = statistic['inclusive_count'] 116 117 # We ignore the lines that have empty fields(i.e they specify only the 118 # addresses of the functions and the inclusive counts values). 119 if all([function_name, file_name, dso_name, inclusive_count]): 120 key = '%s,%s' % (function_name, file_name) 121 value = \ 122 ('%s,%s' % (dso_name, inclusive_count), self.NOT_COMMON_FUNCTION) 123 # All the functions are marked as NOT_COMMON_FUNCTION. 124 cwp_statistics[key] = value 125 126 return cwp_statistics 127 128 def ExtractCommonFunctions(self, pprof_path, common_functions_path, 129 cwp_functions_file): 130 """Extracts the common functions of the pprof files and the CWP file. 131 132 For each pprof file, it creates a separate file with the same name 133 containing the common functions, that will be placed in the 134 common_functions_path directory. 135 136 The resulting file is CSV format, containing the following fields: 137 function name, file name, object, inclusive count, flat, flat%, sum%, cum, 138 cum%. 139 140 It builds a dict of the CWP statistics and if a function is common, it is 141 marked as a COMMON_FUNCTION. 142 143 Args: 144 pprof_path: The directory with the pprof files. 145 common_functions_path: The directory with the common functions files. 146 cwp_functions_file: The file with the CWP data. 147 148 Returns: 149 A dict containing the CWP statistics with the common functions marked as 150 COMMON_FUNCTION. 151 """ 152 # Get the list of pprof files from the given path. 153 pprof_files = os.listdir(pprof_path) 154 cwp_statistics = self.ParseCWPStatistics(cwp_functions_file) 155 function_statistic_regex = re.compile(r'\S+\s+\S+%\s+\S+%\s+\S+\s+\S+%') 156 function_regex = re.compile(r'[a-zA-Z0-9-/_:.~\[\]]+[ a-zA-Z0-9-/_~:.]*') 157 158 for pprof_file in pprof_files: 159 # In the pprof output, the statistics of the functions start from the 160 # 8th line. 161 with open(os.path.join(pprof_path, pprof_file), 'r') as input_file: 162 pprof_statistics = input_file.readlines()[6:] 163 output_lines = \ 164 ['function,file,dso,inclusive_count,flat,flat%,sum%,cum,cum%'] 165 166 for pprof_statistic in pprof_statistics: 167 function_statistic_match = \ 168 function_statistic_regex.search(pprof_statistic) 169 function_statistic = \ 170 ','.join(function_statistic_match.group(0).split()) 171 function_match = function_regex.search(pprof_statistic[ 172 function_statistic_match.end():]) 173 function = ','.join(function_match.group(0).split()) 174 175 if function in cwp_statistics: 176 cwp_statistic = cwp_statistics[function] 177 output_lines.append(','.join([function, cwp_statistic[0], 178 function_statistic])) 179 cwp_statistics[function] = (cwp_statistic[0], self.COMMON_FUNCTION) 180 181 with open(os.path.join(common_functions_path, pprof_file), 'w') \ 182 as output_file: 183 output_file.write('\n'.join(output_lines)) 184 185 return cwp_statistics 186 187 @staticmethod 188 def ParseFunctionGroups(cwp_function_groups_lines): 189 """Parses the contents of the function groups file. 190 191 Args: 192 cwp_function_groups_lines: A list of the lines contained in the CWP 193 function groups file. 194 Returns: 195 A list of tuples containing the group name, the file path, the total 196 number of inclusive count values for that group, a list that will contain 197 the CWP statistics of the functions declared in files that share the file 198 path. 199 """ 200 cwp_function_groups = [] 201 202 for line in cwp_function_groups_lines: 203 group_name, file_path = line.split() 204 cwp_function_groups.append((group_name, file_path, 0, [])) 205 206 return cwp_function_groups 207 208 def GroupExtraFunctions(self, cwp_statistics, cwp_function_groups_file_prefix, 209 cwp_function_groups_file, 210 cwp_function_groups_statistics_file): 211 """Groups the functions that are in the CWP statistics and not in the pprof 212 output. A function belongs to a group that is defined by a given file path 213 if it is declared in a file that shares that path. 214 215 Writes the data of the functions that belong to a group in a file, sorted 216 by their inclusive count value, in descendant order. The file name is 217 composed by the cwp_function_groups_file_prefix and the name of the group. 218 The file is in CSV format, containing the fields: function name, file name, 219 object name, inclusive count. 220 221 It creates a CSV file containing the name of the groups, their 222 common path, the total inclusive count value of all the functions declared 223 in files that share the common path, sorted in descendant order by the 224 inclusive count value. 225 226 Args: 227 cwp_statistics: A dict containing the CWP statistics. 228 cwp_function_groups_file_prefix: The prefix used for naming the files that 229 the function data for a specific group. 230 cwp_function_groups_file: The name of the file containing the groups of 231 functions. 232 cwp_function_groups_statistics_file: The name of the file that will 233 contain the statistics for the function groups. 234 """ 235 with open(cwp_function_groups_file, 'r') as input_file: 236 cwp_function_groups = self.ParseFunctionGroups(input_file.readlines()) 237 238 for function, statistics in cwp_statistics.iteritems(): 239 if statistics[1] == self.COMMON_FUNCTION: 240 continue 241 file_name = function.split(',')[1] 242 group_inclusive_count = int(statistics[0].split(',')[1]) 243 for i, group in enumerate(cwp_function_groups): 244 group_common_path = group[1] 245 246 # The order of the groups mentioned in the cwp_functions_groups 247 # matters. A function declared in a file will belong to the first 248 # mentioned group that matches it's path to the one of the file. 249 # It is possible to have multiple paths that belong to the same group. 250 if group_common_path in file_name: 251 group_name = group[0] 252 group_inclusive_count += group[2] 253 group_lines = group[3] 254 255 group_lines.append(','.join([function, statistics[0]])) 256 cwp_function_groups[i] = (group_name, group_common_path, 257 group_inclusive_count, group_lines) 258 break 259 260 group_statistics_lines = [] 261 262 for group_name, group_path, group_inclusive_count, group_lines in \ 263 cwp_function_groups: 264 group_statistics_lines.append(','.join([group_name, group_path, 265 str(group_inclusive_count)])) 266 if group_lines: 267 # Sort the output in descendant order based on the inclusive_count 268 # value. 269 group_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True) 270 group_lines.insert(0, 'function,file,dso,inclusive_count') 271 group_file_name = cwp_function_groups_file_prefix + group_name 272 273 with open(group_file_name, 'w') as output_file: 274 output_file.write('\n'.join(group_lines)) 275 276 group_statistics_lines.sort( 277 key=lambda x: int(x.split(',')[2]), reverse=True) 278 group_statistics_lines.insert(0, 'group,shared_path,inclusive_count') 279 280 with open(cwp_function_groups_statistics_file, 'w') as output_file: 281 output_file.write('\n'.join(group_statistics_lines)) 282 283 def ExtractExtraFunctions(self, cwp_statistics, extra_cwp_functions_file): 284 """Gets the functions that are in the CWP file, but not in the pprof output. 285 286 Writes the functions and their statistics in the extra_cwp_functions_file 287 file. The output is sorted based on the inclusive_count value. The file is 288 in CSV format, containing the fields: function name, file name, object name, 289 inclusive count. 290 291 Args: 292 cwp_statistics: A dict containing the CWP statistics. 293 extra_cwp_functions_file: The file where should be stored the CWP 294 functions and statistics that are marked as NOT_COMMON_FUNCTIONS. 295 """ 296 output_lines = [] 297 298 for function, statistics in cwp_statistics.iteritems(): 299 if statistics[1] == self.NOT_COMMON_FUNCTION: 300 output_lines.append(','.join([function, statistics[0]])) 301 302 with open(extra_cwp_functions_file, 'w') as output_file: 303 output_lines.sort(key=lambda x: int(x.split(',')[-1]), reverse=True) 304 output_lines.insert(0, 'function,file,dso,inclusive_count') 305 output_file.write('\n'.join(output_lines)) 306 307 308def ParseArguments(arguments): 309 parser = argparse.ArgumentParser() 310 311 parser.add_argument( 312 '-p', 313 '--pprof_path', 314 dest='pprof_path', 315 required=True, 316 help='The directory containing the pprof output files.') 317 parser.add_argument( 318 '-w', 319 '--cwp_hot_functions_file', 320 dest='cwp_hot_functions_file', 321 required=True, 322 help='The CSV file containing the CWP hot functions. The ' 323 'file should include the name of the functions, the ' 324 'file names with the definition, the object file ' 325 'and the CWP inclusive count values, comma ' 326 'separated.') 327 parser.add_argument( 328 '-c', 329 '--common_functions_path', 330 dest='common_functions_path', 331 required=True, 332 help='The directory containing the files with the pprof ' 333 'and CWP common functions. A file will contain all ' 334 'the hot functions from a pprof output file that ' 335 'are also included in the CWP hot functions file. ' 336 'The files with the common functions will have the ' 337 'same names with the corresponding pprof output ' 338 'files.') 339 parser.add_argument( 340 '-e', 341 '--extra_cwp_functions_file', 342 dest='extra_cwp_functions_file', 343 required=True, 344 help='The file that will contain the CWP hot functions ' 345 'that are not in any of the pprof output files. ' 346 'The file should include the name of the functions, ' 347 'the file names with the definition, the object ' 348 'file and the CWP inclusive count values, comma ' 349 'separated.') 350 parser.add_argument( 351 '-g', 352 '--cwp_function_groups_file', 353 dest='cwp_function_groups_file', 354 help='The file that will contain the CWP function groups.' 355 'A line consists in the group name and a file path. A group must ' 356 'represent a ChromeOS component.') 357 parser.add_argument( 358 '-s', 359 '--cwp_function_groups_statistics_file', 360 dest='cwp_function_groups_statistics_file', 361 help='The file that will contain the total inclusive count values of CWP ' 362 'function groups in CSV format. A line will contain the name of the ' 363 'group, the common path, the total inclusive count value of all the' 364 'functions declared in files that share the common path.') 365 parser.add_argument( 366 '-x', 367 '--cwp_function_groups_file_prefix', 368 dest='cwp_function_groups_file_prefix', 369 help='The prefix of the files that will store the function statistics ' 370 'for each function group.') 371 372 options = parser.parse_args(arguments) 373 374 return options 375 376 377def Main(argv): 378 options = ParseArguments(argv) 379 380 hot_functions_processor = HotFunctionsProcessor(options.pprof_path, 381 options.cwp_hot_functions_file, options.common_functions_path, 382 options.extra_cwp_functions_file, options.cwp_function_groups_file, 383 options.cwp_function_groups_statistics_file, 384 options.cwp_function_groups_file_prefix) 385 386 hot_functions_processor.ProcessHotFunctions() 387 388 389if __name__ == '__main__': 390 Main(sys.argv[1:]) 391