1#!/usr/bin/python2 2 3# Copyright 2016 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6"""Selects the optimal set of benchmarks. 7 8For each benchmark, there is a file with the common functions, as extracted by 9the process_hot_functions module. 10 11The script receives as input the CSV file with the CWP inclusive count values, 12the file with Chrome OS groups and the path containing a file with common 13functions for every benchmark. 14 15It extracts for every benchmark and for the CWP data all the functions that 16match the given Chrome OS groups. 17 18It generates all possible combinations of benchmark sets of a given size and 19it computes for every set a metric. 20It outputs the optimal sets, based on which ones have the best metric. 21 22Three different metrics have been used: function count, distance 23variation and score. 24 25For the function count metric, we count the unique functions covered by a 26set of benchmarks. Besides the number of unique functions, we compute also 27the fraction of unique functions out of the amount of CWP functions from the 28given groups. The benchmark set with the highest amount of unique functions 29that belong to all the given groups is considered better. 30 31For the distance variation metric, we compute the sum of the distance variations 32of the functions covered by a set of benchmarks. We define the distance 33variation as the difference between the distance value of a function and the 34ideal distance value (1.0). If a function appears in multiple common functions 35files, we consider only the minimum value. We compute also the distance 36variation per function. The set that has the smaller value for the 37distance variation per function is considered better. 38 39For the score metric, we compute the sum of the scores of the functions from a 40set of benchmarks. If a function appears in multiple common functions files, 41we consider only the maximum value. We compute also the fraction of this sum 42from the sum of all the scores of the functions from the CWP data covering the 43given groups, in the ideal case (the ideal score of a function is 1.0). 44 45We compute the metrics in the same manner for individual Chrome OS groups. 46""" 47 48from collections import defaultdict 49 50import argparse 51import csv 52import itertools 53import json 54import operator 55import os 56import sys 57 58import benchmark_metrics 59import utils 60 61 62class BenchmarkSet(object): 63 """Selects the optimal set of benchmarks of given size.""" 64 65 # Constants that specify the metric type. 66 FUNCTION_COUNT_METRIC = 'function_count' 67 DISTANCE_METRIC = 'distance_variation' 68 SCORE_METRIC = 'score_fraction' 69 70 def __init__(self, benchmark_set_size, benchmark_set_output_file, 71 benchmark_set_common_functions_path, cwp_inclusive_count_file, 72 cwp_function_groups_file, metric): 73 """Initializes the BenchmarkSet. 74 75 Args: 76 benchmark_set_size: Constant representing the size of a benchmark set. 77 benchmark_set_output_file: The output file that will contain the set of 78 optimal benchmarks with the metric values. 79 benchmark_set_common_functions_path: The directory containing the files 80 with the common functions for the list of benchmarks. 81 cwp_inclusive_count_file: The CSV file containing the CWP functions with 82 their inclusive count values. 83 cwp_function_groups_file: The file that contains the CWP function groups. 84 metric: The type of metric used for the analysis. 85 """ 86 self._benchmark_set_size = int(benchmark_set_size) 87 self._benchmark_set_output_file = benchmark_set_output_file 88 self._benchmark_set_common_functions_path = \ 89 benchmark_set_common_functions_path 90 self._cwp_inclusive_count_file = cwp_inclusive_count_file 91 self._cwp_function_groups_file = cwp_function_groups_file 92 self._metric = metric 93 94 @staticmethod 95 def OrganizeCWPFunctionsInGroups(cwp_inclusive_count_statistics, 96 cwp_function_groups): 97 """Selects the CWP functions that match the given Chrome OS groups. 98 99 Args: 100 cwp_inclusive_count_statistics: A dict with the CWP functions. 101 cwp_function_groups: A list with the CWP function groups. 102 103 Returns: 104 A dict having as a key the name of the groups and as a value the list of 105 CWP functions that match an individual group. 106 """ 107 cwp_functions_grouped = defaultdict(list) 108 for function_key in cwp_inclusive_count_statistics: 109 _, file_name = function_key.split(',') 110 for group_name, file_path in cwp_function_groups: 111 if file_path not in file_name: 112 continue 113 cwp_functions_grouped[group_name].append(function_key) 114 break 115 return cwp_functions_grouped 116 117 @staticmethod 118 def OrganizeBenchmarkSetFunctionsInGroups(benchmark_set_files, 119 benchmark_set_common_functions_path, 120 cwp_function_groups): 121 """Selects the benchmark functions that match the given Chrome OS groups. 122 123 Args: 124 benchmark_set_files: The list of common functions files corresponding to a 125 benchmark. 126 benchmark_set_common_functions_path: The directory containing the files 127 with the common functions for the list of benchmarks. 128 cwp_function_groups: A list with the CWP function groups. 129 130 Returns: 131 A dict having as a key the name of a common functions file. The value is 132 a dict having as a key the name of a group and as value a list of 133 functions that match the given group. 134 """ 135 136 benchmark_set_functions_grouped = {} 137 for benchmark_file_name in benchmark_set_files: 138 benchmark_full_file_path = \ 139 os.path.join(benchmark_set_common_functions_path, 140 benchmark_file_name) 141 with open(benchmark_full_file_path) as input_file: 142 statistics_reader = \ 143 csv.DictReader(input_file, delimiter=',') 144 benchmark_functions_grouped = defaultdict(dict) 145 for statistic in statistics_reader: 146 function_name = statistic['function'] 147 file_name = statistic['file'] 148 for group_name, file_path in cwp_function_groups: 149 if file_path not in file_name: 150 continue 151 function_key = ','.join([function_name, file_name]) 152 distance = float(statistic['distance']) 153 score = float(statistic['score']) 154 benchmark_functions_grouped[group_name][function_key] = \ 155 (distance, score) 156 break 157 benchmark_set_functions_grouped[benchmark_file_name] = \ 158 benchmark_functions_grouped 159 return benchmark_set_functions_grouped 160 161 @staticmethod 162 def SelectOptimalBenchmarkSetBasedOnMetric(all_benchmark_combinations_sets, 163 benchmark_set_functions_grouped, 164 cwp_functions_grouped, 165 metric_function_for_set, 166 metric_comparison_operator, 167 metric_default_value, 168 metric_string): 169 """Generic method that selects the optimal benchmark set based on a metric. 170 171 The reason of implementing a generic function is to avoid logic duplication 172 for selecting a benchmark set based on the three different metrics. 173 174 Args: 175 all_benchmark_combinations_sets: The list with all the sets of benchmark 176 combinations. 177 benchmark_set_functions_grouped: A dict with benchmark functions as 178 returned by OrganizeBenchmarkSetFunctionsInGroups. 179 cwp_functions_grouped: A dict with the CWP functions as returned by 180 OrganizeCWPFunctionsInGroups. 181 metric_function_for_set: The method used to compute the metric for a given 182 benchmark set. 183 metric_comparison_operator: A comparison operator used to compare two 184 values of the same metric (i.e: operator.lt or operator.gt). 185 metric_default_value: The default value for the metric. 186 metric_string: A tuple of strings used in the JSON output for the pair of 187 the values of the metric. 188 189 Returns: 190 A list of tuples containing for each optimal benchmark set. A tuple 191 contains the list of benchmarks from the set, the pair of metric values 192 and a dictionary with the metrics for each group. 193 """ 194 optimal_sets = [([], metric_default_value, {})] 195 196 for benchmark_combination_set in all_benchmark_combinations_sets: 197 function_metrics = [benchmark_set_functions_grouped[benchmark] 198 for benchmark in benchmark_combination_set] 199 set_metrics, set_groups_metrics = \ 200 metric_function_for_set(function_metrics, cwp_functions_grouped, 201 metric_string) 202 optimal_value = optimal_sets[0][1][0] 203 if metric_comparison_operator(set_metrics[0], optimal_value): 204 optimal_sets = \ 205 [(benchmark_combination_set, set_metrics, set_groups_metrics)] 206 elif set_metrics[0] == optimal_sets[0][1][0]: 207 optimal_sets.append( 208 (benchmark_combination_set, set_metrics, set_groups_metrics)) 209 210 return optimal_sets 211 212 def SelectOptimalBenchmarkSet(self): 213 """Selects the optimal benchmark sets and writes them in JSON format. 214 215 Parses the CWP inclusive count statistics and benchmark common functions 216 files. Organizes the functions into groups. For every optimal benchmark 217 set, the method writes in the self._benchmark_set_output_file the list of 218 benchmarks, the pair of metrics and a dictionary with the pair of 219 metrics for each group covered by the benchmark set. 220 """ 221 222 benchmark_set_files = os.listdir(self._benchmark_set_common_functions_path) 223 all_benchmark_combinations_sets = \ 224 itertools.combinations(benchmark_set_files, self._benchmark_set_size) 225 226 with open(self._cwp_function_groups_file) as input_file: 227 cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines()) 228 229 cwp_inclusive_count_statistics = \ 230 utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_count_file) 231 cwp_functions_grouped = self.OrganizeCWPFunctionsInGroups( 232 cwp_inclusive_count_statistics, cwp_function_groups) 233 benchmark_set_functions_grouped = \ 234 self.OrganizeBenchmarkSetFunctionsInGroups( 235 benchmark_set_files, self._benchmark_set_common_functions_path, 236 cwp_function_groups) 237 238 if self._metric == self.FUNCTION_COUNT_METRIC: 239 metric_function_for_benchmark_set = \ 240 benchmark_metrics.ComputeFunctionCountForBenchmarkSet 241 metric_comparison_operator = operator.gt 242 metric_default_value = (0, 0.0) 243 metric_string = ('function_count', 'function_count_fraction') 244 elif self._metric == self.DISTANCE_METRIC: 245 metric_function_for_benchmark_set = \ 246 benchmark_metrics.ComputeDistanceForBenchmarkSet 247 metric_comparison_operator = operator.lt 248 metric_default_value = (float('inf'), float('inf')) 249 metric_string = \ 250 ('distance_variation_per_function', 'total_distance_variation') 251 elif self._metric == self.SCORE_METRIC: 252 metric_function_for_benchmark_set = \ 253 benchmark_metrics.ComputeScoreForBenchmarkSet 254 metric_comparison_operator = operator.gt 255 metric_default_value = (0.0, 0.0) 256 metric_string = ('score_fraction', 'total_score') 257 else: 258 raise ValueError("Invalid metric") 259 260 optimal_benchmark_sets = \ 261 self.SelectOptimalBenchmarkSetBasedOnMetric( 262 all_benchmark_combinations_sets, benchmark_set_functions_grouped, 263 cwp_functions_grouped, metric_function_for_benchmark_set, 264 metric_comparison_operator, metric_default_value, metric_string) 265 266 json_output = [] 267 268 for benchmark_set in optimal_benchmark_sets: 269 json_entry = { 270 'benchmark_set': 271 list(benchmark_set[0]), 272 'metrics': { 273 metric_string[0]: benchmark_set[1][0], 274 metric_string[1]: benchmark_set[1][1] 275 }, 276 'groups': 277 dict(benchmark_set[2]) 278 } 279 json_output.append(json_entry) 280 281 with open(self._benchmark_set_output_file, 'w') as output_file: 282 json.dump(json_output, output_file) 283 284 285def ParseArguments(arguments): 286 parser = argparse.ArgumentParser() 287 288 parser.add_argument( 289 '--benchmark_set_common_functions_path', 290 required=True, 291 help='The directory containing the CSV files with the common functions ' 292 'of the benchmark profiles and CWP data. A file will contain all the hot ' 293 'functions from a pprof top output file that are also included in the ' 294 'file containing the cwp inclusive count values. The CSV fields are: the ' 295 'function name, the file and the object where the function is declared, ' 296 'the CWP inclusive count and inclusive count fraction values, the ' 297 'cumulative and average distance, the cumulative and average score. The ' 298 'files with the common functions will have the same names with the ' 299 'corresponding pprof output files.') 300 parser.add_argument( 301 '--cwp_inclusive_count_file', 302 required=True, 303 help='The CSV file containing the CWP hot functions with their ' 304 'inclusive_count values. The CSV fields include the name of the ' 305 'function, the file and the object with the definition, the inclusive ' 306 'count value and the inclusive count fraction out of the total amount of ' 307 'inclusive count values.') 308 parser.add_argument( 309 '--benchmark_set_size', 310 required=True, 311 help='The size of the benchmark sets.') 312 parser.add_argument( 313 '--benchmark_set_output_file', 314 required=True, 315 help='The JSON output file containing optimal benchmark sets with their ' 316 'metrics. For every optimal benchmark set, the file contains the list of ' 317 'benchmarks, the pair of metrics and a dictionary with the pair of ' 318 'metrics for each group covered by the benchmark set.') 319 parser.add_argument( 320 '--metric', 321 required=True, 322 help='The metric used to select the optimal benchmark set. The possible ' 323 'values are: distance_variation, function_count and score_fraction.') 324 parser.add_argument( 325 '--cwp_function_groups_file', 326 required=True, 327 help='The file that contains the CWP function groups. A line consists in ' 328 'the group name and a file path describing the group. A group must ' 329 'represent a Chrome OS component.') 330 331 options = parser.parse_args(arguments) 332 333 return options 334 335 336def Main(argv): 337 options = ParseArguments(argv) 338 benchmark_set = BenchmarkSet(options.benchmark_set_size, 339 options.benchmark_set_output_file, 340 options.benchmark_set_common_functions_path, 341 options.cwp_inclusive_count_file, 342 options.cwp_function_groups_file, options.metric) 343 benchmark_set.SelectOptimalBenchmarkSet() 344 345 346if __name__ == '__main__': 347 Main(sys.argv[1:]) 348