benchmark_metrics_experiment.py revision 523b2ae25b5b98512babb5051b6f8f4dd92ef7cf
1#!/usr/bin/python2 2# 3# Copyright 2016 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6"""Runs an experiment with the benchmark metrics on a pair of CWP data sets. 7 8A data set should contain the files with the pairwise inclusive and the 9inclusive statistics. The pairwise inclusive file contains pairs of 10parent and child functions with their inclusive count fractions out of the 11total amount of inclusive count values and the files of the child functions. 12The inclusive file contains the functions with their inclusive count fraction 13out of the total amount of inclusive count values and the file name of the 14function. The input data should be collected using the scripts 15collect_experiment_data.sh or collect_experiment_data_odd_even_session.sh 16 17For every function, this script computes the distance and the score values. 18The output is stored in the file cwp_functions_statistics_file. 19 20For every Chrome OS component, this script computes a set of metrics consisting 21in the number of functions, the average and cumulative distance and score of 22the functions matching the group. The output is stored in the file 23cwp_function_groups_statistics_file. 24""" 25 26import argparse 27from collections import defaultdict 28import csv 29import os 30import sys 31import benchmark_metrics 32 33 34class MetricsExperiment(object): 35 """Runs an experiment with the benchmark metrics on a pair of data sets.""" 36 37 def __init__(self, cwp_pairwise_inclusive_reference, 38 cwp_pairwise_inclusive_test, cwp_inclusive_reference, 39 cwp_inclusive_test, cwp_function_groups_file, 40 cwp_function_groups_statistics_file, 41 cwp_function_statistics_file): 42 """Initializes the MetricsExperiment class. 43 44 Args: 45 cwp_pairwise_inclusive_reference: The CSV file containing the pairwise 46 inclusive values from the reference data set. 47 cwp_pairwise_inclusive_test: The CSV file containing the pairwise 48 inclusive values from the test data set. 49 cwp_inclusive_reference: The CSV file containing the inclusive values 50 from the reference data set. 51 cwp_inclusive_test: The CSV file containing the inclusive values from 52 the test data set. 53 cwp_function_groups_file: The CSV file containing the groups of functions. 54 cwp_function_groups_statistics_file: The output CSV file that will 55 contain the metrics for the function groups. 56 cwp_function_statistics_file: The output CSV file that will contain the 57 metrics for the CWP functions. 58 """ 59 self._cwp_pairwise_inclusive_reference = cwp_pairwise_inclusive_reference 60 self._cwp_pairwise_inclusive_test = cwp_pairwise_inclusive_test 61 self._cwp_inclusive_reference = cwp_inclusive_reference 62 self._cwp_inclusive_test = cwp_inclusive_test 63 self._cwp_function_groups_file = cwp_function_groups_file 64 self._cwp_function_groups_statistics_file = \ 65 cwp_function_groups_statistics_file 66 self._cwp_function_statistics_file = cwp_function_statistics_file 67 68 @staticmethod 69 def ParsePairwiseInclusiveStatisticsFile(file_name): 70 """Parses the pairwise inclusive statistics files. 71 72 A line of the file should contain a pair of a parent and a child function, 73 concatenated by a ;;, the name of the file where the child function is 74 defined and the inclusive count fractions of the pair of functions out of 75 the total amount of inclusive count values. 76 77 Args: 78 file_name: The file containing the pairwise inclusive statistics of the 79 CWP functions. 80 81 Returns: 82 A dict containing the statistics of the parent functions and each of 83 their child functions. The key of the dict is the name of the parent 84 function. The value is a dict having as a key the name of the child 85 function with its file name separated by a ',' and as a value the 86 inclusive count fraction of the child function. 87 """ 88 pairwise_inclusive_statistics = defaultdict(lambda: defaultdict(float)) 89 90 with open(file_name) as \ 91 pairwise_inclusive_statistics_file: 92 statistics_reader = csv.DictReader( 93 pairwise_inclusive_statistics_file, delimiter=',') 94 for statistic in statistics_reader: 95 parent_function_name, child_function_name = \ 96 statistic['parent_child_functions'].split(';;') 97 child_function_file_name = \ 98 os.path.normpath(statistic['child_function_file']) 99 inclusive_count_fraction = \ 100 float(statistic['inclusive_count_fraction']) 101 102 if all([parent_function_name, child_function_name, \ 103 inclusive_count_fraction]): 104 105 # There might be situations where a child function appears in 106 # multiple files or objects. Such situations can occur when in the 107 # Dremel queries there are not specified the Chrome OS version and the 108 # name of the board (i.e the files can belong to different kernel or 109 # library versions), when the child function is a template function 110 # that is declared in a header file or there are name collisions 111 # between multiple executable objects. 112 # If a pair of child and parent functions appears multiple times, we 113 # add their inclusive count values. 114 child_function_key = ','.join([child_function_name, 115 child_function_file_name]) 116 pairwise_inclusive_statistics[parent_function_name]\ 117 [child_function_key] += inclusive_count_fraction 118 119 return pairwise_inclusive_statistics 120 121 @staticmethod 122 def ParseInclusiveStatisticsFile(inclusive_statistics_file_name): 123 """Parses the inclusive statistics files. 124 125 Args: 126 inclusive_statistics_file_name: The file containing the inclusive 127 statistics of the CWP functions. 128 129 Returns: 130 A dict having as a key the function name and file where the function is 131 defined separated by a ',' and as a value the inclusive count fraction. 132 """ 133 inclusive_statistics = defaultdict(float) 134 135 with open(inclusive_statistics_file_name) as inclusive_statistics_file: 136 statistics_reader = \ 137 csv.DictReader(inclusive_statistics_file, delimiter=',') 138 139 for statistic in statistics_reader: 140 function_name = statistic['function'] 141 file_name = os.path.normpath(statistic['file']) 142 inclusive_count_fraction = \ 143 float(statistic['inclusive_count_fraction']) 144 145 # There might be situations where a function appears in multiple files 146 # or objects. Such situations can occur when in the Dremel queries there 147 # are not specified the Chrome OS version and the name of the board (i.e 148 # the files can belong to different kernel or library versions). 149 if all([function_name, file_name, inclusive_count_fraction]): 150 parent_function_key = ','.join([function_name, file_name]) 151 inclusive_statistics[parent_function_key] += inclusive_count_fraction 152 153 return inclusive_statistics 154 155 def PerformComputation(self): 156 """Does the benchmark metrics experimental computation. 157 158 For every function, it is computed a distance based on the sum of the 159 differences of the fractions spent in the child functions. Afterwards, 160 it is computed a score based on the inclusive values fractions and the 161 distance value. The statistics for all the function are written in the file 162 self._cwp_function_statistics_file. 163 164 The functions are grouped on Chrome OS components based on the path of the 165 file where a function is defined. For every group, there are computed the 166 total number of functions matching that group, the cumulative distance, the 167 average distance and the cumulative score of the functions. 168 """ 169 170 inclusive_statistics_reference = \ 171 self.ParseInclusiveStatisticsFile(self._cwp_inclusive_reference) 172 inclusive_statistics_test = \ 173 self.ParseInclusiveStatisticsFile(self._cwp_inclusive_test) 174 pairwise_inclusive_statistics_reference = \ 175 self.ParsePairwiseInclusiveStatisticsFile( 176 self._cwp_pairwise_inclusive_reference) 177 pairwise_inclusive_statistics_test = \ 178 self.ParsePairwiseInclusiveStatisticsFile( 179 self._cwp_pairwise_inclusive_test) 180 parent_function_statistics = {} 181 182 with open(self._cwp_function_groups_file, 'r') as input_file: 183 cwp_function_groups = [line.split() for line in input_file] 184 185 for parent_function_key, parent_function_fraction_test \ 186 in inclusive_statistics_test.iteritems(): 187 parent_function_name, parent_function_file_name = \ 188 parent_function_key.split(',') 189 190 parent_function_fraction_reference = \ 191 inclusive_statistics_reference.get(parent_function_key, 0.0) 192 193 child_functions_statistics_test = \ 194 pairwise_inclusive_statistics_test.get(parent_function_name, {}) 195 196 child_functions_statistics_reference = \ 197 pairwise_inclusive_statistics_reference.get(parent_function_name, {}) 198 199 distance = benchmark_metrics.ComputeDistanceForFunction( 200 child_functions_statistics_test, child_functions_statistics_reference) 201 202 parent_function_score_test = benchmark_metrics.ComputeScoreForFunction( 203 distance, parent_function_fraction_test, 204 parent_function_fraction_reference) 205 206 parent_function_statistics[parent_function_key] = \ 207 (distance, parent_function_score_test) 208 209 with open(self._cwp_function_statistics_file, 'w') as output_file: 210 statistics_lines = ['function,file,distance,score'] 211 statistics_lines += \ 212 [','.join([parent_function_key.replace(';;', ','), 213 str(statistic[0]), 214 str(statistic[1])]) 215 for parent_function_key, statistic 216 in parent_function_statistics.iteritems()] 217 output_file.write('\n'.join(statistics_lines)) 218 219 cwp_groups_statistics_test = benchmark_metrics.ComputeMetricsForComponents( 220 cwp_function_groups, parent_function_statistics) 221 222 with open(self._cwp_function_groups_statistics_file, 'w') as output_file: 223 group_statistics_lines = \ 224 ['group,file_path,function_count,distance_cum,distance_avg,score_cum,' 225 'score_avg'] 226 group_statistics_lines += \ 227 [','.join([group_name, 228 str(statistic[0]), 229 str(statistic[1]), 230 str(statistic[2]), 231 str(statistic[3]), 232 str(statistic[4]), 233 str(statistic[5])]) 234 for group_name, statistic 235 in cwp_groups_statistics_test.iteritems()] 236 output_file.write('\n'.join(group_statistics_lines)) 237 238 239def ParseArguments(arguments): 240 parser = argparse.ArgumentParser( 241 description='Runs an experiment with the benchmark metrics on a pair of ' 242 'CWP data sets.') 243 parser.add_argument( 244 '--cwp_pairwise_inclusive_reference', 245 required=True, 246 help='The reference CSV file that will contain a pair of parent and ' 247 'child functions with their inclusive count fractions out of the total ' 248 'amount of inclusive count values.') 249 parser.add_argument( 250 '--cwp_pairwise_inclusive_test', 251 required=True, 252 help='The test CSV file that will contain a pair of parent and ' 253 'child functions with their inclusive count fractions out of the total ' 254 'amount of inclusive count values.') 255 parser.add_argument( 256 '--cwp_inclusive_reference', 257 required=True, 258 help='The reference CSV file that will contain a function with its ' 259 'inclusive count fraction out of the total amount of inclusive count ' 260 'values.') 261 parser.add_argument( 262 '--cwp_inclusive_test', 263 required=True, 264 help='The test CSV file that will contain a function with its ' 265 'inclusive count fraction out of the total amount of inclusive count ' 266 'values.') 267 parser.add_argument( 268 '-g', 269 '--cwp_function_groups_file', 270 required=True, 271 help='The file that will contain the CWP function groups.' 272 'A line consists in the group name and a file path. A group must ' 273 'represent a ChromeOS component.') 274 parser.add_argument( 275 '-s', 276 '--cwp_function_groups_statistics_file', 277 required=True, 278 help='The output file that will contain the metric statistics for the ' 279 'CWP function groups in CSV format. A line consists in the group name, ' 280 'file path, number of functions matching the group, the total score ' 281 'and distance values.') 282 parser.add_argument( 283 '-f', 284 '--cwp_function_statistics_file', 285 required=True, 286 help='The output file that will contain the metric statistics for the ' 287 'CWP functions in CSV format. A line consists in the function name, file ' 288 'name, cummulative distance, average distance, cummulative score and ' 289 'average score values.') 290 291 options = parser.parse_args(arguments) 292 return options 293 294 295def Main(argv): 296 options = ParseArguments(argv) 297 metrics_experiment = MetricsExperiment( 298 options.cwp_pairwise_inclusive_reference, 299 options.cwp_pairwise_inclusive_test, options.cwp_inclusive_reference, 300 options.cwp_inclusive_test, options.cwp_function_groups_file, 301 options.cwp_function_groups_statistics_file, 302 options.cwp_function_statistics_file) 303 metrics_experiment.PerformComputation() 304 305 306if __name__ == '__main__': 307 Main(sys.argv[1:]) 308