benchmark_metrics_experiment.py revision 45a52fb3586d4fbaf6ffc5f3595ae417b5d148be
1#!/usr/bin/python2
2#
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Runs an experiment with the benchmark metrics on a pair of CWP data sets.
7
8A data set should contain the files with the pairwise inclusive and the
9inclusive statistics. The pairwise inclusive file contains pairs of
10parent and child functions with their inclusive count fractions out of the
11total amount of inclusive count values and the files of the child functions.
12The inclusive file contains the functions with their inclusive count fraction
13out of the total amount of inclusive count values and the file name of the
14function. The input data should be collected using the scripts
15collect_experiment_data.sh or collect_experiment_data_odd_even_session.sh
16
17For every function, this script computes the distance and the score values.
18The output is stored in the file cwp_functions_statistics_file.
19
20For every Chrome OS component, this script computes a set of metrics consisting
21in the number of functions, the average and cumulative distance and score of
22the functions matching the group. The output is stored in the file
23cwp_function_groups_statistics_file.
24"""
25
26import argparse
27import sys
28
29import benchmark_metrics
30import utils
31
32
33class MetricsExperiment(object):
34  """Runs an experiment with the benchmark metrics on a pair of data sets."""
35
36  def __init__(self, cwp_pairwise_inclusive_reference,
37               cwp_pairwise_inclusive_test, cwp_inclusive_reference,
38               cwp_inclusive_test, cwp_function_groups_file,
39               cwp_function_groups_statistics_file,
40               cwp_function_statistics_file):
41    """Initializes the MetricsExperiment class.
42
43    Args:
44      cwp_pairwise_inclusive_reference: The CSV file containing the pairwise
45        inclusive values from the reference data set.
46      cwp_pairwise_inclusive_test: The CSV file containing the pairwise
47        inclusive values from the test data set.
48      cwp_inclusive_reference: The CSV file containing the inclusive values
49        from the reference data set.
50      cwp_inclusive_test: The CSV file containing the inclusive values from
51        the test data set.
52      cwp_function_groups_file: The CSV file containing the groups of functions.
53      cwp_function_groups_statistics_file: The output CSV file that will
54        contain the metrics for the function groups.
55      cwp_function_statistics_file: The output CSV file that will contain the
56        metrics for the CWP functions.
57    """
58    self._cwp_pairwise_inclusive_reference = cwp_pairwise_inclusive_reference
59    self._cwp_pairwise_inclusive_test = cwp_pairwise_inclusive_test
60    self._cwp_inclusive_reference = cwp_inclusive_reference
61    self._cwp_inclusive_test = cwp_inclusive_test
62    self._cwp_function_groups_file = cwp_function_groups_file
63    self._cwp_function_groups_statistics_file = \
64        cwp_function_groups_statistics_file
65    self._cwp_function_statistics_file = cwp_function_statistics_file
66
67  def PerformComputation(self):
68    """Does the benchmark metrics experimental computation.
69
70    For every function, it is computed a distance based on the sum of the
71    differences of the fractions spent in the child functions. Afterwards,
72    it is computed a score based on the inclusive values fractions and the
73    distance value. The statistics for all the function are written in the file
74    self._cwp_function_statistics_file.
75
76    The functions are grouped on Chrome OS components based on the path of the
77    file where a function is defined. For every group, there are computed the
78    total number of functions matching that group, the cumulative distance, the
79    average distance and the cumulative score of the functions.
80    """
81
82    inclusive_statistics_reference = \
83        utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_reference)
84    inclusive_statistics_cum_reference = \
85        utils.ComputeCWPCummulativeInclusiveStatistics(
86            inclusive_statistics_reference)
87    inclusive_statistics_test = \
88        utils.ParseCWPInclusiveCountFile(self._cwp_inclusive_test)
89    inclusive_statistics_cum_test = \
90        utils.ComputeCWPCummulativeInclusiveStatistics(
91            inclusive_statistics_test)
92    pairwise_inclusive_statistics_reference = \
93        utils.ParseCWPPairwiseInclusiveCountFile(
94            self._cwp_pairwise_inclusive_reference)
95    pairwise_inclusive_fractions_reference = \
96        utils.ComputeCWPChildFunctionsFractions(
97            inclusive_statistics_cum_reference,
98            pairwise_inclusive_statistics_reference)
99    pairwise_inclusive_statistics_test = \
100        utils.ParseCWPPairwiseInclusiveCountFile(
101            self._cwp_pairwise_inclusive_test)
102    pairwise_inclusive_fractions_test = \
103        utils.ComputeCWPChildFunctionsFractions(
104            inclusive_statistics_cum_test,
105            pairwise_inclusive_statistics_test)
106    parent_function_statistics = {}
107
108    with open(self._cwp_function_groups_file) as input_file:
109      cwp_function_groups = utils.ParseFunctionGroups(input_file.readlines())
110
111    for parent_function_key, parent_function_statistics_test \
112        in inclusive_statistics_test.iteritems():
113      parent_function_name, _ = parent_function_key.split(',')
114      parent_function_fraction_test = parent_function_statistics_test[2]
115
116      parent_function_fraction_reference = \
117          inclusive_statistics_reference[parent_function_key][2]
118
119      child_functions_fractions_test = \
120          pairwise_inclusive_fractions_test.get(parent_function_name, {})
121
122      child_functions_fractions_reference = \
123          pairwise_inclusive_fractions_reference.get(parent_function_name, {})
124
125      distance = benchmark_metrics.ComputeDistanceForFunction(
126          child_functions_fractions_test, child_functions_fractions_reference)
127
128      parent_function_score_test = benchmark_metrics.ComputeScoreForFunction(
129          distance, parent_function_fraction_test,
130          parent_function_fraction_reference)
131
132      parent_function_statistics[parent_function_key] = \
133          (distance, parent_function_score_test)
134
135    with open(self._cwp_function_statistics_file, 'w') as output_file:
136      statistics_lines = ['function,file,distance,score']
137      statistics_lines += \
138          [','.join([parent_function_key.replace(';;', ','),
139                     str(statistic[0]),
140                     str(statistic[1])])
141           for parent_function_key, statistic
142           in parent_function_statistics.iteritems()]
143      output_file.write('\n'.join(statistics_lines))
144
145    cwp_groups_statistics_test = benchmark_metrics.ComputeMetricsForComponents(
146        cwp_function_groups, parent_function_statistics)
147
148    with open(self._cwp_function_groups_statistics_file, 'w') as output_file:
149      group_statistics_lines = \
150          ['group,file_path,function_count,distance_cum,distance_avg,score_cum,'
151           'score_avg']
152      group_statistics_lines += \
153          [','.join([group_name,
154                     str(statistic[0]),
155                     str(statistic[1]),
156                     str(statistic[2]),
157                     str(statistic[3]),
158                     str(statistic[4]),
159                     str(statistic[5])])
160           for group_name, statistic
161           in cwp_groups_statistics_test.iteritems()]
162      output_file.write('\n'.join(group_statistics_lines))
163
164
165def ParseArguments(arguments):
166  parser = argparse.ArgumentParser(
167      description='Runs an experiment with the benchmark metrics on a pair of '
168      'CWP data sets.')
169  parser.add_argument(
170      '--cwp_pairwise_inclusive_reference',
171      required=True,
172      help='The reference CSV file that will contain a pair of parent and '
173      'child functions with their inclusive count fractions out of the total '
174      'amount of inclusive count values.')
175  parser.add_argument(
176      '--cwp_pairwise_inclusive_test',
177      required=True,
178      help='The test CSV file that will contain a pair of parent and '
179      'child functions with their inclusive count fractions out of the total '
180      'amount of inclusive count values.')
181  parser.add_argument(
182      '--cwp_inclusive_reference',
183      required=True,
184      help='The reference CSV file that will contain a function with its '
185      'inclusive count fraction out of the total amount of inclusive count '
186      'values.')
187  parser.add_argument(
188      '--cwp_inclusive_test',
189      required=True,
190      help='The test CSV file that will contain a function with its '
191      'inclusive count fraction out of the total amount of inclusive count '
192      'values.')
193  parser.add_argument(
194      '-g',
195      '--cwp_function_groups_file',
196      required=True,
197      help='The file that will contain the CWP function groups.'
198      'A line consists in the group name and a file path. A group must '
199      'represent a ChromeOS component.')
200  parser.add_argument(
201      '-s',
202      '--cwp_function_groups_statistics_file',
203      required=True,
204      help='The output file that will contain the metric statistics for the '
205      'CWP function groups in CSV format. A line consists in the group name, '
206      'file path, number of functions matching the group, the total score '
207      'and distance values.')
208  parser.add_argument(
209      '-f',
210      '--cwp_function_statistics_file',
211      required=True,
212      help='The output file that will contain the metric statistics for the '
213      'CWP functions in CSV format. A line consists in the function name, file '
214      'name, cummulative distance, average distance, cummulative score and '
215      'average score values.')
216
217  options = parser.parse_args(arguments)
218  return options
219
220
221def Main(argv):
222  options = ParseArguments(argv)
223  metrics_experiment = MetricsExperiment(
224      options.cwp_pairwise_inclusive_reference,
225      options.cwp_pairwise_inclusive_test, options.cwp_inclusive_reference,
226      options.cwp_inclusive_test, options.cwp_function_groups_file,
227      options.cwp_function_groups_statistics_file,
228      options.cwp_function_statistics_file)
229  metrics_experiment.PerformComputation()
230
231
232if __name__ == '__main__':
233  Main(sys.argv[1:])
234