benchmark_metrics_experiment.py revision 523b2ae25b5b98512babb5051b6f8f4dd92ef7cf
1#!/usr/bin/python2
2#
3# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6"""Runs an experiment with the benchmark metrics on a pair of CWP data sets.
7
8A data set should contain the files with the pairwise inclusive and the
9inclusive statistics. The pairwise inclusive file contains pairs of
10parent and child functions with their inclusive count fractions out of the
11total amount of inclusive count values and the files of the child functions.
12The inclusive file contains the functions with their inclusive count fraction
13out of the total amount of inclusive count values and the file name of the
14function. The input data should be collected using the scripts
15collect_experiment_data.sh or collect_experiment_data_odd_even_session.sh
16
17For every function, this script computes the distance and the score values.
18The output is stored in the file cwp_functions_statistics_file.
19
20For every Chrome OS component, this script computes a set of metrics consisting
21in the number of functions, the average and cumulative distance and score of
22the functions matching the group. The output is stored in the file
23cwp_function_groups_statistics_file.
24"""
25
26import argparse
27from collections import defaultdict
28import csv
29import os
30import sys
31import benchmark_metrics
32
33
34class MetricsExperiment(object):
35  """Runs an experiment with the benchmark metrics on a pair of data sets."""
36
37  def __init__(self, cwp_pairwise_inclusive_reference,
38               cwp_pairwise_inclusive_test, cwp_inclusive_reference,
39               cwp_inclusive_test, cwp_function_groups_file,
40               cwp_function_groups_statistics_file,
41               cwp_function_statistics_file):
42    """Initializes the MetricsExperiment class.
43
44    Args:
45      cwp_pairwise_inclusive_reference: The CSV file containing the pairwise
46        inclusive values from the reference data set.
47      cwp_pairwise_inclusive_test: The CSV file containing the pairwise
48        inclusive values from the test data set.
49      cwp_inclusive_reference: The CSV file containing the inclusive values
50        from the reference data set.
51      cwp_inclusive_test: The CSV file containing the inclusive values from
52        the test data set.
53      cwp_function_groups_file: The CSV file containing the groups of functions.
54      cwp_function_groups_statistics_file: The output CSV file that will
55        contain the metrics for the function groups.
56      cwp_function_statistics_file: The output CSV file that will contain the
57        metrics for the CWP functions.
58    """
59    self._cwp_pairwise_inclusive_reference = cwp_pairwise_inclusive_reference
60    self._cwp_pairwise_inclusive_test = cwp_pairwise_inclusive_test
61    self._cwp_inclusive_reference = cwp_inclusive_reference
62    self._cwp_inclusive_test = cwp_inclusive_test
63    self._cwp_function_groups_file = cwp_function_groups_file
64    self._cwp_function_groups_statistics_file = \
65        cwp_function_groups_statistics_file
66    self._cwp_function_statistics_file = cwp_function_statistics_file
67
68  @staticmethod
69  def ParsePairwiseInclusiveStatisticsFile(file_name):
70    """Parses the pairwise inclusive statistics files.
71
72    A line of the file should contain a pair of a parent and a child function,
73    concatenated by a ;;, the name of the file where the child function is
74    defined and the inclusive count fractions of the pair of functions out of
75    the total amount of inclusive count values.
76
77    Args:
78      file_name: The file containing the pairwise inclusive statistics of the
79      CWP functions.
80
81    Returns:
82      A dict containing the statistics of the parent functions and each of
83      their child functions. The key of the dict is the name of the parent
84      function. The value is a dict having as a key the name of the child
85      function with its file name separated by a ',' and as a value the
86      inclusive count fraction of the child function.
87    """
88    pairwise_inclusive_statistics = defaultdict(lambda: defaultdict(float))
89
90    with open(file_name) as \
91        pairwise_inclusive_statistics_file:
92      statistics_reader = csv.DictReader(
93          pairwise_inclusive_statistics_file, delimiter=',')
94      for statistic in statistics_reader:
95        parent_function_name, child_function_name = \
96            statistic['parent_child_functions'].split(';;')
97        child_function_file_name = \
98            os.path.normpath(statistic['child_function_file'])
99        inclusive_count_fraction = \
100            float(statistic['inclusive_count_fraction'])
101
102        if all([parent_function_name, child_function_name, \
103                inclusive_count_fraction]):
104
105          # There might be situations where a child function appears in
106          # multiple files or objects. Such situations can occur when in the
107          # Dremel queries there are not specified the Chrome OS version and the
108          # name of the board (i.e the files can belong to different kernel or
109          # library versions), when the child function is a template function
110          # that is declared in a header file or there are name collisions
111          # between multiple executable objects.
112          # If a pair of child and parent functions appears multiple times, we
113          # add their inclusive count values.
114          child_function_key = ','.join([child_function_name,
115                                         child_function_file_name])
116          pairwise_inclusive_statistics[parent_function_name]\
117              [child_function_key] += inclusive_count_fraction
118
119    return pairwise_inclusive_statistics
120
121  @staticmethod
122  def ParseInclusiveStatisticsFile(inclusive_statistics_file_name):
123    """Parses the inclusive statistics files.
124
125    Args:
126      inclusive_statistics_file_name: The file containing the inclusive
127        statistics of the CWP functions.
128
129    Returns:
130      A dict having as a key the function name and file where the function is
131      defined separated by a ',' and as a value the inclusive count fraction.
132    """
133    inclusive_statistics = defaultdict(float)
134
135    with open(inclusive_statistics_file_name) as inclusive_statistics_file:
136      statistics_reader = \
137          csv.DictReader(inclusive_statistics_file, delimiter=',')
138
139      for statistic in statistics_reader:
140        function_name = statistic['function']
141        file_name = os.path.normpath(statistic['file'])
142        inclusive_count_fraction = \
143            float(statistic['inclusive_count_fraction'])
144
145        # There might be situations where a function appears in multiple files
146        # or objects. Such situations can occur when in the Dremel queries there
147        # are not specified the Chrome OS version and the name of the board (i.e
148        # the files can belong to different kernel or library versions).
149        if all([function_name, file_name, inclusive_count_fraction]):
150          parent_function_key = ','.join([function_name, file_name])
151          inclusive_statistics[parent_function_key] += inclusive_count_fraction
152
153    return inclusive_statistics
154
155  def PerformComputation(self):
156    """Does the benchmark metrics experimental computation.
157
158    For every function, it is computed a distance based on the sum of the
159    differences of the fractions spent in the child functions. Afterwards,
160    it is computed a score based on the inclusive values fractions and the
161    distance value. The statistics for all the function are written in the file
162    self._cwp_function_statistics_file.
163
164    The functions are grouped on Chrome OS components based on the path of the
165    file where a function is defined. For every group, there are computed the
166    total number of functions matching that group, the cumulative distance, the
167    average distance and the cumulative score of the functions.
168    """
169
170    inclusive_statistics_reference = \
171        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_reference)
172    inclusive_statistics_test = \
173        self.ParseInclusiveStatisticsFile(self._cwp_inclusive_test)
174    pairwise_inclusive_statistics_reference = \
175        self.ParsePairwiseInclusiveStatisticsFile(
176            self._cwp_pairwise_inclusive_reference)
177    pairwise_inclusive_statistics_test = \
178        self.ParsePairwiseInclusiveStatisticsFile(
179            self._cwp_pairwise_inclusive_test)
180    parent_function_statistics = {}
181
182    with open(self._cwp_function_groups_file, 'r') as input_file:
183      cwp_function_groups = [line.split() for line in input_file]
184
185    for parent_function_key, parent_function_fraction_test \
186        in inclusive_statistics_test.iteritems():
187      parent_function_name, parent_function_file_name = \
188          parent_function_key.split(',')
189
190      parent_function_fraction_reference = \
191          inclusive_statistics_reference.get(parent_function_key, 0.0)
192
193      child_functions_statistics_test = \
194          pairwise_inclusive_statistics_test.get(parent_function_name, {})
195
196      child_functions_statistics_reference = \
197          pairwise_inclusive_statistics_reference.get(parent_function_name, {})
198
199      distance = benchmark_metrics.ComputeDistanceForFunction(
200          child_functions_statistics_test, child_functions_statistics_reference)
201
202      parent_function_score_test = benchmark_metrics.ComputeScoreForFunction(
203          distance, parent_function_fraction_test,
204          parent_function_fraction_reference)
205
206      parent_function_statistics[parent_function_key] = \
207          (distance, parent_function_score_test)
208
209    with open(self._cwp_function_statistics_file, 'w') as output_file:
210      statistics_lines = ['function,file,distance,score']
211      statistics_lines += \
212          [','.join([parent_function_key.replace(';;', ','),
213                     str(statistic[0]),
214                     str(statistic[1])])
215           for parent_function_key, statistic
216           in parent_function_statistics.iteritems()]
217      output_file.write('\n'.join(statistics_lines))
218
219    cwp_groups_statistics_test = benchmark_metrics.ComputeMetricsForComponents(
220        cwp_function_groups, parent_function_statistics)
221
222    with open(self._cwp_function_groups_statistics_file, 'w') as output_file:
223      group_statistics_lines = \
224          ['group,file_path,function_count,distance_cum,distance_avg,score_cum,'
225           'score_avg']
226      group_statistics_lines += \
227          [','.join([group_name,
228                     str(statistic[0]),
229                     str(statistic[1]),
230                     str(statistic[2]),
231                     str(statistic[3]),
232                     str(statistic[4]),
233                     str(statistic[5])])
234           for group_name, statistic
235           in cwp_groups_statistics_test.iteritems()]
236      output_file.write('\n'.join(group_statistics_lines))
237
238
239def ParseArguments(arguments):
240  parser = argparse.ArgumentParser(
241      description='Runs an experiment with the benchmark metrics on a pair of '
242      'CWP data sets.')
243  parser.add_argument(
244      '--cwp_pairwise_inclusive_reference',
245      required=True,
246      help='The reference CSV file that will contain a pair of parent and '
247      'child functions with their inclusive count fractions out of the total '
248      'amount of inclusive count values.')
249  parser.add_argument(
250      '--cwp_pairwise_inclusive_test',
251      required=True,
252      help='The test CSV file that will contain a pair of parent and '
253      'child functions with their inclusive count fractions out of the total '
254      'amount of inclusive count values.')
255  parser.add_argument(
256      '--cwp_inclusive_reference',
257      required=True,
258      help='The reference CSV file that will contain a function with its '
259      'inclusive count fraction out of the total amount of inclusive count '
260      'values.')
261  parser.add_argument(
262      '--cwp_inclusive_test',
263      required=True,
264      help='The test CSV file that will contain a function with its '
265      'inclusive count fraction out of the total amount of inclusive count '
266      'values.')
267  parser.add_argument(
268      '-g',
269      '--cwp_function_groups_file',
270      required=True,
271      help='The file that will contain the CWP function groups.'
272      'A line consists in the group name and a file path. A group must '
273      'represent a ChromeOS component.')
274  parser.add_argument(
275      '-s',
276      '--cwp_function_groups_statistics_file',
277      required=True,
278      help='The output file that will contain the metric statistics for the '
279      'CWP function groups in CSV format. A line consists in the group name, '
280      'file path, number of functions matching the group, the total score '
281      'and distance values.')
282  parser.add_argument(
283      '-f',
284      '--cwp_function_statistics_file',
285      required=True,
286      help='The output file that will contain the metric statistics for the '
287      'CWP functions in CSV format. A line consists in the function name, file '
288      'name, cummulative distance, average distance, cummulative score and '
289      'average score values.')
290
291  options = parser.parse_args(arguments)
292  return options
293
294
295def Main(argv):
296  options = ParseArguments(argv)
297  metrics_experiment = MetricsExperiment(
298      options.cwp_pairwise_inclusive_reference,
299      options.cwp_pairwise_inclusive_test, options.cwp_inclusive_reference,
300      options.cwp_inclusive_test, options.cwp_function_groups_file,
301      options.cwp_function_groups_statistics_file,
302      options.cwp_function_statistics_file)
303  metrics_experiment.PerformComputation()
304
305
306if __name__ == '__main__':
307  Main(sys.argv[1:])
308