1#!/usr/bin/env python
2# Copyright (c) 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6""" Generate bench_expectations file from a given set of bench data files. """
7
8import argparse
9import bench_util
10import json
11import os
12import re
13import sys
14import urllib2
15
16# Parameters for calculating bench ranges.
17RANGE_RATIO_UPPER = 1.5  # Ratio of range for upper bounds.
18RANGE_RATIO_LOWER = 2.0  # Ratio of range for lower bounds.
19ERR_RATIO = 0.08  # Further widens the range by the ratio of average value.
20ERR_UB = 1.0  # Adds an absolute upper error to cope with small benches.
21ERR_LB = 1.5
22
23# List of bench configs to monitor. Ignore all other configs.
24CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',
25                      'simple_viewport_1000x1000_angle',
26                      'simple_viewport_1000x1000_gpu',
27                      'simple_viewport_1000x1000_scalar_1.100000',
28                      'simple_viewport_1000x1000_scalar_1.100000_gpu',
29                     ]
30
31# List of flaky entries that should be excluded. Each entry is defined by a list
32# of 3 strings, corresponding to the substrings of [bench, config, builder] to
33# search for. A bench expectations line is excluded when each of the 3 strings
34# in the list is a substring of the corresponding element of the given line. For
35# instance, ['desk_yahooanswers', 'gpu', 'Ubuntu'] will skip expectation entries
36# of SKP benchs whose name contains 'desk_yahooanswers' on all gpu-related
37# configs of all Ubuntu builders.
38ENTRIES_TO_EXCLUDE = [
39                     ]
40
41_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'
42
43def compute_ranges(benches, more_benches=None):
44  """Given a list of bench numbers, calculate the alert range.
45
46  Args:
47    benches: a list of float bench values.
48    more_benches: a tuple of lists of additional bench values.
49      The first value of each tuple is the number of commits before the current
50      one that set of values is at, and the second value is a list of
51      bench results.
52
53  Returns:
54    a list of float [lower_bound, upper_bound].
55  """
56  avg = sum(benches)/len(benches)
57  minimum = min(benches)
58  maximum = max(benches)
59  diff = maximum - minimum
60
61  return [minimum - diff*RANGE_RATIO_LOWER - avg*ERR_RATIO - ERR_LB,
62          maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]
63
64
65def create_expectations_dict(revision_data_points, builder, extra_data=None):
66  """Convert list of bench data points into a dictionary of expectations data.
67
68  Args:
69    revision_data_points: a list of BenchDataPoint objects.
70    builder: string of the corresponding buildbot builder name.
71
72  Returns:
73    a dictionary of this form:
74        keys = tuple of (config, bench) strings.
75        values = list of float [expected, lower_bound, upper_bound] for the key.
76  """
77  bench_dict = {}
78  for point in revision_data_points:
79    if (point.time_type or  # Not walltime which has time_type ''
80        not point.config in CONFIGS_TO_INCLUDE):
81      continue
82    to_skip = False
83    for bench_substr, config_substr, builder_substr in ENTRIES_TO_EXCLUDE:
84      if (bench_substr in point.bench and config_substr in point.config and
85          builder_substr in builder):
86        to_skip = True
87        break
88    if to_skip:
89      continue
90    key = (point.config, point.bench)
91
92    extras = []
93    for idx, dataset in extra_data:
94      for data in dataset:
95        if (data.bench == point.bench and data.config == point.config and
96              data.time_type == point.time_type and data.per_iter_time):
97          extras.append((idx, data.per_iter_time))
98
99    if key in bench_dict:
100      raise Exception('Duplicate bench entry: ' + str(key))
101    bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)
102
103  return bench_dict
104
105
106def get_parent_commits(start_hash, num_back):
107  """Returns a list of commits that are the parent of the commit passed in."""
108  list_commits = urllib2.urlopen(
109      'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %
110      (start_hash, num_back))
111  # NOTE: Very brittle. Removes the four extraneous characters
112  # so json can be read successfully
113  trunc_list = list_commits.read()[4:]
114  json_data = json.loads(trunc_list)
115  return [revision['commit'] for revision in json_data['log']]
116
117
118def get_file_suffixes(commit_hash, directory):
119  """Gets all the suffixes available in the directory"""
120  possible_files = os.listdir(directory)
121  prefix = 'bench_' + commit_hash + '_data_'
122  return [name[len(prefix):] for name in possible_files
123      if name.startswith(prefix)]
124
125
126def download_bench_data(builder, commit_hash, suffixes, directory):
127  """Downloads data, returns the number successfully downloaded"""
128  cur_files = os.listdir(directory)
129  count = 0
130  for suffix in suffixes:
131    file_name = 'bench_'+commit_hash+'_data_'+suffix
132    if file_name in cur_files:
133      continue
134    try:
135      src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))
136      with open(os.path.join(directory, file_name), 'w') as dest:
137        dest.writelines(src)
138        count += 1
139    except urllib2.HTTPError:
140      pass
141  return count
142
143
144def main():
145    """Reads bench data points, then calculate and export expectations.
146    """
147    parser = argparse.ArgumentParser()
148    parser.add_argument(
149        '-a', '--representation_alg', default='25th',
150        help='bench representation algorithm to use, see bench_util.py.')
151    parser.add_argument(
152        '-b', '--builder', required=True,
153        help='name of the builder whose bench ranges we are computing.')
154    parser.add_argument(
155        '-d', '--input_dir', required=True,
156        help='a directory containing bench data files.')
157    parser.add_argument(
158        '-o', '--output_file', required=True,
159        help='file path and name for storing the output bench expectations.')
160    parser.add_argument(
161        '-r', '--git_revision', required=True,
162        help='the git hash to indicate the revision of input data to use.')
163    parser.add_argument(
164        '-t', '--back_track', required=False, default=10,
165        help='the number of commit hashes backwards to look to include' +
166             'in the calculations.')
167    parser.add_argument(
168        '-m', '--max_commits', required=False, default=1,
169        help='the number of commit hashes to include in the calculations.')
170    args = parser.parse_args()
171
172    builder = args.builder
173
174    data_points = bench_util.parse_skp_bench_data(
175        args.input_dir, args.git_revision, args.representation_alg)
176
177    parent_commits = get_parent_commits(args.git_revision, args.back_track)
178    print "Using commits: {}".format(parent_commits)
179    suffixes = get_file_suffixes(args.git_revision, args.input_dir)
180    print "Using suffixes: {}".format(suffixes)
181
182    # TODO(kelvinly): Find a better approach to than directly copying from
183    # the GS server?
184    downloaded_commits = []
185    for idx, commit in enumerate(parent_commits):
186      num_downloaded = download_bench_data(
187          builder, commit, suffixes, args.input_dir)
188      if num_downloaded > 0:
189        downloaded_commits.append((num_downloaded, idx, commit))
190
191    if len(downloaded_commits) < args.max_commits:
192      print ('Less than desired number of commits found. Please increase'
193            '--back_track in later runs')
194    trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]
195    extra_data = []
196    for _, idx, commit in trunc_commits:
197      extra_data.append((idx, bench_util.parse_skp_bench_data(
198          args.input_dir, commit, args.representation_alg)))
199
200    expectations_dict = create_expectations_dict(data_points, builder,
201                                                 extra_data)
202
203    out_lines = []
204    keys = expectations_dict.keys()
205    keys.sort()
206    for (config, bench) in keys:
207      (expected, lower_bound, upper_bound) = expectations_dict[(config, bench)]
208      out_lines.append('%(bench)s_%(config)s_,%(builder)s-%(representation)s,'
209          '%(expected)s,%(lower_bound)s,%(upper_bound)s' % {
210              'bench': bench,
211              'config': config,
212              'builder': builder,
213              'representation': args.representation_alg,
214              'expected': expected,
215              'lower_bound': lower_bound,
216              'upper_bound': upper_bound})
217
218    with open(args.output_file, 'w') as file_handle:
219      file_handle.write('\n'.join(out_lines))
220
221
222if __name__ == "__main__":
223    main()
224