1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# For instructions see:
7# http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs
8
9import hashlib
10import math
11import optparse
12import os
13import re
14import subprocess
15import sys
16import time
17import urllib2
18
19
20try:
21  import json
22except ImportError:
23  import simplejson as json
24
25
26__version__ = '1.0'
27EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__))
28DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR,
29                                   'chromium_perf_expectations.cfg')
30DEFAULT_TOLERANCE = 0.05
31USAGE = ''
32
33
34def ReadFile(filename):
35  try:
36    file = open(filename, 'rb')
37  except IOError, e:
38    print >> sys.stderr, ('I/O Error reading file %s(%s): %s' %
39                          (filename, e.errno, e.strerror))
40    raise e
41  contents = file.read()
42  file.close()
43  return contents
44
45
46def ConvertJsonIntoDict(string):
47  """Read a JSON string and convert its contents into a Python datatype."""
48  if len(string) == 0:
49    print >> sys.stderr, ('Error could not parse empty string')
50    raise Exception('JSON data missing')
51
52  try:
53    jsondata = json.loads(string)
54  except ValueError, e:
55    print >> sys.stderr, ('Error parsing string: "%s"' % string)
56    raise e
57  return jsondata
58
59
60# Floating point representation of last time we fetched a URL.
61last_fetched_at = None
62def FetchUrlContents(url):
63  global last_fetched_at
64  if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5):
65    # Sleep for half a second to avoid overloading the server.
66    time.sleep(0.5)
67  try:
68    last_fetched_at = time.time()
69    connection = urllib2.urlopen(url)
70  except urllib2.HTTPError, e:
71    if e.code == 404:
72      return None
73    raise e
74  text = connection.read().strip()
75  connection.close()
76  return text
77
78
79def GetRowData(data, key):
80  rowdata = []
81  # reva and revb always come first.
82  for subkey in ['reva', 'revb']:
83    if subkey in data[key]:
84      rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
85  # Strings, like type, come next.
86  for subkey in ['type', 'better']:
87    if subkey in data[key]:
88      rowdata.append('"%s": "%s"' % (subkey, data[key][subkey]))
89  # Finally the main numbers come last.
90  for subkey in ['improve', 'regress', 'tolerance']:
91    if subkey in data[key]:
92      rowdata.append('"%s": %s' % (subkey, data[key][subkey]))
93  return rowdata
94
95
96def GetRowDigest(rowdata, key):
97  sha1 = hashlib.sha1()
98  rowdata = [str(possibly_unicode_string).encode('ascii')
99             for possibly_unicode_string in rowdata]
100  sha1.update(str(rowdata) + key)
101  return sha1.hexdigest()[0:8]
102
103
104def WriteJson(filename, data, keys, calculate_sha1=True):
105  """Write a list of |keys| in |data| to the file specified in |filename|."""
106  try:
107    file = open(filename, 'wb')
108  except IOError, e:
109    print >> sys.stderr, ('I/O Error writing file %s(%s): %s' %
110                          (filename, e.errno, e.strerror))
111    return False
112  jsondata = []
113  for key in keys:
114    rowdata = GetRowData(data, key)
115    if calculate_sha1:
116      # Include an updated checksum.
117      rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key))
118    else:
119      if 'sha1' in data[key]:
120        rowdata.append('"sha1": "%s"' % (data[key]['sha1']))
121    jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata)))
122  jsondata.append('"load": true')
123  jsontext = '{%s\n}' % ',\n '.join(jsondata)
124  file.write(jsontext + '\n')
125  file.close()
126  return True
127
128
129def FloatIsInt(f):
130  epsilon = 1.0e-10
131  return abs(f - int(f)) <= epsilon
132
133
134last_key_printed = None
135def Main(args):
136  def OutputMessage(message, verbose_message=True):
137    global last_key_printed
138    if not options.verbose and verbose_message:
139      return
140
141    if key != last_key_printed:
142      last_key_printed = key
143      print '\n' + key + ':'
144    print '  %s' % message
145
146  parser = optparse.OptionParser(usage=USAGE, version=__version__)
147  parser.add_option('-v', '--verbose', action='store_true', default=False,
148                    help='enable verbose output')
149  parser.add_option('-s', '--checksum', action='store_true',
150                    help='test if any changes are pending')
151  parser.add_option('-c', '--config', dest='config_file',
152                    default=DEFAULT_CONFIG_FILE,
153                    help='set the config file to FILE', metavar='FILE')
154  options, args = parser.parse_args(args)
155
156  if options.verbose:
157    print 'Verbose output enabled.'
158
159  config = ConvertJsonIntoDict(ReadFile(options.config_file))
160
161  # Get the list of summaries for a test.
162  base_url = config['base_url']
163  # Make the perf expectations file relative to the path of the config file.
164  perf_file = os.path.join(
165    os.path.dirname(options.config_file), config['perf_file'])
166  perf = ConvertJsonIntoDict(ReadFile(perf_file))
167
168  # Fetch graphs.dat for this combination.
169  perfkeys = perf.keys()
170  # In perf_expectations.json, ignore the 'load' key.
171  perfkeys.remove('load')
172  perfkeys.sort()
173
174  write_new_expectations = False
175  found_checksum_mismatch = False
176  for key in perfkeys:
177    value = perf[key]
178    tolerance = value.get('tolerance', DEFAULT_TOLERANCE)
179    better = value.get('better', None)
180
181    # Verify the checksum.
182    original_checksum = value.get('sha1', '')
183    if 'sha1' in value:
184      del value['sha1']
185    rowdata = GetRowData(perf, key)
186    computed_checksum = GetRowDigest(rowdata, key)
187    if original_checksum == computed_checksum:
188      OutputMessage('checksum matches, skipping')
189      continue
190    elif options.checksum:
191      found_checksum_mismatch = True
192      continue
193
194    # Skip expectations that are missing a reva or revb.  We can't generate
195    # expectations for those.
196    if not(value.has_key('reva') and value.has_key('revb')):
197      OutputMessage('missing revision range, skipping')
198      continue
199    revb = int(value['revb'])
200    reva = int(value['reva'])
201
202    # Ensure that reva is less than revb.
203    if reva > revb:
204      temp = reva
205      reva = revb
206      revb = temp
207
208    # Get the system/test/graph/tracename and reftracename for the current key.
209    matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key)
210    if not matchData:
211      OutputMessage('cannot parse key, skipping')
212      continue
213    system = matchData.group(1)
214    test = matchData.group(2)
215    graph = matchData.group(3)
216    tracename = matchData.group(4)
217    reftracename = tracename + '_ref'
218
219    # Create the summary_url and get the json data for that URL.
220    # FetchUrlContents() may sleep to avoid overloading the server with
221    # requests.
222    summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph)
223    summaryjson = FetchUrlContents(summary_url)
224    if not summaryjson:
225      OutputMessage('ERROR: cannot find json data, please verify',
226                    verbose_message=False)
227      return 0
228
229    # Set value's type to 'relative' by default.
230    value_type = value.get('type', 'relative')
231
232    summarylist = summaryjson.split('\n')
233    trace_values = {}
234    traces = [tracename]
235    if value_type == 'relative':
236      traces += [reftracename]
237    for trace in traces:
238      trace_values.setdefault(trace, {})
239
240    # Find the high and low values for each of the traces.
241    scanning = False
242    for line in summarylist:
243      jsondata = ConvertJsonIntoDict(line)
244
245      # TODO(iannucci): Remove this once http://crbug.com/336471 is resolved.
246      if 'Force the Chro' in jsondata['rev']:
247        continue
248
249      if int(jsondata['rev']) <= revb:
250        scanning = True
251      if int(jsondata['rev']) < reva:
252        break
253
254      # We found the upper revision in the range.  Scan for trace data until we
255      # find the lower revision in the range.
256      if scanning:
257        for trace in traces:
258          if trace not in jsondata['traces']:
259            OutputMessage('trace %s missing' % trace)
260            continue
261          if type(jsondata['traces'][trace]) != type([]):
262            OutputMessage('trace %s format not recognized' % trace)
263            continue
264          try:
265            tracevalue = float(jsondata['traces'][trace][0])
266          except ValueError:
267            OutputMessage('trace %s value error: %s' % (
268                trace, str(jsondata['traces'][trace][0])))
269            continue
270
271          for bound in ['high', 'low']:
272            trace_values[trace].setdefault(bound, tracevalue)
273
274          trace_values[trace]['high'] = max(trace_values[trace]['high'],
275                                            tracevalue)
276          trace_values[trace]['low'] = min(trace_values[trace]['low'],
277                                           tracevalue)
278
279    if 'high' not in trace_values[tracename]:
280      OutputMessage('no suitable traces matched, skipping')
281      continue
282
283    if value_type == 'relative':
284      # Calculate assuming high deltas are regressions and low deltas are
285      # improvements.
286      regress = (float(trace_values[tracename]['high']) -
287                 float(trace_values[reftracename]['low']))
288      improve = (float(trace_values[tracename]['low']) -
289                 float(trace_values[reftracename]['high']))
290    elif value_type == 'absolute':
291      # Calculate assuming high absolutes are regressions and low absolutes are
292      # improvements.
293      regress = float(trace_values[tracename]['high'])
294      improve = float(trace_values[tracename]['low'])
295
296    # So far we've assumed better is lower (regress > improve).  If the actual
297    # values for regress and improve are equal, though, and better was not
298    # specified, alert the user so we don't let them create a new file with
299    # ambiguous rules.
300    if better == None and regress == improve:
301      OutputMessage('regress (%s) is equal to improve (%s), and "better" is '
302                    'unspecified, please fix by setting "better": "lower" or '
303                    '"better": "higher" in this perf trace\'s expectation' % (
304                    regress, improve), verbose_message=False)
305      return 1
306
307    # If the existing values assume regressions are low deltas relative to
308    # improvements, swap our regress and improve.  This value must be a
309    # scores-like result.
310    if 'regress' in perf[key] and 'improve' in perf[key]:
311      if perf[key]['regress'] < perf[key]['improve']:
312        assert(better != 'lower')
313        better = 'higher'
314        temp = regress
315        regress = improve
316        improve = temp
317      else:
318        # Sometimes values are equal, e.g., when they are both 0,
319        # 'better' may still be set to 'higher'.
320        assert(better != 'higher' or
321               perf[key]['regress'] == perf[key]['improve'])
322        better = 'lower'
323
324    # If both were ints keep as int, otherwise use the float version.
325    originally_ints = False
326    if FloatIsInt(regress) and FloatIsInt(improve):
327      originally_ints = True
328
329    if better == 'higher':
330      if originally_ints:
331        regress = int(math.floor(regress - abs(regress*tolerance)))
332        improve = int(math.ceil(improve + abs(improve*tolerance)))
333      else:
334        regress = regress - abs(regress*tolerance)
335        improve = improve + abs(improve*tolerance)
336    else:
337      if originally_ints:
338        improve = int(math.floor(improve - abs(improve*tolerance)))
339        regress = int(math.ceil(regress + abs(regress*tolerance)))
340      else:
341        improve = improve - abs(improve*tolerance)
342        regress = regress + abs(regress*tolerance)
343
344    # Calculate the new checksum to test if this is the only thing that may have
345    # changed.
346    checksum_rowdata = GetRowData(perf, key)
347    new_checksum = GetRowDigest(checksum_rowdata, key)
348
349    if ('regress' in perf[key] and 'improve' in perf[key] and
350        perf[key]['regress'] == regress and perf[key]['improve'] == improve and
351        original_checksum == new_checksum):
352      OutputMessage('no change')
353      continue
354
355    write_new_expectations = True
356    OutputMessage('traces: %s' % trace_values, verbose_message=False)
357    OutputMessage('before: %s' % perf[key], verbose_message=False)
358    perf[key]['regress'] = regress
359    perf[key]['improve'] = improve
360    OutputMessage('after: %s' % perf[key], verbose_message=False)
361
362  if options.checksum:
363    if found_checksum_mismatch:
364      return 1
365    else:
366      return 0
367
368  if write_new_expectations:
369    print '\nWriting expectations... ',
370    WriteJson(perf_file, perf, perfkeys)
371    print 'done'
372  else:
373    if options.verbose:
374      print ''
375    print 'No changes.'
376  return 0
377
378
379if __name__ == '__main__':
380  sys.exit(Main(sys.argv))
381