1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# For instructions see: 7# http://www.chromium.org/developers/tree-sheriffs/perf-sheriffs 8 9import hashlib 10import math 11import optparse 12import os 13import re 14import subprocess 15import sys 16import time 17import urllib2 18 19 20try: 21 import json 22except ImportError: 23 import simplejson as json 24 25 26__version__ = '1.0' 27EXPECTATIONS_DIR = os.path.dirname(os.path.abspath(__file__)) 28DEFAULT_CONFIG_FILE = os.path.join(EXPECTATIONS_DIR, 29 'chromium_perf_expectations.cfg') 30DEFAULT_TOLERANCE = 0.05 31USAGE = '' 32 33 34def ReadFile(filename): 35 try: 36 file = open(filename, 'rb') 37 except IOError, e: 38 print >> sys.stderr, ('I/O Error reading file %s(%s): %s' % 39 (filename, e.errno, e.strerror)) 40 raise e 41 contents = file.read() 42 file.close() 43 return contents 44 45 46def ConvertJsonIntoDict(string): 47 """Read a JSON string and convert its contents into a Python datatype.""" 48 if len(string) == 0: 49 print >> sys.stderr, ('Error could not parse empty string') 50 raise Exception('JSON data missing') 51 52 try: 53 jsondata = json.loads(string) 54 except ValueError, e: 55 print >> sys.stderr, ('Error parsing string: "%s"' % string) 56 raise e 57 return jsondata 58 59 60# Floating point representation of last time we fetched a URL. 61last_fetched_at = None 62def FetchUrlContents(url): 63 global last_fetched_at 64 if last_fetched_at and ((time.time() - last_fetched_at) <= 0.5): 65 # Sleep for half a second to avoid overloading the server. 66 time.sleep(0.5) 67 try: 68 last_fetched_at = time.time() 69 connection = urllib2.urlopen(url) 70 except urllib2.HTTPError, e: 71 if e.code == 404: 72 return None 73 raise e 74 text = connection.read().strip() 75 connection.close() 76 return text 77 78 79def GetRowData(data, key): 80 rowdata = [] 81 # reva and revb always come first. 82 for subkey in ['reva', 'revb']: 83 if subkey in data[key]: 84 rowdata.append('"%s": %s' % (subkey, data[key][subkey])) 85 # Strings, like type, come next. 86 for subkey in ['type', 'better']: 87 if subkey in data[key]: 88 rowdata.append('"%s": "%s"' % (subkey, data[key][subkey])) 89 # Finally the main numbers come last. 90 for subkey in ['improve', 'regress', 'tolerance']: 91 if subkey in data[key]: 92 rowdata.append('"%s": %s' % (subkey, data[key][subkey])) 93 return rowdata 94 95 96def GetRowDigest(rowdata, key): 97 sha1 = hashlib.sha1() 98 rowdata = [str(possibly_unicode_string).encode('ascii') 99 for possibly_unicode_string in rowdata] 100 sha1.update(str(rowdata) + key) 101 return sha1.hexdigest()[0:8] 102 103 104def WriteJson(filename, data, keys, calculate_sha1=True): 105 """Write a list of |keys| in |data| to the file specified in |filename|.""" 106 try: 107 file = open(filename, 'wb') 108 except IOError, e: 109 print >> sys.stderr, ('I/O Error writing file %s(%s): %s' % 110 (filename, e.errno, e.strerror)) 111 return False 112 jsondata = [] 113 for key in keys: 114 rowdata = GetRowData(data, key) 115 if calculate_sha1: 116 # Include an updated checksum. 117 rowdata.append('"sha1": "%s"' % GetRowDigest(rowdata, key)) 118 else: 119 if 'sha1' in data[key]: 120 rowdata.append('"sha1": "%s"' % (data[key]['sha1'])) 121 jsondata.append('"%s": {%s}' % (key, ', '.join(rowdata))) 122 jsondata.append('"load": true') 123 jsontext = '{%s\n}' % ',\n '.join(jsondata) 124 file.write(jsontext + '\n') 125 file.close() 126 return True 127 128 129def FloatIsInt(f): 130 epsilon = 1.0e-10 131 return abs(f - int(f)) <= epsilon 132 133 134last_key_printed = None 135def Main(args): 136 def OutputMessage(message, verbose_message=True): 137 global last_key_printed 138 if not options.verbose and verbose_message: 139 return 140 141 if key != last_key_printed: 142 last_key_printed = key 143 print '\n' + key + ':' 144 print ' %s' % message 145 146 parser = optparse.OptionParser(usage=USAGE, version=__version__) 147 parser.add_option('-v', '--verbose', action='store_true', default=False, 148 help='enable verbose output') 149 parser.add_option('-s', '--checksum', action='store_true', 150 help='test if any changes are pending') 151 parser.add_option('-c', '--config', dest='config_file', 152 default=DEFAULT_CONFIG_FILE, 153 help='set the config file to FILE', metavar='FILE') 154 options, args = parser.parse_args(args) 155 156 if options.verbose: 157 print 'Verbose output enabled.' 158 159 config = ConvertJsonIntoDict(ReadFile(options.config_file)) 160 161 # Get the list of summaries for a test. 162 base_url = config['base_url'] 163 # Make the perf expectations file relative to the path of the config file. 164 perf_file = os.path.join( 165 os.path.dirname(options.config_file), config['perf_file']) 166 perf = ConvertJsonIntoDict(ReadFile(perf_file)) 167 168 # Fetch graphs.dat for this combination. 169 perfkeys = perf.keys() 170 # In perf_expectations.json, ignore the 'load' key. 171 perfkeys.remove('load') 172 perfkeys.sort() 173 174 write_new_expectations = False 175 found_checksum_mismatch = False 176 for key in perfkeys: 177 value = perf[key] 178 tolerance = value.get('tolerance', DEFAULT_TOLERANCE) 179 better = value.get('better', None) 180 181 # Verify the checksum. 182 original_checksum = value.get('sha1', '') 183 if 'sha1' in value: 184 del value['sha1'] 185 rowdata = GetRowData(perf, key) 186 computed_checksum = GetRowDigest(rowdata, key) 187 if original_checksum == computed_checksum: 188 OutputMessage('checksum matches, skipping') 189 continue 190 elif options.checksum: 191 found_checksum_mismatch = True 192 continue 193 194 # Skip expectations that are missing a reva or revb. We can't generate 195 # expectations for those. 196 if not(value.has_key('reva') and value.has_key('revb')): 197 OutputMessage('missing revision range, skipping') 198 continue 199 revb = int(value['revb']) 200 reva = int(value['reva']) 201 202 # Ensure that reva is less than revb. 203 if reva > revb: 204 temp = reva 205 reva = revb 206 revb = temp 207 208 # Get the system/test/graph/tracename and reftracename for the current key. 209 matchData = re.match(r'^([^/]+)\/([^/]+)\/([^/]+)\/([^/]+)$', key) 210 if not matchData: 211 OutputMessage('cannot parse key, skipping') 212 continue 213 system = matchData.group(1) 214 test = matchData.group(2) 215 graph = matchData.group(3) 216 tracename = matchData.group(4) 217 reftracename = tracename + '_ref' 218 219 # Create the summary_url and get the json data for that URL. 220 # FetchUrlContents() may sleep to avoid overloading the server with 221 # requests. 222 summary_url = '%s/%s/%s/%s-summary.dat' % (base_url, system, test, graph) 223 summaryjson = FetchUrlContents(summary_url) 224 if not summaryjson: 225 OutputMessage('ERROR: cannot find json data, please verify', 226 verbose_message=False) 227 return 0 228 229 # Set value's type to 'relative' by default. 230 value_type = value.get('type', 'relative') 231 232 summarylist = summaryjson.split('\n') 233 trace_values = {} 234 traces = [tracename] 235 if value_type == 'relative': 236 traces += [reftracename] 237 for trace in traces: 238 trace_values.setdefault(trace, {}) 239 240 # Find the high and low values for each of the traces. 241 scanning = False 242 for line in summarylist: 243 jsondata = ConvertJsonIntoDict(line) 244 245 # TODO(iannucci): Remove this once http://crbug.com/336471 is resolved. 246 if 'Force the Chro' in jsondata['rev']: 247 continue 248 249 if int(jsondata['rev']) <= revb: 250 scanning = True 251 if int(jsondata['rev']) < reva: 252 break 253 254 # We found the upper revision in the range. Scan for trace data until we 255 # find the lower revision in the range. 256 if scanning: 257 for trace in traces: 258 if trace not in jsondata['traces']: 259 OutputMessage('trace %s missing' % trace) 260 continue 261 if type(jsondata['traces'][trace]) != type([]): 262 OutputMessage('trace %s format not recognized' % trace) 263 continue 264 try: 265 tracevalue = float(jsondata['traces'][trace][0]) 266 except ValueError: 267 OutputMessage('trace %s value error: %s' % ( 268 trace, str(jsondata['traces'][trace][0]))) 269 continue 270 271 for bound in ['high', 'low']: 272 trace_values[trace].setdefault(bound, tracevalue) 273 274 trace_values[trace]['high'] = max(trace_values[trace]['high'], 275 tracevalue) 276 trace_values[trace]['low'] = min(trace_values[trace]['low'], 277 tracevalue) 278 279 if 'high' not in trace_values[tracename]: 280 OutputMessage('no suitable traces matched, skipping') 281 continue 282 283 if value_type == 'relative': 284 # Calculate assuming high deltas are regressions and low deltas are 285 # improvements. 286 regress = (float(trace_values[tracename]['high']) - 287 float(trace_values[reftracename]['low'])) 288 improve = (float(trace_values[tracename]['low']) - 289 float(trace_values[reftracename]['high'])) 290 elif value_type == 'absolute': 291 # Calculate assuming high absolutes are regressions and low absolutes are 292 # improvements. 293 regress = float(trace_values[tracename]['high']) 294 improve = float(trace_values[tracename]['low']) 295 296 # So far we've assumed better is lower (regress > improve). If the actual 297 # values for regress and improve are equal, though, and better was not 298 # specified, alert the user so we don't let them create a new file with 299 # ambiguous rules. 300 if better == None and regress == improve: 301 OutputMessage('regress (%s) is equal to improve (%s), and "better" is ' 302 'unspecified, please fix by setting "better": "lower" or ' 303 '"better": "higher" in this perf trace\'s expectation' % ( 304 regress, improve), verbose_message=False) 305 return 1 306 307 # If the existing values assume regressions are low deltas relative to 308 # improvements, swap our regress and improve. This value must be a 309 # scores-like result. 310 if 'regress' in perf[key] and 'improve' in perf[key]: 311 if perf[key]['regress'] < perf[key]['improve']: 312 assert(better != 'lower') 313 better = 'higher' 314 temp = regress 315 regress = improve 316 improve = temp 317 else: 318 # Sometimes values are equal, e.g., when they are both 0, 319 # 'better' may still be set to 'higher'. 320 assert(better != 'higher' or 321 perf[key]['regress'] == perf[key]['improve']) 322 better = 'lower' 323 324 # If both were ints keep as int, otherwise use the float version. 325 originally_ints = False 326 if FloatIsInt(regress) and FloatIsInt(improve): 327 originally_ints = True 328 329 if better == 'higher': 330 if originally_ints: 331 regress = int(math.floor(regress - abs(regress*tolerance))) 332 improve = int(math.ceil(improve + abs(improve*tolerance))) 333 else: 334 regress = regress - abs(regress*tolerance) 335 improve = improve + abs(improve*tolerance) 336 else: 337 if originally_ints: 338 improve = int(math.floor(improve - abs(improve*tolerance))) 339 regress = int(math.ceil(regress + abs(regress*tolerance))) 340 else: 341 improve = improve - abs(improve*tolerance) 342 regress = regress + abs(regress*tolerance) 343 344 # Calculate the new checksum to test if this is the only thing that may have 345 # changed. 346 checksum_rowdata = GetRowData(perf, key) 347 new_checksum = GetRowDigest(checksum_rowdata, key) 348 349 if ('regress' in perf[key] and 'improve' in perf[key] and 350 perf[key]['regress'] == regress and perf[key]['improve'] == improve and 351 original_checksum == new_checksum): 352 OutputMessage('no change') 353 continue 354 355 write_new_expectations = True 356 OutputMessage('traces: %s' % trace_values, verbose_message=False) 357 OutputMessage('before: %s' % perf[key], verbose_message=False) 358 perf[key]['regress'] = regress 359 perf[key]['improve'] = improve 360 OutputMessage('after: %s' % perf[key], verbose_message=False) 361 362 if options.checksum: 363 if found_checksum_mismatch: 364 return 1 365 else: 366 return 0 367 368 if write_new_expectations: 369 print '\nWriting expectations... ', 370 WriteJson(perf_file, perf, perfkeys) 371 print 'done' 372 else: 373 if options.verbose: 374 print '' 375 print 'No changes.' 376 return 0 377 378 379if __name__ == '__main__': 380 sys.exit(Main(sys.argv)) 381