1''' 2Created on May 19, 2011 3 4@author: bungeman 5''' 6 7import os 8import re 9import math 10 11# bench representation algorithm constant names 12ALGORITHM_AVERAGE = 'avg' 13ALGORITHM_MEDIAN = 'med' 14ALGORITHM_MINIMUM = 'min' 15ALGORITHM_25TH_PERCENTILE = '25th' 16 17# Regular expressions used throughout. 18PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?' 19SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)' 20BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' 21TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)' 22# non-per-tile benches have configs that don't end with ']' or '>' 23CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)' 24# per-tile bench lines are in the following format. Note that there are 25# non-averaged bench numbers in separate lines, which we ignore now due to 26# their inaccuracy. 27TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' 28 ' ((?:' + TIME_RE + '\s+)+)') 29# for extracting tile layout 30TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: ' 31 32PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE) 33SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE) 34BENCH_RE_COMPILED = re.compile(BENCH_RE) 35TIME_RE_COMPILED = re.compile(TIME_RE) 36CONFIG_RE_COMPILED = re.compile(CONFIG_RE) 37TILE_RE_COMPILED = re.compile(TILE_RE) 38TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE) 39 40class BenchDataPoint: 41 """A single data point produced by bench. 42 """ 43 def __init__(self, bench, config, time_type, time, settings, 44 tile_layout='', per_tile_values=[], per_iter_time=[]): 45 # string name of the benchmark to measure 46 self.bench = bench 47 # string name of the configurations to run 48 self.config = config 49 # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu) 50 self.time_type = time_type 51 # float number of the bench time value 52 self.time = time 53 # dictionary of the run settings 54 self.settings = settings 55 # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows 56 self.tile_layout = tile_layout 57 # list of float for per_tile bench values, if applicable 58 self.per_tile_values = per_tile_values 59 # list of float for per-iteration bench time, if applicable 60 self.per_iter_time = per_iter_time 61 62 def __repr__(self): 63 return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( 64 str(self.bench), 65 str(self.config), 66 str(self.time_type), 67 str(self.time), 68 str(self.settings), 69 ) 70 71class _ExtremeType(object): 72 """Instances of this class compare greater or less than other objects.""" 73 def __init__(self, cmpr, rep): 74 object.__init__(self) 75 self._cmpr = cmpr 76 self._rep = rep 77 78 def __cmp__(self, other): 79 if isinstance(other, self.__class__) and other._cmpr == self._cmpr: 80 return 0 81 return self._cmpr 82 83 def __repr__(self): 84 return self._rep 85 86Max = _ExtremeType(1, "Max") 87Min = _ExtremeType(-1, "Min") 88 89class _ListAlgorithm(object): 90 """Algorithm for selecting the representation value from a given list. 91 representation is one of the ALGORITHM_XXX representation types.""" 92 def __init__(self, data, representation=None): 93 if not representation: 94 representation = ALGORITHM_AVERAGE # default algorithm 95 self._data = data 96 self._len = len(data) 97 if representation == ALGORITHM_AVERAGE: 98 self._rep = sum(self._data) / self._len 99 else: 100 self._data.sort() 101 if representation == ALGORITHM_MINIMUM: 102 self._rep = self._data[0] 103 else: 104 # for percentiles, we use the value below which x% of values are 105 # found, which allows for better detection of quantum behaviors. 106 if representation == ALGORITHM_MEDIAN: 107 x = int(round(0.5 * self._len + 0.5)) 108 elif representation == ALGORITHM_25TH_PERCENTILE: 109 x = int(round(0.25 * self._len + 0.5)) 110 else: 111 raise Exception("invalid representation algorithm %s!" % 112 representation) 113 self._rep = self._data[x - 1] 114 115 def compute(self): 116 return self._rep 117 118def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench, 119 value_dic, layout_dic): 120 """Parses given bench time line with regex and adds data to value_dic. 121 122 config_re_compiled: precompiled regular expression for parsing the config 123 line. 124 is_per_tile: boolean indicating whether this is a per-tile bench. 125 If so, we add tile layout into layout_dic as well. 126 line: input string line to parse. 127 bench: name of bench for the time values. 128 value_dic: dictionary to store bench values. See bench_dic in parse() below. 129 layout_dic: dictionary to store tile layouts. See parse() for descriptions. 130 """ 131 132 for config in config_re_compiled.finditer(line): 133 current_config = config.group(1) 134 tile_layout = '' 135 if is_per_tile: # per-tile bench, add name prefix 136 current_config = 'tile_' + current_config 137 layouts = TILE_LAYOUT_RE_COMPILED.search(line) 138 if layouts and len(layouts.groups()) == 2: 139 tile_layout = '%sx%s' % layouts.groups() 140 times = config.group(2) 141 for new_time in TIME_RE_COMPILED.finditer(times): 142 current_time_type = new_time.group(1) 143 iters = [float(i) for i in 144 new_time.group(2).strip().split(',')] 145 value_dic.setdefault(bench, {}).setdefault( 146 current_config, {}).setdefault(current_time_type, []).append( 147 iters) 148 layout_dic.setdefault(bench, {}).setdefault( 149 current_config, {}).setdefault(current_time_type, tile_layout) 150 151def parse_skp_bench_data(directory, revision, rep, default_settings=None): 152 """Parses all the skp bench data in the given directory. 153 154 Args: 155 directory: string of path to input data directory. 156 revision: git hash revision that matches the data to process. 157 rep: bench representation algorithm, see bench_util.py. 158 default_settings: dictionary of other run settings. See writer.option() in 159 bench/benchmain.cpp. 160 161 Returns: 162 A list of BenchDataPoint objects. 163 """ 164 revision_data_points = [] 165 file_list = os.listdir(directory) 166 file_list.sort() 167 for bench_file in file_list: 168 scalar_type = None 169 # Scalar type, if any, is in the bench filename after 'scalar_'. 170 if (bench_file.startswith('bench_' + revision + '_data_')): 171 if bench_file.find('scalar_') > 0: 172 components = bench_file.split('_') 173 scalar_type = components[components.index('scalar') + 1] 174 else: # Skips non skp bench files. 175 continue 176 177 with open('/'.join([directory, bench_file]), 'r') as file_handle: 178 settings = dict(default_settings or {}) 179 settings['scalar'] = scalar_type 180 revision_data_points.extend(parse(settings, file_handle, rep)) 181 182 return revision_data_points 183 184# TODO(bensong): switch to reading JSON output when available. This way we don't 185# need the RE complexities. 186def parse(settings, lines, representation=None): 187 """Parses bench output into a useful data structure. 188 189 ({str:str}, __iter__ -> str) -> [BenchDataPoint] 190 representation is one of the ALGORITHM_XXX types.""" 191 192 benches = [] 193 current_bench = None 194 # [bench][config][time_type] -> [[per-iter values]] where per-tile config 195 # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...], 196 # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only 197 # contains one list of iterations [[iter1, iter2, ...]]. 198 bench_dic = {} 199 # [bench][config][time_type] -> tile_layout 200 layout_dic = {} 201 202 for line in lines: 203 204 # see if this line is a settings line 205 settingsMatch = SETTINGS_RE_COMPILED.search(line) 206 if (settingsMatch): 207 settings = dict(settings) 208 for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)): 209 if (settingMatch.group(2)): 210 settings[settingMatch.group(1)] = settingMatch.group(2) 211 else: 212 settings[settingMatch.group(1)] = True 213 214 # see if this line starts a new bench 215 new_bench = BENCH_RE_COMPILED.search(line) 216 if new_bench: 217 current_bench = new_bench.group(1) 218 219 # add configs on this line to the bench_dic 220 if current_bench: 221 if line.startswith(' tile_') : 222 _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench, 223 bench_dic, layout_dic) 224 else: 225 _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line, 226 current_bench, bench_dic, layout_dic) 227 228 # append benches to list 229 for bench in bench_dic: 230 for config in bench_dic[bench]: 231 for time_type in bench_dic[bench][config]: 232 tile_layout = '' 233 per_tile_values = [] # empty for non-per-tile configs 234 per_iter_time = [] # empty for per-tile configs 235 bench_summary = None # a single final bench value 236 if len(bench_dic[bench][config][time_type]) > 1: 237 # per-tile config; compute representation for each tile 238 per_tile_values = [ 239 _ListAlgorithm(iters, representation).compute() 240 for iters in bench_dic[bench][config][time_type]] 241 # use sum of each tile representation for total bench value 242 bench_summary = sum(per_tile_values) 243 # extract tile layout 244 tile_layout = layout_dic[bench][config][time_type] 245 else: 246 # get the list of per-iteration values 247 per_iter_time = bench_dic[bench][config][time_type][0] 248 bench_summary = _ListAlgorithm( 249 per_iter_time, representation).compute() 250 benches.append(BenchDataPoint( 251 bench, 252 config, 253 time_type, 254 bench_summary, 255 settings, 256 tile_layout, 257 per_tile_values, 258 per_iter_time)) 259 260 return benches 261 262class LinearRegression: 263 """Linear regression data based on a set of data points. 264 265 ([(Number,Number)]) 266 There must be at least two points for this to make sense.""" 267 def __init__(self, points): 268 n = len(points) 269 max_x = Min 270 min_x = Max 271 272 Sx = 0.0 273 Sy = 0.0 274 Sxx = 0.0 275 Sxy = 0.0 276 Syy = 0.0 277 for point in points: 278 x = point[0] 279 y = point[1] 280 max_x = max(max_x, x) 281 min_x = min(min_x, x) 282 283 Sx += x 284 Sy += y 285 Sxx += x*x 286 Sxy += x*y 287 Syy += y*y 288 289 denom = n*Sxx - Sx*Sx 290 if (denom != 0.0): 291 B = (n*Sxy - Sx*Sy) / denom 292 else: 293 B = 0.0 294 a = (1.0/n)*(Sy - B*Sx) 295 296 se2 = 0 297 sB2 = 0 298 sa2 = 0 299 if (n >= 3 and denom != 0.0): 300 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) 301 sB2 = (n*se2) / denom 302 sa2 = sB2 * (1.0/n) * Sxx 303 304 305 self.slope = B 306 self.intercept = a 307 self.serror = math.sqrt(max(0, se2)) 308 self.serror_slope = math.sqrt(max(0, sB2)) 309 self.serror_intercept = math.sqrt(max(0, sa2)) 310 self.max_x = max_x 311 self.min_x = min_x 312 313 def __repr__(self): 314 return "LinearRegression(%s, %s, %s, %s, %s)" % ( 315 str(self.slope), 316 str(self.intercept), 317 str(self.serror), 318 str(self.serror_slope), 319 str(self.serror_intercept), 320 ) 321 322 def find_min_slope(self): 323 """Finds the minimal slope given one standard deviation.""" 324 slope = self.slope 325 intercept = self.intercept 326 error = self.serror 327 regr_start = self.min_x 328 regr_end = self.max_x 329 regr_width = regr_end - regr_start 330 331 if slope < 0: 332 lower_left_y = slope*regr_start + intercept - error 333 upper_right_y = slope*regr_end + intercept + error 334 return min(0, (upper_right_y - lower_left_y) / regr_width) 335 336 elif slope > 0: 337 upper_left_y = slope*regr_start + intercept + error 338 lower_right_y = slope*regr_end + intercept - error 339 return max(0, (lower_right_y - upper_left_y) / regr_width) 340 341 return 0 342 343def CreateRevisionLink(revision_number): 344 """Returns HTML displaying the given revision number and linking to 345 that revision's change page at code.google.com, e.g. 346 http://code.google.com/p/skia/source/detail?r=2056 347 """ 348 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( 349 revision_number, revision_number) 350 351def main(): 352 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] 353 LinearRegression(foo) 354 355if __name__ == "__main__": 356 main() 357