1''' 2Created on May 19, 2011 3 4@author: bungeman 5''' 6 7import re 8import math 9 10# bench representation algorithm constant names 11ALGORITHM_AVERAGE = 'avg' 12ALGORITHM_MEDIAN = 'med' 13ALGORITHM_MINIMUM = 'min' 14ALGORITHM_25TH_PERCENTILE = '25th' 15 16# Regular expressions used throughout. 17PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?' 18SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)' 19BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' 20TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)' 21# non-per-tile benches have configs that don't end with ']' or '>' 22CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)' 23# per-tile bench lines are in the following format. Note that there are 24# non-averaged bench numbers in separate lines, which we ignore now due to 25# their inaccuracy. 26TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' 27 ' ((?:' + TIME_RE + '\s+)+)') 28# for extracting tile layout 29TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: ' 30 31PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE) 32SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE) 33BENCH_RE_COMPILED = re.compile(BENCH_RE) 34TIME_RE_COMPILED = re.compile(TIME_RE) 35CONFIG_RE_COMPILED = re.compile(CONFIG_RE) 36TILE_RE_COMPILED = re.compile(TILE_RE) 37TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE) 38 39class BenchDataPoint: 40 """A single data point produced by bench. 41 42 (str, str, str, float, {str:str}, str, [floats])""" 43 def __init__(self, bench, config, time_type, time, settings, 44 tile_layout='', per_tile_values=[]): 45 self.bench = bench 46 self.config = config 47 self.time_type = time_type 48 self.time = time 49 self.settings = settings 50 # how tiles cover the whole picture. '5x3' means 5 columns and 3 rows. 51 self.tile_layout = tile_layout 52 # list of per_tile bench values, if applicable 53 self.per_tile_values = per_tile_values 54 55 def __repr__(self): 56 return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( 57 str(self.bench), 58 str(self.config), 59 str(self.time_type), 60 str(self.time), 61 str(self.settings), 62 ) 63 64class _ExtremeType(object): 65 """Instances of this class compare greater or less than other objects.""" 66 def __init__(self, cmpr, rep): 67 object.__init__(self) 68 self._cmpr = cmpr 69 self._rep = rep 70 71 def __cmp__(self, other): 72 if isinstance(other, self.__class__) and other._cmpr == self._cmpr: 73 return 0 74 return self._cmpr 75 76 def __repr__(self): 77 return self._rep 78 79Max = _ExtremeType(1, "Max") 80Min = _ExtremeType(-1, "Min") 81 82class _ListAlgorithm(object): 83 """Algorithm for selecting the representation value from a given list. 84 representation is one of the ALGORITHM_XXX representation types.""" 85 def __init__(self, data, representation=None): 86 if not representation: 87 representation = ALGORITHM_AVERAGE # default algorithm 88 self._data = data 89 self._len = len(data) 90 if representation == ALGORITHM_AVERAGE: 91 self._rep = sum(self._data) / self._len 92 else: 93 self._data.sort() 94 if representation == ALGORITHM_MINIMUM: 95 self._rep = self._data[0] 96 else: 97 # for percentiles, we use the value below which x% of values are 98 # found, which allows for better detection of quantum behaviors. 99 if representation == ALGORITHM_MEDIAN: 100 x = int(round(0.5 * self._len + 0.5)) 101 elif representation == ALGORITHM_25TH_PERCENTILE: 102 x = int(round(0.25 * self._len + 0.5)) 103 else: 104 raise Exception("invalid representation algorithm %s!" % 105 representation) 106 self._rep = self._data[x - 1] 107 108 def compute(self): 109 return self._rep 110 111def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench, 112 value_dic, layout_dic, representation=None): 113 """Parses given bench time line with regex and adds data to value_dic. 114 115 config_re_compiled: precompiled regular expression for parsing the config 116 line. 117 is_per_tile: boolean indicating whether this is a per-tile bench. 118 If so, we add tile layout into layout_dic as well. 119 line: input string line to parse. 120 bench: name of bench for the time values. 121 value_dic: dictionary to store bench values. See bench_dic in parse() below. 122 layout_dic: dictionary to store tile layouts. See parse() for descriptions. 123 representation: should match one of the ALGORITHM_XXX types.""" 124 125 for config in config_re_compiled.finditer(line): 126 current_config = config.group(1) 127 tile_layout = '' 128 if is_per_tile: # per-tile bench, add name prefix 129 current_config = 'tile_' + current_config 130 layouts = TILE_LAYOUT_RE_COMPILED.search(line) 131 if layouts and len(layouts.groups()) == 2: 132 tile_layout = '%sx%s' % layouts.groups() 133 times = config.group(2) 134 for new_time in TIME_RE_COMPILED.finditer(times): 135 current_time_type = new_time.group(1) 136 iters = [float(i) for i in 137 new_time.group(2).strip().split(',')] 138 value_dic.setdefault(bench, {}).setdefault( 139 current_config, {}).setdefault(current_time_type, []).append( 140 _ListAlgorithm(iters, representation).compute()) 141 layout_dic.setdefault(bench, {}).setdefault( 142 current_config, {}).setdefault(current_time_type, tile_layout) 143 144# TODO(bensong): switch to reading JSON output when available. This way we don't 145# need the RE complexities. 146def parse(settings, lines, representation=None): 147 """Parses bench output into a useful data structure. 148 149 ({str:str}, __iter__ -> str) -> [BenchDataPoint] 150 representation is one of the ALGORITHM_XXX types.""" 151 152 benches = [] 153 current_bench = None 154 bench_dic = {} # [bench][config][time_type] -> [list of bench values] 155 # [bench][config][time_type] -> tile_layout 156 layout_dic = {} 157 158 for line in lines: 159 160 # see if this line is a settings line 161 settingsMatch = SETTINGS_RE_COMPILED.search(line) 162 if (settingsMatch): 163 settings = dict(settings) 164 for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)): 165 if (settingMatch.group(2)): 166 settings[settingMatch.group(1)] = settingMatch.group(2) 167 else: 168 settings[settingMatch.group(1)] = True 169 170 # see if this line starts a new bench 171 new_bench = BENCH_RE_COMPILED.search(line) 172 if new_bench: 173 current_bench = new_bench.group(1) 174 175 # add configs on this line to the bench_dic 176 if current_bench: 177 if line.startswith(' tile_') : 178 _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench, 179 bench_dic, layout_dic, representation) 180 else: 181 _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line, 182 current_bench, 183 bench_dic, layout_dic, representation) 184 185 # append benches to list, use the total time as final bench value. 186 for bench in bench_dic: 187 for config in bench_dic[bench]: 188 for time_type in bench_dic[bench][config]: 189 tile_layout = '' 190 per_tile_values = [] 191 if len(bench_dic[bench][config][time_type]) > 1: 192 # per-tile values, extract tile_layout 193 per_tile_values = bench_dic[bench][config][time_type] 194 tile_layout = layout_dic[bench][config][time_type] 195 benches.append(BenchDataPoint( 196 bench, 197 config, 198 time_type, 199 sum(bench_dic[bench][config][time_type]), 200 settings, 201 tile_layout, 202 per_tile_values)) 203 204 return benches 205 206class LinearRegression: 207 """Linear regression data based on a set of data points. 208 209 ([(Number,Number)]) 210 There must be at least two points for this to make sense.""" 211 def __init__(self, points): 212 n = len(points) 213 max_x = Min 214 min_x = Max 215 216 Sx = 0.0 217 Sy = 0.0 218 Sxx = 0.0 219 Sxy = 0.0 220 Syy = 0.0 221 for point in points: 222 x = point[0] 223 y = point[1] 224 max_x = max(max_x, x) 225 min_x = min(min_x, x) 226 227 Sx += x 228 Sy += y 229 Sxx += x*x 230 Sxy += x*y 231 Syy += y*y 232 233 denom = n*Sxx - Sx*Sx 234 if (denom != 0.0): 235 B = (n*Sxy - Sx*Sy) / denom 236 else: 237 B = 0.0 238 a = (1.0/n)*(Sy - B*Sx) 239 240 se2 = 0 241 sB2 = 0 242 sa2 = 0 243 if (n >= 3 and denom != 0.0): 244 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) 245 sB2 = (n*se2) / denom 246 sa2 = sB2 * (1.0/n) * Sxx 247 248 249 self.slope = B 250 self.intercept = a 251 self.serror = math.sqrt(max(0, se2)) 252 self.serror_slope = math.sqrt(max(0, sB2)) 253 self.serror_intercept = math.sqrt(max(0, sa2)) 254 self.max_x = max_x 255 self.min_x = min_x 256 257 def __repr__(self): 258 return "LinearRegression(%s, %s, %s, %s, %s)" % ( 259 str(self.slope), 260 str(self.intercept), 261 str(self.serror), 262 str(self.serror_slope), 263 str(self.serror_intercept), 264 ) 265 266 def find_min_slope(self): 267 """Finds the minimal slope given one standard deviation.""" 268 slope = self.slope 269 intercept = self.intercept 270 error = self.serror 271 regr_start = self.min_x 272 regr_end = self.max_x 273 regr_width = regr_end - regr_start 274 275 if slope < 0: 276 lower_left_y = slope*regr_start + intercept - error 277 upper_right_y = slope*regr_end + intercept + error 278 return min(0, (upper_right_y - lower_left_y) / regr_width) 279 280 elif slope > 0: 281 upper_left_y = slope*regr_start + intercept + error 282 lower_right_y = slope*regr_end + intercept - error 283 return max(0, (lower_right_y - upper_left_y) / regr_width) 284 285 return 0 286 287def CreateRevisionLink(revision_number): 288 """Returns HTML displaying the given revision number and linking to 289 that revision's change page at code.google.com, e.g. 290 http://code.google.com/p/skia/source/detail?r=2056 291 """ 292 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( 293 revision_number, revision_number) 294 295def main(): 296 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] 297 LinearRegression(foo) 298 299if __name__ == "__main__": 300 main() 301