1'''
2Created on May 19, 2011
3
4@author: bungeman
5'''
6
7import re
8import math
9
10# bench representation algorithm constant names
11ALGORITHM_AVERAGE = 'avg'
12ALGORITHM_MEDIAN = 'med'
13ALGORITHM_MINIMUM = 'min'
14ALGORITHM_25TH_PERCENTILE = '25th'
15
16# Regular expressions used throughout.
17PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?'
18SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)'
19BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
20TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)'
21# non-per-tile benches have configs that don't end with ']' or '>'
22CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)'
23# per-tile bench lines are in the following format. Note that there are
24# non-averaged bench numbers in separate lines, which we ignore now due to
25# their inaccuracy.
26TILE_RE = ('  tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
27           ' ((?:' + TIME_RE + '\s+)+)')
28# for extracting tile layout
29TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: '
30
31PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE)
32SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE)
33BENCH_RE_COMPILED = re.compile(BENCH_RE)
34TIME_RE_COMPILED = re.compile(TIME_RE)
35CONFIG_RE_COMPILED = re.compile(CONFIG_RE)
36TILE_RE_COMPILED = re.compile(TILE_RE)
37TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE)
38
39class BenchDataPoint:
40    """A single data point produced by bench.
41
42    (str, str, str, float, {str:str}, str, [floats])"""
43    def __init__(self, bench, config, time_type, time, settings,
44                 tile_layout='', per_tile_values=[]):
45        self.bench = bench
46        self.config = config
47        self.time_type = time_type
48        self.time = time
49        self.settings = settings
50        # how tiles cover the whole picture. '5x3' means 5 columns and 3 rows.
51        self.tile_layout = tile_layout
52        # list of per_tile bench values, if applicable
53        self.per_tile_values = per_tile_values
54
55    def __repr__(self):
56        return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
57                   str(self.bench),
58                   str(self.config),
59                   str(self.time_type),
60                   str(self.time),
61                   str(self.settings),
62               )
63
64class _ExtremeType(object):
65    """Instances of this class compare greater or less than other objects."""
66    def __init__(self, cmpr, rep):
67        object.__init__(self)
68        self._cmpr = cmpr
69        self._rep = rep
70
71    def __cmp__(self, other):
72        if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
73            return 0
74        return self._cmpr
75
76    def __repr__(self):
77        return self._rep
78
79Max = _ExtremeType(1, "Max")
80Min = _ExtremeType(-1, "Min")
81
82class _ListAlgorithm(object):
83    """Algorithm for selecting the representation value from a given list.
84    representation is one of the ALGORITHM_XXX representation types."""
85    def __init__(self, data, representation=None):
86        if not representation:
87            representation = ALGORITHM_AVERAGE  # default algorithm
88        self._data = data
89        self._len = len(data)
90        if representation == ALGORITHM_AVERAGE:
91            self._rep = sum(self._data) / self._len
92        else:
93            self._data.sort()
94            if representation == ALGORITHM_MINIMUM:
95                self._rep = self._data[0]
96            else:
97                # for percentiles, we use the value below which x% of values are
98                # found, which allows for better detection of quantum behaviors.
99                if representation == ALGORITHM_MEDIAN:
100                    x = int(round(0.5 * self._len + 0.5))
101                elif representation == ALGORITHM_25TH_PERCENTILE:
102                    x = int(round(0.25 * self._len + 0.5))
103                else:
104                    raise Exception("invalid representation algorithm %s!" %
105                                    representation)
106                self._rep = self._data[x - 1]
107
108    def compute(self):
109        return self._rep
110
111def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench,
112                        value_dic, layout_dic, representation=None):
113    """Parses given bench time line with regex and adds data to value_dic.
114
115    config_re_compiled: precompiled regular expression for parsing the config
116        line.
117    is_per_tile: boolean indicating whether this is a per-tile bench.
118        If so, we add tile layout into layout_dic as well.
119    line: input string line to parse.
120    bench: name of bench for the time values.
121    value_dic: dictionary to store bench values. See bench_dic in parse() below.
122    layout_dic: dictionary to store tile layouts. See parse() for descriptions.
123    representation: should match one of the ALGORITHM_XXX types."""
124
125    for config in config_re_compiled.finditer(line):
126        current_config = config.group(1)
127        tile_layout = ''
128        if is_per_tile:  # per-tile bench, add name prefix
129            current_config = 'tile_' + current_config
130            layouts = TILE_LAYOUT_RE_COMPILED.search(line)
131            if layouts and len(layouts.groups()) == 2:
132              tile_layout = '%sx%s' % layouts.groups()
133        times = config.group(2)
134        for new_time in TIME_RE_COMPILED.finditer(times):
135            current_time_type = new_time.group(1)
136            iters = [float(i) for i in
137                     new_time.group(2).strip().split(',')]
138            value_dic.setdefault(bench, {}).setdefault(
139                current_config, {}).setdefault(current_time_type, []).append(
140                    _ListAlgorithm(iters, representation).compute())
141            layout_dic.setdefault(bench, {}).setdefault(
142                current_config, {}).setdefault(current_time_type, tile_layout)
143
144# TODO(bensong): switch to reading JSON output when available. This way we don't
145# need the RE complexities.
146def parse(settings, lines, representation=None):
147    """Parses bench output into a useful data structure.
148
149    ({str:str}, __iter__ -> str) -> [BenchDataPoint]
150    representation is one of the ALGORITHM_XXX types."""
151
152    benches = []
153    current_bench = None
154    bench_dic = {}  # [bench][config][time_type] -> [list of bench values]
155    # [bench][config][time_type] -> tile_layout
156    layout_dic = {}
157
158    for line in lines:
159
160        # see if this line is a settings line
161        settingsMatch = SETTINGS_RE_COMPILED.search(line)
162        if (settingsMatch):
163            settings = dict(settings)
164            for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)):
165                if (settingMatch.group(2)):
166                    settings[settingMatch.group(1)] = settingMatch.group(2)
167                else:
168                    settings[settingMatch.group(1)] = True
169
170        # see if this line starts a new bench
171        new_bench = BENCH_RE_COMPILED.search(line)
172        if new_bench:
173            current_bench = new_bench.group(1)
174
175        # add configs on this line to the bench_dic
176        if current_bench:
177            if line.startswith('  tile_') :
178                _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench,
179                                    bench_dic, layout_dic, representation)
180            else:
181                _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line,
182                                    current_bench,
183                                    bench_dic, layout_dic, representation)
184
185    # append benches to list, use the total time as final bench value.
186    for bench in bench_dic:
187        for config in bench_dic[bench]:
188            for time_type in bench_dic[bench][config]:
189                tile_layout = ''
190                per_tile_values = []
191                if len(bench_dic[bench][config][time_type]) > 1:
192                    # per-tile values, extract tile_layout
193                    per_tile_values = bench_dic[bench][config][time_type]
194                    tile_layout = layout_dic[bench][config][time_type]
195                benches.append(BenchDataPoint(
196                    bench,
197                    config,
198                    time_type,
199                    sum(bench_dic[bench][config][time_type]),
200                    settings,
201                    tile_layout,
202                    per_tile_values))
203
204    return benches
205
206class LinearRegression:
207    """Linear regression data based on a set of data points.
208
209    ([(Number,Number)])
210    There must be at least two points for this to make sense."""
211    def __init__(self, points):
212        n = len(points)
213        max_x = Min
214        min_x = Max
215
216        Sx = 0.0
217        Sy = 0.0
218        Sxx = 0.0
219        Sxy = 0.0
220        Syy = 0.0
221        for point in points:
222            x = point[0]
223            y = point[1]
224            max_x = max(max_x, x)
225            min_x = min(min_x, x)
226
227            Sx += x
228            Sy += y
229            Sxx += x*x
230            Sxy += x*y
231            Syy += y*y
232
233        denom = n*Sxx - Sx*Sx
234        if (denom != 0.0):
235            B = (n*Sxy - Sx*Sy) / denom
236        else:
237            B = 0.0
238        a = (1.0/n)*(Sy - B*Sx)
239
240        se2 = 0
241        sB2 = 0
242        sa2 = 0
243        if (n >= 3 and denom != 0.0):
244            se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom))
245            sB2 = (n*se2) / denom
246            sa2 = sB2 * (1.0/n) * Sxx
247
248
249        self.slope = B
250        self.intercept = a
251        self.serror = math.sqrt(max(0, se2))
252        self.serror_slope = math.sqrt(max(0, sB2))
253        self.serror_intercept = math.sqrt(max(0, sa2))
254        self.max_x = max_x
255        self.min_x = min_x
256
257    def __repr__(self):
258        return "LinearRegression(%s, %s, %s, %s, %s)" % (
259                   str(self.slope),
260                   str(self.intercept),
261                   str(self.serror),
262                   str(self.serror_slope),
263                   str(self.serror_intercept),
264               )
265
266    def find_min_slope(self):
267        """Finds the minimal slope given one standard deviation."""
268        slope = self.slope
269        intercept = self.intercept
270        error = self.serror
271        regr_start = self.min_x
272        regr_end = self.max_x
273        regr_width = regr_end - regr_start
274
275        if slope < 0:
276            lower_left_y = slope*regr_start + intercept - error
277            upper_right_y = slope*regr_end + intercept + error
278            return min(0, (upper_right_y - lower_left_y) / regr_width)
279
280        elif slope > 0:
281            upper_left_y = slope*regr_start + intercept + error
282            lower_right_y = slope*regr_end + intercept - error
283            return max(0, (lower_right_y - upper_left_y) / regr_width)
284
285        return 0
286
287def CreateRevisionLink(revision_number):
288    """Returns HTML displaying the given revision number and linking to
289    that revision's change page at code.google.com, e.g.
290    http://code.google.com/p/skia/source/detail?r=2056
291    """
292    return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
293        revision_number, revision_number)
294
295def main():
296    foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
297    LinearRegression(foo)
298
299if __name__ == "__main__":
300    main()
301