1# Copyright (c) 2009, Google Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7#     * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#     * Redistributions in binary form must reproduce the above
10# copyright notice, this list of conditions and the following disclaimer
11# in the documentation and/or other materials provided with the
12# distribution.
13#     * Neither the name of Google Inc. nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28#
29# WebKit's Python module for interacting with WebKit's buildbot
30
31try:
32    import json
33except ImportError:
34    # python 2.5 compatibility
35    import webkitpy.thirdparty.simplejson as json
36
37import operator
38import re
39import urllib
40import urllib2
41
42from webkitpy.common.net.failuremap import FailureMap
43from webkitpy.common.net.layouttestresults import LayoutTestResults
44from webkitpy.common.net.regressionwindow import RegressionWindow
45from webkitpy.common.net.testoutputset import TestOutputSet
46from webkitpy.common.system.logutils import get_logger
47from webkitpy.common.system.zipfileset import ZipFileSet
48from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
49from webkitpy.thirdparty.autoinstalled.mechanize import Browser
50
51_log = get_logger(__file__)
52
53
54class Builder(object):
55    def __init__(self, name, buildbot):
56        self._name = name
57        self._buildbot = buildbot
58        self._builds_cache = {}
59        self._revision_to_build_number = None
60        self._browser = Browser()
61        self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
62
63    def name(self):
64        return self._name
65
66    def results_url(self):
67        return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
68
69    def url_encoded_name(self):
70        return urllib.quote(self._name)
71
72    def url(self):
73        return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
74
75    # This provides a single place to mock
76    def _fetch_build(self, build_number):
77        build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
78        if not build_dictionary:
79            return None
80        return Build(self,
81            build_number=int(build_dictionary['number']),
82            revision=int(build_dictionary['sourceStamp']['revision']),
83            is_green=(build_dictionary['results'] == 0) # Undocumented, 0 seems to mean "pass"
84        )
85
86    def build(self, build_number):
87        if not build_number:
88            return None
89        cached_build = self._builds_cache.get(build_number)
90        if cached_build:
91            return cached_build
92
93        build = self._fetch_build(build_number)
94        self._builds_cache[build_number] = build
95        return build
96
97    def latest_cached_build(self):
98        revision_build_pairs = self.revision_build_pairs_with_results()
99        revision_build_pairs.sort(key=lambda i: i[1])
100        latest_build_number = revision_build_pairs[-1][1]
101        return self.build(latest_build_number)
102
103    def force_build(self, username="webkit-patch", comments=None):
104        def predicate(form):
105            try:
106                return form.find_control("username")
107            except Exception, e:
108                return False
109        self._browser.open(self.url())
110        self._browser.select_form(predicate=predicate)
111        self._browser["username"] = username
112        if comments:
113            self._browser["comments"] = comments
114        return self._browser.submit()
115
116    file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
117    def _revision_and_build_for_filename(self, filename):
118        # Example: "r47483 (1)/" or "r47483 (1).zip"
119        match = self.file_name_regexp.match(filename)
120        return (int(match.group("revision")), int(match.group("build_number")))
121
122    def _fetch_revision_to_build_map(self):
123        # All _fetch requests go through _buildbot for easier mocking
124        # FIXME: This should use NetworkTransaction's 404 handling instead.
125        try:
126            # FIXME: This method is horribly slow due to the huge network load.
127            # FIXME: This is a poor way to do revision -> build mapping.
128            # Better would be to ask buildbot through some sort of API.
129            print "Loading revision/build list from %s." % self.results_url()
130            print "This may take a while..."
131            result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
132        except urllib2.HTTPError, error:
133            if error.code != 404:
134                raise
135            result_files = []
136
137        # This assumes there was only one build per revision, which is false but we don't care for now.
138        return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
139
140    def _revision_to_build_map(self):
141        if not self._revision_to_build_number:
142            self._revision_to_build_number = self._fetch_revision_to_build_map()
143        return self._revision_to_build_number
144
145    def revision_build_pairs_with_results(self):
146        return self._revision_to_build_map().items()
147
148    # This assumes there can be only one build per revision, which is false, but we don't care for now.
149    def build_for_revision(self, revision, allow_failed_lookups=False):
150        # NOTE: This lookup will fail if that exact revision was never built.
151        build_number = self._revision_to_build_map().get(int(revision))
152        if not build_number:
153            return None
154        build = self.build(build_number)
155        if not build and allow_failed_lookups:
156            # Builds for old revisions with fail to lookup via buildbot's json api.
157            build = Build(self,
158                build_number=build_number,
159                revision=revision,
160                is_green=False,
161            )
162        return build
163
164    def find_regression_window(self, red_build, look_back_limit=30):
165        if not red_build or red_build.is_green():
166            return RegressionWindow(None, None)
167        common_failures = None
168        current_build = red_build
169        build_after_current_build = None
170        look_back_count = 0
171        while current_build:
172            if current_build.is_green():
173                # current_build can't possibly have any failures in common
174                # with red_build because it's green.
175                break
176            results = current_build.layout_test_results()
177            # We treat a lack of results as if all the test failed.
178            # This occurs, for example, when we can't compile at all.
179            if results:
180                failures = set(results.failing_tests())
181                if common_failures == None:
182                    common_failures = failures
183                else:
184                    common_failures = common_failures.intersection(failures)
185                    if not common_failures:
186                        # current_build doesn't have any failures in common with
187                        # the red build we're worried about.  We assume that any
188                        # failures in current_build were due to flakiness.
189                        break
190            look_back_count += 1
191            if look_back_count > look_back_limit:
192                return RegressionWindow(None, current_build, failing_tests=common_failures)
193            build_after_current_build = current_build
194            current_build = current_build.previous_build()
195        # We must iterate at least once because red_build is red.
196        assert(build_after_current_build)
197        # Current build must either be green or have no failures in common
198        # with red build, so we've found our failure transition.
199        return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
200
201    def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
202        red_build = self.build(red_build_number)
203        regression_window = self.find_regression_window(red_build, look_back_limit)
204        if not regression_window.build_before_failure():
205            return None  # We ran off the limit of our search
206        # If avoid_flakey_tests, require at least 2 bad builds before we
207        # suspect a real failure transition.
208        if avoid_flakey_tests and regression_window.failing_build() == red_build:
209            return None
210        return regression_window
211
212
213class Build(object):
214    def __init__(self, builder, build_number, revision, is_green):
215        self._builder = builder
216        self._number = build_number
217        self._revision = revision
218        self._is_green = is_green
219        self._layout_test_results = None
220
221    @staticmethod
222    def build_url(builder, build_number):
223        return "%s/builds/%s" % (builder.url(), build_number)
224
225    def url(self):
226        return self.build_url(self.builder(), self._number)
227
228    def results_url(self):
229        results_directory = "r%s (%s)" % (self.revision(), self._number)
230        return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
231
232    def results_zip_url(self):
233        return "%s.zip" % self.results_url()
234
235    def results(self):
236        return TestOutputSet(self._builder.name(), None, ZipFileSet(self.results_zip_url()), include_expected=False)
237
238    def _fetch_results_html(self):
239        results_html = "%s/results.html" % (self.results_url())
240        # FIXME: This should use NetworkTransaction's 404 handling instead.
241        try:
242            # It seems this can return None if the url redirects and then returns 404.
243            return urllib2.urlopen(results_html)
244        except urllib2.HTTPError, error:
245            if error.code != 404:
246                raise
247
248    def layout_test_results(self):
249        if not self._layout_test_results:
250            # FIXME: This should cache that the result was a 404 and stop hitting the network.
251            self._layout_test_results = LayoutTestResults.results_from_string(self._fetch_results_html())
252        return self._layout_test_results
253
254    def builder(self):
255        return self._builder
256
257    def revision(self):
258        return self._revision
259
260    def is_green(self):
261        return self._is_green
262
263    def previous_build(self):
264        # previous_build() allows callers to avoid assuming build numbers are sequential.
265        # They may not be sequential across all master changes, or when non-trunk builds are made.
266        return self._builder.build(self._number - 1)
267
268
269class BuildBot(object):
270    # FIXME: This should move into common.config.urls.
271    default_host = "build.webkit.org"
272
273    def __init__(self, host=default_host):
274        self.buildbot_host = host
275        self._builder_by_name = {}
276
277        # If any core builder is red we should not be landing patches.  Other
278        # builders should be added to this list once they are known to be
279        # reliable.
280        # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
281        self.core_builder_names_regexps = [
282            "SnowLeopard.*Build",
283            "SnowLeopard.*\(Test",
284            "SnowLeopard.*\(WebKit2 Test",
285            "Leopard.*",
286            "Windows.*Build",
287            "Windows.*\(Test",
288            "WinCairo",
289            "WinCE",
290            "EFL",
291            "GTK.*32",
292            "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
293            "Qt",
294            "Chromium.*Release$",
295        ]
296
297    def _parse_last_build_cell(self, builder, cell):
298        status_link = cell.find('a')
299        if status_link:
300            # Will be either a revision number or a build number
301            revision_string = status_link.string
302            # If revision_string has non-digits assume it's not a revision number.
303            builder['built_revision'] = int(revision_string) \
304                                        if not re.match('\D', revision_string) \
305                                        else None
306
307            # FIXME: We treat slave lost as green even though it is not to
308            # work around the Qts bot being on a broken internet connection.
309            # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
310            builder['is_green'] = not re.search('fail', cell.renderContents()) or \
311                                  not not re.search('lost', cell.renderContents())
312
313            status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
314            link_match = re.match(status_link_regexp, status_link['href'])
315            builder['build_number'] = int(link_match.group("build_number"))
316        else:
317            # We failed to find a link in the first cell, just give up.  This
318            # can happen if a builder is just-added, the first cell will just
319            # be "no build"
320            # Other parts of the code depend on is_green being present.
321            builder['is_green'] = False
322            builder['built_revision'] = None
323            builder['build_number'] = None
324
325    def _parse_current_build_cell(self, builder, cell):
326        activity_lines = cell.renderContents().split("<br />")
327        builder["activity"] = activity_lines[0] # normally "building" or "idle"
328        # The middle lines document how long left for any current builds.
329        match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
330        builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
331
332    def _parse_builder_status_from_row(self, status_row):
333        status_cells = status_row.findAll('td')
334        builder = {}
335
336        # First cell is the name
337        name_link = status_cells[0].find('a')
338        builder["name"] = unicode(name_link.string)
339
340        self._parse_last_build_cell(builder, status_cells[1])
341        self._parse_current_build_cell(builder, status_cells[2])
342        return builder
343
344    def _matches_regexps(self, builder_name, name_regexps):
345        for name_regexp in name_regexps:
346            if re.match(name_regexp, builder_name):
347                return True
348        return False
349
350    # FIXME: Should move onto Builder
351    def _is_core_builder(self, builder_name):
352        return self._matches_regexps(builder_name, self.core_builder_names_regexps)
353
354    # FIXME: This method needs to die, but is used by a unit test at the moment.
355    def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
356        return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
357
358    def red_core_builders(self):
359        return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
360
361    def red_core_builders_names(self):
362        return [builder["name"] for builder in self.red_core_builders()]
363
364    def idle_red_core_builders(self):
365        return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
366
367    def core_builders_are_green(self):
368        return not self.red_core_builders()
369
370    # FIXME: These _fetch methods should move to a networking class.
371    def _fetch_build_dictionary(self, builder, build_number):
372        try:
373            base = "http://%s" % self.buildbot_host
374            path = urllib.quote("json/builders/%s/builds/%s" % (builder.name(),
375                                                                build_number))
376            url = "%s/%s" % (base, path)
377            jsondata = urllib2.urlopen(url)
378            return json.load(jsondata)
379        except urllib2.URLError, err:
380            build_url = Build.build_url(builder, build_number)
381            _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
382            return None
383        except ValueError, err:
384            build_url = Build.build_url(builder, build_number)
385            _log.error("Error decoding json data from %s: %s" % (build_url, err))
386            return None
387
388    def _fetch_one_box_per_builder(self):
389        build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
390        return urllib2.urlopen(build_status_url)
391
392    def _file_cell_text(self, file_cell):
393        """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
394        element = file_cell
395        while element.string is None and element.contents:
396            element = element.contents[0]
397        return element.string
398
399    def _parse_twisted_file_row(self, file_row):
400        string_or_empty = lambda string: unicode(string) if string else u""
401        file_cells = file_row.findAll('td')
402        return {
403            "filename": string_or_empty(self._file_cell_text(file_cells[0])),
404            "size": string_or_empty(self._file_cell_text(file_cells[1])),
405            "type": string_or_empty(self._file_cell_text(file_cells[2])),
406            "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
407        }
408
409    def _parse_twisted_directory_listing(self, page):
410        soup = BeautifulSoup(page)
411        # HACK: Match only table rows with a class to ignore twisted header/footer rows.
412        file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
413        return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
414
415    # FIXME: There should be a better way to get this information directly from twisted.
416    def _fetch_twisted_directory_listing(self, url):
417        return self._parse_twisted_directory_listing(urllib2.urlopen(url))
418
419    def builders(self):
420        return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
421
422    # This method pulls from /one_box_per_builder as an efficient way to get information about
423    def builder_statuses(self):
424        soup = BeautifulSoup(self._fetch_one_box_per_builder())
425        return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
426
427    def core_builder_statuses(self):
428        return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
429
430    def builder_with_name(self, name):
431        builder = self._builder_by_name.get(name)
432        if not builder:
433            builder = Builder(name, self)
434            self._builder_by_name[name] = builder
435        return builder
436
437    def failure_map(self, only_core_builders=True):
438        builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
439        failure_map = FailureMap()
440        revision_to_failing_bots = {}
441        for builder_status in builder_statuses:
442            if builder_status["is_green"]:
443                continue
444            builder = self.builder_with_name(builder_status["name"])
445            regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
446            if regression_window:
447                failure_map.add_regression_window(builder, regression_window)
448        return failure_map
449
450    # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
451    # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
452    def _latest_builds_from_builders(self, only_core_builders=True):
453        builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
454        return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
455
456    def _build_at_or_before_revision(self, build, revision):
457        while build:
458            if build.revision() <= revision:
459                return build
460            build = build.previous_build()
461
462    def last_green_revision(self, only_core_builders=True):
463        builds = self._latest_builds_from_builders(only_core_builders)
464        target_revision = builds[0].revision()
465        # An alternate way to do this would be to start at one revision and walk backwards
466        # checking builder.build_for_revision, however build_for_revision is very slow on first load.
467        while True:
468            # Make builds agree on revision
469            builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
470            if None in builds: # One of the builds failed to load from the server.
471                return None
472            min_revision = min(map(lambda build: build.revision(), builds))
473            if min_revision != target_revision:
474                target_revision = min_revision
475                continue # Builds don't all agree on revision, keep searching
476            # Check to make sure they're all green
477            all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
478            if not all_are_green:
479                target_revision -= 1
480                continue
481            return min_revision
482