1#!/usr/bin/env python
2# Copyright (C) 2010 Google Inc. All rights reserved.
3# Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""
32The TestRunner class runs a series of tests (TestType interface) against a set
33of test files.  If a test file fails a TestType, it returns a list TestFailure
34objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
35create a final report.
36"""
37
38from __future__ import with_statement
39
40import copy
41import errno
42import logging
43import math
44import Queue
45import random
46import sys
47import time
48
49from webkitpy.layout_tests.layout_package import json_layout_results_generator
50from webkitpy.layout_tests.layout_package import json_results_generator
51from webkitpy.layout_tests.layout_package import printing
52from webkitpy.layout_tests.layout_package import test_expectations
53from webkitpy.layout_tests.layout_package import test_failures
54from webkitpy.layout_tests.layout_package import test_results
55from webkitpy.layout_tests.layout_package import test_results_uploader
56from webkitpy.layout_tests.layout_package.result_summary import ResultSummary
57from webkitpy.layout_tests.layout_package.test_input import TestInput
58
59from webkitpy.thirdparty import simplejson
60from webkitpy.tool import grammar
61
62_log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
63
64# Builder base URL where we have the archived test results.
65BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
66
67TestExpectationsFile = test_expectations.TestExpectationsFile
68
69
70def summarize_results(port_obj, expectations, result_summary, retry_summary, test_timings, only_unexpected):
71    """Summarize any unexpected results as a dict.
72
73    FIXME: split this data structure into a separate class?
74
75    Args:
76        port_obj: interface to port-specific hooks
77        expectations: test_expectations.TestExpectations object
78        result_summary: summary object from initial test runs
79        retry_summary: summary object from final test run of retried tests
80        test_timings: a list of TestResult objects which contain test runtimes in seconds
81        only_unexpected: whether to return a summary only for the unexpected results
82    Returns:
83        A dictionary containing a summary of the unexpected results from the
84        run, with the following fields:
85        'version': a version indicator (1 in this version)
86        'fixable': # of fixable tests (NOW - PASS)
87        'skipped': # of skipped tests (NOW & SKIPPED)
88        'num_regressions': # of non-flaky failures
89        'num_flaky': # of flaky failures
90        'num_passes': # of unexpected passes
91        'tests': a dict of tests -> {'expected': '...', 'actual': '...', 'time_ms': ...}
92    """
93    results = {}
94    results['version'] = 1
95
96    test_timings_map = dict((test_result.filename, test_result.test_run_time) for test_result in test_timings)
97
98    tbe = result_summary.tests_by_expectation
99    tbt = result_summary.tests_by_timeline
100    results['fixable'] = len(tbt[test_expectations.NOW] -
101                                tbe[test_expectations.PASS])
102    results['skipped'] = len(tbt[test_expectations.NOW] &
103                                tbe[test_expectations.SKIP])
104
105    num_passes = 0
106    num_flaky = 0
107    num_regressions = 0
108    keywords = {}
109    for expecation_string, expectation_enum in TestExpectationsFile.EXPECTATIONS.iteritems():
110        keywords[expectation_enum] = expecation_string.upper()
111
112    for modifier_string, modifier_enum in TestExpectationsFile.MODIFIERS.iteritems():
113        keywords[modifier_enum] = modifier_string.upper()
114
115    tests = {}
116    original_results = result_summary.unexpected_results if only_unexpected else result_summary.results
117
118    for filename, result in original_results.iteritems():
119        # Note that if a test crashed in the original run, we ignore
120        # whether or not it crashed when we retried it (if we retried it),
121        # and always consider the result not flaky.
122        test = port_obj.relative_test_filename(filename)
123        expected = expectations.get_expectations_string(filename)
124        result_type = result.type
125        actual = [keywords[result_type]]
126
127        if result_type == test_expectations.PASS:
128            num_passes += 1
129        elif result_type == test_expectations.CRASH:
130            num_regressions += 1
131        elif filename in result_summary.unexpected_results:
132            if filename not in retry_summary.unexpected_results:
133                actual.extend(expectations.get_expectations_string(filename).split(" "))
134                num_flaky += 1
135            else:
136                retry_result_type = retry_summary.unexpected_results[filename].type
137                if result_type != retry_result_type:
138                    actual.append(keywords[retry_result_type])
139                    num_flaky += 1
140                else:
141                    num_regressions += 1
142
143        tests[test] = {}
144        tests[test]['expected'] = expected
145        tests[test]['actual'] = " ".join(actual)
146        # FIXME: Set this correctly once https://webkit.org/b/37739 is fixed
147        # and only set it if there actually is stderr data.
148        tests[test]['has_stderr'] = False
149
150        failure_types = [type(f) for f in result.failures]
151        if test_failures.FailureMissingAudio in failure_types:
152            tests[test]['is_missing_audio'] = True
153
154        if test_failures.FailureReftestMismatch in failure_types:
155            tests[test]['is_reftest'] = True
156
157        for f in result.failures:
158            if 'is_reftest' in result.failures:
159                tests[test]['is_reftest'] = True
160
161        if test_failures.FailureReftestMismatchDidNotOccur in failure_types:
162            tests[test]['is_mismatch_reftest'] = True
163
164        if test_failures.FailureMissingResult in failure_types:
165            tests[test]['is_missing_text'] = True
166
167        if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
168            tests[test]['is_missing_image'] = True
169
170        if filename in test_timings_map:
171            time_seconds = test_timings_map[filename]
172            tests[test]['time_ms'] = int(1000 * time_seconds)
173
174    results['tests'] = tests
175    results['num_passes'] = num_passes
176    results['num_flaky'] = num_flaky
177    results['num_regressions'] = num_regressions
178    # FIXME: If non-chromium ports start using an expectations file,
179    # we should make this check more robust.
180    results['uses_expectations_file'] = port_obj.name().find('chromium') != -1
181    results['layout_tests_dir'] = port_obj.layout_tests_dir()
182    results['has_wdiff'] = port_obj.wdiff_available()
183    results['has_pretty_patch'] = port_obj.pretty_patch_available()
184
185    return results
186
187
188class TestRunInterruptedException(Exception):
189    """Raised when a test run should be stopped immediately."""
190    def __init__(self, reason):
191        self.reason = reason
192
193    def __reduce__(self):
194        return self.__class__, (self.reason,)
195
196
197class TestRunner:
198    """A class for managing running a series of tests on a series of layout
199    test files."""
200
201
202    # The per-test timeout in milliseconds, if no --time-out-ms option was
203    # given to run_webkit_tests. This should correspond to the default timeout
204    # in DumpRenderTree.
205    DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
206
207    def __init__(self, port, options, printer):
208        """Initialize test runner data structures.
209
210        Args:
211          port: an object implementing port-specific
212          options: a dictionary of command line options
213          printer: a Printer object to record updates to.
214        """
215        self._port = port
216        self._fs = port._filesystem
217        self._options = options
218        self._printer = printer
219        self._message_broker = None
220
221        self.HTTP_SUBDIR = self._fs.join('', 'http', '')
222        self.WEBSOCKET_SUBDIR = self._fs.join('', 'websocket', '')
223        self.LAYOUT_TESTS_DIRECTORY = "LayoutTests" + self._fs.sep
224
225
226        # disable wss server. need to install pyOpenSSL on buildbots.
227        # self._websocket_secure_server = websocket_server.PyWebSocket(
228        #        options.results_directory, use_tls=True, port=9323)
229
230        # a set of test files, and the same tests as a list
231        self._test_files = set()
232        self._test_files_list = None
233        self._result_queue = Queue.Queue()
234        self._retrying = False
235        self._results_directory = self._port.results_directory()
236
237    def collect_tests(self, args, last_unexpected_results):
238        """Find all the files to test.
239
240        Args:
241          args: list of test arguments from the command line
242          last_unexpected_results: list of unexpected results to retest, if any
243
244        """
245        paths = self._strip_test_dir_prefixes(args)
246        paths += last_unexpected_results
247        if self._options.test_list:
248            paths += self._strip_test_dir_prefixes(read_test_files(self._fs, self._options.test_list))
249        self._test_files = self._port.tests(paths)
250
251    def _strip_test_dir_prefixes(self, paths):
252        return [self._strip_test_dir_prefix(path) for path in paths if path]
253
254    def _strip_test_dir_prefix(self, path):
255        if path.startswith(self.LAYOUT_TESTS_DIRECTORY):
256            return path[len(self.LAYOUT_TESTS_DIRECTORY):]
257        return path
258
259    def lint(self):
260        lint_failed = False
261        for test_configuration in self._port.all_test_configurations():
262            try:
263                self.lint_expectations(test_configuration)
264            except test_expectations.ParseError:
265                lint_failed = True
266                self._printer.write("")
267
268        if lint_failed:
269            _log.error("Lint failed.")
270            return -1
271
272        _log.info("Lint succeeded.")
273        return 0
274
275    def lint_expectations(self, config):
276        port = self._port
277        test_expectations.TestExpectations(
278            port,
279            None,
280            port.test_expectations(),
281            config,
282            self._options.lint_test_files,
283            port.test_expectations_overrides())
284
285    def parse_expectations(self):
286        """Parse the expectations from the test_list files and return a data
287        structure holding them. Throws an error if the test_list files have
288        invalid syntax."""
289        port = self._port
290        self._expectations = test_expectations.TestExpectations(
291            port,
292            self._test_files,
293            port.test_expectations(),
294            port.test_configuration(),
295            self._options.lint_test_files,
296            port.test_expectations_overrides())
297
298    # FIXME: This method is way too long and needs to be broken into pieces.
299    def prepare_lists_and_print_output(self):
300        """Create appropriate subsets of test lists and returns a
301        ResultSummary object. Also prints expected test counts.
302        """
303
304        # Remove skipped - both fixable and ignored - files from the
305        # top-level list of files to test.
306        num_all_test_files = len(self._test_files)
307        self._printer.print_expected("Found:  %d tests" %
308                                     (len(self._test_files)))
309        if not num_all_test_files:
310            _log.critical('No tests to run.')
311            return None
312
313        skipped = set()
314        if num_all_test_files > 1 and not self._options.force:
315            skipped = self._expectations.get_tests_with_result_type(
316                           test_expectations.SKIP)
317            self._test_files -= skipped
318
319        # Create a sorted list of test files so the subset chunk,
320        # if used, contains alphabetically consecutive tests.
321        self._test_files_list = list(self._test_files)
322        if self._options.randomize_order:
323            random.shuffle(self._test_files_list)
324        else:
325            self._test_files_list.sort()
326
327        # If the user specifies they just want to run a subset of the tests,
328        # just grab a subset of the non-skipped tests.
329        if self._options.run_chunk or self._options.run_part:
330            chunk_value = self._options.run_chunk or self._options.run_part
331            test_files = self._test_files_list
332            try:
333                (chunk_num, chunk_len) = chunk_value.split(":")
334                chunk_num = int(chunk_num)
335                assert(chunk_num >= 0)
336                test_size = int(chunk_len)
337                assert(test_size > 0)
338            except:
339                _log.critical("invalid chunk '%s'" % chunk_value)
340                return None
341
342            # Get the number of tests
343            num_tests = len(test_files)
344
345            # Get the start offset of the slice.
346            if self._options.run_chunk:
347                chunk_len = test_size
348                # In this case chunk_num can be really large. We need
349                # to make the slave fit in the current number of tests.
350                slice_start = (chunk_num * chunk_len) % num_tests
351            else:
352                # Validate the data.
353                assert(test_size <= num_tests)
354                assert(chunk_num <= test_size)
355
356                # To count the chunk_len, and make sure we don't skip
357                # some tests, we round to the next value that fits exactly
358                # all the parts.
359                rounded_tests = num_tests
360                if rounded_tests % test_size != 0:
361                    rounded_tests = (num_tests + test_size -
362                                     (num_tests % test_size))
363
364                chunk_len = rounded_tests / test_size
365                slice_start = chunk_len * (chunk_num - 1)
366                # It does not mind if we go over test_size.
367
368            # Get the end offset of the slice.
369            slice_end = min(num_tests, slice_start + chunk_len)
370
371            files = test_files[slice_start:slice_end]
372
373            tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
374                (slice_end - slice_start), slice_start, slice_end, num_tests)
375            self._printer.print_expected(tests_run_msg)
376
377            # If we reached the end and we don't have enough tests, we run some
378            # from the beginning.
379            if slice_end - slice_start < chunk_len:
380                extra = chunk_len - (slice_end - slice_start)
381                extra_msg = ('   last chunk is partial, appending [0:%d]' %
382                            extra)
383                self._printer.print_expected(extra_msg)
384                tests_run_msg += "\n" + extra_msg
385                files.extend(test_files[0:extra])
386            tests_run_filename = self._fs.join(self._results_directory, "tests_run.txt")
387            self._fs.write_text_file(tests_run_filename, tests_run_msg)
388
389            len_skip_chunk = int(len(files) * len(skipped) /
390                                 float(len(self._test_files)))
391            skip_chunk_list = list(skipped)[0:len_skip_chunk]
392            skip_chunk = set(skip_chunk_list)
393
394            # Update expectations so that the stats are calculated correctly.
395            # We need to pass a list that includes the right # of skipped files
396            # to ParseExpectations so that ResultSummary() will get the correct
397            # stats. So, we add in the subset of skipped files, and then
398            # subtract them back out.
399            self._test_files_list = files + skip_chunk_list
400            self._test_files = set(self._test_files_list)
401
402            self.parse_expectations()
403
404            self._test_files = set(files)
405            self._test_files_list = files
406        else:
407            skip_chunk = skipped
408
409        result_summary = ResultSummary(self._expectations,
410            self._test_files | skip_chunk)
411        self._print_expected_results_of_type(result_summary,
412            test_expectations.PASS, "passes")
413        self._print_expected_results_of_type(result_summary,
414            test_expectations.FAIL, "failures")
415        self._print_expected_results_of_type(result_summary,
416            test_expectations.FLAKY, "flaky")
417        self._print_expected_results_of_type(result_summary,
418            test_expectations.SKIP, "skipped")
419
420        if self._options.force:
421            self._printer.print_expected('Running all tests, including '
422                                         'skips (--force)')
423        else:
424            # Note that we don't actually run the skipped tests (they were
425            # subtracted out of self._test_files, above), but we stub out the
426            # results here so the statistics can remain accurate.
427            for test in skip_chunk:
428                result = test_results.TestResult(test)
429                result.type = test_expectations.SKIP
430                result_summary.add(result, expected=True)
431        self._printer.print_expected('')
432
433        # Check to make sure we didn't filter out all of the tests.
434        if not len(self._test_files):
435            _log.info("All tests are being skipped")
436            return None
437
438        return result_summary
439
440    def _get_dir_for_test_file(self, test_file):
441        """Returns the highest-level directory by which to shard the given
442        test file."""
443        index = test_file.rfind(self._fs.sep + self.LAYOUT_TESTS_DIRECTORY)
444
445        test_file = test_file[index + len(self.LAYOUT_TESTS_DIRECTORY):]
446        test_file_parts = test_file.split(self._fs.sep, 1)
447        directory = test_file_parts[0]
448        test_file = test_file_parts[1]
449
450        # The http tests are very stable on mac/linux.
451        # TODO(ojan): Make the http server on Windows be apache so we can
452        # turn shard the http tests there as well. Switching to apache is
453        # what made them stable on linux/mac.
454        return_value = directory
455        while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
456                and test_file.find(self._fs.sep) >= 0):
457            test_file_parts = test_file.split(self._fs.sep, 1)
458            directory = test_file_parts[0]
459            return_value = self._fs.join(return_value, directory)
460            test_file = test_file_parts[1]
461
462        return return_value
463
464    def _get_test_input_for_file(self, test_file):
465        """Returns the appropriate TestInput object for the file. Mostly this
466        is used for looking up the timeout value (in ms) to use for the given
467        test."""
468        if self._test_is_slow(test_file):
469            return TestInput(test_file, self._options.slow_time_out_ms)
470        return TestInput(test_file, self._options.time_out_ms)
471
472    def _test_requires_lock(self, test_file):
473        """Return True if the test needs to be locked when
474        running multiple copies of NRWTs."""
475        split_path = test_file.split(self._port._filesystem.sep)
476        return 'http' in split_path or 'websocket' in split_path
477
478    def _test_is_slow(self, test_file):
479        return self._expectations.has_modifier(test_file,
480                                               test_expectations.SLOW)
481
482    def _shard_tests(self, test_files, use_real_shards):
483        """Groups tests into batches.
484        This helps ensure that tests that depend on each other (aka bad tests!)
485        continue to run together as most cross-tests dependencies tend to
486        occur within the same directory. If use_real_shards is False, we
487        put each (non-HTTP/websocket) test into its own shard for maximum
488        concurrency instead of trying to do any sort of real sharding.
489
490        Return:
491            A list of lists of TestInput objects.
492        """
493        # FIXME: when we added http locking, we changed how this works such
494        # that we always lump all of the HTTP threads into a single shard.
495        # That will slow down experimental-fully-parallel, but it's unclear
496        # what the best alternative is completely revamping how we track
497        # when to grab the lock.
498
499        test_lists = []
500        tests_to_http_lock = []
501        if not use_real_shards:
502            for test_file in test_files:
503                test_input = self._get_test_input_for_file(test_file)
504                if self._test_requires_lock(test_file):
505                    tests_to_http_lock.append(test_input)
506                else:
507                    test_lists.append((".", [test_input]))
508        else:
509            tests_by_dir = {}
510            for test_file in test_files:
511                directory = self._get_dir_for_test_file(test_file)
512                test_input = self._get_test_input_for_file(test_file)
513                if self._test_requires_lock(test_file):
514                    tests_to_http_lock.append(test_input)
515                else:
516                    tests_by_dir.setdefault(directory, [])
517                    tests_by_dir[directory].append(test_input)
518            # Sort by the number of tests in the dir so that the ones with the
519            # most tests get run first in order to maximize parallelization.
520            # Number of tests is a good enough, but not perfect, approximation
521            # of how long that set of tests will take to run. We can't just use
522            # a PriorityQueue until we move to Python 2.6.
523            for directory in tests_by_dir:
524                test_list = tests_by_dir[directory]
525                test_list_tuple = (directory, test_list)
526                test_lists.append(test_list_tuple)
527            test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
528
529        # Put the http tests first. There are only a couple hundred of them,
530        # but each http test takes a very long time to run, so sorting by the
531        # number of tests doesn't accurately capture how long they take to run.
532        if tests_to_http_lock:
533            test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
534
535        return test_lists
536
537    def _contains_tests(self, subdir):
538        for test_file in self._test_files:
539            if test_file.find(subdir) >= 0:
540                return True
541        return False
542
543    def _num_workers(self, num_shards):
544        num_workers = min(int(self._options.child_processes), num_shards)
545        driver_name = self._port.driver_name()
546        if num_workers == 1:
547            self._printer.print_config("Running 1 %s over %s" %
548                (driver_name, grammar.pluralize('shard', num_shards)))
549        else:
550            self._printer.print_config("Running %d %ss in parallel over %d shards" %
551                (num_workers, driver_name, num_shards))
552        return num_workers
553
554    def _run_tests(self, file_list, result_summary):
555        """Runs the tests in the file_list.
556
557        Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
558            test_timings, individual_test_timings)
559            interrupted is whether the run was interrupted
560            keyboard_interrupted is whether the interruption was because someone
561              typed Ctrl^C
562            thread_timings is a list of dicts with the total runtime
563              of each thread with 'name', 'num_tests', 'total_time' properties
564            test_timings is a list of timings for each sharded subdirectory
565              of the form [time, directory_name, num_tests]
566            individual_test_timings is a list of run times for each test
567              in the form {filename:filename, test_run_time:test_run_time}
568            result_summary: summary object to populate with the results
569        """
570        raise NotImplementedError()
571
572    def update(self):
573        self.update_summary(self._current_result_summary)
574
575    def _collect_timing_info(self, threads):
576        test_timings = {}
577        individual_test_timings = []
578        thread_timings = []
579
580        for thread in threads:
581            thread_timings.append({'name': thread.getName(),
582                                   'num_tests': thread.get_num_tests(),
583                                   'total_time': thread.get_total_time()})
584            test_timings.update(thread.get_test_group_timing_stats())
585            individual_test_timings.extend(thread.get_test_results())
586
587        return (thread_timings, test_timings, individual_test_timings)
588
589    def needs_http(self):
590        """Returns whether the test runner needs an HTTP server."""
591        return self._contains_tests(self.HTTP_SUBDIR)
592
593    def needs_websocket(self):
594        """Returns whether the test runner needs a WEBSOCKET server."""
595        return self._contains_tests(self.WEBSOCKET_SUBDIR)
596
597    def set_up_run(self):
598        """Configures the system to be ready to run tests.
599
600        Returns a ResultSummary object if we should continue to run tests,
601        or None if we should abort.
602
603        """
604        # This must be started before we check the system dependencies,
605        # since the helper may do things to make the setup correct.
606        self._printer.print_update("Starting helper ...")
607        self._port.start_helper()
608
609        # Check that the system dependencies (themes, fonts, ...) are correct.
610        if not self._options.nocheck_sys_deps:
611            self._printer.print_update("Checking system dependencies ...")
612            if not self._port.check_sys_deps(self.needs_http()):
613                self._port.stop_helper()
614                return None
615
616        if self._options.clobber_old_results:
617            self._clobber_old_results()
618
619        # Create the output directory if it doesn't already exist.
620        self._port.maybe_make_directory(self._results_directory)
621
622        self._port.setup_test_run()
623
624        self._printer.print_update("Preparing tests ...")
625        result_summary = self.prepare_lists_and_print_output()
626        if not result_summary:
627            return None
628
629        return result_summary
630
631    def run(self, result_summary):
632        """Run all our tests on all our test files.
633
634        For each test file, we run each test type. If there are any failures,
635        we collect them for reporting.
636
637        Args:
638          result_summary: a summary object tracking the test results.
639
640        Return:
641          The number of unexpected results (0 == success)
642        """
643        # gather_test_files() must have been called first to initialize us.
644        # If we didn't find any files to test, we've errored out already in
645        # prepare_lists_and_print_output().
646        assert(len(self._test_files))
647
648        start_time = time.time()
649
650        interrupted, keyboard_interrupted, thread_timings, test_timings, \
651            individual_test_timings = (
652            self._run_tests(self._test_files_list, result_summary))
653
654        # We exclude the crashes from the list of results to retry, because
655        # we want to treat even a potentially flaky crash as an error.
656        failures = self._get_failures(result_summary, include_crashes=False)
657        retry_summary = result_summary
658        while (len(failures) and self._options.retry_failures and
659            not self._retrying and not interrupted):
660            _log.info('')
661            _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
662            _log.info('')
663            self._retrying = True
664            retry_summary = ResultSummary(self._expectations, failures.keys())
665            # Note that we intentionally ignore the return value here.
666            self._run_tests(failures.keys(), retry_summary)
667            failures = self._get_failures(retry_summary, include_crashes=True)
668
669        end_time = time.time()
670
671        self._print_timing_statistics(end_time - start_time,
672                                      thread_timings, test_timings,
673                                      individual_test_timings,
674                                      result_summary)
675
676        self._print_result_summary(result_summary)
677
678        sys.stdout.flush()
679        sys.stderr.flush()
680
681        self._printer.print_one_line_summary(result_summary.total,
682                                             result_summary.expected,
683                                             result_summary.unexpected)
684
685        unexpected_results = summarize_results(self._port,
686            self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=True)
687        self._printer.print_unexpected_results(unexpected_results)
688
689        # FIXME: remove record_results. It's just used for testing. There's no need
690        # for it to be a commandline argument.
691        if (self._options.record_results and not self._options.dry_run and
692            not keyboard_interrupted):
693            # Write the same data to log files and upload generated JSON files
694            # to appengine server.
695            summarized_results = summarize_results(self._port,
696                self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=False)
697            self._upload_json_files(unexpected_results, summarized_results, result_summary,
698                                    individual_test_timings)
699
700        # Write the summary to disk (results.html) and display it if requested.
701        if not self._options.dry_run:
702            self._copy_results_html_file()
703            if self._options.show_results:
704                self._show_results_html_file(result_summary)
705
706        # Now that we've completed all the processing we can, we re-raise
707        # a KeyboardInterrupt if necessary so the caller can handle it.
708        if keyboard_interrupted:
709            raise KeyboardInterrupt
710
711        # Ignore flaky failures and unexpected passes so we don't turn the
712        # bot red for those.
713        return unexpected_results['num_regressions']
714
715    def clean_up_run(self):
716        """Restores the system after we're done running tests."""
717
718        _log.debug("flushing stdout")
719        sys.stdout.flush()
720        _log.debug("flushing stderr")
721        sys.stderr.flush()
722        _log.debug("stopping helper")
723        self._port.stop_helper()
724
725    def update_summary(self, result_summary):
726        """Update the summary and print results with any completed tests."""
727        while True:
728            try:
729                result = test_results.TestResult.loads(self._result_queue.get_nowait())
730            except Queue.Empty:
731                return
732
733            self._update_summary_with_result(result_summary, result)
734
735    def _update_summary_with_result(self, result_summary, result):
736        expected = self._expectations.matches_an_expected_result(
737            result.filename, result.type, self._options.pixel_tests)
738        result_summary.add(result, expected)
739        exp_str = self._expectations.get_expectations_string(
740            result.filename)
741        got_str = self._expectations.expectation_to_string(result.type)
742        self._printer.print_test_result(result, expected, exp_str, got_str)
743        self._printer.print_progress(result_summary, self._retrying,
744                                        self._test_files_list)
745
746        def interrupt_if_at_failure_limit(limit, count, message):
747            if limit and count >= limit:
748                raise TestRunInterruptedException(message % count)
749
750        interrupt_if_at_failure_limit(
751            self._options.exit_after_n_failures,
752            result_summary.unexpected_failures,
753            "Aborting run since %d failures were reached")
754        interrupt_if_at_failure_limit(
755            self._options.exit_after_n_crashes_or_timeouts,
756            result_summary.unexpected_crashes_or_timeouts,
757            "Aborting run since %d crashes or timeouts were reached")
758
759    def _clobber_old_results(self):
760        # Just clobber the actual test results directories since the other
761        # files in the results directory are explicitly used for cross-run
762        # tracking.
763        self._printer.print_update("Clobbering old results in %s" %
764                                   self._results_directory)
765        layout_tests_dir = self._port.layout_tests_dir()
766        possible_dirs = self._port.test_dirs()
767        for dirname in possible_dirs:
768            if self._fs.isdir(self._fs.join(layout_tests_dir, dirname)):
769                self._fs.rmtree(self._fs.join(self._results_directory, dirname))
770
771    def _get_failures(self, result_summary, include_crashes):
772        """Filters a dict of results and returns only the failures.
773
774        Args:
775          result_summary: the results of the test run
776          include_crashes: whether crashes are included in the output.
777            We use False when finding the list of failures to retry
778            to see if the results were flaky. Although the crashes may also be
779            flaky, we treat them as if they aren't so that they're not ignored.
780        Returns:
781          a dict of files -> results
782        """
783        failed_results = {}
784        for test, result in result_summary.unexpected_results.iteritems():
785            if (result.type == test_expectations.PASS or
786                result.type == test_expectations.CRASH and not include_crashes):
787                continue
788            failed_results[test] = result.type
789
790        return failed_results
791
792    def _char_for_result(self, result):
793        result = result.lower()
794        if result in TestExpectationsFile.EXPECTATIONS:
795            result_enum_value = TestExpectationsFile.EXPECTATIONS[result]
796        else:
797            result_enum_value = TestExpectationsFile.MODIFIERS[result]
798        return json_layout_results_generator.JSONLayoutResultsGenerator.FAILURE_TO_CHAR[result_enum_value]
799
800    def _upload_json_files(self, unexpected_results, summarized_results, result_summary,
801                           individual_test_timings):
802        """Writes the results of the test run as JSON files into the results
803        dir and upload the files to the appengine server.
804
805        There are three different files written into the results dir:
806          unexpected_results.json: A short list of any unexpected results.
807            This is used by the buildbots to display results.
808          expectations.json: This is used by the flakiness dashboard.
809          results.json: A full list of the results - used by the flakiness
810            dashboard and the aggregate results dashboard.
811
812        Args:
813          unexpected_results: dict of unexpected results
814          summarized_results: dict of results
815          result_summary: full summary object
816          individual_test_timings: list of test times (used by the flakiness
817            dashboard).
818        """
819        _log.debug("Writing JSON files in %s." % self._results_directory)
820
821        unexpected_json_path = self._fs.join(self._results_directory, "unexpected_results.json")
822        json_results_generator.write_json(self._fs, unexpected_results, unexpected_json_path)
823
824        full_results_path = self._fs.join(self._results_directory, "full_results.json")
825        json_results_generator.write_json(self._fs, summarized_results, full_results_path)
826
827        # Write a json file of the test_expectations.txt file for the layout
828        # tests dashboard.
829        expectations_path = self._fs.join(self._results_directory, "expectations.json")
830        expectations_json = \
831            self._expectations.get_expectations_json_for_all_platforms()
832        self._fs.write_text_file(expectations_path,
833                                 u"ADD_EXPECTATIONS(%s);" % expectations_json)
834
835        generator = json_layout_results_generator.JSONLayoutResultsGenerator(
836            self._port, self._options.builder_name, self._options.build_name,
837            self._options.build_number, self._results_directory,
838            BUILDER_BASE_URL, individual_test_timings,
839            self._expectations, result_summary, self._test_files_list,
840            self._options.test_results_server,
841            "layout-tests",
842            self._options.master_name)
843
844        _log.debug("Finished writing JSON files.")
845
846        json_files = ["expectations.json", "incremental_results.json", "full_results.json"]
847
848        generator.upload_json_files(json_files)
849
850    def _print_config(self):
851        """Prints the configuration for the test run."""
852        p = self._printer
853        p.print_config("Using port '%s'" % self._port.name())
854        p.print_config("Test configuration: %s" % self._port.test_configuration())
855        p.print_config("Placing test results in %s" % self._results_directory)
856        if self._options.new_baseline:
857            p.print_config("Placing new baselines in %s" %
858                           self._port.baseline_path())
859        p.print_config("Using %s build" % self._options.configuration)
860        if self._options.pixel_tests:
861            p.print_config("Pixel tests enabled")
862        else:
863            p.print_config("Pixel tests disabled")
864
865        p.print_config("Regular timeout: %s, slow test timeout: %s" %
866                       (self._options.time_out_ms,
867                        self._options.slow_time_out_ms))
868
869        p.print_config('Command line: ' +
870                       ' '.join(self._port.driver_cmd_line()))
871        p.print_config("Worker model: %s" % self._options.worker_model)
872        p.print_config("")
873
874    def _print_expected_results_of_type(self, result_summary,
875                                        result_type, result_type_str):
876        """Print the number of the tests in a given result class.
877
878        Args:
879          result_summary - the object containing all the results to report on
880          result_type - the particular result type to report in the summary.
881          result_type_str - a string description of the result_type.
882        """
883        tests = self._expectations.get_tests_with_result_type(result_type)
884        now = result_summary.tests_by_timeline[test_expectations.NOW]
885        wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
886
887        # We use a fancy format string in order to print the data out in a
888        # nicely-aligned table.
889        fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
890                  % (self._num_digits(now), self._num_digits(wontfix)))
891        self._printer.print_expected(fmtstr %
892            (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
893
894    def _num_digits(self, num):
895        """Returns the number of digits needed to represent the length of a
896        sequence."""
897        ndigits = 1
898        if len(num):
899            ndigits = int(math.log10(len(num))) + 1
900        return ndigits
901
902    def _print_timing_statistics(self, total_time, thread_timings,
903                               directory_test_timings, individual_test_timings,
904                               result_summary):
905        """Record timing-specific information for the test run.
906
907        Args:
908          total_time: total elapsed time (in seconds) for the test run
909          thread_timings: wall clock time each thread ran for
910          directory_test_timings: timing by directory
911          individual_test_timings: timing by file
912          result_summary: summary object for the test run
913        """
914        self._printer.print_timing("Test timing:")
915        self._printer.print_timing("  %6.2f total testing time" % total_time)
916        self._printer.print_timing("")
917        self._printer.print_timing("Thread timing:")
918        cuml_time = 0
919        for t in thread_timings:
920            self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
921                  (t['name'], t['num_tests'], t['total_time']))
922            cuml_time += t['total_time']
923        self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
924              (cuml_time, cuml_time / int(self._options.child_processes)))
925        self._printer.print_timing("")
926
927        self._print_aggregate_test_statistics(individual_test_timings)
928        self._print_individual_test_times(individual_test_timings,
929                                          result_summary)
930        self._print_directory_timings(directory_test_timings)
931
932    def _print_aggregate_test_statistics(self, individual_test_timings):
933        """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
934        Args:
935          individual_test_timings: List of TestResults for all tests.
936        """
937        times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
938        self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):",
939                                                times_for_dump_render_tree)
940
941    def _print_individual_test_times(self, individual_test_timings,
942                                  result_summary):
943        """Prints the run times for slow, timeout and crash tests.
944        Args:
945          individual_test_timings: List of TestStats for all tests.
946          result_summary: summary object for test run
947        """
948        # Reverse-sort by the time spent in DumpRenderTree.
949        individual_test_timings.sort(lambda a, b:
950            cmp(b.test_run_time, a.test_run_time))
951
952        num_printed = 0
953        slow_tests = []
954        timeout_or_crash_tests = []
955        unexpected_slow_tests = []
956        for test_tuple in individual_test_timings:
957            filename = test_tuple.filename
958            is_timeout_crash_or_slow = False
959            if self._test_is_slow(filename):
960                is_timeout_crash_or_slow = True
961                slow_tests.append(test_tuple)
962
963            if filename in result_summary.failures:
964                result = result_summary.results[filename].type
965                if (result == test_expectations.TIMEOUT or
966                    result == test_expectations.CRASH):
967                    is_timeout_crash_or_slow = True
968                    timeout_or_crash_tests.append(test_tuple)
969
970            if (not is_timeout_crash_or_slow and
971                num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
972                num_printed = num_printed + 1
973                unexpected_slow_tests.append(test_tuple)
974
975        self._printer.print_timing("")
976        self._print_test_list_timing("%s slowest tests that are not "
977            "marked as SLOW and did not timeout/crash:" %
978            printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
979        self._printer.print_timing("")
980        self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
981        self._printer.print_timing("")
982        self._print_test_list_timing("Tests that timed out or crashed:",
983                                     timeout_or_crash_tests)
984        self._printer.print_timing("")
985
986    def _print_test_list_timing(self, title, test_list):
987        """Print timing info for each test.
988
989        Args:
990          title: section heading
991          test_list: tests that fall in this section
992        """
993        if self._printer.disabled('slowest'):
994            return
995
996        self._printer.print_timing(title)
997        for test_tuple in test_list:
998            filename = test_tuple.filename[len(
999                self._port.layout_tests_dir()) + 1:]
1000            filename = filename.replace('\\', '/')
1001            test_run_time = round(test_tuple.test_run_time, 1)
1002            self._printer.print_timing("  %s took %s seconds" %
1003                                       (filename, test_run_time))
1004
1005    def _print_directory_timings(self, directory_test_timings):
1006        """Print timing info by directory for any directories that
1007        take > 10 seconds to run.
1008
1009        Args:
1010          directory_test_timing: time info for each directory
1011        """
1012        timings = []
1013        for directory in directory_test_timings:
1014            num_tests, time_for_directory = directory_test_timings[directory]
1015            timings.append((round(time_for_directory, 1), directory,
1016                            num_tests))
1017        timings.sort()
1018
1019        self._printer.print_timing("Time to process slowest subdirectories:")
1020        min_seconds_to_print = 10
1021        for timing in timings:
1022            if timing[0] > min_seconds_to_print:
1023                self._printer.print_timing(
1024                    "  %s took %s seconds to run %s tests." % (timing[1],
1025                    timing[0], timing[2]))
1026        self._printer.print_timing("")
1027
1028    def _print_statistics_for_test_timings(self, title, timings):
1029        """Prints the median, mean and standard deviation of the values in
1030        timings.
1031
1032        Args:
1033          title: Title for these timings.
1034          timings: A list of floats representing times.
1035        """
1036        self._printer.print_timing(title)
1037        timings.sort()
1038
1039        num_tests = len(timings)
1040        if not num_tests:
1041            return
1042        percentile90 = timings[int(.9 * num_tests)]
1043        percentile99 = timings[int(.99 * num_tests)]
1044
1045        if num_tests % 2 == 1:
1046            median = timings[((num_tests - 1) / 2) - 1]
1047        else:
1048            lower = timings[num_tests / 2 - 1]
1049            upper = timings[num_tests / 2]
1050            median = (float(lower + upper)) / 2
1051
1052        mean = sum(timings) / num_tests
1053
1054        for time in timings:
1055            sum_of_deviations = math.pow(time - mean, 2)
1056
1057        std_deviation = math.sqrt(sum_of_deviations / num_tests)
1058        self._printer.print_timing("  Median:          %6.3f" % median)
1059        self._printer.print_timing("  Mean:            %6.3f" % mean)
1060        self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1061        self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1062        self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1063        self._printer.print_timing("")
1064
1065    def _print_result_summary(self, result_summary):
1066        """Print a short summary about how many tests passed.
1067
1068        Args:
1069          result_summary: information to log
1070        """
1071        failed = len(result_summary.failures)
1072        skipped = len(
1073            result_summary.tests_by_expectation[test_expectations.SKIP])
1074        total = result_summary.total
1075        passed = total - failed - skipped
1076        pct_passed = 0.0
1077        if total > 0:
1078            pct_passed = float(passed) * 100 / total
1079
1080        self._printer.print_actual("")
1081        self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1082                     (passed, total, pct_passed))
1083        self._printer.print_actual("")
1084        self._print_result_summary_entry(result_summary,
1085            test_expectations.NOW, "Tests to be fixed")
1086
1087        self._printer.print_actual("")
1088        self._print_result_summary_entry(result_summary,
1089            test_expectations.WONTFIX,
1090            "Tests that will only be fixed if they crash (WONTFIX)")
1091        self._printer.print_actual("")
1092
1093    def _print_result_summary_entry(self, result_summary, timeline,
1094                                    heading):
1095        """Print a summary block of results for a particular timeline of test.
1096
1097        Args:
1098          result_summary: summary to print results for
1099          timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1100          heading: a textual description of the timeline
1101        """
1102        total = len(result_summary.tests_by_timeline[timeline])
1103        not_passing = (total -
1104           len(result_summary.tests_by_expectation[test_expectations.PASS] &
1105               result_summary.tests_by_timeline[timeline]))
1106        self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1107
1108        for result in TestExpectationsFile.EXPECTATION_ORDER:
1109            if result == test_expectations.PASS:
1110                continue
1111            results = (result_summary.tests_by_expectation[result] &
1112                       result_summary.tests_by_timeline[timeline])
1113            desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1114            if not_passing and len(results):
1115                pct = len(results) * 100.0 / not_passing
1116                self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1117                    (len(results), desc[len(results) != 1], pct))
1118
1119    def _copy_results_html_file(self):
1120        base_dir = self._port.path_from_webkit_base('Tools', 'Scripts', 'webkitpy', 'layout_tests', 'layout_package')
1121        results_file = self._fs.join(base_dir, 'json_results.html')
1122        # FIXME: What should we do if this doesn't exist (e.g., in unit tests)?
1123        if self._fs.exists(results_file):
1124            self._fs.copyfile(results_file, self._fs.join(self._results_directory, "results.html"))
1125
1126    def _show_results_html_file(self, result_summary):
1127        """Shows the results.html page."""
1128        if self._options.full_results_html:
1129            test_files = result_summary.failures.keys()
1130        else:
1131            unexpected_failures = self._get_failures(result_summary, include_crashes=True)
1132            test_files = unexpected_failures.keys()
1133
1134        if not len(test_files):
1135            return
1136
1137        results_filename = self._fs.join(self._results_directory, "results.html")
1138        self._port.show_results_html_file(results_filename)
1139
1140
1141def read_test_files(fs, files):
1142    tests = []
1143    for file in files:
1144        try:
1145            file_contents = fs.read_text_file(file).split('\n')
1146            for line in file_contents:
1147                line = test_expectations.strip_comments(line)
1148                if line:
1149                    tests.append(line)
1150        except IOError, e:
1151            if e.errno == errno.ENOENT:
1152                _log.critical('')
1153                _log.critical('--test-list file "%s" not found' % file)
1154            raise
1155    return tests
1156