195640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org# Copyright (C) 2011 Google Inc. All rights reserved.
295640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org#
301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Redistribution and use in source and binary forms, with or without
401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# modification, are permitted provided that the following conditions are
501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# met:
601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#
701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#     * Redistributions of source code must retain the above copyright
801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# notice, this list of conditions and the following disclaimer.
901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#     * Redistributions in binary form must reproduce the above
1001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# copyright notice, this list of conditions and the following disclaimer
1101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# in the documentation and/or other materials provided with the
1201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# distribution.
1301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#     * Neither the name of Google Inc. nor the names of its
1401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# contributors may be used to endorse or promote products derived from
1501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# this software without specific prior written permission.
1601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#
1701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
2001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
2101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
2901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
3001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgimport logging
3101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgimport re
3201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgimport time
3301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
3401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.controllers import repaint_overlay
3501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.controllers import test_result_writer
3601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.port.driver import DeviceFailure, DriverInput, DriverOutput
3701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.models import test_expectations
3801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.models import test_failures
3901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.models.test_results import TestResult
4001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom webkitpy.layout_tests.models import testharness_results
4101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
4201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
4301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org_log = logging.getLogger(__name__)
4401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
4501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
4601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef run_single_test(port, options, results_directory, worker_name, driver, test_input, stop_when_done):
4701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    runner = SingleTestRunner(port, options, results_directory, worker_name, driver, test_input, stop_when_done)
4801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    try:
4901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return runner.run()
5001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    except DeviceFailure as e:
5101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        _log.error("device failed: %s", str(e))
5201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return TestResult(test_input.test_name, device_failed=True)
5301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
5401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
5501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgclass SingleTestRunner(object):
5601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    (ALONGSIDE_TEST, PLATFORM_DIR, VERSION_DIR, UPDATE) = ('alongside', 'platform', 'version', 'update')
5701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
5801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def __init__(self, port, options, results_directory, worker_name, driver, test_input, stop_when_done):
5901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._port = port
6001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._filesystem = port.host.filesystem
6101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._options = options
6201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._results_directory = results_directory
6301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._driver = driver
6401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._timeout = test_input.timeout
6501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._worker_name = worker_name
6601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._test_name = test_input.test_name
6701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._should_run_pixel_test = test_input.should_run_pixel_test
6801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._reference_files = test_input.reference_files
6901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._should_add_missing_baselines = test_input.should_add_missing_baselines
7001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        self._stop_when_done = stop_when_done
7101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
7201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._reference_files:
7301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # Detect and report a test which has a wrong combination of expectation files.
7401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # For example, if 'foo.html' has two expectation files, 'foo-expected.html' and
7501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # 'foo-expected.txt', we should warn users. One test file must be used exclusively
7601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # in either layout tests or reftests, but not in both.
7701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            for suffix in ('.txt', '.png', '.wav'):
7801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                expected_filename = self._port.expected_filename(self._test_name, suffix)
797b854f86ce62131d507041a6ac1072babd03a6c6joaodasilva@chromium.org                if self._filesystem.exists(expected_filename):
8001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                    _log.error('%s is a reftest, but has an unused expectation file. Please remove %s.',
8101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                        self._test_name, expected_filename)
8201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
8301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def _expected_driver_output(self):
8401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return DriverOutput(self._port.expected_text(self._test_name),
8501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                                 self._port.expected_image(self._test_name),
8601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                                 self._port.expected_checksum(self._test_name),
8701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                                 self._port.expected_audio(self._test_name))
8801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
8901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def _should_fetch_expected_checksum(self):
90ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org        return self._should_run_pixel_test and not (self._options.new_baseline or self._options.reset_results)
91ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org
92ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org    def _driver_input(self):
93ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org        # The image hash is used to avoid doing an image dump if the
94ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org        # checksums match, so it should be set to a blank value if we
95ee2aae98b1aba3315f768fe9667d245ad46f3c9cjoaodasilva@chromium.org        # are generating a new baseline.  (Otherwise, an image from a
9601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        # previous run will be copied into the baseline."""
9701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        image_hash = None
9801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._should_fetch_expected_checksum():
9901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            image_hash = self._port.expected_checksum(self._test_name)
10001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
10101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        test_base = self._port.lookup_virtual_test_base(self._test_name)
10201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if test_base:
10301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # If the file actually exists under the virtual dir, we want to use it (largely for virtual references),
10401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            # but we want to use the extra command line args either way.
10501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            if self._filesystem.exists(self._port.abspath_for_test(self._test_name)):
10601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                test_name = self._test_name
10701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            else:
10801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                test_name = test_base
10901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            args = self._port.lookup_virtual_test_args(self._test_name)
11001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        else:
11101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            test_name = self._test_name
11201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            args = self._port.lookup_physical_test_args(self._test_name)
11301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return DriverInput(test_name, self._timeout, image_hash, self._should_run_pixel_test, args)
11401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
11501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def run(self):
11601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._options.enable_sanitizer:
11701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            return self._run_sanitized_test()
11801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._reference_files:
11901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            if self._options.reset_results:
12001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                reftest_type = set([reference_file[0] for reference_file in self._reference_files])
121786be15cb6c0f7d78f3eab9fda9d4266619ddbf0joaodasilva@chromium.org                result = TestResult(self._test_name, reftest_type=reftest_type)
12201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                result.type = test_expectations.SKIP
12301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                return result
12401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            return self._run_reftest()
12501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._options.reset_results:
12601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            return self._run_rebaseline()
12701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return self._run_compare_test()
12801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
1294c53b9ff18aff36dd4eb63ae7099a388be4afb6bjoaodasilva@chromium.org    def _run_sanitized_test(self):
13001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        # running a sanitized test means that we ignore the actual test output and just look
13101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        # for timeouts and crashes (real or forced by the driver). Most crashes should
13201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        # indicate problems found by a sanitizer (ASAN, LSAN, etc.), but we will report
13301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        # on other crashes and timeouts as well in order to detect at least *some* basic failures.
13401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
13501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        expected_driver_output = self._expected_driver_output()
13601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        failures = self._handle_error(driver_output)
13701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        test_result = TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
13801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org                                 pid=driver_output.pid)
13901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, expected_driver_output, test_result.failures)
14001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return test_result
14101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
14201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
14301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def _run_compare_test(self):
14401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
14501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        expected_driver_output = self._expected_driver_output()
14601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
14701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        test_result = self._compare_output(expected_driver_output, driver_output)
14801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        if self._should_add_missing_baselines:
14901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org            self._add_missing_baselines(test_result, driver_output)
15001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, expected_driver_output, test_result.failures)
15101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return test_result
15201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
15301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    def _run_rebaseline(self):
15401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        driver_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
155        failures = self._handle_error(driver_output)
156        test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, driver_output, None, failures)
157        # FIXME: It the test crashed or timed out, it might be better to avoid
158        # to write new baselines.
159        self._overwrite_baselines(driver_output)
160        return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
161                          pid=driver_output.pid)
162
163    _render_tree_dump_pattern = re.compile(r"^layer at \(\d+,\d+\) size \d+x\d+\n")
164
165    def _add_missing_baselines(self, test_result, driver_output):
166        missingImage = test_result.has_failure_matching_types(test_failures.FailureMissingImage, test_failures.FailureMissingImageHash)
167        if test_result.has_failure_matching_types(test_failures.FailureMissingResult):
168            self._save_baseline_data(driver_output.text, '.txt', self._location_for_new_baseline(driver_output.text, '.txt'))
169        if test_result.has_failure_matching_types(test_failures.FailureMissingAudio):
170            self._save_baseline_data(driver_output.audio, '.wav', self._location_for_new_baseline(driver_output.audio, '.wav'))
171        if missingImage:
172            self._save_baseline_data(driver_output.image, '.png', self._location_for_new_baseline(driver_output.image, '.png'))
173
174    def _location_for_new_baseline(self, data, extension):
175        if self._options.add_platform_exceptions:
176            return self.VERSION_DIR
177        if extension == '.png':
178            return self.PLATFORM_DIR
179        if extension == '.wav':
180            return self.ALONGSIDE_TEST
181        if extension == '.txt' and self._render_tree_dump_pattern.match(data):
182            return self.PLATFORM_DIR
183        return self.ALONGSIDE_TEST
184
185    def _overwrite_baselines(self, driver_output):
186        location = self.VERSION_DIR if self._options.add_platform_exceptions else self.UPDATE
187        self._save_baseline_data(driver_output.text, '.txt', location)
188        self._save_baseline_data(driver_output.audio, '.wav', location)
189        if self._should_run_pixel_test:
190            self._save_baseline_data(driver_output.image, '.png', location)
191
192    def _save_baseline_data(self, data, extension, location):
193        if data is None:
194            return
195        port = self._port
196        fs = self._filesystem
197        if location == self.ALONGSIDE_TEST:
198            output_dir = fs.dirname(port.abspath_for_test(self._test_name))
199        elif location == self.VERSION_DIR:
200            output_dir = fs.join(port.baseline_version_dir(), fs.dirname(self._test_name))
201        elif location == self.PLATFORM_DIR:
202            output_dir = fs.join(port.baseline_platform_dir(), fs.dirname(self._test_name))
203        elif location == self.UPDATE:
204            output_dir = fs.dirname(port.expected_filename(self._test_name, extension))
205        else:
206            raise AssertionError('unrecognized baseline location: %s' % location)
207
208        fs.maybe_make_directory(output_dir)
209        output_basename = fs.basename(fs.splitext(self._test_name)[0] + "-expected" + extension)
210        output_path = fs.join(output_dir, output_basename)
211        _log.info('Writing new expected result "%s"' % port.relative_test_filename(output_path))
212        port.update_baseline(output_path, data)
213
214    def _handle_error(self, driver_output, reference_filename=None):
215        """Returns test failures if some unusual errors happen in driver's run.
216
217        Args:
218          driver_output: The output from the driver.
219          reference_filename: The full path to the reference file which produced the driver_output.
220              This arg is optional and should be used only in reftests until we have a better way to know
221              which html file is used for producing the driver_output.
222        """
223        failures = []
224        fs = self._filesystem
225        if driver_output.timeout:
226            failures.append(test_failures.FailureTimeout(bool(reference_filename)))
227
228        if reference_filename:
229            testname = self._port.relative_test_filename(reference_filename)
230        else:
231            testname = self._test_name
232
233        if driver_output.crash:
234            failures.append(test_failures.FailureCrash(bool(reference_filename),
235                                                       driver_output.crashed_process_name,
236                                                       driver_output.crashed_pid,
237                                                       bool('No crash log found' not in driver_output.crash_log)))
238            if driver_output.error:
239                _log.debug("%s %s crashed, (stderr lines):" % (self._worker_name, testname))
240            else:
241                _log.debug("%s %s crashed, (no stderr)" % (self._worker_name, testname))
242        elif driver_output.leak:
243            failures.append(test_failures.FailureLeak(bool(reference_filename),
244                                                      driver_output.leak_log))
245            _log.debug("%s %s leaked" % (self._worker_name, testname))
246        elif driver_output.error:
247            _log.debug("%s %s output stderr lines:" % (self._worker_name, testname))
248        for line in driver_output.error.splitlines():
249            _log.debug("  %s" % line)
250        return failures
251
252    def _compare_output(self, expected_driver_output, driver_output):
253        failures = []
254        failures.extend(self._handle_error(driver_output))
255
256        if driver_output.crash:
257            # Don't continue any more if we already have a crash.
258            # In case of timeouts, we continue since we still want to see the text and image output.
259            return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
260                              pid=driver_output.pid)
261
262        is_testharness_test, testharness_failures = self._compare_testharness_test(driver_output, expected_driver_output)
263        if is_testharness_test:
264            failures.extend(testharness_failures)
265        else:
266            failures.extend(self._compare_text(expected_driver_output.text, driver_output.text))
267            failures.extend(self._compare_audio(expected_driver_output.audio, driver_output.audio))
268            if self._should_run_pixel_test:
269                failures.extend(self._compare_image(expected_driver_output, driver_output))
270        has_repaint_overlay = (repaint_overlay.result_contains_repaint_rects(expected_driver_output.text) or
271                               repaint_overlay.result_contains_repaint_rects(driver_output.text))
272        return TestResult(self._test_name, failures, driver_output.test_time, driver_output.has_stderr(),
273                          pid=driver_output.pid, has_repaint_overlay=has_repaint_overlay)
274
275    def _compare_testharness_test(self, driver_output, expected_driver_output):
276        if expected_driver_output.image or expected_driver_output.audio or expected_driver_output.text:
277            return False, []
278
279        if driver_output.image or driver_output.audio or self._is_render_tree(driver_output.text):
280            return False, []
281
282        text = driver_output.text or ''
283
284        if not testharness_results.is_testharness_output(text):
285            return False, []
286        if not testharness_results.is_testharness_output_passing(text):
287            return True, [test_failures.FailureTestHarnessAssertion()]
288        return True, []
289
290    def _is_render_tree(self, text):
291        return text and "layer at (0,0) size 800x600" in text
292
293    def _compare_text(self, expected_text, actual_text):
294        failures = []
295        if (expected_text and actual_text and
296            # Assuming expected_text is already normalized.
297            self._port.do_text_results_differ(expected_text, self._get_normalized_output_text(actual_text))):
298            failures.append(test_failures.FailureTextMismatch())
299        elif actual_text and not expected_text:
300            failures.append(test_failures.FailureMissingResult())
301        return failures
302
303    def _compare_audio(self, expected_audio, actual_audio):
304        failures = []
305        if (expected_audio and actual_audio and
306            self._port.do_audio_results_differ(expected_audio, actual_audio)):
307            failures.append(test_failures.FailureAudioMismatch())
308        elif actual_audio and not expected_audio:
309            failures.append(test_failures.FailureMissingAudio())
310        return failures
311
312    def _get_normalized_output_text(self, output):
313        """Returns the normalized text output, i.e. the output in which
314        the end-of-line characters are normalized to "\n"."""
315        # Running tests on Windows produces "\r\n".  The "\n" part is helpfully
316        # changed to "\r\n" by our system (Python/Cygwin), resulting in
317        # "\r\r\n", when, in fact, we wanted to compare the text output with
318        # the normalized text expectation files.
319        return output.replace("\r\r\n", "\r\n").replace("\r\n", "\n")
320
321    # FIXME: This function also creates the image diff. Maybe that work should
322    # be handled elsewhere?
323    def _compare_image(self, expected_driver_output, driver_output):
324        failures = []
325        # If we didn't produce a hash file, this test must be text-only.
326        if driver_output.image_hash is None:
327            return failures
328        if not expected_driver_output.image:
329            failures.append(test_failures.FailureMissingImage())
330        elif not expected_driver_output.image_hash:
331            failures.append(test_failures.FailureMissingImageHash())
332        elif driver_output.image_hash != expected_driver_output.image_hash:
333            diff, err_str = self._port.diff_image(expected_driver_output.image, driver_output.image)
334            if err_str:
335                _log.warning('  %s : %s' % (self._test_name, err_str))
336                failures.append(test_failures.FailureImageHashMismatch())
337                driver_output.error = (driver_output.error or '') + err_str
338            else:
339                driver_output.image_diff = diff
340                if driver_output.image_diff:
341                    failures.append(test_failures.FailureImageHashMismatch())
342                else:
343                    # See https://bugs.webkit.org/show_bug.cgi?id=69444 for why this isn't a full failure.
344                    _log.warning('  %s -> pixel hash failed (but diff passed)' % self._test_name)
345        return failures
346
347    def _run_reftest(self):
348        test_output = self._driver.run_test(self._driver_input(), self._stop_when_done)
349        total_test_time = 0
350        reference_output = None
351        test_result = None
352
353        # If the test crashed, or timed out, there's no point in running the reference at all.
354        # This can save a lot of execution time if we have a lot of crashes or timeouts.
355        if test_output.crash or test_output.timeout:
356            expected_driver_output = DriverOutput(text=None, image=None, image_hash=None, audio=None)
357            return self._compare_output(expected_driver_output, test_output)
358
359        # A reftest can have multiple match references and multiple mismatch references;
360        # the test fails if any mismatch matches and all of the matches don't match.
361        # To minimize the number of references we have to check, we run all of the mismatches first,
362        # then the matches, and short-circuit out as soon as we can.
363        # Note that sorting by the expectation sorts "!=" before "==" so this is easy to do.
364
365        putAllMismatchBeforeMatch = sorted
366        reference_test_names = []
367        for expectation, reference_filename in putAllMismatchBeforeMatch(self._reference_files):
368            if self._port.lookup_virtual_test_base(self._test_name):
369                args = self._port.lookup_virtual_test_args(self._test_name)
370            else:
371                args = self._port.lookup_physical_test_args(self._test_name)
372            reference_test_name = self._port.relative_test_filename(reference_filename)
373            reference_test_names.append(reference_test_name)
374            driver_input = DriverInput(reference_test_name, self._timeout, image_hash=None, should_run_pixel_test=True, args=args)
375            reference_output = self._driver.run_test(driver_input, self._stop_when_done)
376            test_result = self._compare_output_with_reference(reference_output, test_output, reference_filename, expectation == '!=')
377
378            if (expectation == '!=' and test_result.failures) or (expectation == '==' and not test_result.failures):
379                break
380            total_test_time += test_result.test_run_time
381
382        assert(reference_output)
383        test_result_writer.write_test_result(self._filesystem, self._port, self._results_directory, self._test_name, test_output, reference_output, test_result.failures)
384
385        # FIXME: We don't really deal with a mix of reftest types properly. We pass in a set() to reftest_type
386        # and only really handle the first of the references in the result.
387        reftest_type = list(set([reference_file[0] for reference_file in self._reference_files]))
388        return TestResult(self._test_name, test_result.failures, total_test_time + test_result.test_run_time,
389                          test_result.has_stderr, reftest_type=reftest_type, pid=test_result.pid,
390                          references=reference_test_names)
391
392    def _compare_output_with_reference(self, reference_driver_output, actual_driver_output, reference_filename, mismatch):
393        total_test_time = reference_driver_output.test_time + actual_driver_output.test_time
394        has_stderr = reference_driver_output.has_stderr() or actual_driver_output.has_stderr()
395        failures = []
396        failures.extend(self._handle_error(actual_driver_output))
397        if failures:
398            # Don't continue any more if we already have crash or timeout.
399            return TestResult(self._test_name, failures, total_test_time, has_stderr)
400        failures.extend(self._handle_error(reference_driver_output, reference_filename=reference_filename))
401        if failures:
402            return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)
403
404        if not reference_driver_output.image_hash and not actual_driver_output.image_hash:
405            failures.append(test_failures.FailureReftestNoImagesGenerated(reference_filename))
406        elif mismatch:
407            if reference_driver_output.image_hash == actual_driver_output.image_hash:
408                diff, err_str = self._port.diff_image(reference_driver_output.image, actual_driver_output.image)
409                if not diff:
410                    failures.append(test_failures.FailureReftestMismatchDidNotOccur(reference_filename))
411                elif err_str:
412                    _log.error(err_str)
413                else:
414                    _log.warning("  %s -> ref test hashes matched but diff failed" % self._test_name)
415
416        elif reference_driver_output.image_hash != actual_driver_output.image_hash:
417            diff, err_str = self._port.diff_image(reference_driver_output.image, actual_driver_output.image)
418            if diff:
419                failures.append(test_failures.FailureReftestMismatch(reference_filename))
420            elif err_str:
421                _log.error(err_str)
422            else:
423                _log.warning("  %s -> ref test hashes didn't match but diff passed" % self._test_name)
424
425        return TestResult(self._test_name, failures, total_test_time, has_stderr, pid=actual_driver_output.pid)
426