1#!/usr/bin/env python 2# Copyright (C) 2010 Google Inc. All rights reserved. 3# Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31""" 32The TestRunner class runs a series of tests (TestType interface) against a set 33of test files. If a test file fails a TestType, it returns a list TestFailure 34objects to the TestRunner. The TestRunner then aggregates the TestFailures to 35create a final report. 36""" 37 38from __future__ import with_statement 39 40import copy 41import errno 42import logging 43import math 44import Queue 45import random 46import sys 47import time 48 49from webkitpy.layout_tests.layout_package import json_layout_results_generator 50from webkitpy.layout_tests.layout_package import json_results_generator 51from webkitpy.layout_tests.layout_package import printing 52from webkitpy.layout_tests.layout_package import test_expectations 53from webkitpy.layout_tests.layout_package import test_failures 54from webkitpy.layout_tests.layout_package import test_results 55from webkitpy.layout_tests.layout_package import test_results_uploader 56from webkitpy.layout_tests.layout_package.result_summary import ResultSummary 57from webkitpy.layout_tests.layout_package.test_input import TestInput 58 59from webkitpy.thirdparty import simplejson 60from webkitpy.tool import grammar 61 62_log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests") 63 64# Builder base URL where we have the archived test results. 65BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/" 66 67TestExpectationsFile = test_expectations.TestExpectationsFile 68 69 70def summarize_results(port_obj, expectations, result_summary, retry_summary, test_timings, only_unexpected): 71 """Summarize any unexpected results as a dict. 72 73 FIXME: split this data structure into a separate class? 74 75 Args: 76 port_obj: interface to port-specific hooks 77 expectations: test_expectations.TestExpectations object 78 result_summary: summary object from initial test runs 79 retry_summary: summary object from final test run of retried tests 80 test_timings: a list of TestResult objects which contain test runtimes in seconds 81 only_unexpected: whether to return a summary only for the unexpected results 82 Returns: 83 A dictionary containing a summary of the unexpected results from the 84 run, with the following fields: 85 'version': a version indicator (1 in this version) 86 'fixable': # of fixable tests (NOW - PASS) 87 'skipped': # of skipped tests (NOW & SKIPPED) 88 'num_regressions': # of non-flaky failures 89 'num_flaky': # of flaky failures 90 'num_passes': # of unexpected passes 91 'tests': a dict of tests -> {'expected': '...', 'actual': '...', 'time_ms': ...} 92 """ 93 results = {} 94 results['version'] = 1 95 96 test_timings_map = dict((test_result.filename, test_result.test_run_time) for test_result in test_timings) 97 98 tbe = result_summary.tests_by_expectation 99 tbt = result_summary.tests_by_timeline 100 results['fixable'] = len(tbt[test_expectations.NOW] - 101 tbe[test_expectations.PASS]) 102 results['skipped'] = len(tbt[test_expectations.NOW] & 103 tbe[test_expectations.SKIP]) 104 105 num_passes = 0 106 num_flaky = 0 107 num_regressions = 0 108 keywords = {} 109 for expecation_string, expectation_enum in TestExpectationsFile.EXPECTATIONS.iteritems(): 110 keywords[expectation_enum] = expecation_string.upper() 111 112 for modifier_string, modifier_enum in TestExpectationsFile.MODIFIERS.iteritems(): 113 keywords[modifier_enum] = modifier_string.upper() 114 115 tests = {} 116 original_results = result_summary.unexpected_results if only_unexpected else result_summary.results 117 118 for filename, result in original_results.iteritems(): 119 # Note that if a test crashed in the original run, we ignore 120 # whether or not it crashed when we retried it (if we retried it), 121 # and always consider the result not flaky. 122 test = port_obj.relative_test_filename(filename) 123 expected = expectations.get_expectations_string(filename) 124 result_type = result.type 125 actual = [keywords[result_type]] 126 127 if result_type == test_expectations.PASS: 128 num_passes += 1 129 elif result_type == test_expectations.CRASH: 130 num_regressions += 1 131 elif filename in result_summary.unexpected_results: 132 if filename not in retry_summary.unexpected_results: 133 actual.extend(expectations.get_expectations_string(filename).split(" ")) 134 num_flaky += 1 135 else: 136 retry_result_type = retry_summary.unexpected_results[filename].type 137 if result_type != retry_result_type: 138 actual.append(keywords[retry_result_type]) 139 num_flaky += 1 140 else: 141 num_regressions += 1 142 143 tests[test] = {} 144 tests[test]['expected'] = expected 145 tests[test]['actual'] = " ".join(actual) 146 # FIXME: Set this correctly once https://webkit.org/b/37739 is fixed 147 # and only set it if there actually is stderr data. 148 tests[test]['has_stderr'] = False 149 150 failure_types = [type(f) for f in result.failures] 151 if test_failures.FailureMissingAudio in failure_types: 152 tests[test]['is_missing_audio'] = True 153 154 if test_failures.FailureReftestMismatch in failure_types: 155 tests[test]['is_reftest'] = True 156 157 for f in result.failures: 158 if 'is_reftest' in result.failures: 159 tests[test]['is_reftest'] = True 160 161 if test_failures.FailureReftestMismatchDidNotOccur in failure_types: 162 tests[test]['is_mismatch_reftest'] = True 163 164 if test_failures.FailureMissingResult in failure_types: 165 tests[test]['is_missing_text'] = True 166 167 if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types: 168 tests[test]['is_missing_image'] = True 169 170 if filename in test_timings_map: 171 time_seconds = test_timings_map[filename] 172 tests[test]['time_ms'] = int(1000 * time_seconds) 173 174 results['tests'] = tests 175 results['num_passes'] = num_passes 176 results['num_flaky'] = num_flaky 177 results['num_regressions'] = num_regressions 178 # FIXME: If non-chromium ports start using an expectations file, 179 # we should make this check more robust. 180 results['uses_expectations_file'] = port_obj.name().find('chromium') != -1 181 results['layout_tests_dir'] = port_obj.layout_tests_dir() 182 results['has_wdiff'] = port_obj.wdiff_available() 183 results['has_pretty_patch'] = port_obj.pretty_patch_available() 184 185 return results 186 187 188class TestRunInterruptedException(Exception): 189 """Raised when a test run should be stopped immediately.""" 190 def __init__(self, reason): 191 self.reason = reason 192 193 def __reduce__(self): 194 return self.__class__, (self.reason,) 195 196 197class TestRunner: 198 """A class for managing running a series of tests on a series of layout 199 test files.""" 200 201 202 # The per-test timeout in milliseconds, if no --time-out-ms option was 203 # given to run_webkit_tests. This should correspond to the default timeout 204 # in DumpRenderTree. 205 DEFAULT_TEST_TIMEOUT_MS = 6 * 1000 206 207 def __init__(self, port, options, printer): 208 """Initialize test runner data structures. 209 210 Args: 211 port: an object implementing port-specific 212 options: a dictionary of command line options 213 printer: a Printer object to record updates to. 214 """ 215 self._port = port 216 self._fs = port._filesystem 217 self._options = options 218 self._printer = printer 219 self._message_broker = None 220 221 self.HTTP_SUBDIR = self._fs.join('', 'http', '') 222 self.WEBSOCKET_SUBDIR = self._fs.join('', 'websocket', '') 223 self.LAYOUT_TESTS_DIRECTORY = "LayoutTests" + self._fs.sep 224 225 226 # disable wss server. need to install pyOpenSSL on buildbots. 227 # self._websocket_secure_server = websocket_server.PyWebSocket( 228 # options.results_directory, use_tls=True, port=9323) 229 230 # a set of test files, and the same tests as a list 231 self._test_files = set() 232 self._test_files_list = None 233 self._result_queue = Queue.Queue() 234 self._retrying = False 235 self._results_directory = self._port.results_directory() 236 237 def collect_tests(self, args, last_unexpected_results): 238 """Find all the files to test. 239 240 Args: 241 args: list of test arguments from the command line 242 last_unexpected_results: list of unexpected results to retest, if any 243 244 """ 245 paths = self._strip_test_dir_prefixes(args) 246 paths += last_unexpected_results 247 if self._options.test_list: 248 paths += self._strip_test_dir_prefixes(read_test_files(self._fs, self._options.test_list)) 249 self._test_files = self._port.tests(paths) 250 251 def _strip_test_dir_prefixes(self, paths): 252 return [self._strip_test_dir_prefix(path) for path in paths if path] 253 254 def _strip_test_dir_prefix(self, path): 255 if path.startswith(self.LAYOUT_TESTS_DIRECTORY): 256 return path[len(self.LAYOUT_TESTS_DIRECTORY):] 257 return path 258 259 def lint(self): 260 lint_failed = False 261 for test_configuration in self._port.all_test_configurations(): 262 try: 263 self.lint_expectations(test_configuration) 264 except test_expectations.ParseError: 265 lint_failed = True 266 self._printer.write("") 267 268 if lint_failed: 269 _log.error("Lint failed.") 270 return -1 271 272 _log.info("Lint succeeded.") 273 return 0 274 275 def lint_expectations(self, config): 276 port = self._port 277 test_expectations.TestExpectations( 278 port, 279 None, 280 port.test_expectations(), 281 config, 282 self._options.lint_test_files, 283 port.test_expectations_overrides()) 284 285 def parse_expectations(self): 286 """Parse the expectations from the test_list files and return a data 287 structure holding them. Throws an error if the test_list files have 288 invalid syntax.""" 289 port = self._port 290 self._expectations = test_expectations.TestExpectations( 291 port, 292 self._test_files, 293 port.test_expectations(), 294 port.test_configuration(), 295 self._options.lint_test_files, 296 port.test_expectations_overrides()) 297 298 # FIXME: This method is way too long and needs to be broken into pieces. 299 def prepare_lists_and_print_output(self): 300 """Create appropriate subsets of test lists and returns a 301 ResultSummary object. Also prints expected test counts. 302 """ 303 304 # Remove skipped - both fixable and ignored - files from the 305 # top-level list of files to test. 306 num_all_test_files = len(self._test_files) 307 self._printer.print_expected("Found: %d tests" % 308 (len(self._test_files))) 309 if not num_all_test_files: 310 _log.critical('No tests to run.') 311 return None 312 313 skipped = set() 314 if num_all_test_files > 1 and not self._options.force: 315 skipped = self._expectations.get_tests_with_result_type( 316 test_expectations.SKIP) 317 self._test_files -= skipped 318 319 # Create a sorted list of test files so the subset chunk, 320 # if used, contains alphabetically consecutive tests. 321 self._test_files_list = list(self._test_files) 322 if self._options.randomize_order: 323 random.shuffle(self._test_files_list) 324 else: 325 self._test_files_list.sort() 326 327 # If the user specifies they just want to run a subset of the tests, 328 # just grab a subset of the non-skipped tests. 329 if self._options.run_chunk or self._options.run_part: 330 chunk_value = self._options.run_chunk or self._options.run_part 331 test_files = self._test_files_list 332 try: 333 (chunk_num, chunk_len) = chunk_value.split(":") 334 chunk_num = int(chunk_num) 335 assert(chunk_num >= 0) 336 test_size = int(chunk_len) 337 assert(test_size > 0) 338 except: 339 _log.critical("invalid chunk '%s'" % chunk_value) 340 return None 341 342 # Get the number of tests 343 num_tests = len(test_files) 344 345 # Get the start offset of the slice. 346 if self._options.run_chunk: 347 chunk_len = test_size 348 # In this case chunk_num can be really large. We need 349 # to make the slave fit in the current number of tests. 350 slice_start = (chunk_num * chunk_len) % num_tests 351 else: 352 # Validate the data. 353 assert(test_size <= num_tests) 354 assert(chunk_num <= test_size) 355 356 # To count the chunk_len, and make sure we don't skip 357 # some tests, we round to the next value that fits exactly 358 # all the parts. 359 rounded_tests = num_tests 360 if rounded_tests % test_size != 0: 361 rounded_tests = (num_tests + test_size - 362 (num_tests % test_size)) 363 364 chunk_len = rounded_tests / test_size 365 slice_start = chunk_len * (chunk_num - 1) 366 # It does not mind if we go over test_size. 367 368 # Get the end offset of the slice. 369 slice_end = min(num_tests, slice_start + chunk_len) 370 371 files = test_files[slice_start:slice_end] 372 373 tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % ( 374 (slice_end - slice_start), slice_start, slice_end, num_tests) 375 self._printer.print_expected(tests_run_msg) 376 377 # If we reached the end and we don't have enough tests, we run some 378 # from the beginning. 379 if slice_end - slice_start < chunk_len: 380 extra = chunk_len - (slice_end - slice_start) 381 extra_msg = (' last chunk is partial, appending [0:%d]' % 382 extra) 383 self._printer.print_expected(extra_msg) 384 tests_run_msg += "\n" + extra_msg 385 files.extend(test_files[0:extra]) 386 tests_run_filename = self._fs.join(self._results_directory, "tests_run.txt") 387 self._fs.write_text_file(tests_run_filename, tests_run_msg) 388 389 len_skip_chunk = int(len(files) * len(skipped) / 390 float(len(self._test_files))) 391 skip_chunk_list = list(skipped)[0:len_skip_chunk] 392 skip_chunk = set(skip_chunk_list) 393 394 # Update expectations so that the stats are calculated correctly. 395 # We need to pass a list that includes the right # of skipped files 396 # to ParseExpectations so that ResultSummary() will get the correct 397 # stats. So, we add in the subset of skipped files, and then 398 # subtract them back out. 399 self._test_files_list = files + skip_chunk_list 400 self._test_files = set(self._test_files_list) 401 402 self.parse_expectations() 403 404 self._test_files = set(files) 405 self._test_files_list = files 406 else: 407 skip_chunk = skipped 408 409 result_summary = ResultSummary(self._expectations, 410 self._test_files | skip_chunk) 411 self._print_expected_results_of_type(result_summary, 412 test_expectations.PASS, "passes") 413 self._print_expected_results_of_type(result_summary, 414 test_expectations.FAIL, "failures") 415 self._print_expected_results_of_type(result_summary, 416 test_expectations.FLAKY, "flaky") 417 self._print_expected_results_of_type(result_summary, 418 test_expectations.SKIP, "skipped") 419 420 if self._options.force: 421 self._printer.print_expected('Running all tests, including ' 422 'skips (--force)') 423 else: 424 # Note that we don't actually run the skipped tests (they were 425 # subtracted out of self._test_files, above), but we stub out the 426 # results here so the statistics can remain accurate. 427 for test in skip_chunk: 428 result = test_results.TestResult(test) 429 result.type = test_expectations.SKIP 430 result_summary.add(result, expected=True) 431 self._printer.print_expected('') 432 433 # Check to make sure we didn't filter out all of the tests. 434 if not len(self._test_files): 435 _log.info("All tests are being skipped") 436 return None 437 438 return result_summary 439 440 def _get_dir_for_test_file(self, test_file): 441 """Returns the highest-level directory by which to shard the given 442 test file.""" 443 index = test_file.rfind(self._fs.sep + self.LAYOUT_TESTS_DIRECTORY) 444 445 test_file = test_file[index + len(self.LAYOUT_TESTS_DIRECTORY):] 446 test_file_parts = test_file.split(self._fs.sep, 1) 447 directory = test_file_parts[0] 448 test_file = test_file_parts[1] 449 450 # The http tests are very stable on mac/linux. 451 # TODO(ojan): Make the http server on Windows be apache so we can 452 # turn shard the http tests there as well. Switching to apache is 453 # what made them stable on linux/mac. 454 return_value = directory 455 while ((directory != 'http' or sys.platform in ('darwin', 'linux2')) 456 and test_file.find(self._fs.sep) >= 0): 457 test_file_parts = test_file.split(self._fs.sep, 1) 458 directory = test_file_parts[0] 459 return_value = self._fs.join(return_value, directory) 460 test_file = test_file_parts[1] 461 462 return return_value 463 464 def _get_test_input_for_file(self, test_file): 465 """Returns the appropriate TestInput object for the file. Mostly this 466 is used for looking up the timeout value (in ms) to use for the given 467 test.""" 468 if self._test_is_slow(test_file): 469 return TestInput(test_file, self._options.slow_time_out_ms) 470 return TestInput(test_file, self._options.time_out_ms) 471 472 def _test_requires_lock(self, test_file): 473 """Return True if the test needs to be locked when 474 running multiple copies of NRWTs.""" 475 split_path = test_file.split(self._port._filesystem.sep) 476 return 'http' in split_path or 'websocket' in split_path 477 478 def _test_is_slow(self, test_file): 479 return self._expectations.has_modifier(test_file, 480 test_expectations.SLOW) 481 482 def _shard_tests(self, test_files, use_real_shards): 483 """Groups tests into batches. 484 This helps ensure that tests that depend on each other (aka bad tests!) 485 continue to run together as most cross-tests dependencies tend to 486 occur within the same directory. If use_real_shards is False, we 487 put each (non-HTTP/websocket) test into its own shard for maximum 488 concurrency instead of trying to do any sort of real sharding. 489 490 Return: 491 A list of lists of TestInput objects. 492 """ 493 # FIXME: when we added http locking, we changed how this works such 494 # that we always lump all of the HTTP threads into a single shard. 495 # That will slow down experimental-fully-parallel, but it's unclear 496 # what the best alternative is completely revamping how we track 497 # when to grab the lock. 498 499 test_lists = [] 500 tests_to_http_lock = [] 501 if not use_real_shards: 502 for test_file in test_files: 503 test_input = self._get_test_input_for_file(test_file) 504 if self._test_requires_lock(test_file): 505 tests_to_http_lock.append(test_input) 506 else: 507 test_lists.append((".", [test_input])) 508 else: 509 tests_by_dir = {} 510 for test_file in test_files: 511 directory = self._get_dir_for_test_file(test_file) 512 test_input = self._get_test_input_for_file(test_file) 513 if self._test_requires_lock(test_file): 514 tests_to_http_lock.append(test_input) 515 else: 516 tests_by_dir.setdefault(directory, []) 517 tests_by_dir[directory].append(test_input) 518 # Sort by the number of tests in the dir so that the ones with the 519 # most tests get run first in order to maximize parallelization. 520 # Number of tests is a good enough, but not perfect, approximation 521 # of how long that set of tests will take to run. We can't just use 522 # a PriorityQueue until we move to Python 2.6. 523 for directory in tests_by_dir: 524 test_list = tests_by_dir[directory] 525 test_list_tuple = (directory, test_list) 526 test_lists.append(test_list_tuple) 527 test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1]))) 528 529 # Put the http tests first. There are only a couple hundred of them, 530 # but each http test takes a very long time to run, so sorting by the 531 # number of tests doesn't accurately capture how long they take to run. 532 if tests_to_http_lock: 533 test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock)) 534 535 return test_lists 536 537 def _contains_tests(self, subdir): 538 for test_file in self._test_files: 539 if test_file.find(subdir) >= 0: 540 return True 541 return False 542 543 def _num_workers(self, num_shards): 544 num_workers = min(int(self._options.child_processes), num_shards) 545 driver_name = self._port.driver_name() 546 if num_workers == 1: 547 self._printer.print_config("Running 1 %s over %s" % 548 (driver_name, grammar.pluralize('shard', num_shards))) 549 else: 550 self._printer.print_config("Running %d %ss in parallel over %d shards" % 551 (num_workers, driver_name, num_shards)) 552 return num_workers 553 554 def _run_tests(self, file_list, result_summary): 555 """Runs the tests in the file_list. 556 557 Return: A tuple (interrupted, keyboard_interrupted, thread_timings, 558 test_timings, individual_test_timings) 559 interrupted is whether the run was interrupted 560 keyboard_interrupted is whether the interruption was because someone 561 typed Ctrl^C 562 thread_timings is a list of dicts with the total runtime 563 of each thread with 'name', 'num_tests', 'total_time' properties 564 test_timings is a list of timings for each sharded subdirectory 565 of the form [time, directory_name, num_tests] 566 individual_test_timings is a list of run times for each test 567 in the form {filename:filename, test_run_time:test_run_time} 568 result_summary: summary object to populate with the results 569 """ 570 raise NotImplementedError() 571 572 def update(self): 573 self.update_summary(self._current_result_summary) 574 575 def _collect_timing_info(self, threads): 576 test_timings = {} 577 individual_test_timings = [] 578 thread_timings = [] 579 580 for thread in threads: 581 thread_timings.append({'name': thread.getName(), 582 'num_tests': thread.get_num_tests(), 583 'total_time': thread.get_total_time()}) 584 test_timings.update(thread.get_test_group_timing_stats()) 585 individual_test_timings.extend(thread.get_test_results()) 586 587 return (thread_timings, test_timings, individual_test_timings) 588 589 def needs_http(self): 590 """Returns whether the test runner needs an HTTP server.""" 591 return self._contains_tests(self.HTTP_SUBDIR) 592 593 def needs_websocket(self): 594 """Returns whether the test runner needs a WEBSOCKET server.""" 595 return self._contains_tests(self.WEBSOCKET_SUBDIR) 596 597 def set_up_run(self): 598 """Configures the system to be ready to run tests. 599 600 Returns a ResultSummary object if we should continue to run tests, 601 or None if we should abort. 602 603 """ 604 # This must be started before we check the system dependencies, 605 # since the helper may do things to make the setup correct. 606 self._printer.print_update("Starting helper ...") 607 self._port.start_helper() 608 609 # Check that the system dependencies (themes, fonts, ...) are correct. 610 if not self._options.nocheck_sys_deps: 611 self._printer.print_update("Checking system dependencies ...") 612 if not self._port.check_sys_deps(self.needs_http()): 613 self._port.stop_helper() 614 return None 615 616 if self._options.clobber_old_results: 617 self._clobber_old_results() 618 619 # Create the output directory if it doesn't already exist. 620 self._port.maybe_make_directory(self._results_directory) 621 622 self._port.setup_test_run() 623 624 self._printer.print_update("Preparing tests ...") 625 result_summary = self.prepare_lists_and_print_output() 626 if not result_summary: 627 return None 628 629 return result_summary 630 631 def run(self, result_summary): 632 """Run all our tests on all our test files. 633 634 For each test file, we run each test type. If there are any failures, 635 we collect them for reporting. 636 637 Args: 638 result_summary: a summary object tracking the test results. 639 640 Return: 641 The number of unexpected results (0 == success) 642 """ 643 # gather_test_files() must have been called first to initialize us. 644 # If we didn't find any files to test, we've errored out already in 645 # prepare_lists_and_print_output(). 646 assert(len(self._test_files)) 647 648 start_time = time.time() 649 650 interrupted, keyboard_interrupted, thread_timings, test_timings, \ 651 individual_test_timings = ( 652 self._run_tests(self._test_files_list, result_summary)) 653 654 # We exclude the crashes from the list of results to retry, because 655 # we want to treat even a potentially flaky crash as an error. 656 failures = self._get_failures(result_summary, include_crashes=False) 657 retry_summary = result_summary 658 while (len(failures) and self._options.retry_failures and 659 not self._retrying and not interrupted): 660 _log.info('') 661 _log.info("Retrying %d unexpected failure(s) ..." % len(failures)) 662 _log.info('') 663 self._retrying = True 664 retry_summary = ResultSummary(self._expectations, failures.keys()) 665 # Note that we intentionally ignore the return value here. 666 self._run_tests(failures.keys(), retry_summary) 667 failures = self._get_failures(retry_summary, include_crashes=True) 668 669 end_time = time.time() 670 671 self._print_timing_statistics(end_time - start_time, 672 thread_timings, test_timings, 673 individual_test_timings, 674 result_summary) 675 676 self._print_result_summary(result_summary) 677 678 sys.stdout.flush() 679 sys.stderr.flush() 680 681 self._printer.print_one_line_summary(result_summary.total, 682 result_summary.expected, 683 result_summary.unexpected) 684 685 unexpected_results = summarize_results(self._port, 686 self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=True) 687 self._printer.print_unexpected_results(unexpected_results) 688 689 # FIXME: remove record_results. It's just used for testing. There's no need 690 # for it to be a commandline argument. 691 if (self._options.record_results and not self._options.dry_run and 692 not keyboard_interrupted): 693 # Write the same data to log files and upload generated JSON files 694 # to appengine server. 695 summarized_results = summarize_results(self._port, 696 self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=False) 697 self._upload_json_files(unexpected_results, summarized_results, result_summary, 698 individual_test_timings) 699 700 # Write the summary to disk (results.html) and display it if requested. 701 if not self._options.dry_run: 702 self._copy_results_html_file() 703 if self._options.show_results: 704 self._show_results_html_file(result_summary) 705 706 # Now that we've completed all the processing we can, we re-raise 707 # a KeyboardInterrupt if necessary so the caller can handle it. 708 if keyboard_interrupted: 709 raise KeyboardInterrupt 710 711 # Ignore flaky failures and unexpected passes so we don't turn the 712 # bot red for those. 713 return unexpected_results['num_regressions'] 714 715 def clean_up_run(self): 716 """Restores the system after we're done running tests.""" 717 718 _log.debug("flushing stdout") 719 sys.stdout.flush() 720 _log.debug("flushing stderr") 721 sys.stderr.flush() 722 _log.debug("stopping helper") 723 self._port.stop_helper() 724 725 def update_summary(self, result_summary): 726 """Update the summary and print results with any completed tests.""" 727 while True: 728 try: 729 result = test_results.TestResult.loads(self._result_queue.get_nowait()) 730 except Queue.Empty: 731 return 732 733 self._update_summary_with_result(result_summary, result) 734 735 def _update_summary_with_result(self, result_summary, result): 736 expected = self._expectations.matches_an_expected_result( 737 result.filename, result.type, self._options.pixel_tests) 738 result_summary.add(result, expected) 739 exp_str = self._expectations.get_expectations_string( 740 result.filename) 741 got_str = self._expectations.expectation_to_string(result.type) 742 self._printer.print_test_result(result, expected, exp_str, got_str) 743 self._printer.print_progress(result_summary, self._retrying, 744 self._test_files_list) 745 746 def interrupt_if_at_failure_limit(limit, count, message): 747 if limit and count >= limit: 748 raise TestRunInterruptedException(message % count) 749 750 interrupt_if_at_failure_limit( 751 self._options.exit_after_n_failures, 752 result_summary.unexpected_failures, 753 "Aborting run since %d failures were reached") 754 interrupt_if_at_failure_limit( 755 self._options.exit_after_n_crashes_or_timeouts, 756 result_summary.unexpected_crashes_or_timeouts, 757 "Aborting run since %d crashes or timeouts were reached") 758 759 def _clobber_old_results(self): 760 # Just clobber the actual test results directories since the other 761 # files in the results directory are explicitly used for cross-run 762 # tracking. 763 self._printer.print_update("Clobbering old results in %s" % 764 self._results_directory) 765 layout_tests_dir = self._port.layout_tests_dir() 766 possible_dirs = self._port.test_dirs() 767 for dirname in possible_dirs: 768 if self._fs.isdir(self._fs.join(layout_tests_dir, dirname)): 769 self._fs.rmtree(self._fs.join(self._results_directory, dirname)) 770 771 def _get_failures(self, result_summary, include_crashes): 772 """Filters a dict of results and returns only the failures. 773 774 Args: 775 result_summary: the results of the test run 776 include_crashes: whether crashes are included in the output. 777 We use False when finding the list of failures to retry 778 to see if the results were flaky. Although the crashes may also be 779 flaky, we treat them as if they aren't so that they're not ignored. 780 Returns: 781 a dict of files -> results 782 """ 783 failed_results = {} 784 for test, result in result_summary.unexpected_results.iteritems(): 785 if (result.type == test_expectations.PASS or 786 result.type == test_expectations.CRASH and not include_crashes): 787 continue 788 failed_results[test] = result.type 789 790 return failed_results 791 792 def _char_for_result(self, result): 793 result = result.lower() 794 if result in TestExpectationsFile.EXPECTATIONS: 795 result_enum_value = TestExpectationsFile.EXPECTATIONS[result] 796 else: 797 result_enum_value = TestExpectationsFile.MODIFIERS[result] 798 return json_layout_results_generator.JSONLayoutResultsGenerator.FAILURE_TO_CHAR[result_enum_value] 799 800 def _upload_json_files(self, unexpected_results, summarized_results, result_summary, 801 individual_test_timings): 802 """Writes the results of the test run as JSON files into the results 803 dir and upload the files to the appengine server. 804 805 There are three different files written into the results dir: 806 unexpected_results.json: A short list of any unexpected results. 807 This is used by the buildbots to display results. 808 expectations.json: This is used by the flakiness dashboard. 809 results.json: A full list of the results - used by the flakiness 810 dashboard and the aggregate results dashboard. 811 812 Args: 813 unexpected_results: dict of unexpected results 814 summarized_results: dict of results 815 result_summary: full summary object 816 individual_test_timings: list of test times (used by the flakiness 817 dashboard). 818 """ 819 _log.debug("Writing JSON files in %s." % self._results_directory) 820 821 unexpected_json_path = self._fs.join(self._results_directory, "unexpected_results.json") 822 json_results_generator.write_json(self._fs, unexpected_results, unexpected_json_path) 823 824 full_results_path = self._fs.join(self._results_directory, "full_results.json") 825 json_results_generator.write_json(self._fs, summarized_results, full_results_path) 826 827 # Write a json file of the test_expectations.txt file for the layout 828 # tests dashboard. 829 expectations_path = self._fs.join(self._results_directory, "expectations.json") 830 expectations_json = \ 831 self._expectations.get_expectations_json_for_all_platforms() 832 self._fs.write_text_file(expectations_path, 833 u"ADD_EXPECTATIONS(%s);" % expectations_json) 834 835 generator = json_layout_results_generator.JSONLayoutResultsGenerator( 836 self._port, self._options.builder_name, self._options.build_name, 837 self._options.build_number, self._results_directory, 838 BUILDER_BASE_URL, individual_test_timings, 839 self._expectations, result_summary, self._test_files_list, 840 self._options.test_results_server, 841 "layout-tests", 842 self._options.master_name) 843 844 _log.debug("Finished writing JSON files.") 845 846 json_files = ["expectations.json", "incremental_results.json", "full_results.json"] 847 848 generator.upload_json_files(json_files) 849 850 def _print_config(self): 851 """Prints the configuration for the test run.""" 852 p = self._printer 853 p.print_config("Using port '%s'" % self._port.name()) 854 p.print_config("Test configuration: %s" % self._port.test_configuration()) 855 p.print_config("Placing test results in %s" % self._results_directory) 856 if self._options.new_baseline: 857 p.print_config("Placing new baselines in %s" % 858 self._port.baseline_path()) 859 p.print_config("Using %s build" % self._options.configuration) 860 if self._options.pixel_tests: 861 p.print_config("Pixel tests enabled") 862 else: 863 p.print_config("Pixel tests disabled") 864 865 p.print_config("Regular timeout: %s, slow test timeout: %s" % 866 (self._options.time_out_ms, 867 self._options.slow_time_out_ms)) 868 869 p.print_config('Command line: ' + 870 ' '.join(self._port.driver_cmd_line())) 871 p.print_config("Worker model: %s" % self._options.worker_model) 872 p.print_config("") 873 874 def _print_expected_results_of_type(self, result_summary, 875 result_type, result_type_str): 876 """Print the number of the tests in a given result class. 877 878 Args: 879 result_summary - the object containing all the results to report on 880 result_type - the particular result type to report in the summary. 881 result_type_str - a string description of the result_type. 882 """ 883 tests = self._expectations.get_tests_with_result_type(result_type) 884 now = result_summary.tests_by_timeline[test_expectations.NOW] 885 wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX] 886 887 # We use a fancy format string in order to print the data out in a 888 # nicely-aligned table. 889 fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)" 890 % (self._num_digits(now), self._num_digits(wontfix))) 891 self._printer.print_expected(fmtstr % 892 (len(tests), result_type_str, len(tests & now), len(tests & wontfix))) 893 894 def _num_digits(self, num): 895 """Returns the number of digits needed to represent the length of a 896 sequence.""" 897 ndigits = 1 898 if len(num): 899 ndigits = int(math.log10(len(num))) + 1 900 return ndigits 901 902 def _print_timing_statistics(self, total_time, thread_timings, 903 directory_test_timings, individual_test_timings, 904 result_summary): 905 """Record timing-specific information for the test run. 906 907 Args: 908 total_time: total elapsed time (in seconds) for the test run 909 thread_timings: wall clock time each thread ran for 910 directory_test_timings: timing by directory 911 individual_test_timings: timing by file 912 result_summary: summary object for the test run 913 """ 914 self._printer.print_timing("Test timing:") 915 self._printer.print_timing(" %6.2f total testing time" % total_time) 916 self._printer.print_timing("") 917 self._printer.print_timing("Thread timing:") 918 cuml_time = 0 919 for t in thread_timings: 920 self._printer.print_timing(" %10s: %5d tests, %6.2f secs" % 921 (t['name'], t['num_tests'], t['total_time'])) 922 cuml_time += t['total_time'] 923 self._printer.print_timing(" %6.2f cumulative, %6.2f optimal" % 924 (cuml_time, cuml_time / int(self._options.child_processes))) 925 self._printer.print_timing("") 926 927 self._print_aggregate_test_statistics(individual_test_timings) 928 self._print_individual_test_times(individual_test_timings, 929 result_summary) 930 self._print_directory_timings(directory_test_timings) 931 932 def _print_aggregate_test_statistics(self, individual_test_timings): 933 """Prints aggregate statistics (e.g. median, mean, etc.) for all tests. 934 Args: 935 individual_test_timings: List of TestResults for all tests. 936 """ 937 times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings] 938 self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):", 939 times_for_dump_render_tree) 940 941 def _print_individual_test_times(self, individual_test_timings, 942 result_summary): 943 """Prints the run times for slow, timeout and crash tests. 944 Args: 945 individual_test_timings: List of TestStats for all tests. 946 result_summary: summary object for test run 947 """ 948 # Reverse-sort by the time spent in DumpRenderTree. 949 individual_test_timings.sort(lambda a, b: 950 cmp(b.test_run_time, a.test_run_time)) 951 952 num_printed = 0 953 slow_tests = [] 954 timeout_or_crash_tests = [] 955 unexpected_slow_tests = [] 956 for test_tuple in individual_test_timings: 957 filename = test_tuple.filename 958 is_timeout_crash_or_slow = False 959 if self._test_is_slow(filename): 960 is_timeout_crash_or_slow = True 961 slow_tests.append(test_tuple) 962 963 if filename in result_summary.failures: 964 result = result_summary.results[filename].type 965 if (result == test_expectations.TIMEOUT or 966 result == test_expectations.CRASH): 967 is_timeout_crash_or_slow = True 968 timeout_or_crash_tests.append(test_tuple) 969 970 if (not is_timeout_crash_or_slow and 971 num_printed < printing.NUM_SLOW_TESTS_TO_LOG): 972 num_printed = num_printed + 1 973 unexpected_slow_tests.append(test_tuple) 974 975 self._printer.print_timing("") 976 self._print_test_list_timing("%s slowest tests that are not " 977 "marked as SLOW and did not timeout/crash:" % 978 printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests) 979 self._printer.print_timing("") 980 self._print_test_list_timing("Tests marked as SLOW:", slow_tests) 981 self._printer.print_timing("") 982 self._print_test_list_timing("Tests that timed out or crashed:", 983 timeout_or_crash_tests) 984 self._printer.print_timing("") 985 986 def _print_test_list_timing(self, title, test_list): 987 """Print timing info for each test. 988 989 Args: 990 title: section heading 991 test_list: tests that fall in this section 992 """ 993 if self._printer.disabled('slowest'): 994 return 995 996 self._printer.print_timing(title) 997 for test_tuple in test_list: 998 filename = test_tuple.filename[len( 999 self._port.layout_tests_dir()) + 1:] 1000 filename = filename.replace('\\', '/') 1001 test_run_time = round(test_tuple.test_run_time, 1) 1002 self._printer.print_timing(" %s took %s seconds" % 1003 (filename, test_run_time)) 1004 1005 def _print_directory_timings(self, directory_test_timings): 1006 """Print timing info by directory for any directories that 1007 take > 10 seconds to run. 1008 1009 Args: 1010 directory_test_timing: time info for each directory 1011 """ 1012 timings = [] 1013 for directory in directory_test_timings: 1014 num_tests, time_for_directory = directory_test_timings[directory] 1015 timings.append((round(time_for_directory, 1), directory, 1016 num_tests)) 1017 timings.sort() 1018 1019 self._printer.print_timing("Time to process slowest subdirectories:") 1020 min_seconds_to_print = 10 1021 for timing in timings: 1022 if timing[0] > min_seconds_to_print: 1023 self._printer.print_timing( 1024 " %s took %s seconds to run %s tests." % (timing[1], 1025 timing[0], timing[2])) 1026 self._printer.print_timing("") 1027 1028 def _print_statistics_for_test_timings(self, title, timings): 1029 """Prints the median, mean and standard deviation of the values in 1030 timings. 1031 1032 Args: 1033 title: Title for these timings. 1034 timings: A list of floats representing times. 1035 """ 1036 self._printer.print_timing(title) 1037 timings.sort() 1038 1039 num_tests = len(timings) 1040 if not num_tests: 1041 return 1042 percentile90 = timings[int(.9 * num_tests)] 1043 percentile99 = timings[int(.99 * num_tests)] 1044 1045 if num_tests % 2 == 1: 1046 median = timings[((num_tests - 1) / 2) - 1] 1047 else: 1048 lower = timings[num_tests / 2 - 1] 1049 upper = timings[num_tests / 2] 1050 median = (float(lower + upper)) / 2 1051 1052 mean = sum(timings) / num_tests 1053 1054 for time in timings: 1055 sum_of_deviations = math.pow(time - mean, 2) 1056 1057 std_deviation = math.sqrt(sum_of_deviations / num_tests) 1058 self._printer.print_timing(" Median: %6.3f" % median) 1059 self._printer.print_timing(" Mean: %6.3f" % mean) 1060 self._printer.print_timing(" 90th percentile: %6.3f" % percentile90) 1061 self._printer.print_timing(" 99th percentile: %6.3f" % percentile99) 1062 self._printer.print_timing(" Standard dev: %6.3f" % std_deviation) 1063 self._printer.print_timing("") 1064 1065 def _print_result_summary(self, result_summary): 1066 """Print a short summary about how many tests passed. 1067 1068 Args: 1069 result_summary: information to log 1070 """ 1071 failed = len(result_summary.failures) 1072 skipped = len( 1073 result_summary.tests_by_expectation[test_expectations.SKIP]) 1074 total = result_summary.total 1075 passed = total - failed - skipped 1076 pct_passed = 0.0 1077 if total > 0: 1078 pct_passed = float(passed) * 100 / total 1079 1080 self._printer.print_actual("") 1081 self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" % 1082 (passed, total, pct_passed)) 1083 self._printer.print_actual("") 1084 self._print_result_summary_entry(result_summary, 1085 test_expectations.NOW, "Tests to be fixed") 1086 1087 self._printer.print_actual("") 1088 self._print_result_summary_entry(result_summary, 1089 test_expectations.WONTFIX, 1090 "Tests that will only be fixed if they crash (WONTFIX)") 1091 self._printer.print_actual("") 1092 1093 def _print_result_summary_entry(self, result_summary, timeline, 1094 heading): 1095 """Print a summary block of results for a particular timeline of test. 1096 1097 Args: 1098 result_summary: summary to print results for 1099 timeline: the timeline to print results for (NOT, WONTFIX, etc.) 1100 heading: a textual description of the timeline 1101 """ 1102 total = len(result_summary.tests_by_timeline[timeline]) 1103 not_passing = (total - 1104 len(result_summary.tests_by_expectation[test_expectations.PASS] & 1105 result_summary.tests_by_timeline[timeline])) 1106 self._printer.print_actual("=> %s (%d):" % (heading, not_passing)) 1107 1108 for result in TestExpectationsFile.EXPECTATION_ORDER: 1109 if result == test_expectations.PASS: 1110 continue 1111 results = (result_summary.tests_by_expectation[result] & 1112 result_summary.tests_by_timeline[timeline]) 1113 desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result] 1114 if not_passing and len(results): 1115 pct = len(results) * 100.0 / not_passing 1116 self._printer.print_actual(" %5d %-24s (%4.1f%%)" % 1117 (len(results), desc[len(results) != 1], pct)) 1118 1119 def _copy_results_html_file(self): 1120 base_dir = self._port.path_from_webkit_base('Tools', 'Scripts', 'webkitpy', 'layout_tests', 'layout_package') 1121 results_file = self._fs.join(base_dir, 'json_results.html') 1122 # FIXME: What should we do if this doesn't exist (e.g., in unit tests)? 1123 if self._fs.exists(results_file): 1124 self._fs.copyfile(results_file, self._fs.join(self._results_directory, "results.html")) 1125 1126 def _show_results_html_file(self, result_summary): 1127 """Shows the results.html page.""" 1128 if self._options.full_results_html: 1129 test_files = result_summary.failures.keys() 1130 else: 1131 unexpected_failures = self._get_failures(result_summary, include_crashes=True) 1132 test_files = unexpected_failures.keys() 1133 1134 if not len(test_files): 1135 return 1136 1137 results_filename = self._fs.join(self._results_directory, "results.html") 1138 self._port.show_results_html_file(results_filename) 1139 1140 1141def read_test_files(fs, files): 1142 tests = [] 1143 for file in files: 1144 try: 1145 file_contents = fs.read_text_file(file).split('\n') 1146 for line in file_contents: 1147 line = test_expectations.strip_comments(line) 1148 if line: 1149 tests.append(line) 1150 except IOError, e: 1151 if e.errno == errno.ENOENT: 1152 _log.critical('') 1153 _log.critical('--test-list file "%s" not found' % file) 1154 raise 1155 return tests 1156