1# Copyright 2014 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import bz2 6import glob 7import json 8import logging 9import os 10import re 11import shutil 12import tempfile 13import time 14import xml.etree.ElementTree as et 15from autotest_lib.client.bin import test, utils 16from autotest_lib.client.common_lib import error 17from autotest_lib.client.cros import cros_logging, service_stopper 18from autotest_lib.client.cros.graphics import graphics_utils 19 20RERUN_RATIO = 0.02 # Ratio to rerun failing test for hasty mode 21 22 23class graphics_dEQP(graphics_utils.GraphicsTest): 24 """Run the drawElements Quality Program test suite. 25 """ 26 version = 1 27 _services = None 28 _hasty = False 29 _hasty_batch_size = 100 # Batch size in hasty mode. 30 _shard_number = 0 31 _shard_count = 1 32 _board = None 33 _cpu_type = None 34 _gpu_type = None 35 _surface = None 36 _filter = None 37 _width = 256 # Use smallest width for which all tests run/pass. 38 _height = 256 # Use smallest height for which all tests run/pass. 39 _timeout = 70 # Larger than twice the dEQP watchdog timeout at 30s. 40 _test_names = None 41 _test_names_file = None 42 _log_path = None # Location for detailed test output logs (in /tmp/). 43 _debug = False # Analyze kernel messages. 44 _log_reader = None # Reader to analyze (kernel) messages log. 45 _log_filter = re.compile('.* .* kernel:') # kernel messages filter. 46 _env = None # environment for test processes 47 DEQP_MODULES = { 48 'dEQP-EGL': 'egl', 49 'dEQP-GLES2': 'gles2', 50 'dEQP-GLES3': 'gles3', 51 'dEQP-GLES31': 'gles31', 52 'dEQP-VK': 'vk', 53 } 54 # We do not consider these results as failures. 55 TEST_RESULT_FILTER = [ 56 'pass', 'notsupported', 'internalerror', 'qualitywarning', 57 'compatibilitywarning', 'skipped' 58 ] 59 60 def initialize(self): 61 super(graphics_dEQP, self).initialize() 62 self._api_helper = graphics_utils.GraphicsApiHelper() 63 self._board = utils.get_board() 64 self._cpu_type = utils.get_cpu_soc_family() 65 self._gpu_type = utils.get_gpu_family() 66 67 # deqp may depend on libraries that are present only on test images. 68 # Those libraries are installed in /usr/local. 69 self._env = os.environ.copy() 70 old_ld_path = self._env.get('LD_LIBRARY_PATH', '') 71 if old_ld_path: 72 self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path 73 else: 74 self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64' 75 76 self._services = service_stopper.ServiceStopper(['ui', 'powerd']) 77 # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions. 78 self._surface = 'pbuffer' 79 self._services.stop_services() 80 81 def cleanup(self): 82 if self._services: 83 self._services.restore_services() 84 super(graphics_dEQP, self).cleanup() 85 86 def _parse_test_results(self, result_filename, 87 test_results=None, failing_test=None): 88 """Handles result files with one or more test results. 89 90 @param result_filename: log file to parse. 91 @param test_results: Result parsed will be appended to it. 92 @param failing_test: Tests considered failed will append to it. 93 94 @return: dictionary of parsed test results. 95 """ 96 xml = '' 97 xml_start = False 98 xml_complete = False 99 xml_bad = False 100 result = 'ParseTestResultFail' 101 102 if test_results is None: 103 test_results = {} 104 105 if not os.path.isfile(result_filename): 106 return test_results 107 108 with open(result_filename) as result_file: 109 for line in result_file.readlines(): 110 # If the test terminates early, the XML will be incomplete 111 # and should not be parsed. 112 if line.startswith('#terminateTestCaseResult'): 113 result = line.strip().split()[1] 114 xml_bad = True 115 # Will only see #endTestCaseResult if the test does not 116 # terminate early. 117 elif line.startswith('#endTestCaseResult'): 118 xml_complete = True 119 elif xml_start: 120 xml += line 121 elif line.startswith('#beginTestCaseResult'): 122 # If we see another begin before an end then something is 123 # wrong. 124 if xml_start: 125 xml_bad = True 126 else: 127 xml_start = True 128 129 if xml_complete or xml_bad: 130 if xml_complete: 131 myparser = et.XMLParser(encoding='ISO-8859-1') 132 root = et.fromstring(xml, parser=myparser) 133 test_case = root.attrib['CasePath'] 134 result = root.find('Result').get('StatusCode').strip() 135 xml_complete = False 136 test_results[result] = test_results.get(result, 0) + 1 137 if (result.lower() not in self.TEST_RESULT_FILTER and 138 failing_test != None): 139 failing_test.append(test_case) 140 xml_bad = False 141 xml_start = False 142 result = 'ParseTestResultFail' 143 xml = '' 144 145 return test_results 146 147 def _load_not_passing_cases(self, test_filter): 148 """Load all test cases that are in non-'Pass' expectations.""" 149 not_passing_cases = [] 150 expectations_dir = os.path.join(self.bindir, 'expectations', 151 self._gpu_type) 152 subset_spec = '%s.*' % test_filter 153 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 154 for subset_file in subset_paths: 155 # Filter against extra hasty failures only in hasty mode. 156 if (not '.Pass.bz2' in subset_file and 157 (self._hasty or '.hasty.' not in subset_file)): 158 not_passing_cases.extend( 159 bz2.BZ2File(subset_file).read().splitlines()) 160 not_passing_cases.sort() 161 return not_passing_cases 162 163 def _translate_name_to_api(self, name): 164 """Translate test_names or test_filter to api.""" 165 test_prefix = name.split('.')[0] 166 if test_prefix in self.DEQP_MODULES: 167 api = self.DEQP_MODULES[test_prefix] 168 else: 169 raise error.TestFail('Failed: Invalid test name: %s' % name) 170 return api 171 172 def _get_executable(self, api): 173 """Return the executable path of the api.""" 174 return self._api_helper.get_deqp_executable(api) 175 176 def _can_run(self, api): 177 """Check if specific api is supported in this board.""" 178 return api in self._api_helper.get_supported_apis() 179 180 def _bootstrap_new_test_cases(self, test_filter): 181 """Ask dEQP for all test cases and removes non-Pass'ing ones. 182 183 This function will query dEQP for test cases and remove all cases that 184 are not in 'Pass'ing expectations from the list. This can be used 185 incrementally updating failing/hangin tests over several runs. 186 187 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 188 189 @return: List of dEQP tests to run. 190 """ 191 test_cases = [] 192 api = self._translate_name_to_api(test_filter) 193 if self._can_run(api): 194 executable = self._get_executable(api) 195 else: 196 return test_cases 197 198 # Must be in the executable directory when running for it to find it's 199 # test data files! 200 os.chdir(os.path.dirname(executable)) 201 202 not_passing_cases = self._load_not_passing_cases(test_filter) 203 # We did not find passing cases in expectations. Assume everything else 204 # that is there should not be run this time. 205 expectations_dir = os.path.join(self.bindir, 'expectations', 206 self._gpu_type) 207 subset_spec = '%s.*' % test_filter 208 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 209 for subset_file in subset_paths: 210 # Filter against hasty failures only in hasty mode. 211 if self._hasty or '.hasty.' not in subset_file: 212 not_passing_cases.extend( 213 bz2.BZ2File(subset_file).read().splitlines()) 214 215 # Now ask dEQP executable nicely for whole list of tests. Needs to be 216 # run in executable directory. Output file is plain text file named 217 # e.g. 'dEQP-GLES2-cases.txt'. 218 command = ('%s ' 219 '--deqp-runmode=txt-caselist ' 220 '--deqp-surface-type=%s ' 221 '--deqp-gl-config-name=rgba8888d24s8ms0 ' % (executable, 222 self._surface)) 223 logging.info('Running command %s', command) 224 utils.run(command, 225 env=self._env, 226 timeout=60, 227 stderr_is_expected=False, 228 ignore_status=False, 229 stdin=None) 230 231 # Now read this caselist file. 232 caselist_name = '%s-cases.txt' % test_filter.split('.')[0] 233 caselist_file = os.path.join(os.path.dirname(executable), caselist_name) 234 if not os.path.isfile(caselist_file): 235 raise error.TestFail('Failed: No caselist file at %s!' % 236 caselist_file) 237 238 # And remove non-Pass'ing expectations from caselist. 239 caselist = open(caselist_file).read().splitlines() 240 # Contains lines like "TEST: dEQP-GLES2.capability" 241 test_cases = [] 242 match = 'TEST: %s' % test_filter 243 logging.info('Bootstrapping test cases matching "%s".', match) 244 for case in caselist: 245 if case.startswith(match): 246 case = case.split('TEST: ')[1] 247 test_cases.append(case) 248 249 test_cases = list(set(test_cases) - set(not_passing_cases)) 250 if not test_cases: 251 raise error.TestFail('Failed: Unable to bootstrap %s!' % test_filter) 252 253 test_cases.sort() 254 return test_cases 255 256 def _get_test_cases_from_names_file(self): 257 if os.path.exists(self._test_names_file): 258 file_path = self._test_names_file 259 test_cases = [line.rstrip('\n') for line in open(file_path)] 260 return [test for test in test_cases if test and not test.isspace()] 261 return [] 262 263 def _get_test_cases(self, test_filter, subset): 264 """Gets the test cases for 'Pass', 'Fail' etc. expectations. 265 266 This function supports bootstrapping of new GPU families and dEQP 267 binaries. In particular if there are not 'Pass' expectations found for 268 this GPU family it will query the dEQP executable for a list of all 269 available tests. It will then remove known non-'Pass'ing tests from 270 this list to avoid getting into hangs/crashes etc. 271 272 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 273 @param subset: string from 'Pass', 'Fail', 'Timeout' etc. 274 275 @return: List of dEQP tests to run. 276 """ 277 expectations_dir = os.path.join(self.bindir, 'expectations', 278 self._gpu_type) 279 subset_name = '%s.%s.bz2' % (test_filter, subset) 280 subset_path = os.path.join(expectations_dir, subset_name) 281 if not os.path.isfile(subset_path): 282 if subset == 'NotPass': 283 # TODO(ihf): Running hasty and NotPass together is an invitation 284 # for trouble (stability). Decide if it should be disallowed. 285 return self._load_not_passing_cases(test_filter) 286 if subset != 'Pass': 287 raise error.TestFail('Failed: No subset file found for %s!' % 288 subset_path) 289 # Ask dEQP for all cases and remove the failing ones. 290 return self._bootstrap_new_test_cases(test_filter) 291 292 test_cases = bz2.BZ2File(subset_path).read().splitlines() 293 if not test_cases: 294 raise error.TestFail( 295 'Failed: No test cases found in subset file %s!' % subset_path) 296 return test_cases 297 298 def _run_tests_individually(self, test_cases, failing_test=None): 299 """Runs tests as isolated from each other, but slowly. 300 301 This function runs each test case separately as a command. 302 This means a new context for each test etc. Failures will be more 303 isolated, but runtime quite high due to overhead. 304 305 @param test_cases: List of dEQP test case strings. 306 @param failing_test: Tests considered failed will be appended to it. 307 308 @return: dictionary of test results. 309 """ 310 test_results = {} 311 width = self._width 312 height = self._height 313 314 i = 0 315 for test_case in test_cases: 316 i += 1 317 logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case) 318 result_prefix = os.path.join(self._log_path, test_case) 319 log_file = '%s.log' % result_prefix 320 debug_file = '%s.debug' % result_prefix 321 api = self._translate_name_to_api(test_case) 322 if not self._can_run(api): 323 result = 'Skipped' 324 logging.info('Skipping on %s: %s', self._gpu_type, test_case) 325 else: 326 executable = self._get_executable(api) 327 command = ('%s ' 328 '--deqp-case=%s ' 329 '--deqp-surface-type=%s ' 330 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 331 '--deqp-log-images=disable ' 332 '--deqp-watchdog=enable ' 333 '--deqp-surface-width=%d ' 334 '--deqp-surface-height=%d ' 335 '--deqp-log-filename=%s' % ( 336 executable, 337 test_case, 338 self._surface, 339 width, 340 height, 341 log_file) 342 ) 343 logging.debug('Running single: %s', command) 344 345 # Must be in the executable directory when running for it to find it's 346 # test data files! 347 os.chdir(os.path.dirname(executable)) 348 349 # Must initialize because some errors don't repopulate 350 # run_result, leaving old results. 351 run_result = {} 352 start_time = time.time() 353 try: 354 run_result = utils.run(command, 355 env=self._env, 356 timeout=self._timeout, 357 stderr_is_expected=False, 358 ignore_status=True) 359 result_counts = self._parse_test_results( 360 log_file, 361 failing_test=failing_test) 362 if result_counts: 363 result = result_counts.keys()[0] 364 else: 365 result = 'Unknown' 366 except error.CmdTimeoutError: 367 result = 'TestTimeout' 368 except error.CmdError: 369 result = 'CommandFailed' 370 except Exception: 371 result = 'UnexpectedError' 372 end_time = time.time() 373 374 if self._debug: 375 # Collect debug info and save to json file. 376 output_msgs = { 377 'start_time': start_time, 378 'end_time': end_time, 379 'stdout': [], 380 'stderr': [], 381 'dmesg': [] 382 } 383 logs = self._log_reader.get_logs() 384 self._log_reader.set_start_by_current() 385 output_msgs['dmesg'] = [ 386 msg for msg in logs.splitlines() 387 if self._log_filter.match(msg) 388 ] 389 if run_result: 390 output_msgs['stdout'] = run_result.stdout.splitlines() 391 output_msgs['stderr'] = run_result.stderr.splitlines() 392 with open(debug_file, 'w') as fd: 393 json.dump( 394 output_msgs, 395 fd, 396 indent=4, 397 separators=(',', ' : '), 398 sort_keys=True) 399 400 logging.info('Result: %s', result) 401 test_results[result] = test_results.get(result, 0) + 1 402 403 return test_results 404 405 def _run_tests_hasty(self, test_cases, failing_test=None): 406 """Runs tests as quickly as possible. 407 408 This function runs all the test cases, but does not isolate tests and 409 may take shortcuts/not run all tests to provide maximum coverage at 410 minumum runtime. 411 412 @param test_cases: List of dEQP test case strings. 413 @param failing_test: Test considered failed will append to it. 414 415 @return: dictionary of test results. 416 """ 417 # TODO(ihf): It saves half the test time to use 32*32 but a few tests 418 # fail as they need surfaces larger than 200*200. 419 width = self._width 420 height = self._height 421 results = {} 422 423 # All tests combined less than 1h in hasty. 424 batch_timeout = min(3600, self._timeout * self._hasty_batch_size) 425 num_test_cases = len(test_cases) 426 427 # We are dividing the number of tests into several shards but run them 428 # in smaller batches. We start and end at multiples of batch_size 429 # boundaries. 430 shard_start = self._hasty_batch_size * ( 431 (self._shard_number * (num_test_cases / self._shard_count)) / 432 self._hasty_batch_size) 433 shard_end = self._hasty_batch_size * (( 434 (self._shard_number + 1) * (num_test_cases / self._shard_count)) / 435 self._hasty_batch_size) 436 # The last shard will be slightly larger than the others. Extend it to 437 # cover all test cases avoiding rounding problems with the integer 438 # arithmetics done to compute shard_start and shard_end. 439 if self._shard_number + 1 == self._shard_count: 440 shard_end = num_test_cases 441 442 for batch in xrange(shard_start, shard_end, self._hasty_batch_size): 443 batch_to = min(batch + self._hasty_batch_size, shard_end) 444 batch_cases = '\n'.join(test_cases[batch:batch_to]) 445 # This assumes all tests in the batch are kicked off via the same 446 # executable. 447 api = self._translate_name_to_api(test_cases[batch]) 448 if not self._can_run(api): 449 logging.info('Skipping tests on %s: %s', self._gpu_type, 450 batch_cases) 451 else: 452 executable = self._get_executable(api) 453 log_file = os.path.join(self._log_path, 454 '%s_hasty_%d.log' % (self._filter, batch)) 455 command = ('%s ' 456 '--deqp-stdin-caselist ' 457 '--deqp-surface-type=%s ' 458 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 459 '--deqp-log-images=disable ' 460 '--deqp-visibility=hidden ' 461 '--deqp-watchdog=enable ' 462 '--deqp-surface-width=%d ' 463 '--deqp-surface-height=%d ' 464 '--deqp-log-filename=%s' % ( 465 executable, 466 self._surface, 467 width, 468 height, 469 log_file) 470 ) 471 472 logging.info('Running tests %d...%d out of %d:\n%s\n%s', 473 batch + 1, batch_to, num_test_cases, command, 474 batch_cases) 475 476 # Must be in the executable directory when running for it to 477 # find it's test data files! 478 os.chdir(os.path.dirname(executable)) 479 480 try: 481 utils.run(command, 482 env=self._env, 483 timeout=batch_timeout, 484 stderr_is_expected=False, 485 ignore_status=False, 486 stdin=batch_cases) 487 except Exception: 488 pass 489 # We are trying to handle all errors by parsing the log file. 490 results = self._parse_test_results(log_file, results, 491 failing_test) 492 logging.info(results) 493 return results 494 495 def _run_once(self, test_cases): 496 """Run dEQP test_cases in individual/hasty mode. 497 @param test_cases: test cases to run. 498 """ 499 failing_test = [] 500 if self._hasty: 501 logging.info('Running in hasty mode.') 502 test_results = self._run_tests_hasty(test_cases, failing_test) 503 else: 504 logging.info('Running each test individually.') 505 test_results = self._run_tests_individually(test_cases, 506 failing_test) 507 return test_results, failing_test 508 509 def run_once(self, opts=None): 510 options = dict(filter='', 511 test_names='', # e.g., dEQP-GLES3.info.version, 512 # dEQP-GLES2.functional, 513 # dEQP-GLES3.accuracy.texture, etc. 514 test_names_file='', 515 timeout=self._timeout, 516 subset_to_run='Pass', # Pass, Fail, Timeout, NotPass... 517 hasty='False', 518 shard_number='0', 519 shard_count='1', 520 debug='False', 521 perf_failure_description=None) 522 if opts is None: 523 opts = [] 524 options.update(utils.args_to_dict(opts)) 525 logging.info('Test Options: %s', options) 526 527 self._hasty = (options['hasty'] == 'True') 528 self._timeout = int(options['timeout']) 529 self._test_names_file = options['test_names_file'] 530 self._test_names = options['test_names'] 531 self._shard_number = int(options['shard_number']) 532 self._shard_count = int(options['shard_count']) 533 self._debug = (options['debug'] == 'True') 534 if not (self._test_names_file or self._test_names): 535 self._filter = options['filter'] 536 if not self._filter: 537 raise error.TestFail('Failed: No dEQP test filter specified') 538 if options['perf_failure_description']: 539 self._test_failure_description = options['perf_failure_description'] 540 else: 541 # Do not report failure if failure description is not specified. 542 self._test_failure_report_enable = False 543 544 # Some information to help post-process logs into blacklists later. 545 logging.info('ChromeOS BOARD = %s', self._board) 546 logging.info('ChromeOS CPU family = %s', self._cpu_type) 547 logging.info('ChromeOS GPU family = %s', self._gpu_type) 548 549 # Create a place to put detailed test output logs. 550 filter_name = self._filter or os.path.basename(self._test_names_file) 551 logging.info('dEQP test filter = %s', filter_name) 552 self._log_path = os.path.join(tempfile.gettempdir(), '%s-logs' % 553 filter_name) 554 shutil.rmtree(self._log_path, ignore_errors=True) 555 os.mkdir(self._log_path) 556 557 # Load either tests specified by test_names_file, test_names or filter. 558 test_cases = [] 559 if self._test_names_file: 560 test_cases = self._get_test_cases_from_names_file() 561 elif self._test_names: 562 test_cases = [] 563 for name in self._test_names.split(','): 564 test_cases.extend(self._get_test_cases(name, 'Pass')) 565 elif self._filter: 566 test_cases = self._get_test_cases(self._filter, 567 options['subset_to_run']) 568 569 if self._debug: 570 # LogReader works on /var/log/messages by default. 571 self._log_reader = cros_logging.LogReader() 572 self._log_reader.set_start_by_current() 573 574 # Assume all tests failed at the beginning. 575 for test_case in test_cases: 576 self.add_failures(test_case) 577 578 test_results, failing_test = self._run_once(test_cases) 579 # Rerun the test if we are in hasty mode. 580 if self._hasty and len(failing_test) > 0: 581 if len(failing_test) < sum(test_results.values()) * RERUN_RATIO: 582 logging.info("Because we are in hasty mode, we will rerun the " 583 "failing tests one at a time") 584 rerun_results, failing_test = self._run_once(failing_test) 585 # Update failing test result from the test_results 586 for result in test_results: 587 if result.lower() not in self.TEST_RESULT_FILTER: 588 test_results[result] = 0 589 for result in rerun_results: 590 test_results[result] = (test_results.get(result, 0) + 591 rerun_results[result]) 592 else: 593 logging.info("There are too many failing tests. It would " 594 "take too long to rerun them. Giving up.") 595 596 # Update failing tests to the chrome perf dashboard records. 597 for test_case in test_cases: 598 if test_case not in failing_test: 599 self.remove_failures(test_case) 600 601 logging.info('Test results:') 602 logging.info(test_results) 603 logging.debug('Test Failed: %s', failing_test) 604 self.write_perf_keyval(test_results) 605 606 test_count = 0 607 test_failures = 0 608 test_passes = 0 609 test_skipped = 0 610 for result in test_results: 611 test_count += test_results[result] 612 if result.lower() in ['pass']: 613 test_passes += test_results[result] 614 if result.lower() not in self.TEST_RESULT_FILTER: 615 test_failures += test_results[result] 616 if result.lower() in ['skipped']: 617 test_skipped += test_results[result] 618 # The text "Completed all tests." is used by the process_log.py script 619 # and should always appear at the end of a completed test run. 620 logging.info( 621 'Completed all tests. Saw %d tests, %d passes and %d failures.', 622 test_count, test_passes, test_failures) 623 624 if self._filter and test_count == 0 and options[ 625 'subset_to_run'] != 'NotPass': 626 logging.warning('No test cases found for filter: %s!', self._filter) 627 628 if test_failures: 629 raise error.TestFail('Failed: on %s %d/%d tests failed.' % 630 (self._gpu_type, test_failures, test_count)) 631 if test_skipped > 0: 632 logging.info('On %s %d tests skipped, %d passes' % 633 (self._gpu_type, test_skipped, test_passes)) 634