graphics_dEQP.py revision af271023c914bbf0870ae4d3a866ae647f754ed3
1# Copyright 2014 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import bz2 6import glob 7import json 8import logging 9import os 10import re 11import shutil 12import tempfile 13import time 14import xml.etree.ElementTree as et 15from autotest_lib.client.bin import test, utils 16from autotest_lib.client.common_lib import error 17from autotest_lib.client.cros import cros_logging, service_stopper 18from autotest_lib.client.cros.graphics import graphics_utils 19 20RERUN_RATIO = 0.02 # Ratio to rerun failing test for hasty mode 21 22 23class graphics_dEQP(test.test): 24 """Run the drawElements Quality Program test suite. 25 """ 26 version = 1 27 _services = None 28 _hasty = False 29 _hasty_batch_size = 100 # Batch size in hasty mode. 30 _shard_number = 0 31 _shard_count = 1 32 _board = None 33 _cpu_type = None 34 _gpu_type = None 35 _surface = None 36 _filter = None 37 _width = 256 # Use smallest width for which all tests run/pass. 38 _height = 256 # Use smallest height for which all tests run/pass. 39 _timeout = 70 # Larger than twice the dEQP watchdog timeout at 30s. 40 _test_names = None 41 _test_names_file = None 42 _log_path = None # Location for detailed test output logs (in /tmp/). 43 _debug = False # Analyze kernel messages. 44 _log_reader = None # Reader to analyze (kernel) messages log. 45 _log_filter = re.compile('.* .* kernel:') # kernel messages filter. 46 _env = None # environment for test processes 47 DEQP_MODULES = { 48 'dEQP-EGL': 'egl', 49 'dEQP-GLES2': 'gles2', 50 'dEQP-GLES3': 'gles3', 51 'dEQP-GLES31': 'gles31', 52 'dEQP-VK': 'vk', 53 } 54 55 TEST_RESULT_FILTER = [ 56 'pass', 'notsupported', 'internalerror', 'qualitywarning', 57 'compatibilitywarning', 'skipped' 58 ] 59 60 def initialize(self): 61 self._api_helper = graphics_utils.GraphicsApiHelper() 62 self._board = utils.get_board() 63 self._cpu_type = utils.get_cpu_soc_family() 64 self._gpu_type = utils.get_gpu_family() 65 66 # deqp may depend on libraries that are present only on test images. 67 # Those libraries are installed in /usr/local. 68 self._env = os.environ.copy() 69 old_ld_path = self._env.get('LD_LIBRARY_PATH', '') 70 if old_ld_path: 71 self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path 72 else: 73 self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64' 74 75 self._services = service_stopper.ServiceStopper(['ui', 'powerd']) 76 # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions. 77 self._surface = 'pbuffer' 78 79 def cleanup(self): 80 if self._services: 81 self._services.restore_services() 82 83 def _parse_test_results(self, result_filename, 84 test_results=None, failing_test=None): 85 """Handles result files with one or more test results. 86 87 @param result_filename: log file to parse. 88 @param test_results: Result parsed will be appended to it. 89 @param failing_test: Tests considered failed will append to it. 90 91 @return: dictionary of parsed test results. 92 """ 93 xml = '' 94 xml_start = False 95 xml_complete = False 96 xml_bad = False 97 result = 'ParseTestResultFail' 98 99 if test_results is None: 100 test_results = {} 101 102 if not os.path.isfile(result_filename): 103 return test_results 104 105 with open(result_filename) as result_file: 106 for line in result_file.readlines(): 107 # If the test terminates early, the XML will be incomplete 108 # and should not be parsed. 109 if line.startswith('#terminateTestCaseResult'): 110 result = line.strip().split()[1] 111 xml_bad = True 112 # Will only see #endTestCaseResult if the test does not 113 # terminate early. 114 elif line.startswith('#endTestCaseResult'): 115 xml_complete = True 116 elif xml_start: 117 xml += line 118 elif line.startswith('#beginTestCaseResult'): 119 # If we see another begin before an end then something is 120 # wrong. 121 if xml_start: 122 xml_bad = True 123 else: 124 xml_start = True 125 126 if xml_complete or xml_bad: 127 if xml_complete: 128 myparser = et.XMLParser(encoding='ISO-8859-1') 129 root = et.fromstring(xml, parser=myparser) 130 test_case = root.attrib['CasePath'] 131 result = root.find('Result').get('StatusCode').strip() 132 xml_complete = False 133 test_results[result] = test_results.get(result, 0) + 1 134 if (result.lower() not in self.TEST_RESULT_FILTER and 135 failing_test != None): 136 failing_test.append(test_case) 137 xml_bad = False 138 xml_start = False 139 result = 'ParseTestResultFail' 140 xml = '' 141 142 return test_results 143 144 def _load_not_passing_cases(self, test_filter): 145 """Load all test cases that are in non-'Pass' expectations.""" 146 not_passing_cases = [] 147 expectations_dir = os.path.join(self.bindir, 'expectations', 148 self._gpu_type) 149 subset_spec = '%s.*' % test_filter 150 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 151 for subset_file in subset_paths: 152 # Filter against extra hasty failures only in hasty mode. 153 if (not '.Pass.bz2' in subset_file and 154 (self._hasty or '.hasty.' not in subset_file)): 155 not_passing_cases.extend( 156 bz2.BZ2File(subset_file).read().splitlines()) 157 not_passing_cases.sort() 158 return not_passing_cases 159 160 def _translate_name_to_api(self, name): 161 """Translate test_names or test_filter to api.""" 162 test_prefix = name.split('.')[0] 163 if test_prefix in self.DEQP_MODULES: 164 api = self.DEQP_MODULES[test_prefix] 165 else: 166 raise error.TestFail('Failed: Invalid test name: %s' % name) 167 return api 168 169 def _get_executable(self, api): 170 """Return the executable path of the api.""" 171 return self._api_helper.get_deqp_executable(api) 172 173 def _can_run(self, api): 174 """Check if specific api is supported in this board.""" 175 return api in self._api_helper.get_supported_apis() 176 177 def _bootstrap_new_test_cases(self, test_filter): 178 """Ask dEQP for all test cases and removes non-Pass'ing ones. 179 180 This function will query dEQP for test cases and remove all cases that 181 are not in 'Pass'ing expectations from the list. This can be used 182 incrementally updating failing/hangin tests over several runs. 183 184 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 185 186 @return: List of dEQP tests to run. 187 """ 188 test_cases = [] 189 api = self._translate_name_to_api(test_filter) 190 if self._can_run(api): 191 executable = self._get_executable(api) 192 else: 193 return test_cases 194 195 # Must be in the executable directory when running for it to find it's 196 # test data files! 197 os.chdir(os.path.dirname(executable)) 198 199 not_passing_cases = self._load_not_passing_cases(test_filter) 200 # We did not find passing cases in expectations. Assume everything else 201 # that is there should not be run this time. 202 expectations_dir = os.path.join(self.bindir, 'expectations', 203 self._gpu_type) 204 subset_spec = '%s.*' % test_filter 205 subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec)) 206 for subset_file in subset_paths: 207 # Filter against hasty failures only in hasty mode. 208 if self._hasty or '.hasty.' not in subset_file: 209 not_passing_cases.extend( 210 bz2.BZ2File(subset_file).read().splitlines()) 211 212 # Now ask dEQP executable nicely for whole list of tests. Needs to be 213 # run in executable directory. Output file is plain text file named 214 # e.g. 'dEQP-GLES2-cases.txt'. 215 command = ('%s ' 216 '--deqp-runmode=txt-caselist ' 217 '--deqp-surface-type=%s ' 218 '--deqp-gl-config-name=rgba8888d24s8ms0 ' % (executable, 219 self._surface)) 220 logging.info('Running command %s', command) 221 utils.run(command, 222 env=self._env, 223 timeout=60, 224 stderr_is_expected=False, 225 ignore_status=False, 226 stdin=None) 227 228 # Now read this caselist file. 229 caselist_name = '%s-cases.txt' % test_filter.split('.')[0] 230 caselist_file = os.path.join(os.path.dirname(executable), caselist_name) 231 if not os.path.isfile(caselist_file): 232 raise error.TestFail('Failed: No caselist file at %s!' % 233 caselist_file) 234 235 # And remove non-Pass'ing expectations from caselist. 236 caselist = open(caselist_file).read().splitlines() 237 # Contains lines like "TEST: dEQP-GLES2.capability" 238 test_cases = [] 239 match = 'TEST: %s' % test_filter 240 logging.info('Bootstrapping test cases matching "%s".', match) 241 for case in caselist: 242 if case.startswith(match): 243 case = case.split('TEST: ')[1] 244 test_cases.append(case) 245 246 test_cases = list(set(test_cases) - set(not_passing_cases)) 247 if not test_cases: 248 raise error.TestFail('Failed: Unable to bootstrap %s!' % test_filter) 249 250 test_cases.sort() 251 return test_cases 252 253 def _get_test_cases_from_names_file(self): 254 # Assume master file comes with '-master.txt' suffix, 255 # Ex. gles2-master.txt, gles31-master.txt, vk-master.txt 256 basename = os.path.basename(self._test_names_file) 257 groups = re.match(r'(.*)-master\.txt$', basename) 258 if groups is None: 259 logging.warning('File name \'%s\' could not be recognized.' % 260 basename) 261 else: 262 api = groups.group(1) 263 if not api in self._api_helper.get_supported_apis(): 264 logging.warning('\'%s\' is not supported in this board' % api) 265 return [] 266 267 file_path = os.path.join(self.bindir, self._test_names_file) 268 test_cases = [line.rstrip('\n') for line in open(file_path)] 269 return [test for test in test_cases if test and not test.isspace()] 270 271 def _get_test_cases(self, test_filter, subset): 272 """Gets the test cases for 'Pass', 'Fail' etc. expectations. 273 274 This function supports bootstrapping of new GPU families and dEQP 275 binaries. In particular if there are not 'Pass' expectations found for 276 this GPU family it will query the dEQP executable for a list of all 277 available tests. It will then remove known non-'Pass'ing tests from 278 this list to avoid getting into hangs/crashes etc. 279 280 @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'. 281 @param subset: string from 'Pass', 'Fail', 'Timeout' etc. 282 283 @return: List of dEQP tests to run. 284 """ 285 expectations_dir = os.path.join(self.bindir, 'expectations', 286 self._gpu_type) 287 subset_name = '%s.%s.bz2' % (test_filter, subset) 288 subset_path = os.path.join(expectations_dir, subset_name) 289 if not os.path.isfile(subset_path): 290 if subset == 'NotPass': 291 # TODO(ihf): Running hasty and NotPass together is an invitation 292 # for trouble (stability). Decide if it should be disallowed. 293 return self._load_not_passing_cases(test_filter) 294 if subset != 'Pass': 295 raise error.TestFail('Failed: No subset file found for %s!' % 296 subset_path) 297 # Ask dEQP for all cases and remove the failing ones. 298 return self._bootstrap_new_test_cases(test_filter) 299 300 test_cases = bz2.BZ2File(subset_path).read().splitlines() 301 if not test_cases: 302 raise error.TestFail( 303 'Failed: No test cases found in subset file %s!' % subset_path) 304 return test_cases 305 306 def run_tests_individually(self, test_cases): 307 """Runs tests as isolated from each other, but slowly. 308 309 This function runs each test case separately as a command. 310 This means a new context for each test etc. Failures will be more 311 isolated, but runtime quite high due to overhead. 312 313 @param test_cases: List of dEQP test case strings. 314 315 @return: dictionary of test results. 316 """ 317 test_results = {} 318 width = self._width 319 height = self._height 320 321 i = 0 322 for test_case in test_cases: 323 i += 1 324 logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case) 325 result_prefix = os.path.join(self._log_path, test_case) 326 log_file = '%s.log' % result_prefix 327 debug_file = '%s.debug' % result_prefix 328 api = self._translate_name_to_api(test_case) 329 if not self._can_run(api): 330 result = 'Skipped' 331 logging.info('Skipping on %s: %s', self._gpu_type, test_case) 332 else: 333 executable = self._get_executable(api) 334 command = ('%s ' 335 '--deqp-case=%s ' 336 '--deqp-surface-type=%s ' 337 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 338 '--deqp-log-images=disable ' 339 '--deqp-watchdog=enable ' 340 '--deqp-surface-width=%d ' 341 '--deqp-surface-height=%d ' 342 '--deqp-log-filename=%s' % ( 343 executable, 344 test_case, 345 self._surface, 346 width, 347 height, 348 log_file) 349 ) 350 logging.debug('Running single: %s', command) 351 352 # Must be in the executable directory when running for it to find it's 353 # test data files! 354 os.chdir(os.path.dirname(executable)) 355 356 # Must initialize because some errors don't repopulate 357 # run_result, leaving old results. 358 run_result = {} 359 start_time = time.time() 360 try: 361 run_result = utils.run(command, 362 env=self._env, 363 timeout=self._timeout, 364 stderr_is_expected=False, 365 ignore_status=True) 366 result_counts = self._parse_test_results(log_file) 367 if result_counts: 368 result = result_counts.keys()[0] 369 else: 370 result = 'Unknown' 371 except error.CmdTimeoutError: 372 result = 'TestTimeout' 373 except error.CmdError: 374 result = 'CommandFailed' 375 except Exception: 376 result = 'UnexpectedError' 377 end_time = time.time() 378 379 if self._debug: 380 # Collect debug info and save to json file. 381 output_msgs = { 382 'start_time': start_time, 383 'end_time': end_time, 384 'stdout': [], 385 'stderr': [], 386 'dmesg': [] 387 } 388 logs = self._log_reader.get_logs() 389 self._log_reader.set_start_by_current() 390 output_msgs['dmesg'] = [ 391 msg for msg in logs.splitlines() 392 if self._log_filter.match(msg) 393 ] 394 if run_result: 395 output_msgs['stdout'] = run_result.stdout.splitlines() 396 output_msgs['stderr'] = run_result.stderr.splitlines() 397 with open(debug_file, 'w') as fd: 398 json.dump( 399 output_msgs, 400 fd, 401 indent=4, 402 separators=(',', ' : '), 403 sort_keys=True) 404 405 logging.info('Result: %s', result) 406 test_results[result] = test_results.get(result, 0) + 1 407 408 return test_results 409 410 def run_tests_hasty(self, test_cases, failing_test=None): 411 """Runs tests as quickly as possible. 412 413 This function runs all the test cases, but does not isolate tests and 414 may take shortcuts/not run all tests to provide maximum coverage at 415 minumum runtime. 416 417 @param test_cases: List of dEQP test case strings. 418 @param failing_test: Test considered failed will append to it. 419 420 @return: dictionary of test results. 421 """ 422 # TODO(ihf): It saves half the test time to use 32*32 but a few tests 423 # fail as they need surfaces larger than 200*200. 424 width = self._width 425 height = self._height 426 results = {} 427 428 # All tests combined less than 1h in hasty. 429 batch_timeout = min(3600, self._timeout * self._hasty_batch_size) 430 num_test_cases = len(test_cases) 431 432 # We are dividing the number of tests into several shards but run them 433 # in smaller batches. We start and end at multiples of batch_size 434 # boundaries. 435 shard_start = self._hasty_batch_size * ( 436 (self._shard_number * (num_test_cases / self._shard_count)) / 437 self._hasty_batch_size) 438 shard_end = self._hasty_batch_size * (( 439 (self._shard_number + 1) * (num_test_cases / self._shard_count)) / 440 self._hasty_batch_size) 441 # The last shard will be slightly larger than the others. Extend it to 442 # cover all test cases avoiding rounding problems with the integer 443 # arithmetics done to compute shard_start and shard_end. 444 if self._shard_number + 1 == self._shard_count: 445 shard_end = num_test_cases 446 447 for batch in xrange(shard_start, shard_end, self._hasty_batch_size): 448 batch_to = min(batch + self._hasty_batch_size, shard_end) 449 batch_cases = '\n'.join(test_cases[batch:batch_to]) 450 # This assumes all tests in the batch are kicked off via the same 451 # executable. 452 api = self._translate_name_to_api(test_cases[batch]) 453 if not self._can_run(api): 454 logging.info('Skipping tests on %s: %s', self._gpu_type, 455 batch_cases) 456 else: 457 executable = self._get_executable(api) 458 log_file = os.path.join(self._log_path, 459 '%s_hasty_%d.log' % (self._filter, batch)) 460 command = ('%s ' 461 '--deqp-stdin-caselist ' 462 '--deqp-surface-type=%s ' 463 '--deqp-gl-config-name=rgba8888d24s8ms0 ' 464 '--deqp-log-images=disable ' 465 '--deqp-visibility=hidden ' 466 '--deqp-watchdog=enable ' 467 '--deqp-surface-width=%d ' 468 '--deqp-surface-height=%d ' 469 '--deqp-log-filename=%s' % ( 470 executable, 471 self._surface, 472 width, 473 height, 474 log_file) 475 ) 476 477 logging.info('Running tests %d...%d out of %d:\n%s\n%s', 478 batch + 1, batch_to, num_test_cases, command, 479 batch_cases) 480 481 # Must be in the executable directory when running for it to 482 # find it's test data files! 483 os.chdir(os.path.dirname(executable)) 484 485 try: 486 utils.run(command, 487 env=self._env, 488 timeout=batch_timeout, 489 stderr_is_expected=False, 490 ignore_status=False, 491 stdin=batch_cases) 492 except Exception: 493 pass 494 # We are trying to handle all errors by parsing the log file. 495 results = self._parse_test_results(log_file, results, 496 failing_test) 497 logging.info(results) 498 return results 499 500 def run_once(self, opts=None): 501 options = dict(filter='', 502 test_names='', # e.g., dEQP-GLES3.info.version, 503 # dEQP-GLES2.functional, 504 # dEQP-GLES3.accuracy.texture, etc. 505 test_names_file='', 506 timeout=self._timeout, 507 subset_to_run='Pass', # Pass, Fail, Timeout, NotPass... 508 hasty='False', 509 shard_number='0', 510 shard_count='1', 511 debug='False') 512 if opts is None: 513 opts = [] 514 options.update(utils.args_to_dict(opts)) 515 logging.info('Test Options: %s', options) 516 517 self._hasty = (options['hasty'] == 'True') 518 self._timeout = int(options['timeout']) 519 self._test_names_file = options['test_names_file'] 520 self._test_names = options['test_names'] 521 self._shard_number = int(options['shard_number']) 522 self._shard_count = int(options['shard_count']) 523 self._debug = (options['debug'] == 'True') 524 if not (self._test_names_file or self._test_names): 525 self._filter = options['filter'] 526 if not self._filter: 527 raise error.TestFail('Failed: No dEQP test filter specified') 528 529 # Some information to help post-process logs into blacklists later. 530 logging.info('ChromeOS BOARD = %s', self._board) 531 logging.info('ChromeOS CPU family = %s', self._cpu_type) 532 logging.info('ChromeOS GPU family = %s', self._gpu_type) 533 534 # Create a place to put detailed test output logs. 535 filter_name = self._filter or os.path.basename(self._test_names_file) 536 logging.info('dEQP test filter = %s', filter_name) 537 self._log_path = os.path.join(tempfile.gettempdir(), '%s-logs' % 538 filter_name) 539 shutil.rmtree(self._log_path, ignore_errors=True) 540 os.mkdir(self._log_path) 541 542 self._services.stop_services() 543 # Load either tests specified by test_names_file, test_names or filter. 544 test_cases = [] 545 if self._test_names_file: 546 test_cases = self._get_test_cases_from_names_file() 547 elif self._test_names: 548 test_cases = [] 549 for name in self._test_names.split(','): 550 test_cases.extend(self._get_test_cases(name, 'Pass')) 551 elif self._filter: 552 test_cases = self._get_test_cases(self._filter, 553 options['subset_to_run']) 554 555 if self._debug: 556 # LogReader works on /var/log/messages by default. 557 self._log_reader = cros_logging.LogReader() 558 self._log_reader.set_start_by_current() 559 560 test_results = {} 561 if self._hasty: 562 logging.info('Running in hasty mode.') 563 failing_test = [] 564 test_results = self.run_tests_hasty(test_cases, failing_test) 565 566 logging.info("Failing Tests: %s", str(failing_test)) 567 if len(failing_test) > 0: 568 if len(failing_test) < sum(test_results.values()) * RERUN_RATIO: 569 logging.info("Because we are in hasty mode, we will rerun" 570 "the failing tests one at a time") 571 rerun_results = self.run_tests_individually(failing_test) 572 # Update failing test result from the test_results 573 for result in test_results: 574 if result.lower() not in self.TEST_RESULT_FILTER: 575 test_results[result] = 0 576 for result in rerun_results: 577 test_results[result] = (test_results.get(result, 0) + 578 rerun_results[result]) 579 else: 580 logging.info("There are too many failing tests. It would " 581 "take too long to rerun them. Giving up.") 582 else: 583 logging.info('Running each test individually.') 584 test_results = self.run_tests_individually(test_cases) 585 586 logging.info('Test results:') 587 logging.info(test_results) 588 self.write_perf_keyval(test_results) 589 590 test_count = 0 591 test_failures = 0 592 test_passes = 0 593 test_skipped = 0 594 for result in test_results: 595 test_count += test_results[result] 596 if result.lower() in ['pass']: 597 test_passes += test_results[result] 598 if result.lower() not in self.TEST_RESULT_FILTER: 599 test_failures += test_results[result] 600 if result.lower() in ['skipped']: 601 test_skipped += test_results[result] 602 # The text "Completed all tests." is used by the process_log.py script 603 # and should always appear at the end of a completed test run. 604 logging.info( 605 'Completed all tests. Saw %d tests, %d passes and %d failures.', 606 test_count, test_passes, test_failures) 607 608 if self._filter and test_count == 0 and options[ 609 'subset_to_run'] != 'NotPass': 610 logging.warning('No test cases found for filter: %s!', self._filter) 611 612 if test_failures: 613 raise error.TestFail('Failed: on %s %d/%d tests failed.' % 614 (self._gpu_type, test_failures, test_count)) 615 if test_skipped > 0: 616 raise error.TestFail('Failed: on %s %d tests skipped, %d passes' % 617 (self._gpu_type, test_skipped, test_passes)) 618