graphics_dEQP.py revision af271023c914bbf0870ae4d3a866ae647f754ed3
1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bz2
6import glob
7import json
8import logging
9import os
10import re
11import shutil
12import tempfile
13import time
14import xml.etree.ElementTree as et
15from autotest_lib.client.bin import test, utils
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.cros import cros_logging, service_stopper
18from autotest_lib.client.cros.graphics import graphics_utils
19
20RERUN_RATIO = 0.02  # Ratio to rerun failing test for hasty mode
21
22
23class graphics_dEQP(test.test):
24    """Run the drawElements Quality Program test suite.
25    """
26    version = 1
27    _services = None
28    _hasty = False
29    _hasty_batch_size = 100  # Batch size in hasty mode.
30    _shard_number = 0
31    _shard_count = 1
32    _board = None
33    _cpu_type = None
34    _gpu_type = None
35    _surface = None
36    _filter = None
37    _width = 256  # Use smallest width for which all tests run/pass.
38    _height = 256  # Use smallest height for which all tests run/pass.
39    _timeout = 70  # Larger than twice the dEQP watchdog timeout at 30s.
40    _test_names = None
41    _test_names_file = None
42    _log_path = None  # Location for detailed test output logs (in /tmp/).
43    _debug = False  # Analyze kernel messages.
44    _log_reader = None  # Reader to analyze (kernel) messages log.
45    _log_filter = re.compile('.* .* kernel:')  # kernel messages filter.
46    _env = None  # environment for test processes
47    DEQP_MODULES = {
48        'dEQP-EGL': 'egl',
49        'dEQP-GLES2': 'gles2',
50        'dEQP-GLES3': 'gles3',
51        'dEQP-GLES31': 'gles31',
52        'dEQP-VK': 'vk',
53    }
54
55    TEST_RESULT_FILTER = [
56        'pass', 'notsupported', 'internalerror', 'qualitywarning',
57        'compatibilitywarning', 'skipped'
58    ]
59
60    def initialize(self):
61        self._api_helper = graphics_utils.GraphicsApiHelper()
62        self._board = utils.get_board()
63        self._cpu_type = utils.get_cpu_soc_family()
64        self._gpu_type = utils.get_gpu_family()
65
66        # deqp may depend on libraries that are present only on test images.
67        # Those libraries are installed in /usr/local.
68        self._env = os.environ.copy()
69        old_ld_path = self._env.get('LD_LIBRARY_PATH', '')
70        if old_ld_path:
71            self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path
72        else:
73            self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64'
74
75        self._services = service_stopper.ServiceStopper(['ui', 'powerd'])
76        # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions.
77        self._surface = 'pbuffer'
78
79    def cleanup(self):
80        if self._services:
81            self._services.restore_services()
82
83    def _parse_test_results(self, result_filename,
84                            test_results=None, failing_test=None):
85        """Handles result files with one or more test results.
86
87        @param result_filename: log file to parse.
88        @param test_results: Result parsed will be appended to it.
89        @param failing_test: Tests considered failed will append to it.
90
91        @return: dictionary of parsed test results.
92        """
93        xml = ''
94        xml_start = False
95        xml_complete = False
96        xml_bad = False
97        result = 'ParseTestResultFail'
98
99        if test_results is None:
100            test_results = {}
101
102        if not os.path.isfile(result_filename):
103            return test_results
104
105        with open(result_filename) as result_file:
106            for line in result_file.readlines():
107                # If the test terminates early, the XML will be incomplete
108                # and should not be parsed.
109                if line.startswith('#terminateTestCaseResult'):
110                    result = line.strip().split()[1]
111                    xml_bad = True
112                # Will only see #endTestCaseResult if the test does not
113                # terminate early.
114                elif line.startswith('#endTestCaseResult'):
115                    xml_complete = True
116                elif xml_start:
117                    xml += line
118                elif line.startswith('#beginTestCaseResult'):
119                    # If we see another begin before an end then something is
120                    # wrong.
121                    if xml_start:
122                        xml_bad = True
123                    else:
124                        xml_start = True
125
126                if xml_complete or xml_bad:
127                    if xml_complete:
128                        myparser = et.XMLParser(encoding='ISO-8859-1')
129                        root = et.fromstring(xml, parser=myparser)
130                        test_case = root.attrib['CasePath']
131                        result = root.find('Result').get('StatusCode').strip()
132                        xml_complete = False
133                    test_results[result] = test_results.get(result, 0) + 1
134                    if (result.lower() not in self.TEST_RESULT_FILTER and
135                        failing_test != None):
136                        failing_test.append(test_case)
137                    xml_bad = False
138                    xml_start = False
139                    result = 'ParseTestResultFail'
140                    xml = ''
141
142        return test_results
143
144    def _load_not_passing_cases(self, test_filter):
145        """Load all test cases that are in non-'Pass' expectations."""
146        not_passing_cases = []
147        expectations_dir = os.path.join(self.bindir, 'expectations',
148                                        self._gpu_type)
149        subset_spec = '%s.*' % test_filter
150        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
151        for subset_file in subset_paths:
152            # Filter against extra hasty failures only in hasty mode.
153            if (not '.Pass.bz2' in subset_file and
154                (self._hasty or '.hasty.' not in subset_file)):
155                not_passing_cases.extend(
156                    bz2.BZ2File(subset_file).read().splitlines())
157        not_passing_cases.sort()
158        return not_passing_cases
159
160    def _translate_name_to_api(self, name):
161        """Translate test_names or test_filter to api."""
162        test_prefix = name.split('.')[0]
163        if test_prefix in self.DEQP_MODULES:
164            api = self.DEQP_MODULES[test_prefix]
165        else:
166            raise error.TestFail('Failed: Invalid test name: %s' % name)
167        return api
168
169    def _get_executable(self, api):
170        """Return the executable path of the api."""
171        return self._api_helper.get_deqp_executable(api)
172
173    def _can_run(self, api):
174        """Check if specific api is supported in this board."""
175        return api in self._api_helper.get_supported_apis()
176
177    def _bootstrap_new_test_cases(self, test_filter):
178        """Ask dEQP for all test cases and removes non-Pass'ing ones.
179
180        This function will query dEQP for test cases and remove all cases that
181        are not in 'Pass'ing expectations from the list. This can be used
182        incrementally updating failing/hangin tests over several runs.
183
184        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
185
186        @return: List of dEQP tests to run.
187        """
188        test_cases = []
189        api = self._translate_name_to_api(test_filter)
190        if self._can_run(api):
191            executable = self._get_executable(api)
192        else:
193            return test_cases
194
195        # Must be in the executable directory when running for it to find it's
196        # test data files!
197        os.chdir(os.path.dirname(executable))
198
199        not_passing_cases = self._load_not_passing_cases(test_filter)
200        # We did not find passing cases in expectations. Assume everything else
201        # that is there should not be run this time.
202        expectations_dir = os.path.join(self.bindir, 'expectations',
203                                        self._gpu_type)
204        subset_spec = '%s.*' % test_filter
205        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
206        for subset_file in subset_paths:
207            # Filter against hasty failures only in hasty mode.
208            if self._hasty or '.hasty.' not in subset_file:
209                not_passing_cases.extend(
210                    bz2.BZ2File(subset_file).read().splitlines())
211
212        # Now ask dEQP executable nicely for whole list of tests. Needs to be
213        # run in executable directory. Output file is plain text file named
214        # e.g. 'dEQP-GLES2-cases.txt'.
215        command = ('%s '
216                   '--deqp-runmode=txt-caselist '
217                   '--deqp-surface-type=%s '
218                   '--deqp-gl-config-name=rgba8888d24s8ms0 ' % (executable,
219                                                                self._surface))
220        logging.info('Running command %s', command)
221        utils.run(command,
222                  env=self._env,
223                  timeout=60,
224                  stderr_is_expected=False,
225                  ignore_status=False,
226                  stdin=None)
227
228        # Now read this caselist file.
229        caselist_name = '%s-cases.txt' % test_filter.split('.')[0]
230        caselist_file = os.path.join(os.path.dirname(executable), caselist_name)
231        if not os.path.isfile(caselist_file):
232            raise error.TestFail('Failed: No caselist file at %s!' %
233                                 caselist_file)
234
235        # And remove non-Pass'ing expectations from caselist.
236        caselist = open(caselist_file).read().splitlines()
237        # Contains lines like "TEST: dEQP-GLES2.capability"
238        test_cases = []
239        match = 'TEST: %s' % test_filter
240        logging.info('Bootstrapping test cases matching "%s".', match)
241        for case in caselist:
242            if case.startswith(match):
243                case = case.split('TEST: ')[1]
244                test_cases.append(case)
245
246        test_cases = list(set(test_cases) - set(not_passing_cases))
247        if not test_cases:
248            raise error.TestFail('Failed: Unable to bootstrap %s!' % test_filter)
249
250        test_cases.sort()
251        return test_cases
252
253    def _get_test_cases_from_names_file(self):
254        # Assume master file comes with '-master.txt' suffix,
255        # Ex. gles2-master.txt, gles31-master.txt, vk-master.txt
256        basename = os.path.basename(self._test_names_file)
257        groups = re.match(r'(.*)-master\.txt$', basename)
258        if groups is None:
259            logging.warning('File name \'%s\' could not be recognized.' %
260                            basename)
261        else:
262            api = groups.group(1)
263            if not api in self._api_helper.get_supported_apis():
264                logging.warning('\'%s\' is not supported in this board' % api)
265                return []
266
267            file_path = os.path.join(self.bindir, self._test_names_file)
268            test_cases = [line.rstrip('\n') for line in open(file_path)]
269            return [test for test in test_cases if test and not test.isspace()]
270
271    def _get_test_cases(self, test_filter, subset):
272        """Gets the test cases for 'Pass', 'Fail' etc. expectations.
273
274        This function supports bootstrapping of new GPU families and dEQP
275        binaries. In particular if there are not 'Pass' expectations found for
276        this GPU family it will query the dEQP executable for a list of all
277        available tests. It will then remove known non-'Pass'ing tests from
278        this list to avoid getting into hangs/crashes etc.
279
280        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
281        @param subset: string from 'Pass', 'Fail', 'Timeout' etc.
282
283        @return: List of dEQP tests to run.
284        """
285        expectations_dir = os.path.join(self.bindir, 'expectations',
286                                        self._gpu_type)
287        subset_name = '%s.%s.bz2' % (test_filter, subset)
288        subset_path = os.path.join(expectations_dir, subset_name)
289        if not os.path.isfile(subset_path):
290            if subset == 'NotPass':
291                # TODO(ihf): Running hasty and NotPass together is an invitation
292                # for trouble (stability). Decide if it should be disallowed.
293                return self._load_not_passing_cases(test_filter)
294            if subset != 'Pass':
295                raise error.TestFail('Failed: No subset file found for %s!' %
296                                     subset_path)
297            # Ask dEQP for all cases and remove the failing ones.
298            return self._bootstrap_new_test_cases(test_filter)
299
300        test_cases = bz2.BZ2File(subset_path).read().splitlines()
301        if not test_cases:
302            raise error.TestFail(
303                'Failed: No test cases found in subset file %s!' % subset_path)
304        return test_cases
305
306    def run_tests_individually(self, test_cases):
307        """Runs tests as isolated from each other, but slowly.
308
309        This function runs each test case separately as a command.
310        This means a new context for each test etc. Failures will be more
311        isolated, but runtime quite high due to overhead.
312
313        @param test_cases: List of dEQP test case strings.
314
315        @return: dictionary of test results.
316        """
317        test_results = {}
318        width = self._width
319        height = self._height
320
321        i = 0
322        for test_case in test_cases:
323            i += 1
324            logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case)
325            result_prefix = os.path.join(self._log_path, test_case)
326            log_file = '%s.log' % result_prefix
327            debug_file = '%s.debug' % result_prefix
328            api = self._translate_name_to_api(test_case)
329            if not self._can_run(api):
330                result = 'Skipped'
331                logging.info('Skipping on %s: %s', self._gpu_type, test_case)
332            else:
333                executable = self._get_executable(api)
334                command = ('%s '
335                           '--deqp-case=%s '
336                           '--deqp-surface-type=%s '
337                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
338                           '--deqp-log-images=disable '
339                           '--deqp-watchdog=enable '
340                           '--deqp-surface-width=%d '
341                           '--deqp-surface-height=%d '
342                           '--deqp-log-filename=%s' % (
343                               executable,
344                               test_case,
345                               self._surface,
346                               width,
347                               height,
348                               log_file)
349                           )
350                logging.debug('Running single: %s', command)
351
352                # Must be in the executable directory when running for it to find it's
353                # test data files!
354                os.chdir(os.path.dirname(executable))
355
356                # Must initialize because some errors don't repopulate
357                # run_result, leaving old results.
358                run_result = {}
359                start_time = time.time()
360                try:
361                    run_result = utils.run(command,
362                                           env=self._env,
363                                           timeout=self._timeout,
364                                           stderr_is_expected=False,
365                                           ignore_status=True)
366                    result_counts = self._parse_test_results(log_file)
367                    if result_counts:
368                        result = result_counts.keys()[0]
369                    else:
370                        result = 'Unknown'
371                except error.CmdTimeoutError:
372                    result = 'TestTimeout'
373                except error.CmdError:
374                    result = 'CommandFailed'
375                except Exception:
376                    result = 'UnexpectedError'
377                end_time = time.time()
378
379                if self._debug:
380                    # Collect debug info and save to json file.
381                    output_msgs = {
382                        'start_time': start_time,
383                        'end_time': end_time,
384                        'stdout': [],
385                        'stderr': [],
386                        'dmesg': []
387                    }
388                    logs = self._log_reader.get_logs()
389                    self._log_reader.set_start_by_current()
390                    output_msgs['dmesg'] = [
391                        msg for msg in logs.splitlines()
392                        if self._log_filter.match(msg)
393                    ]
394                    if run_result:
395                        output_msgs['stdout'] = run_result.stdout.splitlines()
396                        output_msgs['stderr'] = run_result.stderr.splitlines()
397                    with open(debug_file, 'w') as fd:
398                        json.dump(
399                            output_msgs,
400                            fd,
401                            indent=4,
402                            separators=(',', ' : '),
403                            sort_keys=True)
404
405            logging.info('Result: %s', result)
406            test_results[result] = test_results.get(result, 0) + 1
407
408        return test_results
409
410    def run_tests_hasty(self, test_cases, failing_test=None):
411        """Runs tests as quickly as possible.
412
413        This function runs all the test cases, but does not isolate tests and
414        may take shortcuts/not run all tests to provide maximum coverage at
415        minumum runtime.
416
417        @param test_cases: List of dEQP test case strings.
418        @param failing_test: Test considered failed will append to it.
419
420        @return: dictionary of test results.
421        """
422        # TODO(ihf): It saves half the test time to use 32*32 but a few tests
423        # fail as they need surfaces larger than 200*200.
424        width = self._width
425        height = self._height
426        results = {}
427
428        # All tests combined less than 1h in hasty.
429        batch_timeout = min(3600, self._timeout * self._hasty_batch_size)
430        num_test_cases = len(test_cases)
431
432        # We are dividing the number of tests into several shards but run them
433        # in smaller batches. We start and end at multiples of batch_size
434        # boundaries.
435        shard_start = self._hasty_batch_size * (
436            (self._shard_number * (num_test_cases / self._shard_count)) /
437            self._hasty_batch_size)
438        shard_end = self._hasty_batch_size * ((
439            (self._shard_number + 1) * (num_test_cases / self._shard_count)) /
440                                              self._hasty_batch_size)
441        # The last shard will be slightly larger than the others. Extend it to
442        # cover all test cases avoiding rounding problems with the integer
443        # arithmetics done to compute shard_start and shard_end.
444        if self._shard_number + 1 == self._shard_count:
445            shard_end = num_test_cases
446
447        for batch in xrange(shard_start, shard_end, self._hasty_batch_size):
448            batch_to = min(batch + self._hasty_batch_size, shard_end)
449            batch_cases = '\n'.join(test_cases[batch:batch_to])
450            # This assumes all tests in the batch are kicked off via the same
451            # executable.
452            api = self._translate_name_to_api(test_cases[batch])
453            if not self._can_run(api):
454                logging.info('Skipping tests on %s: %s', self._gpu_type,
455                             batch_cases)
456            else:
457                executable = self._get_executable(api)
458                log_file = os.path.join(self._log_path,
459                                        '%s_hasty_%d.log' % (self._filter, batch))
460                command = ('%s '
461                           '--deqp-stdin-caselist '
462                           '--deqp-surface-type=%s '
463                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
464                           '--deqp-log-images=disable '
465                           '--deqp-visibility=hidden '
466                           '--deqp-watchdog=enable '
467                           '--deqp-surface-width=%d '
468                           '--deqp-surface-height=%d '
469                           '--deqp-log-filename=%s' % (
470                               executable,
471                               self._surface,
472                               width,
473                               height,
474                               log_file)
475                           )
476
477                logging.info('Running tests %d...%d out of %d:\n%s\n%s',
478                             batch + 1, batch_to, num_test_cases, command,
479                             batch_cases)
480
481                # Must be in the executable directory when running for it to
482                # find it's test data files!
483                os.chdir(os.path.dirname(executable))
484
485                try:
486                    utils.run(command,
487                              env=self._env,
488                              timeout=batch_timeout,
489                              stderr_is_expected=False,
490                              ignore_status=False,
491                              stdin=batch_cases)
492                except Exception:
493                    pass
494                # We are trying to handle all errors by parsing the log file.
495                results = self._parse_test_results(log_file, results,
496                                                   failing_test)
497                logging.info(results)
498        return results
499
500    def run_once(self, opts=None):
501        options = dict(filter='',
502                       test_names='',  # e.g., dEQP-GLES3.info.version,
503                                       # dEQP-GLES2.functional,
504                                       # dEQP-GLES3.accuracy.texture, etc.
505                       test_names_file='',
506                       timeout=self._timeout,
507                       subset_to_run='Pass',  # Pass, Fail, Timeout, NotPass...
508                       hasty='False',
509                       shard_number='0',
510                       shard_count='1',
511                       debug='False')
512        if opts is None:
513            opts = []
514        options.update(utils.args_to_dict(opts))
515        logging.info('Test Options: %s', options)
516
517        self._hasty = (options['hasty'] == 'True')
518        self._timeout = int(options['timeout'])
519        self._test_names_file = options['test_names_file']
520        self._test_names = options['test_names']
521        self._shard_number = int(options['shard_number'])
522        self._shard_count = int(options['shard_count'])
523        self._debug = (options['debug'] == 'True')
524        if not (self._test_names_file or self._test_names):
525            self._filter = options['filter']
526            if not self._filter:
527                raise error.TestFail('Failed: No dEQP test filter specified')
528
529        # Some information to help post-process logs into blacklists later.
530        logging.info('ChromeOS BOARD = %s', self._board)
531        logging.info('ChromeOS CPU family = %s', self._cpu_type)
532        logging.info('ChromeOS GPU family = %s', self._gpu_type)
533
534        # Create a place to put detailed test output logs.
535        filter_name = self._filter or os.path.basename(self._test_names_file)
536        logging.info('dEQP test filter = %s', filter_name)
537        self._log_path = os.path.join(tempfile.gettempdir(), '%s-logs' %
538                                                             filter_name)
539        shutil.rmtree(self._log_path, ignore_errors=True)
540        os.mkdir(self._log_path)
541
542        self._services.stop_services()
543        # Load either tests specified by test_names_file, test_names or filter.
544        test_cases = []
545        if self._test_names_file:
546            test_cases = self._get_test_cases_from_names_file()
547        elif self._test_names:
548            test_cases = []
549            for name in self._test_names.split(','):
550                test_cases.extend(self._get_test_cases(name, 'Pass'))
551        elif self._filter:
552            test_cases = self._get_test_cases(self._filter,
553                                              options['subset_to_run'])
554
555        if self._debug:
556            # LogReader works on /var/log/messages by default.
557            self._log_reader = cros_logging.LogReader()
558            self._log_reader.set_start_by_current()
559
560        test_results = {}
561        if self._hasty:
562            logging.info('Running in hasty mode.')
563            failing_test = []
564            test_results = self.run_tests_hasty(test_cases, failing_test)
565
566            logging.info("Failing Tests: %s", str(failing_test))
567            if len(failing_test) > 0:
568                if len(failing_test) < sum(test_results.values()) * RERUN_RATIO:
569                    logging.info("Because we are in hasty mode, we will rerun"
570                                 "the failing tests one at a time")
571                    rerun_results = self.run_tests_individually(failing_test)
572                    # Update failing test result from the test_results
573                    for result in test_results:
574                        if result.lower() not in self.TEST_RESULT_FILTER:
575                            test_results[result] = 0
576                    for result in rerun_results:
577                        test_results[result] = (test_results.get(result, 0) +
578                                                rerun_results[result])
579                else:
580                    logging.info("There are too many failing tests. It would "
581                                 "take too long to rerun them. Giving up.")
582        else:
583            logging.info('Running each test individually.')
584            test_results = self.run_tests_individually(test_cases)
585
586        logging.info('Test results:')
587        logging.info(test_results)
588        self.write_perf_keyval(test_results)
589
590        test_count = 0
591        test_failures = 0
592        test_passes = 0
593        test_skipped = 0
594        for result in test_results:
595            test_count += test_results[result]
596            if result.lower() in ['pass']:
597                test_passes += test_results[result]
598            if result.lower() not in self.TEST_RESULT_FILTER:
599                test_failures += test_results[result]
600            if result.lower() in ['skipped']:
601                test_skipped += test_results[result]
602        # The text "Completed all tests." is used by the process_log.py script
603        # and should always appear at the end of a completed test run.
604        logging.info(
605            'Completed all tests. Saw %d tests, %d passes and %d failures.',
606            test_count, test_passes, test_failures)
607
608        if self._filter and test_count == 0 and options[
609                'subset_to_run'] != 'NotPass':
610            logging.warning('No test cases found for filter: %s!', self._filter)
611
612        if test_failures:
613            raise error.TestFail('Failed: on %s %d/%d tests failed.' %
614                                 (self._gpu_type, test_failures, test_count))
615        if test_skipped > 0:
616            raise error.TestFail('Failed: on %s %d tests skipped, %d passes' %
617                                 (self._gpu_type, test_skipped, test_passes))
618