graphics_dEQP.py revision 4f4e936de19af1ed20a5cbf1b5378bab3bd9c4a1
1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bz2
6import glob
7import json
8import logging
9import os
10import re
11import shutil
12import tempfile
13import xml.etree.ElementTree as et
14from autotest_lib.client.bin import test, utils
15from autotest_lib.client.common_lib import error
16from autotest_lib.client.cros import cros_logging, service_stopper
17from autotest_lib.client.cros.graphics import graphics_utils
18
19
20class graphics_dEQP(test.test):
21    """Run the drawElements Quality Program test suite.
22    """
23    version = 1
24    _services = None
25    _hasty = False
26    _hasty_batch_size = 100  # Batch size in hasty mode.
27    _shard_number = 0
28    _shard_count = 1
29    _board = None
30    _cpu_type = None
31    _gpu_type = None
32    _can_run_executables = []
33    _filter = None
34    _width = 256  # Use smallest width for which all tests run/pass.
35    _height = 256  # Use smallest height for which all tests run/pass.
36    _timeout = 70  # Larger than twice the dEQP watchdog timeout at 30s.
37    _test_names = None
38    _test_names_file = None
39    _log_path = None  # Location for detailed test output logs (in /tmp/).
40    _debug = False  # Analyze kernel messages.
41    _log_reader = None  # Reader to analyze (kernel) messages log.
42    _log_filter = re.compile('.* .* kernel:')  # kernel messages filter.
43
44    DEQP_BASEDIR = '/usr/local/deqp'
45    DEQP_MODULES = {
46        'dEQP-EGL': 'egl',
47        'dEQP-GLES2': 'gles2',
48        'dEQP-GLES3': 'gles3',
49        'dEQP-GLES31': 'gles31'
50    }
51
52    def initialize(self):
53        self._board = utils.get_board()
54        self._cpu_type = utils.get_cpu_soc_family()
55        self._gpu_type = utils.get_gpu_family()
56        # Determine which executable should be run. Right now never egl.
57        major, minor = graphics_utils.get_gles_version()
58        logging.info('Found gles%d.%d.' % (major, minor))
59        if major is None or minor is None:
60            raise error.TestError(
61                'Could not get gles version information (%d, %d).' %
62                (major, minor))
63        if major >= 2:
64            self._can_run_executables.append('gles2/deqp-gles2')
65        if major >= 3:
66            self._can_run_executables.append('gles3/deqp-gles3')
67            if major > 3 or minor >= 1:
68                self._can_run_executables.append('gles31/deqp-gles31')
69        self._services = service_stopper.ServiceStopper(['ui', 'powerd'])
70
71    def cleanup(self):
72        if self._services:
73            self._services.restore_services()
74
75    def _parse_test_results(self, result_filename, test_results=None):
76        """Handles result files with one or more test results.
77
78        @param result_filename: log file to parse.
79
80        @return: dictionary of parsed test results.
81        """
82        xml = ''
83        xml_start = False
84        xml_complete = False
85        xml_bad = False
86        result = 'ParseTestResultFail'
87
88        if test_results is None:
89            test_results = {}
90
91        if not os.path.isfile(result_filename):
92            return test_results
93        # TODO(ihf): Add names of failing tests to a list in the results.
94        with open(result_filename) as result_file:
95            for line in result_file.readlines():
96                # If the test terminates early, the XML will be incomplete
97                # and should not be parsed.
98                if line.startswith('#terminateTestCaseResult'):
99                    result = line.strip().split()[1]
100                    xml_bad = True
101                # Will only see #endTestCaseResult if the test does not
102                # terminate early.
103                elif line.startswith('#endTestCaseResult'):
104                    xml_complete = True
105                elif xml_start:
106                    xml += line
107                elif line.startswith('#beginTestCaseResult'):
108                    # If we see another begin before an end then something is
109                    # wrong.
110                    if xml_start:
111                        xml_bad = True
112                    else:
113                        xml_start = True
114
115                if xml_complete or xml_bad:
116                    if xml_complete:
117                        myparser = et.XMLParser(encoding='ISO-8859-1')
118                        root = et.fromstring(xml, parser=myparser)
119                        result = root.find('Result').get('StatusCode').strip()
120                        xml_complete = False
121                    test_results[result] = test_results.get(result, 0) + 1
122                    xml_bad = False
123                    xml_start = False
124                    result = 'ParseTestResultFail'
125                    xml = ''
126
127        return test_results
128
129    def _load_not_passing_cases(self, test_filter):
130        """Load all test cases that are in non-'Pass' expectations."""
131        not_passing_cases = []
132        expectations_dir = os.path.join(self.bindir, 'expectations',
133                                        self._gpu_type)
134        subset_spec = '%s.*' % test_filter
135        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
136        for subset_file in subset_paths:
137            # Filter against extra hasty failures only in hasty mode.
138            if (not '.Pass.bz2' in subset_file and
139                (self._hasty or '.hasty.' not in subset_file)):
140                not_passing_cases.extend(
141                    bz2.BZ2File(subset_file).read().splitlines())
142        not_passing_cases.sort()
143        return not_passing_cases
144
145    def _get_executable(self, name):
146        # Determine module from test_names or filter.
147        test_prefix = name.split('.')[0]
148        if test_prefix in self.DEQP_MODULES:
149            module = self.DEQP_MODULES[test_prefix]
150        else:
151            raise error.TestError('Invalid test name: %s' % name)
152        executable = os.path.join(self.DEQP_BASEDIR, 'modules', module,
153                                  'deqp-%s' % module)
154        # Must be in the executable directory when running for it to find it's
155        # test data files!
156        os.chdir(os.path.dirname(executable))
157        return executable
158
159    def _can_run(self, executable):
160        for bin in self._can_run_executables:
161            if bin in executable:
162                return True
163        return False
164
165    def _bootstrap_new_test_cases(self, test_filter):
166        """Ask dEQP for all test cases and removes non-Pass'ing ones.
167
168        This function will query dEQP for test cases and remove all cases that
169        are not in 'Pass'ing expectations from the list. This can be used
170        incrementally updating failing/hangin tests over several runs.
171
172        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
173
174        @return: List of dEQP tests to run.
175        """
176        test_cases = []
177        executable = self._get_executable(test_filter)
178        if not self._can_run(executable):
179            return test_cases
180
181        not_passing_cases = self._load_not_passing_cases(test_filter)
182        # We did not find passing cases in expectations. Assume everything else
183        # that is there should not be run this time.
184        expectations_dir = os.path.join(self.bindir, 'expectations',
185                                        self._gpu_type)
186        subset_spec = '%s.*' % test_filter
187        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
188        for subset_file in subset_paths:
189            # Filter against hasty failures only in hasty mode.
190            if self._hasty or '.hasty.' not in subset_file:
191                not_passing_cases.extend(
192                    bz2.BZ2File(subset_file).read().splitlines())
193
194        # Now ask dEQP executable nicely for whole list of tests. Needs to be
195        # run in executable directory. Output file is plain text file named
196        # e.g. 'dEQP-GLES2-cases.txt'.
197        command = ('%s '
198                   '--deqp-runmode=txt-caselist '
199                   '--deqp-surface-type=fbo '
200                   '--deqp-gl-config-name=rgba8888d24s8ms0 ' % executable)
201        logging.info('Running command %s', command)
202        utils.run(command,
203                  timeout=60,
204                  stderr_is_expected=False,
205                  ignore_status=False,
206                  stdin=None,
207                  stdout_tee=utils.TEE_TO_LOGS,
208                  stderr_tee=utils.TEE_TO_LOGS)
209
210        # Now read this caselist file.
211        caselist_name = '%s-cases.txt' % test_filter.split('.')[0]
212        caselist_file = os.path.join(os.path.dirname(executable), caselist_name)
213        if not os.path.isfile(caselist_file):
214            raise error.TestError('No caselist file at %s!' % caselist_file)
215
216        # And remove non-Pass'ing expectations from caselist.
217        caselist = open(caselist_file).read().splitlines()
218        # Contains lines like "TEST: dEQP-GLES2.capability"
219        test_cases = []
220        match = 'TEST: %s' % test_filter
221        logging.info('Bootstrapping test cases matching "%s".', match)
222        for case in caselist:
223            if case.startswith(match):
224                case = case.split('TEST: ')[1]
225                test_cases.append(case)
226
227        test_cases = list(set(test_cases) - set(not_passing_cases))
228        if not test_cases:
229            raise error.TestError('Unable to bootstrap %s!' % test_filter)
230
231        test_cases.sort()
232        return test_cases
233
234    def _get_test_cases(self, test_filter, subset):
235        """Gets the test cases for 'Pass', 'Fail' etc. expectations.
236
237        This function supports bootstrapping of new GPU families and dEQP
238        binaries. In particular if there are not 'Pass' expectations found for
239        this GPU family it will query the dEQP executable for a list of all
240        available tests. It will then remove known non-'Pass'ing tests from this
241        list to avoid getting into hangs/crashes etc.
242
243        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
244        @param subset: string from 'Pass', 'Fail', 'Timeout' etc.
245
246        @return: List of dEQP tests to run.
247        """
248        expectations_dir = os.path.join(self.bindir, 'expectations',
249                                        self._gpu_type)
250        subset_name = '%s.%s.bz2' % (test_filter, subset)
251        subset_path = os.path.join(expectations_dir, subset_name)
252        if not os.path.isfile(subset_path):
253            if subset == 'NotPass':
254                # TODO(ihf): Running hasty and NotPass together is an invitation
255                # for trouble (stability). Decide if it should be disallowed.
256                return self._load_not_passing_cases(test_filter)
257            if subset != 'Pass':
258                raise error.TestError('No subset file found for %s!' %
259                                      subset_path)
260            # Ask dEQP for all cases and remove the failing ones.
261            return self._bootstrap_new_test_cases(test_filter)
262
263        test_cases = bz2.BZ2File(subset_path).read().splitlines()
264        if not test_cases:
265            raise error.TestError('No test cases found in subset file %s!' %
266                                  subset_path)
267        return test_cases
268
269    def run_tests_individually(self, test_cases):
270        """Runs tests as isolated from each other, but slowly.
271
272        This function runs each test case separately as a command.
273        This means a new context for each test etc. Failures will be more
274        isolated, but runtime quite high due to overhead.
275
276        @param test_cases: List of dEQP test case strings.
277
278        @return: dictionary of test results.
279        """
280        test_results = {}
281        width = self._width
282        height = self._height
283
284        i = 0
285        for test_case in test_cases:
286            i += 1
287            logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case)
288            result_prefix = os.path.join(self._log_path, test_case)
289            log_file = '%s.log' % result_prefix
290            debug_file = '%s.debug' % result_prefix
291            executable = self._get_executable(test_case)
292            command = ('%s '
293                       '--deqp-case=%s '
294                       '--deqp-surface-type=fbo '
295                       '--deqp-gl-config-name=rgba8888d24s8ms0 '
296                       '--deqp-log-images=disable '
297                       '--deqp-watchdog=enable '
298                       '--deqp-surface-width=%d '
299                       '--deqp-surface-height=%d '
300                       '--deqp-log-filename=%s' %
301                       (executable, test_case, width, height, log_file))
302            if not self._can_run(executable):
303                result = 'Skipped'
304                logging.info('Skipping on %s: %s', self._gpu_type, test_case)
305            else:
306                logging.info('Running single: %s', command)
307                # Must initialize because some errors don't repopulate
308                # run_result, leaving old results.
309                run_result = {}
310                try:
311                    run_result = utils.run(command,
312                                           timeout=self._timeout,
313                                           stderr_is_expected=False,
314                                           ignore_status=True,
315                                           stdout_tee=utils.TEE_TO_LOGS,
316                                           stderr_tee=utils.TEE_TO_LOGS)
317                    result_counts = self._parse_test_results(log_file)
318                    if result_counts:
319                        result = result_counts.keys()[0]
320                    else:
321                        result = 'Unknown'
322                except error.CmdTimeoutError:
323                    result = 'TestTimeout'
324                except error.CmdError:
325                    result = 'CommandFailed'
326                except Exception:
327                    result = 'UnexpectedError'
328
329            if self._debug:
330                # Collect debug info and save to json file.
331                output_msgs = {'stdout': [], 'stderr': [], 'dmesg': []}
332                logs = self._log_reader.get_logs()
333                self._log_reader.set_start_by_current()
334                output_msgs['dmesg'] = [msg
335                                        for msg in logs.splitlines()
336                                        if self._log_filter.match(msg)]
337                if run_result:
338                    output_msgs['stdout'] = run_result.stdout.splitlines()
339                    output_msgs['stderr'] = run_result.stderr.splitlines()
340                with open(debug_file, 'w') as fd:
341                    json.dump(output_msgs,
342                              fd,
343                              indent=4,
344                              separators=(',', ' : '),
345                              sort_keys=True)
346
347            logging.info('Result: %s', result)
348            test_results[result] = test_results.get(result, 0) + 1
349
350        return test_results
351
352    def run_tests_hasty(self, test_cases):
353        """Runs tests as quickly as possible.
354
355        This function runs all the test cases, but does not isolate tests and
356        may take shortcuts/not run all tests to provide maximum coverage at
357        minumum runtime.
358
359        @param test_cases: List of dEQP test case strings.
360
361        @return: dictionary of test results.
362        """
363        # TODO(ihf): It saves half the test time to use 32*32 but a few tests
364        # fail as they need surfaces larger than 200*200.
365        width = self._width
366        height = self._height
367        results = {}
368
369        # All tests combined less than 1h in hasty.
370        batch_timeout = min(3600, self._timeout * self._hasty_batch_size)
371        num_test_cases = len(test_cases)
372
373        # We are dividing the number of tests into several shards but run them
374        # in smaller batches. We start and end at multiples of batch_size
375        # boundaries.
376        shard_start = self._hasty_batch_size * (
377            (self._shard_number *
378             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
379        shard_end = self._hasty_batch_size * (
380            ((self._shard_number + 1) *
381             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
382        # The last shard will be slightly larger than the others. Extend it to
383        # cover all test cases avoiding rounding problems with the integer
384        # arithmetics done to compute shard_start and shard_end.
385        if self._shard_number + 1 == self._shard_count:
386            shard_end = num_test_cases
387
388        for batch in xrange(shard_start, shard_end, self._hasty_batch_size):
389            batch_to = min(batch + self._hasty_batch_size, shard_end)
390            batch_cases = '\n'.join(test_cases[batch:batch_to])
391            # This assumes all tests in the batch are kicked off via the same
392            # executable.
393            executable = self._get_executable(test_cases[batch])
394            command = ('%s '
395                       '--deqp-stdin-caselist '
396                       '--deqp-surface-type=fbo '
397                       '--deqp-gl-config-name=rgba8888d24s8ms0 '
398                       '--deqp-log-images=disable '
399                       '--deqp-visibility=hidden '
400                       '--deqp-watchdog=enable '
401                       '--deqp-surface-width=%d '
402                       '--deqp-surface-height=%d ' % (executable, width,
403                                                      height))
404
405            log_file = os.path.join(self._log_path,
406                                    '%s_hasty_%d.log' % (self._filter, batch))
407
408            command += '--deqp-log-filename=' + log_file
409
410            if not self._can_run(executable):
411                logging.info('Skipping tests on %s: %s', self._gpu_type,
412                             batch_cases)
413            else:
414                logging.info('Running tests %d...%d out of %d:\n%s\n%s',
415                             batch + 1, batch_to, num_test_cases, command,
416                             batch_cases)
417                try:
418                    utils.run(command,
419                              timeout=batch_timeout,
420                              stderr_is_expected=False,
421                              ignore_status=False,
422                              stdin=batch_cases,
423                              stdout_tee=utils.TEE_TO_LOGS,
424                              stderr_tee=utils.TEE_TO_LOGS)
425                except Exception:
426                    pass
427                # We are trying to handle all errors by parsing the log file.
428                results = self._parse_test_results(log_file, results)
429                logging.info(results)
430        return results
431
432    def run_once(self, opts=None):
433        options = dict(filter='',
434                       test_names='',  # e.g., dEQP-GLES3.info.version,
435                                       # dEQP-GLES2.functional,
436                                       # dEQP-GLES3.accuracy.texture, etc.
437                       test_names_file='',
438                       timeout=self._timeout,
439                       subset_to_run='Pass',  # Pass, Fail, Timeout, NotPass...
440                       hasty='False',
441                       shard_number='0',
442                       shard_count='1',
443                       debug='False')
444        if opts is None:
445            opts = []
446        options.update(utils.args_to_dict(opts))
447        logging.info('Test Options: %s', options)
448
449        self._hasty = (options['hasty'] == 'True')
450        self._timeout = int(options['timeout'])
451        self._test_names_file = options['test_names_file']
452        self._test_names = options['test_names']
453        self._shard_number = int(options['shard_number'])
454        self._shard_count = int(options['shard_count'])
455        self._debug = (options['debug'] == 'True')
456        if not (self._test_names_file or self._test_names):
457            self._filter = options['filter']
458            if not self._filter:
459                raise error.TestError('No dEQP test filter specified')
460
461        # Some information to help postprocess logs into blacklists later.
462        logging.info('ChromeOS BOARD = %s', self._board)
463        logging.info('ChromeOS CPU family = %s', self._cpu_type)
464        logging.info('ChromeOS GPU family = %s', self._gpu_type)
465
466        # Create a place to put detailed test output logs.
467        if self._filter:
468            logging.info('dEQP test filter = %s', self._filter)
469            self._log_path = os.path.join(tempfile.gettempdir(),
470                                          '%s-logs' % self._filter)
471        else:
472            base = os.path.basename(self._test_names_file)
473            # TODO(ihf): Clean this up.
474            logging.info('dEQP test filter = %s', os.path.splitext(base)[0])
475            self._log_path = os.path.join(tempfile.gettempdir(),
476                                          '%s-logs' % base)
477        shutil.rmtree(self._log_path, ignore_errors=True)
478        os.mkdir(self._log_path)
479
480        if self._gpu_type.startswith('tegra'):
481            raise error.TestNAError('dEQP not implemented on tegra. '
482                                    'crbug.com/543373')
483
484        self._services.stop_services()
485        if self._test_names_file:
486            test_cases = [line.rstrip('\n')
487                          for line in open(os.path.join(self.bindir,
488                                                        self._test_names_file))]
489            test_cases = [test
490                          for test in test_cases if test and not test.isspace()]
491        if self._test_names:
492            test_cases = []
493            for name in self._test_names.split(','):
494                test_cases.extend(self._get_test_cases(name, 'Pass'))
495        if self._filter:
496            test_cases = self._get_test_cases(self._filter,
497                                              options['subset_to_run'])
498
499        if self._debug:
500            # LogReader works on /var/log/messages by default.
501            self._log_reader = cros_logging.LogReader()
502            self._log_reader.set_start_by_current()
503
504        test_results = {}
505        if self._hasty:
506            logging.info('Running in hasty mode.')
507            test_results = self.run_tests_hasty(test_cases)
508        else:
509            logging.info('Running each test individually.')
510            test_results = self.run_tests_individually(test_cases)
511
512        logging.info('Test results:')
513        logging.info(test_results)
514        self.write_perf_keyval(test_results)
515
516        test_count = 0
517        test_failures = 0
518        test_passes = 0
519        test_skipped = 0
520        for result in test_results:
521            test_count += test_results[result]
522            if result.lower() in ['pass']:
523                test_passes += test_results[result]
524            if result.lower() not in ['pass', 'notsupported', 'internalerror',
525                                      'skipped']:
526                test_failures += test_results[result]
527            if result.lower() in ['skipped']:
528                test_skipped += test_results[result]
529        # The text "Completed all tests." is used by the process_log.py script
530        # and should always appear at the end of a completed test run.
531        logging.info(
532            'Completed all tests. Saw %d tests, %d passes and %d failures.',
533            test_count, test_passes, test_failures)
534
535        if self._filter and test_count == 0 and options[
536                'subset_to_run'] != 'NotPass':
537            raise error.TestNAError('No test cases found for filter: %s!' %
538                                    self._filter)
539
540        if options['subset_to_run'] == 'NotPass':
541            if test_passes:
542                raise error.TestWarn(
543                    '%d formerly failing tests are passing now.' % test_passes)
544        elif test_failures:
545            raise error.TestFail('%d/%d tests failed.' %
546                                 (test_failures, test_count))
547        if test_skipped > 0:
548            raise error.TestWarn('%d tests skipped, %d passes' %
549                                 (test_skipped, test_passes))
550