graphics_dEQP.py revision 939f57efdd21c6dd47f92f6b8b6f2221c6911595
1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bz2
6import glob
7import logging
8import os
9import shutil
10import tempfile
11import xml.etree.ElementTree as et
12from autotest_lib.client.bin import test, utils
13from autotest_lib.client.common_lib import error
14from autotest_lib.client.cros import service_stopper
15
16
17class graphics_dEQP(test.test):
18    """Run the drawElements Quality Program test suite.
19    """
20    version = 1
21    _services = None
22    _hasty = False
23    _hasty_batch_size = 100  # Batch size in hasty mode.
24    _shard_number = 0
25    _shard_count = 1
26    _board = None
27    _cpu_type = None
28    _gpu_type = None
29    _filter = None
30    _width = 256  # Use smallest width for which all tests run/pass.
31    _height = 256  # Use smallest height for which all tests run/pass.
32    _timeout = 70  # Larger than twice the dEQP watchdog timeout at 30s.
33    _test_names = None
34
35    DEQP_BASEDIR = '/usr/local/deqp'
36    DEQP_MODULES = {
37        'dEQP-EGL': 'egl',
38        'dEQP-GLES2': 'gles2',
39        'dEQP-GLES3': 'gles3',
40        'dEQP-GLES31': 'gles31'
41    }
42
43    def initialize(self):
44        self._gpu_type = utils.get_gpu_family()
45        self._cpu_type = utils.get_cpu_soc_family()
46        self._board = utils.get_board()
47        self._services = service_stopper.ServiceStopper(['ui', 'powerd'])
48
49    def cleanup(self):
50        if self._services:
51            self._services.restore_services()
52
53    def _parse_test_results(self, result_filename, test_results=None):
54        """Handles result files with one or more test results.
55
56        @param result_filename: log file to parse.
57
58        @return: dictionary of parsed test results.
59        """
60        xml = ''
61        xml_start = False
62        xml_complete = False
63        xml_bad = False
64        result = 'ParseTestResultFail'
65
66        if test_results is None:
67            test_results = {}
68
69        if not os.path.isfile(result_filename):
70            return test_results
71        # TODO(ihf): Add names of failing tests to a list in the results.
72        with open(result_filename) as result_file:
73            for line in result_file.readlines():
74                # If the test terminates early, the XML will be incomplete
75                # and should not be parsed.
76                if line.startswith('#terminateTestCaseResult'):
77                    result = line.strip().split()[1]
78                    xml_bad = True
79                # Will only see #endTestCaseResult if the test does not
80                # terminate early.
81                elif line.startswith('#endTestCaseResult'):
82                    xml_complete = True
83                elif xml_start:
84                    xml += line
85                elif line.startswith('#beginTestCaseResult'):
86                    # If we see another begin before an end then something is
87                    # wrong.
88                    if xml_start:
89                        xml_bad = True
90                    else:
91                        xml_start = True
92
93                if xml_complete or xml_bad:
94                    if xml_complete:
95                        myparser = et.XMLParser(encoding='ISO-8859-1')
96                        root = et.fromstring(xml, parser=myparser)
97                        result = root.find('Result').get('StatusCode').strip()
98                        xml_complete = False
99                    test_results[result] = test_results.get(result, 0) + 1
100                    xml_bad = False
101                    xml_start = False
102                    result = 'ParseTestResultFail'
103                    xml = ''
104
105        return test_results
106
107    def _load_not_passing_cases(self, test_filter):
108        """Load all test cases that are in non-'Pass' expectations."""
109        not_passing_cases = []
110        expectations_dir = os.path.join(self.bindir, 'expectations',
111                                        self._gpu_type)
112        subset_spec = '%s.*' % test_filter
113        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
114        for subset_file in subset_paths:
115            # Filter against extra hasty failures only in hasty mode.
116            if (not '.Pass.bz2' in subset_file and
117                (self._hasty or '.hasty.' not in subset_file)):
118                not_passing_cases.extend(
119                    bz2.BZ2File(subset_file).read().splitlines())
120        not_passing_cases.sort()
121        return not_passing_cases
122
123    def _bootstrap_new_test_cases(self, executable, test_filter):
124        """Ask dEQP for all test cases and removes non-Pass'ing ones.
125
126        This function will query dEQP for test cases and remove all cases that
127        are not in 'Pass'ing expectations from the list. This can be used
128        incrementally updating failing/hangin tests over several runs.
129
130        @param executable: location '/usr/local/deqp/modules/gles2/deqp-gles2'.
131        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
132
133        @return: List of dEQP tests to run.
134        """
135        test_cases = []
136        not_passing_cases = self._load_not_passing_cases(test_filter)
137        # We did not find passing cases in expectations. Assume everything else
138        # that is there should not be run this time.
139        expectations_dir = os.path.join(self.bindir, 'expectations',
140                                        self._gpu_type)
141        subset_spec = '%s.*' % test_filter
142        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
143        for subset_file in subset_paths:
144            # Filter against hasty failures only in hasty mode.
145            if self._hasty or '.hasty.' not in subset_file:
146                not_passing_cases.extend(
147                    bz2.BZ2File(subset_file).read().splitlines())
148
149        # Now ask dEQP executable nicely for whole list of tests. Needs to be
150        # run in executable directory. Output file is plain text file named
151        # e.g. 'dEQP-GLES2-cases.txt'.
152        command = ('%s '
153                   '--deqp-runmode=txt-caselist '
154                   '--deqp-surface-type=fbo ' % executable)
155        logging.info('Running command %s', command)
156        utils.run(command,
157                  timeout=60,
158                  stderr_is_expected=False,
159                  ignore_status=False,
160                  stdin=None,
161                  stdout_tee=utils.TEE_TO_LOGS,
162                  stderr_tee=utils.TEE_TO_LOGS)
163
164        # Now read this caselist file.
165        caselist_name = '%s-cases.txt' % test_filter.split('.')[0]
166        caselist_file = os.path.join(os.path.dirname(executable), caselist_name)
167        if not os.path.isfile(caselist_file):
168            raise error.TestError('No caselist file at %s!' % caselist_file)
169
170        # And remove non-Pass'ing expectations from caselist.
171        caselist = open(caselist_file).read().splitlines()
172        # Contains lines like "TEST: dEQP-GLES2.capability"
173        test_cases = []
174        match = 'TEST: %s' % test_filter
175        logging.info('Bootstrapping test cases matching "%s".', match)
176        for case in caselist:
177            if case.startswith(match):
178                case = case.split('TEST: ')[1]
179                test_cases.append(case)
180
181        test_cases = list(set(test_cases) - set(not_passing_cases))
182        if not test_cases:
183            raise error.TestError('Unable to bootstrap %s!' % test_filter)
184
185        test_cases.sort()
186        return test_cases
187
188    def _get_test_cases(self, executable, test_filter, subset):
189        """Gets the test cases for 'Pass', 'Fail' etc. expectations.
190
191        This function supports bootstrapping of new GPU families and dEQP
192        binaries. In particular if there are not 'Pass' expectations found for
193        this GPU family it will query the dEQP executable for a list of all
194        available tests. It will then remove known non-'Pass'ing tests from this
195        list to avoid getting into hangs/crashes etc.
196
197        @param executable: location '/usr/local/deqp/modules/gles2/deqp-gles2'.
198        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
199        @param subset: string from 'Pass', 'Fail', 'Timeout' etc.
200
201        @return: List of dEQP tests to run.
202        """
203        expectations_dir = os.path.join(self.bindir, 'expectations',
204                                        self._gpu_type)
205        subset_name = '%s.%s.bz2' % (test_filter, subset)
206        subset_path = os.path.join(expectations_dir, subset_name)
207        if not os.path.isfile(subset_path):
208            if subset == 'NotPass':
209                # TODO(ihf): Running hasty and NotPass together is an invitation
210                # for trouble (stability). Decide if it should be disallowed.
211                return self._load_not_passing_cases(test_filter)
212            if subset != 'Pass':
213                raise error.TestError('No subset file found for %s!' %
214                                      subset_path)
215            # Ask dEQP for all cases and remove the failing ones.
216            return self._bootstrap_new_test_cases(executable, test_filter)
217
218        test_cases = bz2.BZ2File(subset_path).read().splitlines()
219        if not test_cases:
220            raise error.TestError('No test cases found in subset file %s!' %
221                                  subset_path)
222        return test_cases
223
224    def run_tests_individually(self, executable, test_cases):
225        """Runs tests as isolated from each other, but slowly.
226
227        This function runs each test case separately as a command.
228        This means a new context for each test etc. Failures will be more
229        isolated, but runtime quite high due to overhead.
230
231        @param executable: dEQP executable path.
232        @param test_cases: List of dEQP test case strings.
233
234        @return: dictionary of test results.
235        """
236        test_results = {}
237        width = self._width
238        height = self._height
239
240        log_path = os.path.join(tempfile.gettempdir(), '%s-logs' % self._filter)
241        shutil.rmtree(log_path, ignore_errors=True)
242        os.mkdir(log_path)
243
244        i = 0
245        for test_case in test_cases:
246            i += 1
247            logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case)
248            log_file = '%s.log' % os.path.join(log_path, test_case)
249            command = ('%s '
250                       '--deqp-case=%s '
251                       '--deqp-surface-type=fbo '
252                       '--deqp-log-images=disable '
253                       '--deqp-watchdog=enable '
254                       '--deqp-surface-width=%d '
255                       '--deqp-surface-height=%d '
256                       '--deqp-log-filename=%s' %
257                       (executable, test_case, width, height, log_file))
258
259            try:
260                logging.info('Running single: %s', command)
261                utils.run(command,
262                          timeout=self._timeout,
263                          stderr_is_expected=False,
264                          ignore_status=True,
265                          stdout_tee=utils.TEE_TO_LOGS,
266                          stderr_tee=utils.TEE_TO_LOGS)
267                result_counts = self._parse_test_results(log_file)
268                if result_counts:
269                    result = result_counts.keys()[0]
270                else:
271                    result = 'Unknown'
272            except error.CmdTimeoutError:
273                result = 'TestTimeout'
274            except error.CmdError:
275                result = 'CommandFailed'
276            except Exception:
277                result = 'UnexpectedError'
278
279            logging.info('Result: %s', result)
280            test_results[result] = test_results.get(result, 0) + 1
281
282        return test_results
283
284    def run_tests_hasty(self, executable, test_cases):
285        """Runs tests as quickly as possible.
286
287        This function runs all the test cases, but does not isolate tests and
288        may take shortcuts/not run all tests to provide maximum coverage at
289        minumum runtime.
290
291        @param executable: dEQP executable path.
292        @param test_cases: List of dEQP test case strings.
293
294        @return: dictionary of test results.
295        """
296        # TODO(ihf): It saves half the test time to use 32*32 but a few tests
297        # fail as they need surfaces larger than 200*200.
298        width = self._width
299        height = self._height
300        results = {}
301
302        log_path = os.path.join(tempfile.gettempdir(), '%s-logs' % self._filter)
303        shutil.rmtree(log_path, ignore_errors=True)
304        os.mkdir(log_path)
305
306        # All tests combined less than 1h in hasty.
307        batch_timeout = min(3600, self._timeout * self._hasty_batch_size)
308        num_test_cases = len(test_cases)
309
310        # We are dividing the number of tests into several shards but run them
311        # in smaller batches. We start and end at multiples of batch_size
312        # boundaries.
313        shard_start = self._hasty_batch_size * (
314            (self._shard_number *
315             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
316        shard_end = self._hasty_batch_size * (
317            ((self._shard_number + 1) *
318             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
319        # The last shard will be slightly larger than the others. Extend it to
320        # cover all test cases avoiding rounding problems with the integer
321        # arithmetics done to compute shard_start and shard_end.
322        if self._shard_number + 1 == self._shard_count:
323            shard_end = num_test_cases
324
325        for batch in xrange(shard_start, shard_end, self._hasty_batch_size):
326            batch_to = min(batch + self._hasty_batch_size, shard_end)
327            batch_cases = '\n'.join(test_cases[batch:batch_to])
328            command = ('%s '
329                       '--deqp-stdin-caselist '
330                       '--deqp-surface-type=fbo '
331                       '--deqp-log-images=disable '
332                       '--deqp-visibility=hidden '
333                       '--deqp-watchdog=enable '
334                       '--deqp-surface-width=%d '
335                       '--deqp-surface-height=%d ' % (executable, width,
336                                                      height))
337
338            log_file = os.path.join(log_path,
339                                    '%s_hasty_%d.log' % (self._filter, batch))
340
341            command += '--deqp-log-filename=' + log_file
342            logging.info('Running tests %d...%d out of %d:\n%s\n%s', batch + 1,
343                         batch_to, num_test_cases, command, batch_cases)
344
345            try:
346                utils.run(command,
347                          timeout=batch_timeout,
348                          stderr_is_expected=False,
349                          ignore_status=False,
350                          stdin=batch_cases,
351                          stdout_tee=utils.TEE_TO_LOGS,
352                          stderr_tee=utils.TEE_TO_LOGS)
353            except Exception:
354                pass
355            # We are trying to handle all errors by parsing the log file.
356            results = self._parse_test_results(log_file, results)
357            logging.info(results)
358        return results
359
360    def run_once(self, opts=None):
361        options = dict(filter='',
362                       test_names='',  # e.g., dEQP-GLES3.info.version,
363                                       # dEQP-GLES2.functional,
364                                       # dEQP-GLES3.accuracy.texture, etc.
365                       timeout=self._timeout,
366                       subset_to_run='Pass',  # Pass, Fail, Timeout, NotPass...
367                       hasty='False',
368                       shard_number='0',
369                       shard_count='1')
370        if opts is None:
371            opts = []
372        options.update(utils.args_to_dict(opts))
373        logging.info('Test Options: %s', options)
374
375        self._hasty = (options['hasty'] == 'True')
376        self._timeout = int(options['timeout'])
377        self._test_names = options['test_names']
378        self._shard_number = int(options['shard_number'])
379        self._shard_count = int(options['shard_count'])
380        if not self._test_names:
381            self._filter = options['filter']
382            if not self._filter:
383                raise error.TestError('No dEQP test filter specified')
384
385        # Some information to help postprocess logs into blacklists later.
386        logging.info('ChromeOS BOARD = %s', self._board)
387        logging.info('ChromeOS CPU family = %s', self._cpu_type)
388        logging.info('ChromeOS GPU family = %s', self._gpu_type)
389        logging.info('dEQP test filter = %s', self._filter)
390
391        if self._gpu_type == 'pinetrail':
392            raise error.TestNAError('dEQP not implemented on pinetrail. '
393                                          'crbug.com/532691')
394        if self._gpu_type == 'mali':
395            raise error.TestNAError('dEQP not implemented on mali. '
396                                          'crbug.com/543372')
397        if self._gpu_type == 'tegra':
398            raise error.TestNAError('dEQP not implemented on tegra. '
399                                          'crbug.com/543373')
400
401        # Determine module from test_names or filter.
402        if self._test_names:
403            test_prefix = self._test_names.split('.')[0]
404            self._filter = '%s.filter_args' % test_prefix
405        elif self._filter:
406            test_prefix = self._filter.split('.')[0]
407        if test_prefix in self.DEQP_MODULES:
408            module = self.DEQP_MODULES[test_prefix]
409        elif self._test_names:
410            raise error.TestError('Invalid test names: %s' % self._test_names)
411        else:
412            raise error.TestError('Invalid test filter: %s' % self._filter)
413
414        executable_path = os.path.join(self.DEQP_BASEDIR, 'modules', module)
415        executable = os.path.join(executable_path, 'deqp-%s' % module)
416
417        self._services.stop_services()
418
419        # Must be in the executable directory when running for it to find it's
420        # test data files!
421        os.chdir(executable_path)
422        if self._test_names:
423            test_cases = []
424            for name in self._test_names.split(','):
425                test_cases.extend(self._get_test_cases(executable, name,
426                                                       'Pass'))
427        else:
428            test_cases = self._get_test_cases(executable, self._filter,
429                                              options['subset_to_run'])
430
431        test_results = {}
432        if self._hasty:
433            logging.info('Running in hasty mode.')
434            test_results = self.run_tests_hasty(executable, test_cases)
435        else:
436            logging.info('Running each test individually.')
437            test_results = self.run_tests_individually(executable, test_cases)
438
439        logging.info('Test results:')
440        logging.info(test_results)
441        self.write_perf_keyval(test_results)
442
443        test_count = 0
444        test_failures = 0
445        test_passes = 0
446        for result in test_results:
447            test_count += test_results[result]
448            if result.lower() in ['pass']:
449                test_passes += test_results[result]
450            if result.lower() not in ['pass', 'notsupported', 'internalerror']:
451                test_failures += test_results[result]
452        # The text "Completed all tests." is used by the process_log.py script
453        # and should always appear at the end of a completed test run.
454        logging.info(
455            'Completed all tests. Saw %d tests, %d passes and %d failures.',
456            test_count, test_passes, test_failures)
457
458        if test_count == 0:
459            raise error.TestWarn('No test cases found for filter: %s!' %
460                                 self._filter)
461
462        if options['subset_to_run'] == 'NotPass':
463            if test_passes:
464                raise error.TestWarn(
465                    '%d formerly failing tests are passing now.' % test_passes)
466        elif test_failures:
467            raise error.TestFail('%d/%d tests failed.' %
468                                 (test_failures, test_count))
469