1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bz2
6import glob
7import json
8import logging
9import os
10import re
11import shutil
12import tempfile
13import time
14import xml.etree.ElementTree as et
15from autotest_lib.client.bin import test, utils
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.cros import cros_logging, service_stopper
18from autotest_lib.client.cros.graphics import graphics_utils
19
20RERUN_RATIO = 0.02  # Ratio to rerun failing test for hasty mode
21
22
23class graphics_dEQP(graphics_utils.GraphicsTest):
24    """Run the drawElements Quality Program test suite.
25    """
26    version = 1
27    _services = None
28    _hasty = False
29    _hasty_batch_size = 100  # Batch size in hasty mode.
30    _shard_number = 0
31    _shard_count = 1
32    _board = None
33    _cpu_type = None
34    _gpu_type = None
35    _surface = None
36    _filter = None
37    _width = 256  # Use smallest width for which all tests run/pass.
38    _height = 256  # Use smallest height for which all tests run/pass.
39    _timeout = 70  # Larger than twice the dEQP watchdog timeout at 30s.
40    _test_names = None
41    _test_names_file = None
42    _log_path = None  # Location for detailed test output logs (in /tmp/).
43    _debug = False  # Analyze kernel messages.
44    _log_reader = None  # Reader to analyze (kernel) messages log.
45    _log_filter = re.compile('.* .* kernel:')  # kernel messages filter.
46    _env = None  # environment for test processes
47    DEQP_MODULES = {
48        'dEQP-EGL': 'egl',
49        'dEQP-GLES2': 'gles2',
50        'dEQP-GLES3': 'gles3',
51        'dEQP-GLES31': 'gles31',
52        'dEQP-VK': 'vk',
53    }
54    # We do not consider these results as failures.
55    TEST_RESULT_FILTER = [
56        'pass', 'notsupported', 'internalerror', 'qualitywarning',
57        'compatibilitywarning', 'skipped'
58    ]
59
60    def initialize(self):
61        super(graphics_dEQP, self).initialize()
62        self._api_helper = graphics_utils.GraphicsApiHelper()
63        self._board = utils.get_board()
64        self._cpu_type = utils.get_cpu_soc_family()
65        self._gpu_type = utils.get_gpu_family()
66
67        # deqp may depend on libraries that are present only on test images.
68        # Those libraries are installed in /usr/local.
69        self._env = os.environ.copy()
70        old_ld_path = self._env.get('LD_LIBRARY_PATH', '')
71        if old_ld_path:
72            self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path
73        else:
74            self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64'
75
76        self._services = service_stopper.ServiceStopper(['ui', 'powerd'])
77        # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions.
78        self._surface = 'pbuffer'
79        self._services.stop_services()
80
81    def cleanup(self):
82        if self._services:
83            self._services.restore_services()
84        super(graphics_dEQP, self).cleanup()
85
86    def _parse_test_results(self, result_filename,
87                            test_results=None, failing_test=None):
88        """Handles result files with one or more test results.
89
90        @param result_filename: log file to parse.
91        @param test_results: Result parsed will be appended to it.
92        @param failing_test: Tests considered failed will append to it.
93
94        @return: dictionary of parsed test results.
95        """
96        xml = ''
97        xml_start = False
98        xml_complete = False
99        xml_bad = False
100        result = 'ParseTestResultFail'
101
102        if test_results is None:
103            test_results = {}
104
105        if not os.path.isfile(result_filename):
106            return test_results
107
108        with open(result_filename) as result_file:
109            for line in result_file.readlines():
110                # If the test terminates early, the XML will be incomplete
111                # and should not be parsed.
112                if line.startswith('#terminateTestCaseResult'):
113                    result = line.strip().split()[1]
114                    xml_bad = True
115                # Will only see #endTestCaseResult if the test does not
116                # terminate early.
117                elif line.startswith('#endTestCaseResult'):
118                    xml_complete = True
119                elif xml_start:
120                    xml += line
121                elif line.startswith('#beginTestCaseResult'):
122                    # If we see another begin before an end then something is
123                    # wrong.
124                    if xml_start:
125                        xml_bad = True
126                    else:
127                        xml_start = True
128
129                if xml_complete or xml_bad:
130                    if xml_complete:
131                        myparser = et.XMLParser(encoding='ISO-8859-1')
132                        root = et.fromstring(xml, parser=myparser)
133                        test_case = root.attrib['CasePath']
134                        result = root.find('Result').get('StatusCode').strip()
135                        xml_complete = False
136                    test_results[result] = test_results.get(result, 0) + 1
137                    if (result.lower() not in self.TEST_RESULT_FILTER and
138                        failing_test != None):
139                        failing_test.append(test_case)
140                    xml_bad = False
141                    xml_start = False
142                    result = 'ParseTestResultFail'
143                    xml = ''
144
145        return test_results
146
147    def _load_not_passing_cases(self, test_filter):
148        """Load all test cases that are in non-'Pass' expectations."""
149        not_passing_cases = []
150        expectations_dir = os.path.join(self.bindir, 'expectations',
151                                        self._gpu_type)
152        subset_spec = '%s.*' % test_filter
153        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
154        for subset_file in subset_paths:
155            # Filter against extra hasty failures only in hasty mode.
156            if (not '.Pass.bz2' in subset_file and
157               (self._hasty or '.hasty.' not in subset_file)):
158                not_passing_cases.extend(
159                    bz2.BZ2File(subset_file).read().splitlines())
160        not_passing_cases.sort()
161        return not_passing_cases
162
163    def _translate_name_to_api(self, name):
164        """Translate test_names or test_filter to api."""
165        test_prefix = name.split('.')[0]
166        if test_prefix in self.DEQP_MODULES:
167            api = self.DEQP_MODULES[test_prefix]
168        else:
169            raise error.TestFail('Failed: Invalid test name: %s' % name)
170        return api
171
172    def _get_executable(self, api):
173        """Return the executable path of the api."""
174        return self._api_helper.get_deqp_executable(api)
175
176    def _can_run(self, api):
177        """Check if specific api is supported in this board."""
178        return api in self._api_helper.get_supported_apis()
179
180    def _bootstrap_new_test_cases(self, test_filter):
181        """Ask dEQP for all test cases and removes non-Pass'ing ones.
182
183        This function will query dEQP for test cases and remove all cases that
184        are not in 'Pass'ing expectations from the list. This can be used
185        incrementally updating failing/hangin tests over several runs.
186
187        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
188
189        @return: List of dEQP tests to run.
190        """
191        test_cases = []
192        api = self._translate_name_to_api(test_filter)
193        if self._can_run(api):
194            executable = self._get_executable(api)
195        else:
196            return test_cases
197
198        # Must be in the executable directory when running for it to find it's
199        # test data files!
200        os.chdir(os.path.dirname(executable))
201
202        not_passing_cases = self._load_not_passing_cases(test_filter)
203        # We did not find passing cases in expectations. Assume everything else
204        # that is there should not be run this time.
205        expectations_dir = os.path.join(self.bindir, 'expectations',
206                                        self._gpu_type)
207        subset_spec = '%s.*' % test_filter
208        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
209        for subset_file in subset_paths:
210            # Filter against hasty failures only in hasty mode.
211            if self._hasty or '.hasty.' not in subset_file:
212                not_passing_cases.extend(
213                    bz2.BZ2File(subset_file).read().splitlines())
214
215        # Now ask dEQP executable nicely for whole list of tests. Needs to be
216        # run in executable directory. Output file is plain text file named
217        # e.g. 'dEQP-GLES2-cases.txt'.
218        command = ('%s '
219                   '--deqp-runmode=txt-caselist '
220                   '--deqp-surface-type=%s '
221                   '--deqp-gl-config-name=rgba8888d24s8ms0 ' % (executable,
222                                                                self._surface))
223        logging.info('Running command %s', command)
224        utils.run(command,
225                  env=self._env,
226                  timeout=60,
227                  stderr_is_expected=False,
228                  ignore_status=False,
229                  stdin=None)
230
231        # Now read this caselist file.
232        caselist_name = '%s-cases.txt' % test_filter.split('.')[0]
233        caselist_file = os.path.join(os.path.dirname(executable), caselist_name)
234        if not os.path.isfile(caselist_file):
235            raise error.TestFail('Failed: No caselist file at %s!' %
236                                 caselist_file)
237
238        # And remove non-Pass'ing expectations from caselist.
239        caselist = open(caselist_file).read().splitlines()
240        # Contains lines like "TEST: dEQP-GLES2.capability"
241        test_cases = []
242        match = 'TEST: %s' % test_filter
243        logging.info('Bootstrapping test cases matching "%s".', match)
244        for case in caselist:
245            if case.startswith(match):
246                case = case.split('TEST: ')[1]
247                test_cases.append(case)
248
249        test_cases = list(set(test_cases) - set(not_passing_cases))
250        if not test_cases:
251            raise error.TestFail('Failed: Unable to bootstrap %s!' % test_filter)
252
253        test_cases.sort()
254        return test_cases
255
256    def _get_test_cases_from_names_file(self):
257        if os.path.exists(self._test_names_file):
258            file_path = self._test_names_file
259            test_cases = [line.rstrip('\n') for line in open(file_path)]
260            return [test for test in test_cases if test and not test.isspace()]
261        return []
262
263    def _get_test_cases(self, test_filter, subset):
264        """Gets the test cases for 'Pass', 'Fail' etc. expectations.
265
266        This function supports bootstrapping of new GPU families and dEQP
267        binaries. In particular if there are not 'Pass' expectations found for
268        this GPU family it will query the dEQP executable for a list of all
269        available tests. It will then remove known non-'Pass'ing tests from
270        this list to avoid getting into hangs/crashes etc.
271
272        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
273        @param subset: string from 'Pass', 'Fail', 'Timeout' etc.
274
275        @return: List of dEQP tests to run.
276        """
277        expectations_dir = os.path.join(self.bindir, 'expectations',
278                                        self._gpu_type)
279        subset_name = '%s.%s.bz2' % (test_filter, subset)
280        subset_path = os.path.join(expectations_dir, subset_name)
281        if not os.path.isfile(subset_path):
282            if subset == 'NotPass':
283                # TODO(ihf): Running hasty and NotPass together is an invitation
284                # for trouble (stability). Decide if it should be disallowed.
285                return self._load_not_passing_cases(test_filter)
286            if subset != 'Pass':
287                raise error.TestFail('Failed: No subset file found for %s!' %
288                                     subset_path)
289            # Ask dEQP for all cases and remove the failing ones.
290            return self._bootstrap_new_test_cases(test_filter)
291
292        test_cases = bz2.BZ2File(subset_path).read().splitlines()
293        if not test_cases:
294            raise error.TestFail(
295                'Failed: No test cases found in subset file %s!' % subset_path)
296        return test_cases
297
298    def _run_tests_individually(self, test_cases, failing_test=None):
299        """Runs tests as isolated from each other, but slowly.
300
301        This function runs each test case separately as a command.
302        This means a new context for each test etc. Failures will be more
303        isolated, but runtime quite high due to overhead.
304
305        @param test_cases: List of dEQP test case strings.
306        @param failing_test: Tests considered failed will be appended to it.
307
308        @return: dictionary of test results.
309        """
310        test_results = {}
311        width = self._width
312        height = self._height
313
314        i = 0
315        for test_case in test_cases:
316            i += 1
317            logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case)
318            result_prefix = os.path.join(self._log_path, test_case)
319            log_file = '%s.log' % result_prefix
320            debug_file = '%s.debug' % result_prefix
321            api = self._translate_name_to_api(test_case)
322            if not self._can_run(api):
323                result = 'Skipped'
324                logging.info('Skipping on %s: %s', self._gpu_type, test_case)
325            else:
326                executable = self._get_executable(api)
327                command = ('%s '
328                           '--deqp-case=%s '
329                           '--deqp-surface-type=%s '
330                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
331                           '--deqp-log-images=disable '
332                           '--deqp-watchdog=enable '
333                           '--deqp-surface-width=%d '
334                           '--deqp-surface-height=%d '
335                           '--deqp-log-filename=%s' % (
336                               executable,
337                               test_case,
338                               self._surface,
339                               width,
340                               height,
341                               log_file)
342                           )
343                logging.debug('Running single: %s', command)
344
345                # Must be in the executable directory when running for it to find it's
346                # test data files!
347                os.chdir(os.path.dirname(executable))
348
349                # Must initialize because some errors don't repopulate
350                # run_result, leaving old results.
351                run_result = {}
352                start_time = time.time()
353                try:
354                    run_result = utils.run(command,
355                                           env=self._env,
356                                           timeout=self._timeout,
357                                           stderr_is_expected=False,
358                                           ignore_status=True)
359                    result_counts = self._parse_test_results(
360                        log_file,
361                        failing_test=failing_test)
362                    if result_counts:
363                        result = result_counts.keys()[0]
364                    else:
365                        result = 'Unknown'
366                except error.CmdTimeoutError:
367                    result = 'TestTimeout'
368                except error.CmdError:
369                    result = 'CommandFailed'
370                except Exception:
371                    result = 'UnexpectedError'
372                end_time = time.time()
373
374                if self._debug:
375                    # Collect debug info and save to json file.
376                    output_msgs = {
377                        'start_time': start_time,
378                        'end_time': end_time,
379                        'stdout': [],
380                        'stderr': [],
381                        'dmesg': []
382                    }
383                    logs = self._log_reader.get_logs()
384                    self._log_reader.set_start_by_current()
385                    output_msgs['dmesg'] = [
386                        msg for msg in logs.splitlines()
387                        if self._log_filter.match(msg)
388                    ]
389                    if run_result:
390                        output_msgs['stdout'] = run_result.stdout.splitlines()
391                        output_msgs['stderr'] = run_result.stderr.splitlines()
392                    with open(debug_file, 'w') as fd:
393                        json.dump(
394                            output_msgs,
395                            fd,
396                            indent=4,
397                            separators=(',', ' : '),
398                            sort_keys=True)
399
400            logging.info('Result: %s', result)
401            test_results[result] = test_results.get(result, 0) + 1
402
403        return test_results
404
405    def _run_tests_hasty(self, test_cases, failing_test=None):
406        """Runs tests as quickly as possible.
407
408        This function runs all the test cases, but does not isolate tests and
409        may take shortcuts/not run all tests to provide maximum coverage at
410        minumum runtime.
411
412        @param test_cases: List of dEQP test case strings.
413        @param failing_test: Test considered failed will append to it.
414
415        @return: dictionary of test results.
416        """
417        # TODO(ihf): It saves half the test time to use 32*32 but a few tests
418        # fail as they need surfaces larger than 200*200.
419        width = self._width
420        height = self._height
421        results = {}
422
423        # All tests combined less than 1h in hasty.
424        batch_timeout = min(3600, self._timeout * self._hasty_batch_size)
425        num_test_cases = len(test_cases)
426
427        # We are dividing the number of tests into several shards but run them
428        # in smaller batches. We start and end at multiples of batch_size
429        # boundaries.
430        shard_start = self._hasty_batch_size * (
431            (self._shard_number * (num_test_cases / self._shard_count)) /
432            self._hasty_batch_size)
433        shard_end = self._hasty_batch_size * ((
434            (self._shard_number + 1) * (num_test_cases / self._shard_count)) /
435                                              self._hasty_batch_size)
436        # The last shard will be slightly larger than the others. Extend it to
437        # cover all test cases avoiding rounding problems with the integer
438        # arithmetics done to compute shard_start and shard_end.
439        if self._shard_number + 1 == self._shard_count:
440            shard_end = num_test_cases
441
442        for batch in xrange(shard_start, shard_end, self._hasty_batch_size):
443            batch_to = min(batch + self._hasty_batch_size, shard_end)
444            batch_cases = '\n'.join(test_cases[batch:batch_to])
445            # This assumes all tests in the batch are kicked off via the same
446            # executable.
447            api = self._translate_name_to_api(test_cases[batch])
448            if not self._can_run(api):
449                logging.info('Skipping tests on %s: %s', self._gpu_type,
450                             batch_cases)
451            else:
452                executable = self._get_executable(api)
453                log_file = os.path.join(self._log_path,
454                                        '%s_hasty_%d.log' % (self._filter, batch))
455                command = ('%s '
456                           '--deqp-stdin-caselist '
457                           '--deqp-surface-type=%s '
458                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
459                           '--deqp-log-images=disable '
460                           '--deqp-visibility=hidden '
461                           '--deqp-watchdog=enable '
462                           '--deqp-surface-width=%d '
463                           '--deqp-surface-height=%d '
464                           '--deqp-log-filename=%s' % (
465                               executable,
466                               self._surface,
467                               width,
468                               height,
469                               log_file)
470                           )
471
472                logging.info('Running tests %d...%d out of %d:\n%s\n%s',
473                             batch + 1, batch_to, num_test_cases, command,
474                             batch_cases)
475
476                # Must be in the executable directory when running for it to
477                # find it's test data files!
478                os.chdir(os.path.dirname(executable))
479
480                try:
481                    utils.run(command,
482                              env=self._env,
483                              timeout=batch_timeout,
484                              stderr_is_expected=False,
485                              ignore_status=False,
486                              stdin=batch_cases)
487                except Exception:
488                    pass
489                # We are trying to handle all errors by parsing the log file.
490                results = self._parse_test_results(log_file, results,
491                                                   failing_test)
492                logging.info(results)
493        return results
494
495    def _run_once(self, test_cases):
496        """Run dEQP test_cases in individual/hasty mode.
497        @param test_cases: test cases to run.
498        """
499        failing_test = []
500        if self._hasty:
501            logging.info('Running in hasty mode.')
502            test_results = self._run_tests_hasty(test_cases, failing_test)
503        else:
504            logging.info('Running each test individually.')
505            test_results = self._run_tests_individually(test_cases,
506                                                        failing_test)
507        return test_results, failing_test
508
509    def run_once(self, opts=None):
510        options = dict(filter='',
511                       test_names='',  # e.g., dEQP-GLES3.info.version,
512                                       # dEQP-GLES2.functional,
513                                       # dEQP-GLES3.accuracy.texture, etc.
514                       test_names_file='',
515                       timeout=self._timeout,
516                       subset_to_run='Pass',  # Pass, Fail, Timeout, NotPass...
517                       hasty='False',
518                       shard_number='0',
519                       shard_count='1',
520                       debug='False',
521                       perf_failure_description=None)
522        if opts is None:
523            opts = []
524        options.update(utils.args_to_dict(opts))
525        logging.info('Test Options: %s', options)
526
527        self._hasty = (options['hasty'] == 'True')
528        self._timeout = int(options['timeout'])
529        self._test_names_file = options['test_names_file']
530        self._test_names = options['test_names']
531        self._shard_number = int(options['shard_number'])
532        self._shard_count = int(options['shard_count'])
533        self._debug = (options['debug'] == 'True')
534        if not (self._test_names_file or self._test_names):
535            self._filter = options['filter']
536            if not self._filter:
537                raise error.TestFail('Failed: No dEQP test filter specified')
538        if options['perf_failure_description']:
539            self._test_failure_description = options['perf_failure_description']
540        else:
541            # Do not report failure if failure description is not specified.
542            self._test_failure_report_enable = False
543
544        # Some information to help post-process logs into blacklists later.
545        logging.info('ChromeOS BOARD = %s', self._board)
546        logging.info('ChromeOS CPU family = %s', self._cpu_type)
547        logging.info('ChromeOS GPU family = %s', self._gpu_type)
548
549        # Create a place to put detailed test output logs.
550        filter_name = self._filter or os.path.basename(self._test_names_file)
551        logging.info('dEQP test filter = %s', filter_name)
552        self._log_path = os.path.join(tempfile.gettempdir(), '%s-logs' %
553                                                             filter_name)
554        shutil.rmtree(self._log_path, ignore_errors=True)
555        os.mkdir(self._log_path)
556
557        # Load either tests specified by test_names_file, test_names or filter.
558        test_cases = []
559        if self._test_names_file:
560            test_cases = self._get_test_cases_from_names_file()
561        elif self._test_names:
562            test_cases = []
563            for name in self._test_names.split(','):
564                test_cases.extend(self._get_test_cases(name, 'Pass'))
565        elif self._filter:
566            test_cases = self._get_test_cases(self._filter,
567                                              options['subset_to_run'])
568
569        if self._debug:
570            # LogReader works on /var/log/messages by default.
571            self._log_reader = cros_logging.LogReader()
572            self._log_reader.set_start_by_current()
573
574        # Assume all tests failed at the beginning.
575        for test_case in test_cases:
576            self.add_failures(test_case)
577
578        test_results, failing_test = self._run_once(test_cases)
579        # Rerun the test if we are in hasty mode.
580        if self._hasty and len(failing_test) > 0:
581            if len(failing_test) < sum(test_results.values()) * RERUN_RATIO:
582                logging.info("Because we are in hasty mode, we will rerun the "
583                             "failing tests one at a time")
584                rerun_results, failing_test = self._run_once(failing_test)
585                # Update failing test result from the test_results
586                for result in test_results:
587                    if result.lower() not in self.TEST_RESULT_FILTER:
588                        test_results[result] = 0
589                for result in rerun_results:
590                    test_results[result] = (test_results.get(result, 0) +
591                                            rerun_results[result])
592            else:
593                logging.info("There are too many failing tests. It would "
594                             "take too long to rerun them. Giving up.")
595
596        # Update failing tests to the chrome perf dashboard records.
597        for test_case in test_cases:
598            if test_case not in failing_test:
599                self.remove_failures(test_case)
600
601        logging.info('Test results:')
602        logging.info(test_results)
603        logging.debug('Test Failed: %s', failing_test)
604        self.write_perf_keyval(test_results)
605
606        test_count = 0
607        test_failures = 0
608        test_passes = 0
609        test_skipped = 0
610        for result in test_results:
611            test_count += test_results[result]
612            if result.lower() in ['pass']:
613                test_passes += test_results[result]
614            if result.lower() not in self.TEST_RESULT_FILTER:
615                test_failures += test_results[result]
616            if result.lower() in ['skipped']:
617                test_skipped += test_results[result]
618        # The text "Completed all tests." is used by the process_log.py script
619        # and should always appear at the end of a completed test run.
620        logging.info(
621            'Completed all tests. Saw %d tests, %d passes and %d failures.',
622            test_count, test_passes, test_failures)
623
624        if self._filter and test_count == 0 and options[
625                'subset_to_run'] != 'NotPass':
626            logging.warning('No test cases found for filter: %s!', self._filter)
627
628        if test_failures:
629            raise error.TestFail('Failed: on %s %d/%d tests failed.' %
630                                 (self._gpu_type, test_failures, test_count))
631        if test_skipped > 0:
632            logging.info('On %s %d tests skipped, %d passes' %
633                         (self._gpu_type, test_skipped, test_passes))
634