1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""
6Test to generate the AFDO profile for a set of ChromeOS benchmarks.
7
8This will run a pre-determined set of benchmarks on the DUT under
9the monitoring of the linux "perf" tool. The resulting perf.data
10file will then be copied to Google Storage (GS) where it can be
11used by the AFDO optimized build.
12
13Given that the telemetry benchmarks are quite unstable on ChromeOS at
14this point, this test also supports a mode where the benchmarks are
15executed outside of the telemetry framework. It is not the same as
16executing the benchmarks under telemetry because there is no telemetry
17measurement taken but, for the purposes of profiling Chrome, it should
18be pretty close.
19
20Example invocation:
21/usr/bin/test_that --debug --board=lumpy <DUT IP>
22  --args="ignore_failures=True local=True gs_test_location=True"
23  telemetry_AFDOGenerate
24"""
25
26import bz2
27import logging
28import os
29import time
30
31from autotest_lib.client.common_lib import error, utils
32from autotest_lib.server import autotest
33from autotest_lib.server import profilers
34from autotest_lib.server import test
35from autotest_lib.server import utils
36from autotest_lib.server.cros import telemetry_runner
37
38# List of benchmarks to run to capture profile information. This is
39# based on the "superhero" and "perf_v2" list and other telemetry
40# benchmarks. Goal is to have a short list that is as representative
41# as possible and takes a short time to execute. At this point the
42# list of benchmarks is in flux.
43TELEMETRY_AFDO_BENCHMARKS = (
44    ('page_cycler_v2.typical_25', ('--pageset-repeat=1',)),
45    ('page_cycler_v2.intl_ja_zh', ('--pageset-repeat=1',)),
46    # Temporarily disable these two benchmarks to finish in 60 minutes.
47    # ('page_cycler_v2.intl_ar_fa_he', ('--pageset-repeat=1',)),
48    # ('page_cycler_v2.intl_es_fr_pt-BR', ('--pageset-repeat=1',)),
49    # ('page_cycler_v2.intl_ko_th_vi', ('--pageset-repeat=1',)),
50    # ('page_cycler_v2.intl_hi_ru', ('--pageset-repeat=1',)),
51    ('octane',),
52    ('kraken',),
53    ('speedometer',),
54    ('dromaeo.domcoreattr',),
55    ('dromaeo.domcoremodify',),
56    )
57
58# Temporarily disable this benchmark because it is failing a
59# lot. Filed chromium:590127
60# ('smoothness.tough_webgl_cases',)
61
62# Some benchmarks removed from the profile set:
63# 'page_cycler.morejs' -> uninteresting, seems to fail frequently,
64# 'page_cycler.moz' -> seems very old.
65# 'media.tough_video_cases' -> removed this because it does not bring
66#                              any benefit and takes more than 12 mins
67
68# List of boards where this test can be run.  Currently, it needs a
69# machines with at least 4GB of memory or 2GB of /tmp.
70# This must be consistent with chromite.
71GCC_BOARDS = ['lumpy']
72
73# Should be disjoint with GCC_BOARDS
74LLVM_BOARDS = ['chell', 'samus']
75
76class telemetry_AFDOGenerate(test.test):
77    """
78    Run one or more telemetry benchmarks under the "perf" monitoring
79    tool, generate a "perf.data" file and upload to GS for comsumption
80    by the AFDO optimized build.
81    """
82    version = 1
83
84
85    def run_once(self, host, args):
86        """Run a set of telemetry benchmarks.
87
88        @param host: Host machine where test is run
89        @param args: A dictionary of the arguments that were passed
90                to this test.
91        @returns None.
92        """
93        self._host = host
94        host_board = host.get_board().split(':')[1]
95
96        if not (host_board in LLVM_BOARDS or host_board in GCC_BOARDS):
97            raise error.TestFail(
98                    'This test cannot be run on board %s' % host_board)
99
100        self._parse_args(args)
101
102        if self._minimal_telemetry:
103            self._run_tests_minimal_telemetry()
104        else:
105            self._telemetry_runner = telemetry_runner.TelemetryRunner(
106                    self._host, self._local, telemetry_on_dut=False)
107
108            for benchmark_info in TELEMETRY_AFDO_BENCHMARKS:
109                benchmark = benchmark_info[0]
110                args = () if len(benchmark_info) == 1 else benchmark_info[1]
111                try:
112                    self._run_test_with_retry(benchmark, *args)
113                except error.TestBaseException:
114                    if not self._ignore_failures:
115                        raise
116                    else:
117                        logging.info('Ignoring failure from benchmark %s.',
118                                     benchmark)
119
120
121    def after_run_once(self):
122        """After the profile information has been collected, compress it
123        and upload it to GS
124        """
125        PERF_FILE = 'perf.data'
126        COMP_PERF_FILE = 'chromeos-chrome-%s-%s.perf.data'
127        perf_data = os.path.join(self.profdir, PERF_FILE)
128        comp_data = os.path.join(self.profdir, COMP_PERF_FILE % (
129                self._arch, self._version))
130        compressed = self._compress_file(perf_data, comp_data)
131        self._gs_upload(compressed, os.path.basename(compressed))
132
133        # Also create copy of this file using "LATEST" as version so
134        # it can be found in case the builder is looking for a version
135        # number that does not match. It is ok to use a slighly old
136        # version of the this file for the optimized build
137        latest_data =  COMP_PERF_FILE % (self._arch, 'LATEST')
138        latest_compressed = self._get_compressed_name(latest_data)
139        self._gs_upload(compressed, latest_compressed)
140
141
142    def _parse_args(self, args):
143        """Parses input arguments to this autotest.
144
145        @param args: Options->values dictionary.
146        @raises error.TestFail if a bad option is passed.
147        """
148
149        # Set default values for the options.
150        # Architecture for which we are collecting afdo data.
151        self._arch = 'amd64'
152        # Use an alternate GS location where everyone can write.
153        # Set default depending on whether this is executing in
154        # the lab environment or not
155        self._gs_test_location = not utils.host_is_in_lab_zone(
156                self._host.hostname)
157        # Ignore individual test failures.
158        self._ignore_failures = False
159        # Use local copy of telemetry instead of using the dev server copy.
160        self._local = False
161        # Chrome version to which the AFDO data corresponds.
162        self._version, _ = self._host.get_chrome_version()
163        # Try to use the minimal support from Telemetry. The Telemetry
164        # benchmarks in ChromeOS are too flaky at this point. So, initially,
165        # this will be set to True by default.
166        self._minimal_telemetry = False
167
168        for option_name, value in args.iteritems():
169            if option_name == 'arch':
170                self._arch = value
171            elif option_name == 'gs_test_location':
172                self._gs_test_location = (value == 'True')
173            elif option_name == 'ignore_failures':
174                self._ignore_failures = (value == 'True')
175            elif option_name == 'local':
176                self._local = (value == 'True')
177            elif option_name == 'minimal_telemetry':
178                self._minimal_telemetry = (value == 'True')
179            elif option_name == 'version':
180                self._version = value
181            else:
182                raise error.TestFail('Unknown option passed: %s' % option_name)
183
184
185    def _run_test(self, benchmark, *args):
186        """Run the benchmark using Telemetry.
187
188        @param benchmark: Name of the benchmark to run.
189        @param args: Additional arguments to pass to the telemetry execution
190                     script.
191        @raises Raises error.TestFail if execution of test failed.
192                Also re-raise any exceptions thrown by run_telemetry benchmark.
193        """
194        try:
195            logging.info('Starting run for Telemetry benchmark %s', benchmark)
196            start_time = time.time()
197            result = self._telemetry_runner.run_telemetry_benchmark(
198                    benchmark, None, *args)
199            end_time = time.time()
200            logging.info('Completed Telemetry benchmark %s in %f seconds',
201                         benchmark, end_time - start_time)
202        except error.TestBaseException as e:
203            end_time = time.time()
204            logging.info('Got exception from Telemetry benchmark %s '
205                         'after %f seconds. Exception: %s',
206                         benchmark, end_time - start_time, str(e))
207            raise
208
209        # We dont generate any keyvals for this run. This is not
210        # an official run of the benchmark. We are just running it to get
211        # a profile from it.
212
213        if result.status is telemetry_runner.SUCCESS_STATUS:
214            logging.info('Benchmark %s succeeded', benchmark)
215        else:
216            raise error.TestFail('An error occurred while executing'
217                                 ' benchmark: %s' % benchmark)
218
219
220    def _run_test_with_retry(self, benchmark, *args):
221        """Run the benchmark using Telemetry. Retry in case of failure.
222
223        @param benchmark: Name of the benchmark to run.
224        @param args: Additional arguments to pass to the telemetry execution
225                     script.
226        @raises Re-raise any exceptions thrown by _run_test.
227        """
228
229        tried = False
230        while True:
231            try:
232                self._run_test(benchmark, *args)
233                logging.info('Benchmark %s succeeded on %s try',
234                             benchmark,
235                             'first' if not tried else 'second')
236                break
237            except error.TestBaseException:
238                if not tried:
239                   tried = True
240                   logging.info('Benchmark %s failed. Retrying ...',
241                                benchmark)
242                else:
243                    logging.info('Benchmark %s failed twice. Not retrying',
244                                  benchmark)
245                    raise
246
247
248    def _run_tests_minimal_telemetry(self):
249        """Run the benchmarks using the minimal support from Telemetry.
250
251        The benchmarks are run using a client side autotest test. This test
252        will control Chrome directly using the chrome.Chrome support and it
253        will ask Chrome to display the benchmark pages directly instead of
254        using the "page sets" and "measurements" support from Telemetry.
255        In this way we avoid using Telemetry benchmark support which is not
256        stable on ChromeOS yet.
257        """
258        AFDO_GENERATE_CLIENT_TEST = 'telemetry_AFDOGenerateClient'
259
260        # We dont want to "inherit" the profiler settings for this test
261        # to the client test. Doing so will end up in two instances of
262        # the profiler (perf) being executed at the same time.
263        # Filed a feature request about this. See crbug/342958.
264
265        # Save the current settings for profilers.
266        saved_profilers = self.job.profilers
267        saved_default_profile_only = self.job.default_profile_only
268
269        # Reset the state of the profilers.
270        self.job.default_profile_only = False
271        self.job.profilers = profilers.profilers(self.job)
272
273        # Execute the client side test.
274        client_at = autotest.Autotest(self._host)
275        client_at.run_test(AFDO_GENERATE_CLIENT_TEST, args='')
276
277        # Restore the settings for the profilers.
278        self.job.default_profile_only = saved_default_profile_only
279        self.job.profiler = saved_profilers
280
281
282    @staticmethod
283    def _get_compressed_name(name):
284        """Given a file name, return bz2 compressed name.
285        @param name: Name of uncompressed file.
286        @returns name of compressed file.
287        """
288        return name + '.bz2'
289
290    @staticmethod
291    def _compress_file(unc_file, com_file):
292        """Compresses specified file with bz2.
293
294        @param unc_file: name of file to compress.
295        @param com_file: prefix name of compressed file.
296        @raises error.TestFail if compression failed
297        @returns Name of compressed file.
298        """
299        dest = ''
300        with open(unc_file, 'r') as inp:
301            dest = telemetry_AFDOGenerate._get_compressed_name(com_file)
302            with bz2.BZ2File(dest, 'w') as out:
303                for data in inp:
304                    out.write(data)
305        if not dest or not os.path.isfile(dest):
306            raise error.TestFail('Could not compress %s' % unc_file)
307        return dest
308
309
310    def _gs_upload(self, local_file, remote_basename):
311        """Uploads file to google storage specific location.
312
313        @param local_file: name of file to upload.
314        @param remote_basename: basename of remote file.
315        @raises error.TestFail if upload failed.
316        @returns nothing.
317        """
318        GS_GCC_DEST = 'gs://chromeos-prebuilt/afdo-job/canonicals/%s'
319        GS_LLVM_DEST = 'gs://chromeos-prebuilt/afdo-job/llvm/%s'
320        GS_TEST_DEST = 'gs://chromeos-throw-away-bucket/afdo-job/canonicals/%s'
321        GS_ACL = 'project-private'
322
323        board = self._host.get_board().split(':')[1]
324
325        if self._gs_test_location:
326            gs_dest = GS_TEST_DEST
327        elif board in GCC_BOARDS:
328            gs_dest = GS_GCC_DEST
329        elif board in LLVM_BOARDS:
330            gs_dest = GS_LLVM_DEST
331        else:
332            raise error.TestFail(
333                    'This test cannot be run on board %s' % board)
334
335        remote_file = gs_dest % remote_basename
336
337        logging.info('About to upload to GS: %s', remote_file)
338        if not utils.gs_upload(local_file,
339                               remote_file,
340                               GS_ACL, result_dir=self.resultsdir):
341            logging.info('Failed upload to GS: %s', remote_file)
342            raise error.TestFail('Unable to gs upload %s to %s' %
343                                 (local_file, remote_file))
344
345        logging.info('Successfull upload to GS: %s', remote_file)
346