telemetry_AFDOGenerate.py revision 9162c55fe4568d1ef42d1f5e5f57484285038c14
1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5""" 6Test to generate the AFDO profile for a set of ChromeOS benchmarks. 7 8This will run a pre-determined set of benchmarks on the DUT under 9the monitoring of the linux "perf" tool. The resulting perf.data 10file will then be copied to Google Storage (GS) where it can be 11used by the AFDO optimized build. 12 13Given that the telemetry benchmarks are quite unstable on ChromeOS at 14this point, this test also supports a mode where the benchmarks are 15executed outside of the telemetry framework. It is not the same as 16executing the benchmarks under telemetry because there is no telemetry 17measurement taken but, for the purposes of profiling Chrome, it should 18be pretty close. 19 20Example invocation: 21/usr/bin/test_that --debug --board=lumpy <DUT IP> 22 --args="ignore_failures=True local=True gs_test_location=True" 23 telemetry_AFDOGenerate 24""" 25 26import bz2 27import logging 28import os 29import time 30 31from autotest_lib.client.common_lib import error, utils 32from autotest_lib.server import autotest 33from autotest_lib.server import profilers 34from autotest_lib.server import test 35from autotest_lib.server import utils 36from autotest_lib.server.cros import telemetry_runner 37 38# List of benchmarks to run to capture profile information. This is 39# based on the "superhero" and "perf_v2" list and other telemetry 40# benchmarks. Goal is to have a short list that is as representative 41# as possible and takes a short time to execute. At this point the 42# list of benchmarks is in flux. 43TELEMETRY_AFDO_BENCHMARKS = ( 44 ('page_cycler.typical_25', ('--pageset-repeat=1',)), 45 ('page_cycler.intl_ja_zh', ('--pageset-repeat=1',)), 46 ('page_cycler.intl_ar_fa_he', ('--pageset-repeat=1',)), 47 ('page_cycler.intl_es_fr_pt-BR', ('--pageset-repeat=1',)), 48 ('page_cycler.intl_ko_th_vi', ('--pageset-repeat=1',)), 49 ('page_cycler.intl_hi_ru', ('--pageset-repeat=1',)), 50 ('octane',), 51 ('kraken',), 52 ('speedometer',), 53 ('dromaeo.domcoreattr',), 54 ('dromaeo.domcoremodify',), 55 ) 56 57# Temporarily disable this benchmark because it is failing a 58# lot. Filed chromium:590127 59# ('smoothness.tough_webgl_cases',) 60 61# Some benchmarks removed from the profile set: 62# 'page_cycler.morejs' -> uninteresting, seems to fail frequently, 63# 'page_cycler.moz' -> seems very old. 64# 'media.tough_video_cases' -> removed this because it does not bring 65# any benefit and takes more than 12 mins 66 67# List of boards where this test can be run. 68# Currently, this has only been tested on 'sandybridge' boards. 69VALID_BOARDS = ['butterfly', 'lumpy', 'parrot', 'stumpy'] 70 71class telemetry_AFDOGenerate(test.test): 72 """ 73 Run one or more telemetry benchmarks under the "perf" monitoring 74 tool, generate a "perf.data" file and upload to GS for comsumption 75 by the AFDO optimized build. 76 """ 77 version = 1 78 79 80 def run_once(self, host, args): 81 """Run a set of telemetry benchmarks. 82 83 @param host: Host machine where test is run 84 @param args: A dictionary of the arguments that were passed 85 to this test. 86 @returns None. 87 """ 88 self._host = host 89 host_board = host.get_board().split(':')[1] 90 if not host_board in VALID_BOARDS: 91 raise error.TestFail( 92 'This test cannot be run on board %s' % host_board) 93 94 self._parse_args(args) 95 96 if self._minimal_telemetry: 97 self._run_tests_minimal_telemetry() 98 else: 99 self._telemetry_runner = telemetry_runner.TelemetryRunner( 100 self._host, self._local) 101 102 for benchmark_info in TELEMETRY_AFDO_BENCHMARKS: 103 benchmark = benchmark_info[0] 104 args = () if len(benchmark_info) == 1 else benchmark_info[1] 105 try: 106 self._run_test_with_retry(benchmark, *args) 107 except error.TestBaseException: 108 if not self._ignore_failures: 109 raise 110 else: 111 logging.info('Ignoring failure from benchmark %s.', 112 benchmark) 113 114 115 def after_run_once(self): 116 """After the profile information has been collected, compress it 117 and upload it to GS 118 """ 119 PERF_FILE = 'perf.data' 120 COMP_PERF_FILE = 'chromeos-chrome-%s-%s.perf.data' 121 perf_data = os.path.join(self.profdir, PERF_FILE) 122 comp_data = os.path.join(self.profdir, COMP_PERF_FILE % ( 123 self._arch, self._version)) 124 compressed = self._compress_file(perf_data, comp_data) 125 self._gs_upload(compressed, os.path.basename(compressed)) 126 127 # Also create copy of this file using "LATEST" as version so 128 # it can be found in case the builder is looking for a version 129 # number that does not match. It is ok to use a slighly old 130 # version of the this file for the optimized build 131 latest_data = COMP_PERF_FILE % (self._arch, 'LATEST') 132 latest_compressed = self._get_compressed_name(latest_data) 133 self._gs_upload(compressed, latest_compressed) 134 135 136 def _parse_args(self, args): 137 """Parses input arguments to this autotest. 138 139 @param args: Options->values dictionary. 140 @raises error.TestFail if a bad option is passed. 141 """ 142 143 # Set default values for the options. 144 # Architecture for which we are collecting afdo data. 145 self._arch = 'amd64' 146 # Use an alternate GS location where everyone can write. 147 # Set default depending on whether this is executing in 148 # the lab environment or not 149 self._gs_test_location = not utils.host_is_in_lab_zone( 150 self._host.hostname) 151 # Ignore individual test failures. 152 self._ignore_failures = False 153 # Use local copy of telemetry instead of using the dev server copy. 154 self._local = False 155 # Chrome version to which the AFDO data corresponds. 156 self._version, _ = self._host.get_chrome_version() 157 # Try to use the minimal support from Telemetry. The Telemetry 158 # benchmarks in ChromeOS are too flaky at this point. So, initially, 159 # this will be set to True by default. 160 self._minimal_telemetry = False 161 162 for option_name, value in args.iteritems(): 163 if option_name == 'arch': 164 self._arch = value 165 elif option_name == 'gs_test_location': 166 self._gs_test_location = (value == 'True') 167 elif option_name == 'ignore_failures': 168 self._ignore_failures = (value == 'True') 169 elif option_name == 'local': 170 self._local = (value == 'True') 171 elif option_name == 'minimal_telemetry': 172 self._minimal_telemetry = (value == 'True') 173 elif option_name == 'version': 174 self._version = value 175 else: 176 raise error.TestFail('Unknown option passed: %s' % option_name) 177 178 179 def _run_test(self, benchmark, *args): 180 """Run the benchmark using Telemetry. 181 182 @param benchmark: Name of the benchmark to run. 183 @param args: Additional arguments to pass to the telemetry execution 184 script. 185 @raises Raises error.TestFail if execution of test failed. 186 Also re-raise any exceptions thrown by run_telemetry benchmark. 187 """ 188 try: 189 logging.info('Starting run for Telemetry benchmark %s', benchmark) 190 start_time = time.time() 191 result = self._telemetry_runner.run_telemetry_benchmark( 192 benchmark, None, *args) 193 end_time = time.time() 194 logging.info('Completed Telemetry benchmark %s in %f seconds', 195 benchmark, end_time - start_time) 196 except error.TestBaseException as e: 197 end_time = time.time() 198 logging.info('Got exception from Telemetry benchmark %s ' 199 'after %f seconds. Exception: %s', 200 benchmark, end_time - start_time, str(e)) 201 raise 202 203 # We dont generate any keyvals for this run. This is not 204 # an official run of the benchmark. We are just running it to get 205 # a profile from it. 206 207 if result.status is telemetry_runner.SUCCESS_STATUS: 208 logging.info('Benchmark %s succeeded', benchmark) 209 else: 210 raise error.TestFail('An error occurred while executing' 211 ' benchmark: %s' % benchmark) 212 213 214 def _run_test_with_retry(self, benchmark, *args): 215 """Run the benchmark using Telemetry. Retry in case of failure. 216 217 @param benchmark: Name of the benchmark to run. 218 @param args: Additional arguments to pass to the telemetry execution 219 script. 220 @raises Re-raise any exceptions thrown by _run_test. 221 """ 222 223 tried = False 224 while True: 225 try: 226 self._run_test(benchmark, *args) 227 logging.info('Benchmark %s succeeded on %s try', 228 benchmark, 229 'first' if not tried else 'second') 230 break 231 except error.TestBaseException: 232 if not tried: 233 tried = True 234 logging.info('Benchmark %s failed. Retrying ...', 235 benchmark) 236 else: 237 logging.info('Benchmark %s failed twice. Not retrying', 238 benchmark) 239 raise 240 241 242 def _run_tests_minimal_telemetry(self): 243 """Run the benchmarks using the minimal support from Telemetry. 244 245 The benchmarks are run using a client side autotest test. This test 246 will control Chrome directly using the chrome.Chrome support and it 247 will ask Chrome to display the benchmark pages directly instead of 248 using the "page sets" and "measurements" support from Telemetry. 249 In this way we avoid using Telemetry benchmark support which is not 250 stable on ChromeOS yet. 251 """ 252 AFDO_GENERATE_CLIENT_TEST = 'telemetry_AFDOGenerateClient' 253 254 # We dont want to "inherit" the profiler settings for this test 255 # to the client test. Doing so will end up in two instances of 256 # the profiler (perf) being executed at the same time. 257 # Filed a feature request about this. See crbug/342958. 258 259 # Save the current settings for profilers. 260 saved_profilers = self.job.profilers 261 saved_default_profile_only = self.job.default_profile_only 262 263 # Reset the state of the profilers. 264 self.job.default_profile_only = False 265 self.job.profilers = profilers.profilers(self.job) 266 267 # Execute the client side test. 268 client_at = autotest.Autotest(self._host) 269 client_at.run_test(AFDO_GENERATE_CLIENT_TEST, args='') 270 271 # Restore the settings for the profilers. 272 self.job.default_profile_only = saved_default_profile_only 273 self.job.profiler = saved_profilers 274 275 276 @staticmethod 277 def _get_compressed_name(name): 278 """Given a file name, return bz2 compressed name. 279 @param name: Name of uncompressed file. 280 @returns name of compressed file. 281 """ 282 return name + '.bz2' 283 284 @staticmethod 285 def _compress_file(unc_file, com_file): 286 """Compresses specified file with bz2. 287 288 @param unc_file: name of file to compress. 289 @param com_file: prefix name of compressed file. 290 @raises error.TestFail if compression failed 291 @returns Name of compressed file. 292 """ 293 dest = '' 294 with open(unc_file, 'r') as inp: 295 dest = telemetry_AFDOGenerate._get_compressed_name(com_file) 296 with bz2.BZ2File(dest, 'w') as out: 297 for data in inp: 298 out.write(data) 299 if not dest or not os.path.isfile(dest): 300 raise error.TestFail('Could not compress %s' % unc_file) 301 return dest 302 303 304 def _gs_upload(self, local_file, remote_basename): 305 """Uploads file to google storage specific location. 306 307 @param local_file: name of file to upload. 308 @param remote_basename: basename of remote file. 309 @raises error.TestFail if upload failed. 310 @returns nothing. 311 """ 312 GS_DEST = 'gs://chromeos-prebuilt/afdo-job/canonicals/%s' 313 GS_TEST_DEST = 'gs://chromeos-throw-away-bucket/afdo-job/canonicals/%s' 314 GS_ACL = 'project-private' 315 316 gs_dest = GS_TEST_DEST if self._gs_test_location else GS_DEST 317 remote_file = gs_dest % remote_basename 318 319 logging.info('About to upload to GS: %s', remote_file) 320 if not utils.gs_upload(local_file, 321 remote_file, 322 GS_ACL, result_dir=self.resultsdir): 323 logging.info('Failed upload to GS: %s', remote_file) 324 raise error.TestFail('Unable to gs upload %s to %s' % 325 (local_file, remote_file)) 326 327 logging.info('Successfull upload to GS: %s', remote_file) 328