benchmark_run.py revision f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbe
1 2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import datetime 7import os 8import threading 9import time 10import traceback 11 12from cros_utils import command_executer 13from cros_utils import timeline 14 15from suite_runner import SuiteRunner 16from results_cache import MockResult 17from results_cache import MockResultsCache 18from results_cache import Result 19from results_cache import ResultsCache 20from results_cache import TelemetryResult 21 22STATUS_FAILED = 'FAILED' 23STATUS_SUCCEEDED = 'SUCCEEDED' 24STATUS_IMAGING = 'IMAGING' 25STATUS_RUNNING = 'RUNNING' 26STATUS_WAITING = 'WAITING' 27STATUS_PENDING = 'PENDING' 28 29 30class BenchmarkRun(threading.Thread): 31 32 def __init__(self, name, benchmark, label, iteration, cache_conditions, 33 machine_manager, logger_to_use, log_level, share_cache): 34 threading.Thread.__init__(self) 35 self.name = name 36 self._logger = logger_to_use 37 self.log_level = log_level 38 self.benchmark = benchmark 39 self.iteration = iteration 40 self.label = label 41 self.result = None 42 self.terminated = False 43 self.retval = None 44 self.run_completed = False 45 self.machine_manager = machine_manager 46 self.suite_runner = SuiteRunner(self._logger, self.log_level) 47 self.machine = None 48 self.cache_conditions = cache_conditions 49 self.runs_complete = 0 50 self.cache_hit = False 51 self.failure_reason = '' 52 self.test_args = benchmark.test_args 53 self.profiler_args = self._GetExtraAutotestArgs() 54 self._ce = command_executer.GetCommandExecuter(self._logger, 55 log_level=self.log_level) 56 self.timeline = timeline.Timeline() 57 self.timeline.Record(STATUS_PENDING) 58 self.share_cache = share_cache 59 self.cache_has_been_read = False 60 61 # This is used by schedv2. 62 self.owner_thread = None 63 64 def ReadCache(self): 65 # Just use the first machine for running the cached version, 66 # without locking it. 67 self.cache = ResultsCache() 68 self.cache.Init(self.label.chromeos_image, self.label.chromeos_root, 69 self.benchmark.test_name, self.iteration, self.test_args, 70 self.profiler_args, self.machine_manager, self.machine, 71 self.label.board, self.cache_conditions, self._logger, 72 self.log_level, self.label, self.share_cache, 73 self.benchmark.suite, self.benchmark.show_all_results, 74 self.benchmark.run_local) 75 76 self.result = self.cache.ReadResult() 77 self.cache_hit = (self.result is not None) 78 self.cache_has_been_read = True 79 80 def run(self): 81 try: 82 if not self.cache_has_been_read: 83 self.ReadCache() 84 85 if self.result: 86 self._logger.LogOutput('%s: Cache hit.' % self.name) 87 self._logger.LogOutput(self.result.out, print_to_console=False) 88 self._logger.LogError(self.result.err, print_to_console=False) 89 90 elif self.label.cache_only: 91 self._logger.LogOutput('%s: No cache hit.' % self.name) 92 output = '%s: No Cache hit.' % self.name 93 retval = 1 94 err = 'No cache hit.' 95 self.result = Result.CreateFromRun( 96 self._logger, self.log_level, self.label, self.machine, output, err, 97 retval, self.benchmark.show_all_results, self.benchmark.test_name, 98 self.benchmark.suite) 99 100 else: 101 self._logger.LogOutput('%s: No cache hit.' % self.name) 102 self.timeline.Record(STATUS_WAITING) 103 # Try to acquire a machine now. 104 self.machine = self.AcquireMachine() 105 self.cache.machine = self.machine 106 self.result = self.RunTest(self.machine) 107 108 self.cache.remote = self.machine.name 109 self.label.chrome_version = self.machine_manager.GetChromeVersion( 110 self.machine) 111 self.cache.StoreResult(self.result) 112 113 if self.machine and not self.label.chrome_version: 114 self.label.chrome_version = self.machine_manager.GetChromeVersion( 115 self.machine) 116 117 if self.terminated: 118 return 119 120 if not self.result.retval: 121 self.timeline.Record(STATUS_SUCCEEDED) 122 else: 123 if self.timeline.GetLastEvent() != STATUS_FAILED: 124 self.failure_reason = 'Return value of test suite was non-zero.' 125 self.timeline.Record(STATUS_FAILED) 126 127 except Exception, e: 128 self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e)) 129 traceback.print_exc() 130 if self.timeline.GetLastEvent() != STATUS_FAILED: 131 self.timeline.Record(STATUS_FAILED) 132 self.failure_reason = str(e) 133 finally: 134 if self.owner_thread is not None: 135 # In schedv2 mode, we do not lock machine locally. So noop here. 136 pass 137 elif self.machine: 138 if not self.machine.IsReachable(): 139 self._logger.LogOutput('Machine %s is not reachable, removing it.' % 140 self.machine.name) 141 self.machine_manager.RemoveMachine(self.machine.name) 142 self._logger.LogOutput('Releasing machine: %s' % self.machine.name) 143 self.machine_manager.ReleaseMachine(self.machine) 144 self._logger.LogOutput('Released machine: %s' % self.machine.name) 145 146 def Terminate(self): 147 self.terminated = True 148 self.suite_runner.Terminate() 149 if self.timeline.GetLastEvent() != STATUS_FAILED: 150 self.timeline.Record(STATUS_FAILED) 151 self.failure_reason = 'Thread terminated.' 152 153 def AcquireMachine(self): 154 if self.owner_thread is not None: 155 # No need to lock machine locally, DutWorker, which is a thread, is 156 # responsible for running br. 157 return self.owner_thread.dut() 158 while True: 159 machine = None 160 if self.terminated: 161 raise Exception('Thread terminated while trying to acquire machine.') 162 163 machine = self.machine_manager.AcquireMachine(self.label) 164 165 if machine: 166 self._logger.LogOutput('%s: Machine %s acquired at %s' % 167 (self.name, machine.name, 168 datetime.datetime.now())) 169 break 170 else: 171 sleep_duration = 10 172 time.sleep(sleep_duration) 173 return machine 174 175 def _GetExtraAutotestArgs(self): 176 if self.benchmark.perf_args and self.benchmark.suite == 'telemetry': 177 self._logger.LogError('Telemetry does not support profiler.') 178 self.benchmark.perf_args = '' 179 180 if self.benchmark.perf_args and self.benchmark.suite == 'test_that': 181 self._logger.LogError('test_that does not support profiler.') 182 self.benchmark.perf_args = '' 183 184 if self.benchmark.perf_args: 185 perf_args_list = self.benchmark.perf_args.split(' ') 186 perf_args_list = [perf_args_list[0]] + ['-a'] + perf_args_list[1:] 187 perf_args = ' '.join(perf_args_list) 188 if not perf_args_list[0] in ['record', 'stat']: 189 raise Exception('perf_args must start with either record or stat') 190 extra_test_args = ['--profiler=custom_perf', 191 ("--profiler_args='perf_options=\"%s\"'" % perf_args)] 192 return ' '.join(extra_test_args) 193 else: 194 return '' 195 196 def RunTest(self, machine): 197 self.timeline.Record(STATUS_IMAGING) 198 if self.owner_thread is not None: 199 # In schedv2 mode, do not even call ImageMachine. Machine image is 200 # guarenteed. 201 pass 202 else: 203 self.machine_manager.ImageMachine(machine, self.label) 204 self.timeline.Record(STATUS_RUNNING) 205 [retval, out, err] = self.suite_runner.Run(machine.name, self.label, 206 self.benchmark, self.test_args, 207 self.profiler_args) 208 self.run_completed = True 209 return Result.CreateFromRun(self._logger, self.log_level, self.label, 210 self.machine, out, err, retval, 211 self.benchmark.show_all_results, 212 self.benchmark.test_name, self.benchmark.suite) 213 214 def SetCacheConditions(self, cache_conditions): 215 self.cache_conditions = cache_conditions 216 217 def __str__(self): 218 """For better debugging.""" 219 220 return 'BenchmarkRun[name="{}"]'.format(self.name) 221 222 223class MockBenchmarkRun(BenchmarkRun): 224 """Inherited from BenchmarkRun.""" 225 226 def ReadCache(self): 227 # Just use the first machine for running the cached version, 228 # without locking it. 229 self.cache = MockResultsCache() 230 self.cache.Init(self.label.chromeos_image, self.label.chromeos_root, 231 self.benchmark.test_name, self.iteration, self.test_args, 232 self.profiler_args, self.machine_manager, self.machine, 233 self.label.board, self.cache_conditions, self._logger, 234 self.log_level, self.label, self.share_cache, 235 self.benchmark.suite, self.benchmark.show_all_results, 236 self.benchmark.run_local) 237 238 self.result = self.cache.ReadResult() 239 self.cache_hit = (self.result is not None) 240 241 def RunTest(self, machine): 242 """Remove Result.CreateFromRun for testing.""" 243 self.timeline.Record(STATUS_IMAGING) 244 self.machine_manager.ImageMachine(machine, self.label) 245 self.timeline.Record(STATUS_RUNNING) 246 [retval, out, err] = self.suite_runner.Run(machine.name, self.label, 247 self.benchmark, self.test_args, 248 self.profiler_args) 249 self.run_completed = True 250 rr = MockResult('logger', self.label, self.log_level, machine) 251 rr.out = out 252 rr.err = err 253 rr.retval = retval 254 return rr 255