benchmark_run.py revision 7057cf67ba1dbdd4387f53e5fe47b43c955b1a53
1#!/usr/bin/python 2 3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import datetime 8import os 9import threading 10import time 11import traceback 12 13from cros_utils import command_executer 14from cros_utils import timeline 15 16from suite_runner import SuiteRunner 17from results_cache import MockResult 18from results_cache import MockResultsCache 19from results_cache import Result 20from results_cache import ResultsCache 21from results_cache import TelemetryResult 22 23 24STATUS_FAILED = "FAILED" 25STATUS_SUCCEEDED = "SUCCEEDED" 26STATUS_IMAGING = "IMAGING" 27STATUS_RUNNING = "RUNNING" 28STATUS_WAITING = "WAITING" 29STATUS_PENDING = "PENDING" 30 31class BenchmarkRun(threading.Thread): 32 def __init__(self, name, benchmark, 33 label, 34 iteration, 35 cache_conditions, 36 machine_manager, 37 logger_to_use, 38 log_level, 39 share_cache): 40 threading.Thread.__init__(self) 41 self.name = name 42 self._logger = logger_to_use 43 self.log_level = log_level 44 self.benchmark = benchmark 45 self.iteration = iteration 46 self.label = label 47 self.result = None 48 self.terminated = False 49 self.retval = None 50 self.run_completed = False 51 self.machine_manager = machine_manager 52 self.suite_runner = SuiteRunner(self._logger, self.log_level) 53 self.machine = None 54 self.cache_conditions = cache_conditions 55 self.runs_complete = 0 56 self.cache_hit = False 57 self.failure_reason = "" 58 self.test_args = benchmark.test_args 59 self.profiler_args = self._GetExtraAutotestArgs() 60 self._ce = command_executer.GetCommandExecuter(self._logger, 61 log_level=self.log_level) 62 self.timeline = timeline.Timeline() 63 self.timeline.Record(STATUS_PENDING) 64 self.share_cache = share_cache 65 self.cache_has_been_read = False 66 67 # This is used by schedv2. 68 self.owner_thread = None 69 70 def ReadCache(self): 71 # Just use the first machine for running the cached version, 72 # without locking it. 73 self.cache = ResultsCache() 74 self.cache.Init(self.label.chromeos_image, 75 self.label.chromeos_root, 76 self.benchmark.test_name, 77 self.iteration, 78 self.test_args, 79 self.profiler_args, 80 self.machine_manager, 81 self.machine, 82 self.label.board, 83 self.cache_conditions, 84 self._logger, 85 self.log_level, 86 self.label, 87 self.share_cache, 88 self.benchmark.suite, 89 self.benchmark.show_all_results, 90 self.benchmark.run_local 91 ) 92 93 self.result = self.cache.ReadResult() 94 self.cache_hit = (self.result is not None) 95 self.cache_has_been_read = True 96 97 def run(self): 98 try: 99 if not self.cache_has_been_read: 100 self.ReadCache() 101 102 if self.result: 103 self._logger.LogOutput("%s: Cache hit." % self.name) 104 self._logger.LogOutput(self.result.out, print_to_console=False) 105 self._logger.LogError(self.result.err, print_to_console=False) 106 107 elif self.label.cache_only: 108 self._logger.LogOutput("%s: No cache hit." % self.name) 109 output = "%s: No Cache hit." % self.name 110 retval = 1 111 err = "No cache hit." 112 self.result = Result.CreateFromRun(self._logger, self.log_level, 113 self.label, self.machine, 114 output, err, retval, 115 self.benchmark.show_all_results, 116 self.benchmark.test_name, 117 self.benchmark.suite) 118 119 else: 120 self._logger.LogOutput("%s: No cache hit." % self.name) 121 self.timeline.Record(STATUS_WAITING) 122 # Try to acquire a machine now. 123 self.machine = self.AcquireMachine() 124 self.cache.machine = self.machine 125 self.result = self.RunTest(self.machine) 126 127 self.cache.remote = self.machine.name 128 self.label.chrome_version = self.machine_manager.GetChromeVersion( 129 self.machine) 130 self.cache.StoreResult(self.result) 131 132 if self.machine and not self.label.chrome_version: 133 self.label.chrome_version = self.machine_manager.GetChromeVersion( 134 self.machine) 135 136 if self.terminated: 137 return 138 139 if not self.result.retval: 140 self.timeline.Record(STATUS_SUCCEEDED) 141 else: 142 if self.timeline.GetLastEvent() != STATUS_FAILED: 143 self.failure_reason = "Return value of test suite was non-zero." 144 self.timeline.Record(STATUS_FAILED) 145 146 except Exception, e: 147 self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e)) 148 traceback.print_exc() 149 if self.timeline.GetLastEvent() != STATUS_FAILED: 150 self.timeline.Record(STATUS_FAILED) 151 self.failure_reason = str(e) 152 finally: 153 if self.owner_thread is not None: 154 # In schedv2 mode, we do not lock machine locally. So noop here. 155 pass 156 elif self.machine: 157 if not self.machine.IsReachable(): 158 self._logger.LogOutput("Machine %s is not reachable, removing it." 159 % self.machine.name) 160 self.machine_manager.RemoveMachine(self.machine.name) 161 self._logger.LogOutput("Releasing machine: %s" % self.machine.name) 162 self.machine_manager.ReleaseMachine(self.machine) 163 self._logger.LogOutput("Released machine: %s" % self.machine.name) 164 165 def Terminate(self): 166 self.terminated = True 167 self.suite_runner.Terminate() 168 if self.timeline.GetLastEvent() != STATUS_FAILED: 169 self.timeline.Record(STATUS_FAILED) 170 self.failure_reason = "Thread terminated." 171 172 def AcquireMachine(self): 173 if self.owner_thread is not None: 174 # No need to lock machine locally, DutWorker, which is a thread, is 175 # responsible for running br. 176 return self.owner_thread.dut() 177 while True: 178 machine = None 179 if self.terminated: 180 raise Exception("Thread terminated while trying to acquire machine.") 181 182 machine = self.machine_manager.AcquireMachine(self.label) 183 184 if machine: 185 self._logger.LogOutput("%s: Machine %s acquired at %s" % 186 (self.name, 187 machine.name, 188 datetime.datetime.now())) 189 break 190 else: 191 sleep_duration = 10 192 time.sleep(sleep_duration) 193 return machine 194 195 def _GetExtraAutotestArgs(self): 196 if self.benchmark.perf_args and self.benchmark.suite == "telemetry": 197 self._logger.LogError("Telemetry does not support profiler.") 198 self.benchmark.perf_args = "" 199 200 if self.benchmark.perf_args and self.benchmark.suite == "test_that": 201 self._logger.LogError("test_that does not support profiler.") 202 self.benchmark.perf_args = "" 203 204 if self.benchmark.perf_args: 205 perf_args_list = self.benchmark.perf_args.split(" ") 206 perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:] 207 perf_args = " ".join(perf_args_list) 208 if not perf_args_list[0] in ["record", "stat"]: 209 raise Exception("perf_args must start with either record or stat") 210 extra_test_args = ["--profiler=custom_perf", 211 ("--profiler_args='perf_options=\"%s\"'" % 212 perf_args)] 213 return " ".join(extra_test_args) 214 else: 215 return "" 216 217 def RunTest(self, machine): 218 self.timeline.Record(STATUS_IMAGING) 219 if self.owner_thread is not None: 220 # In schedv2 mode, do not even call ImageMachine. Machine image is 221 # guarenteed. 222 pass 223 else: 224 self.machine_manager.ImageMachine(machine, 225 self.label) 226 self.timeline.Record(STATUS_RUNNING) 227 [retval, out, err] = self.suite_runner.Run(machine.name, 228 self.label, 229 self.benchmark, 230 self.test_args, 231 self.profiler_args) 232 self.run_completed = True 233 return Result.CreateFromRun(self._logger, 234 self.log_level, 235 self.label, 236 self.machine, 237 out, 238 err, 239 retval, 240 self.benchmark.show_all_results, 241 self.benchmark.test_name, 242 self.benchmark.suite) 243 244 def SetCacheConditions(self, cache_conditions): 245 self.cache_conditions = cache_conditions 246 247 def __str__(self): 248 """For better debugging.""" 249 250 return 'BenchmarkRun[name="{}"]'.format(self.name) 251 252 253class MockBenchmarkRun(BenchmarkRun): 254 """Inherited from BenchmarkRun.""" 255 256 def ReadCache(self): 257 # Just use the first machine for running the cached version, 258 # without locking it. 259 self.cache = MockResultsCache() 260 self.cache.Init(self.label.chromeos_image, 261 self.label.chromeos_root, 262 self.benchmark.test_name, 263 self.iteration, 264 self.test_args, 265 self.profiler_args, 266 self.machine_manager, 267 self.machine, 268 self.label.board, 269 self.cache_conditions, 270 self._logger, 271 self.log_level, 272 self.label, 273 self.share_cache, 274 self.benchmark.suite, 275 self.benchmark.show_all_results, 276 self.benchmark.run_local 277 ) 278 279 self.result = self.cache.ReadResult() 280 self.cache_hit = (self.result is not None) 281 282 283 def RunTest(self, machine): 284 """Remove Result.CreateFromRun for testing.""" 285 self.timeline.Record(STATUS_IMAGING) 286 self.machine_manager.ImageMachine(machine, 287 self.label) 288 self.timeline.Record(STATUS_RUNNING) 289 [retval, out, err] = self.suite_runner.Run(machine.name, 290 self.label, 291 self.benchmark, 292 self.test_args, 293 self.profiler_args) 294 self.run_completed = True 295 rr = MockResult("logger", self.label, self.log_level, machine) 296 rr.out = out 297 rr.err = err 298 rr.retval = retval 299 return rr 300