benchmark_run.py revision 0d1a9f32c928e21a72547f3d334d631c5861f027
1#!/usr/bin/python 2 3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import datetime 8import os 9import threading 10import time 11import traceback 12 13from cros_utils import command_executer 14from cros_utils import timeline 15 16from suite_runner import SuiteRunner 17from results_cache import MockResult 18from results_cache import MockResultsCache 19from results_cache import Result 20from results_cache import ResultsCache 21from results_cache import TelemetryResult 22 23 24STATUS_FAILED = "FAILED" 25STATUS_SUCCEEDED = "SUCCEEDED" 26STATUS_IMAGING = "IMAGING" 27STATUS_RUNNING = "RUNNING" 28STATUS_WAITING = "WAITING" 29STATUS_PENDING = "PENDING" 30 31class BenchmarkRun(threading.Thread): 32 def __init__(self, name, benchmark, 33 label, 34 iteration, 35 cache_conditions, 36 machine_manager, 37 logger_to_use, 38 log_level, 39 share_cache): 40 threading.Thread.__init__(self) 41 self.name = name 42 self._logger = logger_to_use 43 self.log_level = log_level 44 self.benchmark = benchmark 45 self.iteration = iteration 46 self.label = label 47 self.result = None 48 self.terminated = False 49 self.retval = None 50 self.run_completed = False 51 self.machine_manager = machine_manager 52 self.suite_runner = SuiteRunner(self._logger, self.log_level) 53 self.machine = None 54 self.cache_conditions = cache_conditions 55 self.runs_complete = 0 56 self.cache_hit = False 57 self.failure_reason = "" 58 self.test_args = benchmark.test_args 59 self.profiler_args = self._GetExtraAutotestArgs() 60 self._ce = command_executer.GetCommandExecuter(self._logger, 61 log_level=self.log_level) 62 self.timeline = timeline.Timeline() 63 self.timeline.Record(STATUS_PENDING) 64 self.share_cache = share_cache 65 self.cache_has_been_read = False 66 67 # This is used by schedv2. 68 self.owner_thread = None 69 70 def ReadCache(self): 71 # Just use the first machine for running the cached version, 72 # without locking it. 73 self.cache = ResultsCache() 74 self.cache.Init(self.label.chromeos_image, 75 self.label.chromeos_root, 76 self.benchmark.test_name, 77 self.iteration, 78 self.test_args, 79 self.profiler_args, 80 self.machine_manager, 81 self.machine, 82 self.label.board, 83 self.cache_conditions, 84 self._logger, 85 self.log_level, 86 self.label, 87 self.share_cache, 88 self.benchmark.suite, 89 self.benchmark.show_all_results, 90 self.benchmark.run_local 91 ) 92 93 self.result = self.cache.ReadResult() 94 self.cache_hit = (self.result is not None) 95 self.cache_has_been_read = True 96 97 def run(self): 98 try: 99 if not self.cache_has_been_read: 100 self.ReadCache() 101 102 if self.result: 103 self._logger.LogOutput("%s: Cache hit." % self.name) 104 self._logger.LogOutput(self.result.out, print_to_console=False) 105 self._logger.LogError(self.result.err, print_to_console=False) 106 107 elif self.label.cache_only: 108 self._logger.LogOutput("%s: No cache hit." % self.name) 109 output = "%s: No Cache hit." % self.name 110 retval = 1 111 err = "No cache hit." 112 self.result = Result.CreateFromRun(self._logger, self.log_level, 113 self.label, self.machine, 114 output, err, retval, 115 self.benchmark.show_all_results, 116 self.benchmark.test_name, 117 self.benchmark.suite) 118 119 else: 120 self._logger.LogOutput("%s: No cache hit." % self.name) 121 self.timeline.Record(STATUS_WAITING) 122 # Try to acquire a machine now. 123 self.machine = self.AcquireMachine() 124 self.cache.machine = self.machine 125 self.result = self.RunTest(self.machine) 126 127 self.cache.remote = self.machine.name 128 self.label.chrome_version = self.machine_manager.GetChromeVersion( 129 self.machine) 130 self.cache.StoreResult(self.result) 131 132 if self.machine and not self.label.chrome_version: 133 self.label.chrome_version = self.machine_manager.GetChromeVersion( 134 self.machine) 135 136 if self.terminated: 137 return 138 139 if not self.result.retval: 140 self.timeline.Record(STATUS_SUCCEEDED) 141 else: 142 if self.timeline.GetLastEvent() != STATUS_FAILED: 143 self.failure_reason = "Return value of test suite was non-zero." 144 self.timeline.Record(STATUS_FAILED) 145 146 except Exception, e: 147 self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e)) 148 traceback.print_exc() 149 if self.timeline.GetLastEvent() != STATUS_FAILED: 150 self.timeline.Record(STATUS_FAILED) 151 self.failure_reason = str(e) 152 finally: 153 if self.owner_thread is not None: 154 # In schedv2 mode, we do not lock machine locally. So noop here. 155 pass 156 elif self.machine: 157 if not self.machine.IsReachable(): 158 self._logger.LogOutput("Machine %s is not reachable, removing it." 159 % self.machine.name) 160 self.machine_manager.RemoveMachine(self.machine.name) 161 self._logger.LogOutput("Releasing machine: %s" % self.machine.name) 162 self.machine_manager.ReleaseMachine(self.machine) 163 self._logger.LogOutput("Released machine: %s" % self.machine.name) 164 165 def Terminate(self): 166 self.terminated = True 167 self.suite_runner.Terminate() 168 if self.timeline.GetLastEvent() != STATUS_FAILED: 169 self.timeline.Record(STATUS_FAILED) 170 self.failure_reason = "Thread terminated." 171 172 def AcquireMachine(self): 173 if self.owner_thread is not None: 174 # No need to lock machine locally, DutWorker, which is a thread, is 175 # responsible for running br. 176 return self.owner_thread.dut() 177 while True: 178 machine = None 179 if self.terminated: 180 raise Exception("Thread terminated while trying to acquire machine.") 181 182 machine = self.machine_manager.AcquireMachine(self.label.chromeos_image, 183 self.label, throw=False) 184 185 if machine: 186 self._logger.LogOutput("%s: Machine %s acquired at %s" % 187 (self.name, 188 machine.name, 189 datetime.datetime.now())) 190 break 191 else: 192 sleep_duration = 10 193 time.sleep(sleep_duration) 194 return machine 195 196 def _GetExtraAutotestArgs(self): 197 if self.benchmark.perf_args and self.benchmark.suite == "telemetry": 198 self._logger.LogError("Telemetry does not support profiler.") 199 self.benchmark.perf_args = "" 200 201 if self.benchmark.perf_args and self.benchmark.suite == "test_that": 202 self._logger.LogError("test_that does not support profiler.") 203 self.benchmark.perf_args = "" 204 205 if self.benchmark.perf_args: 206 perf_args_list = self.benchmark.perf_args.split(" ") 207 perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:] 208 perf_args = " ".join(perf_args_list) 209 if not perf_args_list[0] in ["record", "stat"]: 210 raise Exception("perf_args must start with either record or stat") 211 extra_test_args = ["--profiler=custom_perf", 212 ("--profiler_args='perf_options=\"%s\"'" % 213 perf_args)] 214 return " ".join(extra_test_args) 215 else: 216 return "" 217 218 def RunTest(self, machine): 219 self.timeline.Record(STATUS_IMAGING) 220 if self.owner_thread is not None: 221 # In schedv2 mode, do not even call ImageMachine. Machine image is 222 # guarenteed. 223 pass 224 else: 225 self.machine_manager.ImageMachine(machine, 226 self.label) 227 self.timeline.Record(STATUS_RUNNING) 228 [retval, out, err] = self.suite_runner.Run(machine.name, 229 self.label, 230 self.benchmark, 231 self.test_args, 232 self.profiler_args) 233 self.run_completed = True 234 return Result.CreateFromRun(self._logger, 235 self.log_level, 236 self.label, 237 self.machine, 238 out, 239 err, 240 retval, 241 self.benchmark.show_all_results, 242 self.benchmark.test_name, 243 self.benchmark.suite) 244 245 def SetCacheConditions(self, cache_conditions): 246 self.cache_conditions = cache_conditions 247 248 def __str__(self): 249 """For better debugging.""" 250 251 return 'BenchmarkRun[name="{}"]'.format(self.name) 252 253 254class MockBenchmarkRun(BenchmarkRun): 255 """Inherited from BenchmarkRuna.""" 256 257 def ReadCache(self): 258 # Just use the first machine for running the cached version, 259 # without locking it. 260 self.cache = MockResultsCache() 261 self.cache.Init(self.label.chromeos_image, 262 self.label.chromeos_root, 263 self.benchmark.test_name, 264 self.iteration, 265 self.test_args, 266 self.profiler_args, 267 self.machine_manager, 268 self.machine, 269 self.label.board, 270 self.cache_conditions, 271 self._logger, 272 self.log_level, 273 self.label, 274 self.share_cache, 275 self.benchmark.suite, 276 self.benchmark.show_all_results, 277 self.benchmark.run_local 278 ) 279 280 self.result = self.cache.ReadResult() 281 self.cache_hit = (self.result is not None) 282 283 284 def RunTest(self, machine): 285 """Remove Result.CreateFromRun for testing.""" 286 self.timeline.Record(STATUS_IMAGING) 287 self.machine_manager.ImageMachine(machine, 288 self.label) 289 self.timeline.Record(STATUS_RUNNING) 290 [retval, out, err] = self.suite_runner.Run(machine.name, 291 self.label, 292 self.benchmark, 293 self.test_args, 294 self.profiler_args) 295 self.run_completed = True 296 rr = MockResult("logger", self.label, self.log_level) 297 rr.out = out 298 rr.err = err 299 rr.retval = retval 300 return rr 301