benchmark_run.py revision e066297f07a8d1e1ad3416b4b034b2943f47c648
1#!/usr/bin/python 2 3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import datetime 8import os 9import threading 10import time 11import traceback 12 13from utils import command_executer 14from utils import timeline 15 16from suite_runner import SuiteRunner 17from results_cache import MockResult 18from results_cache import MockResultsCache 19from results_cache import Result 20from results_cache import ResultsCache 21from results_cache import TelemetryResult 22 23 24STATUS_FAILED = "FAILED" 25STATUS_SUCCEEDED = "SUCCEEDED" 26STATUS_IMAGING = "IMAGING" 27STATUS_RUNNING = "RUNNING" 28STATUS_WAITING = "WAITING" 29STATUS_PENDING = "PENDING" 30 31class BenchmarkRun(threading.Thread): 32 def __init__(self, name, benchmark, 33 label, 34 iteration, 35 cache_conditions, 36 machine_manager, 37 logger_to_use, 38 log_level, 39 share_cache): 40 threading.Thread.__init__(self) 41 self.name = name 42 self._logger = logger_to_use 43 self.log_level = log_level 44 self.benchmark = benchmark 45 self.iteration = iteration 46 self.label = label 47 self.result = None 48 self.terminated = False 49 self.retval = None 50 self.run_completed = False 51 self.machine_manager = machine_manager 52 self.suite_runner = SuiteRunner(self._logger, self.log_level) 53 self.machine = None 54 self.cache_conditions = cache_conditions 55 self.runs_complete = 0 56 self.cache_hit = False 57 self.failure_reason = "" 58 self.test_args = benchmark.test_args 59 self.profiler_args = self._GetExtraAutotestArgs() 60 self._ce = command_executer.GetCommandExecuter(self._logger, 61 log_level=self.log_level) 62 self.timeline = timeline.Timeline() 63 self.timeline.Record(STATUS_PENDING) 64 self.share_cache = share_cache 65 66 # This is used by schedv2. 67 self.owner_thread = None 68 69 def ReadCache(self): 70 # Just use the first machine for running the cached version, 71 # without locking it. 72 self.cache = ResultsCache() 73 self.cache.Init(self.label.chromeos_image, 74 self.label.chromeos_root, 75 self.benchmark.test_name, 76 self.iteration, 77 self.test_args, 78 self.profiler_args, 79 self.machine_manager, 80 self.machine, 81 self.label.board, 82 self.cache_conditions, 83 self._logger, 84 self.log_level, 85 self.label, 86 self.share_cache, 87 self.benchmark.suite, 88 self.benchmark.show_all_results, 89 self.benchmark.run_local 90 ) 91 92 self.result = self.cache.ReadResult() 93 self.cache_hit = (self.result is not None) 94 95 def run(self): 96 try: 97 98 if self.result: 99 self._logger.LogOutput("%s: Cache hit." % self.name) 100 self._logger.LogOutput(self.result.out, print_to_console=False) 101 self._logger.LogError(self.result.err, print_to_console=False) 102 103 elif self.label.cache_only: 104 self._logger.LogOutput("%s: No cache hit." % self.name) 105 output = "%s: No Cache hit." % self.name 106 retval = 1 107 err = "No cache hit." 108 self.result = Result.CreateFromRun(self._logger, self.log_level, 109 self.label, self.machine, 110 output, err, retval, 111 self.benchmark.show_all_results, 112 self.benchmark.test_name, 113 self.benchmark.suite) 114 115 else: 116 self._logger.LogOutput("%s: No cache hit." % self.name) 117 self.timeline.Record(STATUS_WAITING) 118 # Try to acquire a machine now. 119 self.machine = self.AcquireMachine() 120 self.cache.machine = self.machine 121 self.result = self.RunTest(self.machine) 122 123 self.cache.remote = self.machine.name 124 self.label.chrome_version = self.machine_manager.GetChromeVersion( 125 self.machine) 126 self.cache.StoreResult(self.result) 127 128 if self.machine and not self.label.chrome_version: 129 self.label.chrome_version = self.machine_manager.GetChromeVersion( 130 self.machine) 131 132 if self.terminated: 133 return 134 135 if not self.result.retval: 136 self.timeline.Record(STATUS_SUCCEEDED) 137 else: 138 if self.timeline.GetLastEvent() != STATUS_FAILED: 139 self.failure_reason = "Return value of test suite was non-zero." 140 self.timeline.Record(STATUS_FAILED) 141 142 except Exception, e: 143 self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e)) 144 traceback.print_exc() 145 if self.timeline.GetLastEvent() != STATUS_FAILED: 146 self.timeline.Record(STATUS_FAILED) 147 self.failure_reason = str(e) 148 finally: 149 if self.owner_thread is not None: 150 # In schedv2 mode, we do not lock machine locally. So noop here. 151 pass 152 elif self.machine: 153 if not self.machine.IsReachable(): 154 self._logger.LogOutput("Machine %s is not reachable, removing it." 155 % self.machine.name) 156 self.machine_manager.RemoveMachine(self.machine.name) 157 self._logger.LogOutput("Releasing machine: %s" % self.machine.name) 158 self.machine_manager.ReleaseMachine(self.machine) 159 self._logger.LogOutput("Released machine: %s" % self.machine.name) 160 161 def Terminate(self): 162 self.terminated = True 163 self.suite_runner.Terminate() 164 if self.timeline.GetLastEvent() != STATUS_FAILED: 165 self.timeline.Record(STATUS_FAILED) 166 self.failure_reason = "Thread terminated." 167 168 def AcquireMachine(self): 169 if self.owner_thread is not None: 170 # No need to lock machine locally, DutWorker, which is a thread, is 171 # responsible for running br. 172 return self.owner_thread.dut() 173 while True: 174 machine = None 175 if self.terminated: 176 raise Exception("Thread terminated while trying to acquire machine.") 177 178 machine = self.machine_manager.AcquireMachine(self.label.chromeos_image, 179 self.label, throw=False) 180 181 if machine: 182 self._logger.LogOutput("%s: Machine %s acquired at %s" % 183 (self.name, 184 machine.name, 185 datetime.datetime.now())) 186 break 187 else: 188 sleep_duration = 10 189 time.sleep(sleep_duration) 190 return machine 191 192 def _GetExtraAutotestArgs(self): 193 if self.benchmark.perf_args and self.benchmark.suite == "telemetry": 194 self._logger.LogError("Telemetry does not support profiler.") 195 self.benchmark.perf_args = "" 196 197 if self.benchmark.perf_args and self.benchmark.suite == "test_that": 198 self._logger.LogError("test_that does not support profiler.") 199 self.benchmark.perf_args = "" 200 201 if self.benchmark.perf_args: 202 perf_args_list = self.benchmark.perf_args.split(" ") 203 perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:] 204 perf_args = " ".join(perf_args_list) 205 if not perf_args_list[0] in ["record", "stat"]: 206 raise Exception("perf_args must start with either record or stat") 207 extra_test_args = ["--profiler=custom_perf", 208 ("--profiler_args='perf_options=\"%s\"'" % 209 perf_args)] 210 return " ".join(extra_test_args) 211 else: 212 return "" 213 214 def RunTest(self, machine): 215 self.timeline.Record(STATUS_IMAGING) 216 if self.owner_thread is not None: 217 # In schedv2 mode, do not even call ImageMachine. Machine image is 218 # guarenteed. 219 pass 220 else: 221 self.machine_manager.ImageMachine(machine, 222 self.label) 223 self.timeline.Record(STATUS_RUNNING) 224 [retval, out, err] = self.suite_runner.Run(machine.name, 225 self.label, 226 self.benchmark, 227 self.test_args, 228 self.profiler_args) 229 self.run_completed = True 230 return Result.CreateFromRun(self._logger, 231 self.log_level, 232 self.label, 233 self.machine, 234 out, 235 err, 236 retval, 237 self.benchmark.show_all_results, 238 self.benchmark.test_name, 239 self.benchmark.suite) 240 241 def SetCacheConditions(self, cache_conditions): 242 self.cache_conditions = cache_conditions 243 244 def __str__(self): 245 """For better debugging.""" 246 247 return 'BenchmarkRun[name="{}"]'.format(self.name) 248 249 250class MockBenchmarkRun(BenchmarkRun): 251 """Inherited from BenchmarkRuna.""" 252 253 def ReadCache(self): 254 # Just use the first machine for running the cached version, 255 # without locking it. 256 self.cache = MockResultsCache() 257 self.cache.Init(self.label.chromeos_image, 258 self.label.chromeos_root, 259 self.benchmark.test_name, 260 self.iteration, 261 self.test_args, 262 self.profiler_args, 263 self.machine_manager, 264 self.machine, 265 self.label.board, 266 self.cache_conditions, 267 self._logger, 268 self.log_level, 269 self.label, 270 self.share_cache, 271 self.benchmark.suite, 272 self.benchmark.show_all_results, 273 self.benchmark.run_local 274 ) 275 276 self.result = self.cache.ReadResult() 277 self.cache_hit = (self.result is not None) 278 279 280 def RunTest(self, machine): 281 """Remove Result.CreateFromRun for testing.""" 282 self.timeline.Record(STATUS_IMAGING) 283 self.machine_manager.ImageMachine(machine, 284 self.label) 285 self.timeline.Record(STATUS_RUNNING) 286 [retval, out, err] = self.suite_runner.Run(machine.name, 287 self.label, 288 self.benchmark, 289 self.test_args, 290 self.profiler_args) 291 self.run_completed = True 292 rr = MockResult("logger", self.label, self.log_level) 293 rr.out = out 294 rr.err = err 295 rr.retval = retval 296 return rr 297