benchmark_run.py revision e066297f07a8d1e1ad3416b4b034b2943f47c648
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import datetime
8import os
9import threading
10import time
11import traceback
12
13from utils import command_executer
14from utils import timeline
15
16from suite_runner import SuiteRunner
17from results_cache import MockResult
18from results_cache import MockResultsCache
19from results_cache import Result
20from results_cache import ResultsCache
21from results_cache import TelemetryResult
22
23
24STATUS_FAILED = "FAILED"
25STATUS_SUCCEEDED = "SUCCEEDED"
26STATUS_IMAGING = "IMAGING"
27STATUS_RUNNING = "RUNNING"
28STATUS_WAITING = "WAITING"
29STATUS_PENDING = "PENDING"
30
31class BenchmarkRun(threading.Thread):
32  def __init__(self, name, benchmark,
33               label,
34               iteration,
35               cache_conditions,
36               machine_manager,
37               logger_to_use,
38               log_level,
39               share_cache):
40    threading.Thread.__init__(self)
41    self.name = name
42    self._logger = logger_to_use
43    self.log_level = log_level
44    self.benchmark = benchmark
45    self.iteration = iteration
46    self.label = label
47    self.result = None
48    self.terminated = False
49    self.retval = None
50    self.run_completed = False
51    self.machine_manager = machine_manager
52    self.suite_runner = SuiteRunner(self._logger, self.log_level)
53    self.machine = None
54    self.cache_conditions = cache_conditions
55    self.runs_complete = 0
56    self.cache_hit = False
57    self.failure_reason = ""
58    self.test_args = benchmark.test_args
59    self.profiler_args = self._GetExtraAutotestArgs()
60    self._ce = command_executer.GetCommandExecuter(self._logger,
61                                                   log_level=self.log_level)
62    self.timeline = timeline.Timeline()
63    self.timeline.Record(STATUS_PENDING)
64    self.share_cache = share_cache
65
66    # This is used by schedv2.
67    self.owner_thread = None
68
69  def ReadCache(self):
70    # Just use the first machine for running the cached version,
71    # without locking it.
72    self.cache = ResultsCache()
73    self.cache.Init(self.label.chromeos_image,
74                    self.label.chromeos_root,
75                    self.benchmark.test_name,
76                    self.iteration,
77                    self.test_args,
78                    self.profiler_args,
79                    self.machine_manager,
80                    self.machine,
81                    self.label.board,
82                    self.cache_conditions,
83                    self._logger,
84                    self.log_level,
85                    self.label,
86                    self.share_cache,
87                    self.benchmark.suite,
88                    self.benchmark.show_all_results,
89                    self.benchmark.run_local
90                   )
91
92    self.result = self.cache.ReadResult()
93    self.cache_hit = (self.result is not None)
94
95  def run(self):
96    try:
97
98      if self.result:
99        self._logger.LogOutput("%s: Cache hit." % self.name)
100        self._logger.LogOutput(self.result.out, print_to_console=False)
101        self._logger.LogError(self.result.err, print_to_console=False)
102
103      elif self.label.cache_only:
104        self._logger.LogOutput("%s: No cache hit." % self.name)
105        output = "%s: No Cache hit." % self.name
106        retval = 1
107        err = "No cache hit."
108        self.result = Result.CreateFromRun(self._logger, self.log_level,
109                                           self.label, self.machine,
110                                           output, err, retval,
111                                           self.benchmark.show_all_results,
112                                           self.benchmark.test_name,
113                                           self.benchmark.suite)
114
115      else:
116        self._logger.LogOutput("%s: No cache hit." % self.name)
117        self.timeline.Record(STATUS_WAITING)
118        # Try to acquire a machine now.
119        self.machine = self.AcquireMachine()
120        self.cache.machine = self.machine
121        self.result = self.RunTest(self.machine)
122
123        self.cache.remote = self.machine.name
124        self.label.chrome_version = self.machine_manager.GetChromeVersion(
125            self.machine)
126        self.cache.StoreResult(self.result)
127
128      if self.machine and not self.label.chrome_version:
129        self.label.chrome_version = self.machine_manager.GetChromeVersion(
130          self.machine)
131
132      if self.terminated:
133        return
134
135      if not self.result.retval:
136        self.timeline.Record(STATUS_SUCCEEDED)
137      else:
138        if self.timeline.GetLastEvent() != STATUS_FAILED:
139          self.failure_reason = "Return value of test suite was non-zero."
140          self.timeline.Record(STATUS_FAILED)
141
142    except Exception, e:
143      self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e))
144      traceback.print_exc()
145      if self.timeline.GetLastEvent() != STATUS_FAILED:
146        self.timeline.Record(STATUS_FAILED)
147        self.failure_reason = str(e)
148    finally:
149      if self.owner_thread is not None:
150        # In schedv2 mode, we do not lock machine locally. So noop here.
151        pass
152      elif self.machine:
153        if not self.machine.IsReachable():
154          self._logger.LogOutput("Machine %s is not reachable, removing it."
155                                 % self.machine.name)
156          self.machine_manager.RemoveMachine(self.machine.name)
157        self._logger.LogOutput("Releasing machine: %s" % self.machine.name)
158        self.machine_manager.ReleaseMachine(self.machine)
159        self._logger.LogOutput("Released machine: %s" % self.machine.name)
160
161  def Terminate(self):
162    self.terminated = True
163    self.suite_runner.Terminate()
164    if self.timeline.GetLastEvent() != STATUS_FAILED:
165      self.timeline.Record(STATUS_FAILED)
166      self.failure_reason = "Thread terminated."
167
168  def AcquireMachine(self):
169    if self.owner_thread is not None:
170      # No need to lock machine locally, DutWorker, which is a thread, is
171      # responsible for running br.
172      return self.owner_thread.dut()
173    while True:
174      machine = None
175      if self.terminated:
176        raise Exception("Thread terminated while trying to acquire machine.")
177
178      machine = self.machine_manager.AcquireMachine(self.label.chromeos_image,
179                                                    self.label, throw=False)
180
181      if machine:
182        self._logger.LogOutput("%s: Machine %s acquired at %s" %
183                               (self.name,
184                                machine.name,
185                                datetime.datetime.now()))
186        break
187      else:
188        sleep_duration = 10
189        time.sleep(sleep_duration)
190    return machine
191
192  def _GetExtraAutotestArgs(self):
193    if self.benchmark.perf_args and self.benchmark.suite == "telemetry":
194      self._logger.LogError("Telemetry does not support profiler.")
195      self.benchmark.perf_args = ""
196
197    if self.benchmark.perf_args and self.benchmark.suite == "test_that":
198      self._logger.LogError("test_that does not support profiler.")
199      self.benchmark.perf_args = ""
200
201    if self.benchmark.perf_args:
202      perf_args_list = self.benchmark.perf_args.split(" ")
203      perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:]
204      perf_args = " ".join(perf_args_list)
205      if not perf_args_list[0] in ["record", "stat"]:
206        raise Exception("perf_args must start with either record or stat")
207      extra_test_args = ["--profiler=custom_perf",
208                             ("--profiler_args='perf_options=\"%s\"'" %
209                              perf_args)]
210      return " ".join(extra_test_args)
211    else:
212      return ""
213
214  def RunTest(self, machine):
215    self.timeline.Record(STATUS_IMAGING)
216    if self.owner_thread is not None:
217      # In schedv2 mode, do not even call ImageMachine. Machine image is
218      # guarenteed.
219      pass
220    else:
221      self.machine_manager.ImageMachine(machine,
222                                        self.label)
223    self.timeline.Record(STATUS_RUNNING)
224    [retval, out, err] = self.suite_runner.Run(machine.name,
225                                                  self.label,
226                                                  self.benchmark,
227                                                  self.test_args,
228                                                  self.profiler_args)
229    self.run_completed = True
230    return Result.CreateFromRun(self._logger,
231                                self.log_level,
232                                self.label,
233                                self.machine,
234                                out,
235                                err,
236                                retval,
237                                self.benchmark.show_all_results,
238                                self.benchmark.test_name,
239                                self.benchmark.suite)
240
241  def SetCacheConditions(self, cache_conditions):
242    self.cache_conditions = cache_conditions
243
244  def __str__(self):
245    """For better debugging."""
246
247    return 'BenchmarkRun[name="{}"]'.format(self.name)
248
249
250class MockBenchmarkRun(BenchmarkRun):
251  """Inherited from BenchmarkRuna."""
252
253  def ReadCache(self):
254    # Just use the first machine for running the cached version,
255    # without locking it.
256    self.cache = MockResultsCache()
257    self.cache.Init(self.label.chromeos_image,
258                    self.label.chromeos_root,
259                    self.benchmark.test_name,
260                    self.iteration,
261                    self.test_args,
262                    self.profiler_args,
263                    self.machine_manager,
264                    self.machine,
265                    self.label.board,
266                    self.cache_conditions,
267                    self._logger,
268                    self.log_level,
269                    self.label,
270                    self.share_cache,
271                    self.benchmark.suite,
272                    self.benchmark.show_all_results,
273                    self.benchmark.run_local
274                   )
275
276    self.result = self.cache.ReadResult()
277    self.cache_hit = (self.result is not None)
278
279
280  def RunTest(self, machine):
281    """Remove Result.CreateFromRun for testing."""
282    self.timeline.Record(STATUS_IMAGING)
283    self.machine_manager.ImageMachine(machine,
284                                      self.label)
285    self.timeline.Record(STATUS_RUNNING)
286    [retval, out, err] = self.suite_runner.Run(machine.name,
287                                               self.label,
288                                               self.benchmark,
289                                               self.test_args,
290                                               self.profiler_args)
291    self.run_completed = True
292    rr = MockResult("logger", self.label, self.log_level)
293    rr.out = out
294    rr.err = err
295    rr.retval = retval
296    return rr
297