benchmark_run.py revision 0d1a9f32c928e21a72547f3d334d631c5861f027
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import datetime
8import os
9import threading
10import time
11import traceback
12
13from cros_utils import command_executer
14from cros_utils import timeline
15
16from suite_runner import SuiteRunner
17from results_cache import MockResult
18from results_cache import MockResultsCache
19from results_cache import Result
20from results_cache import ResultsCache
21from results_cache import TelemetryResult
22
23
24STATUS_FAILED = "FAILED"
25STATUS_SUCCEEDED = "SUCCEEDED"
26STATUS_IMAGING = "IMAGING"
27STATUS_RUNNING = "RUNNING"
28STATUS_WAITING = "WAITING"
29STATUS_PENDING = "PENDING"
30
31class BenchmarkRun(threading.Thread):
32  def __init__(self, name, benchmark,
33               label,
34               iteration,
35               cache_conditions,
36               machine_manager,
37               logger_to_use,
38               log_level,
39               share_cache):
40    threading.Thread.__init__(self)
41    self.name = name
42    self._logger = logger_to_use
43    self.log_level = log_level
44    self.benchmark = benchmark
45    self.iteration = iteration
46    self.label = label
47    self.result = None
48    self.terminated = False
49    self.retval = None
50    self.run_completed = False
51    self.machine_manager = machine_manager
52    self.suite_runner = SuiteRunner(self._logger, self.log_level)
53    self.machine = None
54    self.cache_conditions = cache_conditions
55    self.runs_complete = 0
56    self.cache_hit = False
57    self.failure_reason = ""
58    self.test_args = benchmark.test_args
59    self.profiler_args = self._GetExtraAutotestArgs()
60    self._ce = command_executer.GetCommandExecuter(self._logger,
61                                                   log_level=self.log_level)
62    self.timeline = timeline.Timeline()
63    self.timeline.Record(STATUS_PENDING)
64    self.share_cache = share_cache
65    self.cache_has_been_read = False
66
67    # This is used by schedv2.
68    self.owner_thread = None
69
70  def ReadCache(self):
71    # Just use the first machine for running the cached version,
72    # without locking it.
73    self.cache = ResultsCache()
74    self.cache.Init(self.label.chromeos_image,
75                    self.label.chromeos_root,
76                    self.benchmark.test_name,
77                    self.iteration,
78                    self.test_args,
79                    self.profiler_args,
80                    self.machine_manager,
81                    self.machine,
82                    self.label.board,
83                    self.cache_conditions,
84                    self._logger,
85                    self.log_level,
86                    self.label,
87                    self.share_cache,
88                    self.benchmark.suite,
89                    self.benchmark.show_all_results,
90                    self.benchmark.run_local
91                   )
92
93    self.result = self.cache.ReadResult()
94    self.cache_hit = (self.result is not None)
95    self.cache_has_been_read = True
96
97  def run(self):
98    try:
99      if not self.cache_has_been_read:
100        self.ReadCache()
101
102      if self.result:
103        self._logger.LogOutput("%s: Cache hit." % self.name)
104        self._logger.LogOutput(self.result.out, print_to_console=False)
105        self._logger.LogError(self.result.err, print_to_console=False)
106
107      elif self.label.cache_only:
108        self._logger.LogOutput("%s: No cache hit." % self.name)
109        output = "%s: No Cache hit." % self.name
110        retval = 1
111        err = "No cache hit."
112        self.result = Result.CreateFromRun(self._logger, self.log_level,
113                                           self.label, self.machine,
114                                           output, err, retval,
115                                           self.benchmark.show_all_results,
116                                           self.benchmark.test_name,
117                                           self.benchmark.suite)
118
119      else:
120        self._logger.LogOutput("%s: No cache hit." % self.name)
121        self.timeline.Record(STATUS_WAITING)
122        # Try to acquire a machine now.
123        self.machine = self.AcquireMachine()
124        self.cache.machine = self.machine
125        self.result = self.RunTest(self.machine)
126
127        self.cache.remote = self.machine.name
128        self.label.chrome_version = self.machine_manager.GetChromeVersion(
129            self.machine)
130        self.cache.StoreResult(self.result)
131
132      if self.machine and not self.label.chrome_version:
133        self.label.chrome_version = self.machine_manager.GetChromeVersion(
134          self.machine)
135
136      if self.terminated:
137        return
138
139      if not self.result.retval:
140        self.timeline.Record(STATUS_SUCCEEDED)
141      else:
142        if self.timeline.GetLastEvent() != STATUS_FAILED:
143          self.failure_reason = "Return value of test suite was non-zero."
144          self.timeline.Record(STATUS_FAILED)
145
146    except Exception, e:
147      self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e))
148      traceback.print_exc()
149      if self.timeline.GetLastEvent() != STATUS_FAILED:
150        self.timeline.Record(STATUS_FAILED)
151        self.failure_reason = str(e)
152    finally:
153      if self.owner_thread is not None:
154        # In schedv2 mode, we do not lock machine locally. So noop here.
155        pass
156      elif self.machine:
157        if not self.machine.IsReachable():
158          self._logger.LogOutput("Machine %s is not reachable, removing it."
159                                 % self.machine.name)
160          self.machine_manager.RemoveMachine(self.machine.name)
161        self._logger.LogOutput("Releasing machine: %s" % self.machine.name)
162        self.machine_manager.ReleaseMachine(self.machine)
163        self._logger.LogOutput("Released machine: %s" % self.machine.name)
164
165  def Terminate(self):
166    self.terminated = True
167    self.suite_runner.Terminate()
168    if self.timeline.GetLastEvent() != STATUS_FAILED:
169      self.timeline.Record(STATUS_FAILED)
170      self.failure_reason = "Thread terminated."
171
172  def AcquireMachine(self):
173    if self.owner_thread is not None:
174      # No need to lock machine locally, DutWorker, which is a thread, is
175      # responsible for running br.
176      return self.owner_thread.dut()
177    while True:
178      machine = None
179      if self.terminated:
180        raise Exception("Thread terminated while trying to acquire machine.")
181
182      machine = self.machine_manager.AcquireMachine(self.label.chromeos_image,
183                                                    self.label, throw=False)
184
185      if machine:
186        self._logger.LogOutput("%s: Machine %s acquired at %s" %
187                               (self.name,
188                                machine.name,
189                                datetime.datetime.now()))
190        break
191      else:
192        sleep_duration = 10
193        time.sleep(sleep_duration)
194    return machine
195
196  def _GetExtraAutotestArgs(self):
197    if self.benchmark.perf_args and self.benchmark.suite == "telemetry":
198      self._logger.LogError("Telemetry does not support profiler.")
199      self.benchmark.perf_args = ""
200
201    if self.benchmark.perf_args and self.benchmark.suite == "test_that":
202      self._logger.LogError("test_that does not support profiler.")
203      self.benchmark.perf_args = ""
204
205    if self.benchmark.perf_args:
206      perf_args_list = self.benchmark.perf_args.split(" ")
207      perf_args_list = [perf_args_list[0]] + ["-a"] + perf_args_list[1:]
208      perf_args = " ".join(perf_args_list)
209      if not perf_args_list[0] in ["record", "stat"]:
210        raise Exception("perf_args must start with either record or stat")
211      extra_test_args = ["--profiler=custom_perf",
212                             ("--profiler_args='perf_options=\"%s\"'" %
213                              perf_args)]
214      return " ".join(extra_test_args)
215    else:
216      return ""
217
218  def RunTest(self, machine):
219    self.timeline.Record(STATUS_IMAGING)
220    if self.owner_thread is not None:
221      # In schedv2 mode, do not even call ImageMachine. Machine image is
222      # guarenteed.
223      pass
224    else:
225      self.machine_manager.ImageMachine(machine,
226                                        self.label)
227    self.timeline.Record(STATUS_RUNNING)
228    [retval, out, err] = self.suite_runner.Run(machine.name,
229                                                  self.label,
230                                                  self.benchmark,
231                                                  self.test_args,
232                                                  self.profiler_args)
233    self.run_completed = True
234    return Result.CreateFromRun(self._logger,
235                                self.log_level,
236                                self.label,
237                                self.machine,
238                                out,
239                                err,
240                                retval,
241                                self.benchmark.show_all_results,
242                                self.benchmark.test_name,
243                                self.benchmark.suite)
244
245  def SetCacheConditions(self, cache_conditions):
246    self.cache_conditions = cache_conditions
247
248  def __str__(self):
249    """For better debugging."""
250
251    return 'BenchmarkRun[name="{}"]'.format(self.name)
252
253
254class MockBenchmarkRun(BenchmarkRun):
255  """Inherited from BenchmarkRuna."""
256
257  def ReadCache(self):
258    # Just use the first machine for running the cached version,
259    # without locking it.
260    self.cache = MockResultsCache()
261    self.cache.Init(self.label.chromeos_image,
262                    self.label.chromeos_root,
263                    self.benchmark.test_name,
264                    self.iteration,
265                    self.test_args,
266                    self.profiler_args,
267                    self.machine_manager,
268                    self.machine,
269                    self.label.board,
270                    self.cache_conditions,
271                    self._logger,
272                    self.log_level,
273                    self.label,
274                    self.share_cache,
275                    self.benchmark.suite,
276                    self.benchmark.show_all_results,
277                    self.benchmark.run_local
278                   )
279
280    self.result = self.cache.ReadResult()
281    self.cache_hit = (self.result is not None)
282
283
284  def RunTest(self, machine):
285    """Remove Result.CreateFromRun for testing."""
286    self.timeline.Record(STATUS_IMAGING)
287    self.machine_manager.ImageMachine(machine,
288                                      self.label)
289    self.timeline.Record(STATUS_RUNNING)
290    [retval, out, err] = self.suite_runner.Run(machine.name,
291                                               self.label,
292                                               self.benchmark,
293                                               self.test_args,
294                                               self.profiler_args)
295    self.run_completed = True
296    rr = MockResult("logger", self.label, self.log_level)
297    rr.out = out
298    rr.err = err
299    rr.retval = retval
300    return rr
301