benchmark_run.py revision f2a3ef46f75d2196a93d3ed27f4d1fcf22b54fbe
1
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import datetime
7import os
8import threading
9import time
10import traceback
11
12from cros_utils import command_executer
13from cros_utils import timeline
14
15from suite_runner import SuiteRunner
16from results_cache import MockResult
17from results_cache import MockResultsCache
18from results_cache import Result
19from results_cache import ResultsCache
20from results_cache import TelemetryResult
21
22STATUS_FAILED = 'FAILED'
23STATUS_SUCCEEDED = 'SUCCEEDED'
24STATUS_IMAGING = 'IMAGING'
25STATUS_RUNNING = 'RUNNING'
26STATUS_WAITING = 'WAITING'
27STATUS_PENDING = 'PENDING'
28
29
30class BenchmarkRun(threading.Thread):
31
32  def __init__(self, name, benchmark, label, iteration, cache_conditions,
33               machine_manager, logger_to_use, log_level, share_cache):
34    threading.Thread.__init__(self)
35    self.name = name
36    self._logger = logger_to_use
37    self.log_level = log_level
38    self.benchmark = benchmark
39    self.iteration = iteration
40    self.label = label
41    self.result = None
42    self.terminated = False
43    self.retval = None
44    self.run_completed = False
45    self.machine_manager = machine_manager
46    self.suite_runner = SuiteRunner(self._logger, self.log_level)
47    self.machine = None
48    self.cache_conditions = cache_conditions
49    self.runs_complete = 0
50    self.cache_hit = False
51    self.failure_reason = ''
52    self.test_args = benchmark.test_args
53    self.profiler_args = self._GetExtraAutotestArgs()
54    self._ce = command_executer.GetCommandExecuter(self._logger,
55                                                   log_level=self.log_level)
56    self.timeline = timeline.Timeline()
57    self.timeline.Record(STATUS_PENDING)
58    self.share_cache = share_cache
59    self.cache_has_been_read = False
60
61    # This is used by schedv2.
62    self.owner_thread = None
63
64  def ReadCache(self):
65    # Just use the first machine for running the cached version,
66    # without locking it.
67    self.cache = ResultsCache()
68    self.cache.Init(self.label.chromeos_image, self.label.chromeos_root,
69                    self.benchmark.test_name, self.iteration, self.test_args,
70                    self.profiler_args, self.machine_manager, self.machine,
71                    self.label.board, self.cache_conditions, self._logger,
72                    self.log_level, self.label, self.share_cache,
73                    self.benchmark.suite, self.benchmark.show_all_results,
74                    self.benchmark.run_local)
75
76    self.result = self.cache.ReadResult()
77    self.cache_hit = (self.result is not None)
78    self.cache_has_been_read = True
79
80  def run(self):
81    try:
82      if not self.cache_has_been_read:
83        self.ReadCache()
84
85      if self.result:
86        self._logger.LogOutput('%s: Cache hit.' % self.name)
87        self._logger.LogOutput(self.result.out, print_to_console=False)
88        self._logger.LogError(self.result.err, print_to_console=False)
89
90      elif self.label.cache_only:
91        self._logger.LogOutput('%s: No cache hit.' % self.name)
92        output = '%s: No Cache hit.' % self.name
93        retval = 1
94        err = 'No cache hit.'
95        self.result = Result.CreateFromRun(
96            self._logger, self.log_level, self.label, self.machine, output, err,
97            retval, self.benchmark.show_all_results, self.benchmark.test_name,
98            self.benchmark.suite)
99
100      else:
101        self._logger.LogOutput('%s: No cache hit.' % self.name)
102        self.timeline.Record(STATUS_WAITING)
103        # Try to acquire a machine now.
104        self.machine = self.AcquireMachine()
105        self.cache.machine = self.machine
106        self.result = self.RunTest(self.machine)
107
108        self.cache.remote = self.machine.name
109        self.label.chrome_version = self.machine_manager.GetChromeVersion(
110            self.machine)
111        self.cache.StoreResult(self.result)
112
113      if self.machine and not self.label.chrome_version:
114        self.label.chrome_version = self.machine_manager.GetChromeVersion(
115            self.machine)
116
117      if self.terminated:
118        return
119
120      if not self.result.retval:
121        self.timeline.Record(STATUS_SUCCEEDED)
122      else:
123        if self.timeline.GetLastEvent() != STATUS_FAILED:
124          self.failure_reason = 'Return value of test suite was non-zero.'
125          self.timeline.Record(STATUS_FAILED)
126
127    except Exception, e:
128      self._logger.LogError("Benchmark run: '%s' failed: %s" % (self.name, e))
129      traceback.print_exc()
130      if self.timeline.GetLastEvent() != STATUS_FAILED:
131        self.timeline.Record(STATUS_FAILED)
132        self.failure_reason = str(e)
133    finally:
134      if self.owner_thread is not None:
135        # In schedv2 mode, we do not lock machine locally. So noop here.
136        pass
137      elif self.machine:
138        if not self.machine.IsReachable():
139          self._logger.LogOutput('Machine %s is not reachable, removing it.' %
140                                 self.machine.name)
141          self.machine_manager.RemoveMachine(self.machine.name)
142        self._logger.LogOutput('Releasing machine: %s' % self.machine.name)
143        self.machine_manager.ReleaseMachine(self.machine)
144        self._logger.LogOutput('Released machine: %s' % self.machine.name)
145
146  def Terminate(self):
147    self.terminated = True
148    self.suite_runner.Terminate()
149    if self.timeline.GetLastEvent() != STATUS_FAILED:
150      self.timeline.Record(STATUS_FAILED)
151      self.failure_reason = 'Thread terminated.'
152
153  def AcquireMachine(self):
154    if self.owner_thread is not None:
155      # No need to lock machine locally, DutWorker, which is a thread, is
156      # responsible for running br.
157      return self.owner_thread.dut()
158    while True:
159      machine = None
160      if self.terminated:
161        raise Exception('Thread terminated while trying to acquire machine.')
162
163      machine = self.machine_manager.AcquireMachine(self.label)
164
165      if machine:
166        self._logger.LogOutput('%s: Machine %s acquired at %s' %
167                               (self.name, machine.name,
168                                datetime.datetime.now()))
169        break
170      else:
171        sleep_duration = 10
172        time.sleep(sleep_duration)
173    return machine
174
175  def _GetExtraAutotestArgs(self):
176    if self.benchmark.perf_args and self.benchmark.suite == 'telemetry':
177      self._logger.LogError('Telemetry does not support profiler.')
178      self.benchmark.perf_args = ''
179
180    if self.benchmark.perf_args and self.benchmark.suite == 'test_that':
181      self._logger.LogError('test_that does not support profiler.')
182      self.benchmark.perf_args = ''
183
184    if self.benchmark.perf_args:
185      perf_args_list = self.benchmark.perf_args.split(' ')
186      perf_args_list = [perf_args_list[0]] + ['-a'] + perf_args_list[1:]
187      perf_args = ' '.join(perf_args_list)
188      if not perf_args_list[0] in ['record', 'stat']:
189        raise Exception('perf_args must start with either record or stat')
190      extra_test_args = ['--profiler=custom_perf',
191                         ("--profiler_args='perf_options=\"%s\"'" % perf_args)]
192      return ' '.join(extra_test_args)
193    else:
194      return ''
195
196  def RunTest(self, machine):
197    self.timeline.Record(STATUS_IMAGING)
198    if self.owner_thread is not None:
199      # In schedv2 mode, do not even call ImageMachine. Machine image is
200      # guarenteed.
201      pass
202    else:
203      self.machine_manager.ImageMachine(machine, self.label)
204    self.timeline.Record(STATUS_RUNNING)
205    [retval, out, err] = self.suite_runner.Run(machine.name, self.label,
206                                               self.benchmark, self.test_args,
207                                               self.profiler_args)
208    self.run_completed = True
209    return Result.CreateFromRun(self._logger, self.log_level, self.label,
210                                self.machine, out, err, retval,
211                                self.benchmark.show_all_results,
212                                self.benchmark.test_name, self.benchmark.suite)
213
214  def SetCacheConditions(self, cache_conditions):
215    self.cache_conditions = cache_conditions
216
217  def __str__(self):
218    """For better debugging."""
219
220    return 'BenchmarkRun[name="{}"]'.format(self.name)
221
222
223class MockBenchmarkRun(BenchmarkRun):
224  """Inherited from BenchmarkRun."""
225
226  def ReadCache(self):
227    # Just use the first machine for running the cached version,
228    # without locking it.
229    self.cache = MockResultsCache()
230    self.cache.Init(self.label.chromeos_image, self.label.chromeos_root,
231                    self.benchmark.test_name, self.iteration, self.test_args,
232                    self.profiler_args, self.machine_manager, self.machine,
233                    self.label.board, self.cache_conditions, self._logger,
234                    self.log_level, self.label, self.share_cache,
235                    self.benchmark.suite, self.benchmark.show_all_results,
236                    self.benchmark.run_local)
237
238    self.result = self.cache.ReadResult()
239    self.cache_hit = (self.result is not None)
240
241  def RunTest(self, machine):
242    """Remove Result.CreateFromRun for testing."""
243    self.timeline.Record(STATUS_IMAGING)
244    self.machine_manager.ImageMachine(machine, self.label)
245    self.timeline.Record(STATUS_RUNNING)
246    [retval, out, err] = self.suite_runner.Run(machine.name, self.label,
247                                               self.benchmark, self.test_args,
248                                               self.profiler_args)
249    self.run_completed = True
250    rr = MockResult('logger', self.label, self.log_level, machine)
251    rr.out = out
252    rr.err = err
253    rr.retval = retval
254    return rr
255