experiment.py revision e066297f07a8d1e1ad3416b4b034b2943f47c648
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""The experiment setting module."""
8
9import os
10import time
11
12import afe_lock_machine
13from threading import Lock
14
15from utils import logger
16from utils import misc
17
18import benchmark_run
19from machine_manager import MachineManager
20from machine_manager import MockMachineManager
21import test_flag
22
23
24class Experiment(object):
25  """Class representing an Experiment to be run."""
26
27  def __init__(self, name, remote, working_directory,
28               chromeos_root, cache_conditions, labels, benchmarks,
29               experiment_file, email_to, acquire_timeout, log_dir,
30               log_level, share_cache, results_directory, locks_directory):
31    self.name = name
32    self.working_directory = working_directory
33    self.remote = remote
34    self.chromeos_root = chromeos_root
35    self.cache_conditions = cache_conditions
36    self.experiment_file = experiment_file
37    self.email_to = email_to
38    if not results_directory:
39      self.results_directory = os.path.join(self.working_directory,
40                                            self.name + "_results")
41    else:
42      self.results_directory = misc.CanonicalizePath(results_directory)
43    self.log_dir = log_dir
44    self.log_level = log_level
45    self.labels = labels
46    self.benchmarks = benchmarks
47    self.num_complete = 0
48    self.num_run_complete = 0
49    self.share_cache = share_cache
50    # If locks_directory (self.lock_dir) not blank, we will use the file
51    # locking mechanism; if it is blank then we will use the AFE server
52    # locking mechanism.
53    self.locks_dir = locks_directory
54    self.locked_machines = []
55
56    # We need one chromeos_root to run the benchmarks in, but it doesn't
57    # matter where it is, unless the ABIs are different.
58    if not chromeos_root:
59      for label in self.labels:
60        if label.chromeos_root:
61          chromeos_root = label.chromeos_root
62    if not chromeos_root:
63      raise Exception("No chromeos_root given and could not determine one from "
64                      "the image path.")
65
66    if test_flag.GetTestMode():
67      self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout,
68                                                log_level, locks_directory)
69    else:
70      self.machine_manager = MachineManager(chromeos_root, acquire_timeout,
71                                            log_level, locks_directory)
72    self.l = logger.GetLogger(log_dir)
73
74    for machine in self.remote:
75      # machine_manager.AddMachine only adds reachable machines.
76      self.machine_manager.AddMachine(machine)
77    # Now machine_manager._all_machines contains a list of reachable
78    # machines. This is a subset of self.remote. We make both lists the same.
79    self.remote = [m.name for m in self.machine_manager._all_machines]
80
81    for label in labels:
82      # We filter out label remotes that are not reachable (not in
83      # self.remote). So each label.remote is a sublist of experiment.remote.
84      label.remote = filter(lambda x: x in self.remote, label.remote)
85      self.machine_manager.ComputeCommonCheckSum(label)
86      self.machine_manager.ComputeCommonCheckSumString(label)
87
88    self.start_time = None
89    self.benchmark_runs = self._GenerateBenchmarkRuns()
90
91    self._schedv2 = None
92    self._internal_counter_lock = Lock()
93
94  def set_schedv2(self, schedv2):
95      self._schedv2 = schedv2
96
97  def schedv2(self):
98      return self._schedv2
99
100  def _GenerateBenchmarkRuns(self):
101    """Generate benchmark runs from labels and benchmark defintions."""
102    benchmark_runs = []
103    for label in self.labels:
104      for benchmark in self.benchmarks:
105        for iteration in range(1, benchmark.iterations + 1):
106
107          benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name,
108                                                iteration)
109          full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration)
110          logger_to_use = logger.Logger(self.log_dir,
111                                        "run.%s" % (full_name),
112                                        True)
113          benchmark_runs.append(benchmark_run.BenchmarkRun(
114              benchmark_run_name,
115              benchmark,
116              label,
117              iteration,
118              self.cache_conditions,
119              self.machine_manager,
120              logger_to_use,
121              self.log_level,
122              self.share_cache))
123
124    return benchmark_runs
125
126  def Build(self):
127    pass
128
129  def Terminate(self):
130    if self._schedv2 is not None:
131      self._schedv2.terminate()
132    else:
133      for t in self.benchmark_runs:
134        if t.isAlive():
135          self.l.LogError("Terminating run: '%s'." % t.name)
136          t.Terminate()
137
138  def IsComplete(self):
139    if self._schedv2:
140      return self._schedv2.is_complete()
141    if self.active_threads:
142      for t in self.active_threads:
143        if t.isAlive():
144          t.join(0)
145        if not t.isAlive():
146          self.num_complete += 1
147          if not t.cache_hit:
148            self.num_run_complete += 1
149          self.active_threads.remove(t)
150      return False
151    return True
152
153  def BenchmarkRunFinished(self, br):
154      """Update internal counters after br finishes.
155
156      Note this is only used by schedv2 and is called by multiple threads.
157      Never throw any exception here.
158      """
159
160      assert self._schedv2 is not None
161      with self._internal_counter_lock:
162          self.num_complete += 1
163          if not br.cache_hit:
164            self.num_run_complete += 1
165
166  def Run(self):
167    self.start_time = time.time()
168    if self._schedv2 is not None:
169      self._schedv2.run_sched()
170    else:
171      self.active_threads = []
172      for benchmark_run in self.benchmark_runs:
173        # Set threads to daemon so program exits when ctrl-c is pressed.
174        benchmark_run.daemon = True
175        benchmark_run.start()
176        self.active_threads.append(benchmark_run)
177
178  def SetCacheConditions(self, cache_conditions):
179    for benchmark_run in self.benchmark_runs:
180      benchmark_run.SetCacheConditions(cache_conditions)
181
182  def Cleanup(self):
183    """Make sure all machines are unlocked."""
184    if self.locks_dir:
185      # We are using the file locks mechanism, so call machine_manager.Cleanup
186      # to unlock everything.
187      self.machine_manager.Cleanup()
188    else:
189      all_machines = self.locked_machines
190      if not all_machines:
191        return
192
193      # If we locked any machines earlier, make sure we unlock them now.
194      lock_mgr = afe_lock_machine.AFELockManager(all_machines, "",
195                                                 self.labels[0].chromeos_root,
196                                                 None)
197      machine_states = lock_mgr.GetMachineStates("unlock")
198      for k, state in machine_states.iteritems():
199        if state["locked"]:
200          lock_mgr.UpdateLockInAFE(False, k)
201