experiment.py revision ddde50532281f7f796dd7dc44b562b29d25ab381
1#!/usr/bin/python 2 3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""The experiment setting module.""" 8 9import os 10import time 11 12import afe_lock_machine 13from threading import Lock 14 15from utils import logger 16from utils import misc 17 18import benchmark_run 19from machine_manager import MachineManager 20from machine_manager import MockMachineManager 21import test_flag 22 23 24class Experiment(object): 25 """Class representing an Experiment to be run.""" 26 27 def __init__(self, name, remote, working_directory, 28 chromeos_root, cache_conditions, labels, benchmarks, 29 experiment_file, email_to, acquire_timeout, log_dir, 30 log_level, share_cache, results_directory, locks_directory): 31 self.name = name 32 self.working_directory = working_directory 33 self.remote = remote 34 self.chromeos_root = chromeos_root 35 self.cache_conditions = cache_conditions 36 self.experiment_file = experiment_file 37 self.email_to = email_to 38 if not results_directory: 39 self.results_directory = os.path.join(self.working_directory, 40 self.name + "_results") 41 else: 42 self.results_directory = misc.CanonicalizePath(results_directory) 43 self.log_dir = log_dir 44 self.log_level = log_level 45 self.labels = labels 46 self.benchmarks = benchmarks 47 self.num_complete = 0 48 self.num_run_complete = 0 49 self.share_cache = share_cache 50 # If locks_directory (self.lock_dir) not blank, we will use the file 51 # locking mechanism; if it is blank then we will use the AFE server 52 # locking mechanism. 53 self.locks_dir = locks_directory 54 self.locked_machines = [] 55 56 # We need one chromeos_root to run the benchmarks in, but it doesn't 57 # matter where it is, unless the ABIs are different. 58 if not chromeos_root: 59 for label in self.labels: 60 if label.chromeos_root: 61 chromeos_root = label.chromeos_root 62 if not chromeos_root: 63 raise Exception("No chromeos_root given and could not determine one from " 64 "the image path.") 65 66 if test_flag.GetTestMode(): 67 self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout, 68 log_level, locks_directory) 69 else: 70 self.machine_manager = MachineManager(chromeos_root, acquire_timeout, 71 log_level, locks_directory) 72 self.l = logger.GetLogger(log_dir) 73 74 for machine in self.remote: 75 # machine_manager.AddMachine only adds reachable machines. 76 self.machine_manager.AddMachine(machine) 77 # Now machine_manager._all_machines contains a list of reachable 78 # machines. This is a subset of self.remote. We make both lists the same. 79 self.remote = [m.name for m in self.machine_manager._all_machines] 80 81 for label in labels: 82 # We filter out label remotes that are not reachable (not in 83 # self.remote). So each label.remote is a sublist of experiment.remote. 84 label.remote = filter(lambda x: x in self.remote, label.remote) 85 self.machine_manager.ComputeCommonCheckSum(label) 86 self.machine_manager.ComputeCommonCheckSumString(label) 87 88 self.start_time = None 89 self.benchmark_runs = self._GenerateBenchmarkRuns() 90 91 self._schedv2 = None 92 self._internal_counter_lock = Lock() 93 94 def set_schedv2(self, schedv2): 95 self._schedv2 = schedv2 96 97 def schedv2(self): 98 return self._schedv2 99 100 def _GenerateBenchmarkRuns(self): 101 """Generate benchmark runs from labels and benchmark defintions.""" 102 benchmark_runs = [] 103 for label in self.labels: 104 for benchmark in self.benchmarks: 105 for iteration in range(1, benchmark.iterations + 1): 106 107 benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name, 108 iteration) 109 full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration) 110 logger_to_use = logger.Logger(self.log_dir, 111 "run.%s" % (full_name), 112 True) 113 benchmark_runs.append(benchmark_run.BenchmarkRun( 114 benchmark_run_name, 115 benchmark, 116 label, 117 iteration, 118 self.cache_conditions, 119 self.machine_manager, 120 logger_to_use, 121 self.log_level, 122 self.share_cache)) 123 124 return benchmark_runs 125 126 def Build(self): 127 pass 128 129 def Terminate(self): 130 if self._schedv2 is not None: 131 self._schedv2.terminate() 132 else: 133 for t in self.benchmark_runs: 134 if t.isAlive(): 135 self.l.LogError("Terminating run: '%s'." % t.name) 136 t.Terminate() 137 138 def IsComplete(self): 139 if self._schedv2: 140 return self._schedv2.is_complete() 141 if self.active_threads: 142 for t in self.active_threads: 143 if t.isAlive(): 144 t.join(0) 145 if not t.isAlive(): 146 self.num_complete += 1 147 if not t.cache_hit: 148 self.num_run_complete += 1 149 self.active_threads.remove(t) 150 return False 151 return True 152 153 def BenchmarkRunFinished(self, br): 154 """Update internal counters after br finishes. 155 156 Note this is only used by schedv2 and is called by multiple threads. 157 Never throw any exception here. 158 """ 159 160 assert self._schedv2 is not None 161 with self._internal_counter_lock: 162 self.num_complete += 1 163 if not br.cache_hit: 164 self.num_run_complete += 1 165 166 def Run(self): 167 self.start_time = time.time() 168 if self._schedv2 is not None: 169 self._schedv2.run_sched() 170 else: 171 self.active_threads = [] 172 for benchmark_run in self.benchmark_runs: 173 # Set threads to daemon so program exits when ctrl-c is pressed. 174 benchmark_run.daemon = True 175 benchmark_run.start() 176 self.active_threads.append(benchmark_run) 177 178 def SetCacheConditions(self, cache_conditions): 179 for benchmark_run in self.benchmark_runs: 180 benchmark_run.SetCacheConditions(cache_conditions) 181 182 def Cleanup(self): 183 """Make sure all machines are unlocked.""" 184 if self.locks_dir: 185 # We are using the file locks mechanism, so call machine_manager.Cleanup 186 # to unlock everything. 187 self.machine_manager.Cleanup() 188 else: 189 all_machines = self.locked_machines 190 if not all_machines: 191 return 192 193 # If we locked any machines earlier, make sure we unlock them now. 194 lock_mgr = afe_lock_machine.AFELockManager(all_machines, "", 195 self.labels[0].chromeos_root, 196 None) 197 machine_states = lock_mgr.GetMachineStates("unlock") 198 for k, state in machine_states.iteritems(): 199 if state["locked"]: 200 lock_mgr.UpdateLockInAFE(False, k) 201