autotest_run.py revision f81680c018729fd4499e1e200d04b48c4b90127c
1import datetime
2import getpass
3import glob
4import os
5import pickle
6import re
7import threading
8import time
9import image_chromeos
10import machine_manager_singleton
11import table_formatter
12from utils import command_executer
13from utils import logger
14
15
16SCRATCH_DIR = "/home/%s/cros_scratch" % getpass.getuser()
17PICKLE_FILE = "pickle.txt"
18VERSION = "1"
19
20
21def ConvertToFilename(text):
22  ret = text
23  ret = re.sub("/", "__", ret)
24  ret = re.sub(" ", "_", ret)
25  ret = re.sub("=", "", ret)
26  ret = re.sub("\"", "", ret)
27  return ret
28
29
30class AutotestRun(threading.Thread):
31  def __init__(self, autotest, chromeos_root="", chromeos_image="",
32               board="", remote="", iteration=0, image_checksum="",
33               exact_remote=False, rerun=False, rerun_if_failed=False):
34    self.autotest = autotest
35    self.chromeos_root = chromeos_root
36    self.chromeos_image = chromeos_image
37    self.board = board
38    self.remote = remote
39    self.iteration = iteration
40    l = logger.GetLogger()
41    l.LogFatalIf(not image_checksum, "Checksum shouldn't be None")
42    self.image_checksum = image_checksum
43    self.results = {}
44    threading.Thread.__init__(self)
45    self.terminate = False
46    self.retval = None
47    self.status = "PENDING"
48    self.run_completed = False
49    self.exact_remote = exact_remote
50    self.rerun = rerun
51    self.rerun_if_failed = rerun_if_failed
52    self.results_dir = None
53    self.full_name = None
54
55  @staticmethod
56  def MeanExcludingSlowest(array):
57    mean = sum(array) / len(array)
58    array2 = []
59
60    for v in array:
61      if mean != 0 and abs(v - mean)/mean < 0.2:
62        array2.append(v)
63
64    if array2:
65      return sum(array2) / len(array2)
66    else:
67      return mean
68
69  @staticmethod
70  def AddComposite(results_dict):
71    composite_keys = []
72    composite_dict = {}
73    for key in results_dict:
74      mo = re.match("(.*){\d+}", key)
75      if mo:
76        composite_keys.append(mo.group(1))
77    for key in results_dict:
78      for composite_key in composite_keys:
79        if (key.count(composite_key) != 0 and
80            table_formatter.IsFloat(results_dict[key])):
81          if composite_key not in composite_dict:
82            composite_dict[composite_key] = []
83          composite_dict[composite_key].append(float(results_dict[key]))
84          break
85
86    for composite_key in composite_dict:
87      v = composite_dict[composite_key]
88      results_dict["%s[c]" % composite_key] = sum(v) / len(v)
89      mean_excluding_slowest = AutotestRun.MeanExcludingSlowest(v)
90      results_dict["%s[ce]" % composite_key] = mean_excluding_slowest
91
92    return results_dict
93
94  def ParseOutput(self):
95    p = re.compile("^-+.*?^-+", re.DOTALL|re.MULTILINE)
96    matches = p.findall(self.out)
97    for i in range(len(matches)):
98      results = matches[i]
99      results_dict = {}
100      for line in results.splitlines()[1:-1]:
101        mo = re.match("(.*\S)\s+\[\s+(PASSED|FAILED)\s+\]", line)
102        if mo:
103          results_dict[mo.group(1)] = mo.group(2)
104          continue
105        mo = re.match("(.*\S)\s+(.*)", line)
106        if mo:
107          results_dict[mo.group(1)] = mo.group(2)
108
109      # Add a composite keyval for tests like startup.
110      results_dict = AutotestRun.AddComposite(results_dict)
111
112      self.results = results_dict
113
114      # This causes it to not parse the table again
115      # Autotest recently added a secondary table
116      # That reports errors and screws up the final pretty output.
117      break
118    mo = re.search("Results placed in (\S+)", self.out)
119    if mo:
120      self.results_dir = mo.group(1)
121      self.full_name = os.path.basename(self.results_dir)
122
123  def GetCacheHashBase(self):
124    ret = ("%s %s %s" %
125           (self.image_checksum, self.autotest.name, self.iteration))
126    if self.autotest.args:
127      ret += " %s" % self.autotest.args
128    ret += "-%s" % VERSION
129    return ret
130
131  def GetLabel(self):
132    ret = "%s %s remote:%s" % (self.chromeos_image, self.autotest.name,
133                               self.remote)
134    return ret
135
136  def TryToLoadFromCache(self):
137    base = self.GetCacheHashBase()
138    if self.exact_remote:
139      if not self.remote:
140        return False
141      cache_dir_glob = "%s_%s" % (ConvertToFilename(base), self.remote)
142    else:
143      cache_dir_glob = "%s*" % ConvertToFilename(base)
144    cache_path_glob = os.path.join(SCRATCH_DIR, cache_dir_glob)
145    matching_dirs = glob.glob(cache_path_glob)
146    if matching_dirs:
147      matching_dir = matching_dirs[0]
148      cache_file = os.path.join(matching_dir, PICKLE_FILE)
149      assert os.path.isfile(cache_file)
150      self._logger.LogOutput("Trying to read from cache file: %s" % cache_file)
151      return self.ReadFromCache(cache_file)
152    self._logger.LogOutput("Cache miss. AM going to run: %s for: %s" %
153                           (self.autotest.name, self.chromeos_image))
154    return False
155
156  def ReadFromCache(self, cache_file):
157    with open(cache_file, "rb") as f:
158      self.retval = pickle.load(f)
159      self.out = pickle.load(f)
160      self.err = pickle.load(f)
161      self._logger.LogOutput(self.out)
162      return True
163    return False
164
165  def StoreToCache(self):
166    base = self.GetCacheHashBase()
167    self.cache_dir = os.path.join(SCRATCH_DIR, "%s_%s" % (
168        ConvertToFilename(base),
169        self.remote))
170    cache_file = os.path.join(self.cache_dir, PICKLE_FILE)
171    command = "mkdir -p %s" % os.path.dirname(cache_file)
172    ret = self._ce.RunCommand(command)
173    assert ret == 0, "Couldn't create cache dir"
174    with open(cache_file, "wb") as f:
175      pickle.dump(self.retval, f)
176      pickle.dump(self.out, f)
177      pickle.dump(self.err, f)
178
179  def run(self):
180    self._logger = logger.Logger(
181        os.path.dirname(__file__),
182        "%s.%s" % (os.path.basename(__file__),
183                   self.name), True)
184    self._ce = command_executer.GetCommandExecuter(self._logger)
185    self.RunCached()
186
187  def RunCached(self):
188    self.status = "WAITING"
189    cache_hit = False
190    if not self.rerun:
191      cache_hit = self.TryToLoadFromCache()
192    else:
193      self._logger.LogOutput("--rerun passed. Not using cached results.")
194    if self.rerun_if_failed and self.retval:
195      self._logger.LogOutput("--rerun_if_failed passed and existing test "
196                             "failed. Rerunning...")
197      cache_hit = False
198    if not cache_hit:
199      # Get machine
200      while True:
201        if self.terminate:
202          return 1
203        self.machine = (
204            machine_manager_singleton.MachineManagerSingleton().AcquireMachine(self.image_checksum))
205        if self.machine:
206          self._logger.LogOutput("%s: Machine %s acquired at %s" %
207                                 (self.name,
208                                  self.machine.name,
209                                  datetime.datetime.now()))
210          break
211        else:
212          sleep_duration = 10
213          time.sleep(sleep_duration)
214      try:
215        self.remote = self.machine.name
216
217        if self.machine.checksum != self.image_checksum:
218          self.retval = self.ImageTo(self.machine.name)
219          if self.retval: return self.retval
220          self.machine.checksum = self.image_checksum
221          self.machine.image = self.chromeos_image
222        self.status = "RUNNING: %s" % self.autotest.name
223        [self.retval, self.out, self.err] = self.RunTestOn(self.machine.name)
224        self.run_completed = True
225
226      finally:
227        self._logger.LogOutput("Releasing machine: %s" % self.machine.name)
228        machine_manager_singleton.MachineManagerSingleton().ReleaseMachine(self.machine)
229        self._logger.LogOutput("Released machine: %s" % self.machine.name)
230
231      self.StoreToCache()
232
233    if not self.retval:
234      self.status = "SUCCEEDED"
235    else:
236      self.status = "FAILED"
237
238    self.ParseOutput()
239    # Copy results directory to the scratch dir
240    if (not cache_hit and not self.retval and self.autotest.args and
241        "--profile" in self.autotest.args):
242      results_dir = os.path.join(self.chromeos_root, "chroot",
243                                 self.results_dir.lstrip("/"))
244      tarball = os.path.join(
245          self.cache_dir,
246          os.path.basename(os.path.dirname(self.results_dir)))
247      command = ("cd %s && tar cjf %s.tbz2 ." % (results_dir, tarball))
248      self._ce.RunCommand(command)
249      perf_data_file = os.path.join(self.results_dir, self.full_name,
250                                    "profiling/iteration.1/perf.data")
251
252      # Attempt to build a perf report and keep it with the results.
253      command = ("cd %s/src/scripts &&"
254                 " cros_sdk -- /usr/sbin/perf report --symfs=/build/%s"
255                 " -i %s --stdio" % (self.chromeos_root, self.board,
256                                     perf_data_file))
257      ret, out, err = self._ce.RunCommand(command, return_output=True)
258      with open(os.path.join(self.cache_dir, "perf.report"), "wb") as f:
259        f.write(out)
260    return self.retval
261
262  def ImageTo(self, machine_name):
263    image_args = [image_chromeos.__file__,
264                  "--chromeos_root=%s" % self.chromeos_root,
265                  "--image=%s" % self.chromeos_image,
266                  "--remote=%s" % machine_name]
267    if self.board:
268      image_args.append("--board=%s" % self.board)
269
270###    devserver_port = 8080
271###    mo = re.search("\d+", self.name)
272###    if mo:
273###      to_add = int(mo.group(0))
274###      assert to_add < 100, "Too many threads launched!"
275###      devserver_port += to_add
276
277###    # I tried --noupdate_stateful, but that still fails when run in parallel.
278###    image_args.append("--image_to_live_args=\"--devserver_port=%s"
279###                      " --noupdate_stateful\"" % devserver_port)
280###    image_args.append("--image_to_live_args=--devserver_port=%s" %
281###                      devserver_port)
282
283    # Currently can't image two machines at once.
284    # So have to serialized on this lock.
285    self.status = "WAITING ON IMAGE_LOCK"
286    with machine_manager_singleton.MachineManagerSingleton().image_lock:
287      self.status = "IMAGING"
288      retval = self._ce.RunCommand(" ".join(["python"] + image_args))
289      machine_manager_singleton.MachineManagerSingleton().num_reimages += 1
290      if retval:
291        self.status = "ABORTED DUE TO IMAGE FAILURE"
292    return retval
293
294  def DoPowerdHack(self):
295    command = "sudo initctl stop powerd"
296    self._ce.CrosRunCommand(command, machine=self.machine.name,
297                            chromeos_root=self.chromeos_root)
298
299  def RunTestOn(self, machine_name):
300    command = "cd %s/src/scripts" % self.chromeos_root
301    options = ""
302    if self.board:
303      options += " --board=%s" % self.board
304    if self.autotest.args:
305      options += " %s" % self.autotest.args
306    if "tegra2" in self.board:
307      self.DoPowerdHack()
308    command += ("&& cros_sdk -- ./run_remote_tests.sh --remote=%s %s %s" %
309                (machine_name,
310                 options,
311                 self.autotest.name))
312    return self._ce.RunCommand(command, True)
313