results_cache.py revision c490e073c811f60d6fdfec5c193e7f042302b5e8
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module to deal with result cache."""
8
9import getpass
10import glob
11import hashlib
12import os
13import pickle
14import re
15import tempfile
16import json
17import sys
18
19from utils import command_executer
20from utils import misc
21
22from image_checksummer import ImageChecksummer
23
24SCRATCH_BASE = "/home/%s/cros_scratch"
25SCRATCH_DIR = SCRATCH_BASE % getpass.getuser()
26RESULTS_FILE = "results.txt"
27MACHINE_FILE = "machine.txt"
28AUTOTEST_TARBALL = "autotest.tbz2"
29PERF_RESULTS_FILE = "perf-results.txt"
30
31class Result(object):
32  """ This class manages what exactly is stored inside the cache without knowing
33  what the key of the cache is. For runs with perf, it stores perf.data,
34  perf.report, etc. The key generation is handled by the ResultsCache class.
35  """
36
37  def __init__(self, logger, label, log_level, cmd_exec=None):
38    self._chromeos_root = label.chromeos_root
39    self._logger = logger
40    self._ce = cmd_exec or command_executer.GetCommandExecuter(self._logger,
41                                                   log_level=log_level)
42    self._temp_dir = None
43    self.label = label
44    self.results_dir = None
45    self.log_level = log_level
46    self.perf_data_files = []
47    self.perf_report_files = []
48
49  def _CopyFilesTo(self, dest_dir, files_to_copy):
50    file_index = 0
51    for file_to_copy in files_to_copy:
52      if not os.path.isdir(dest_dir):
53        command = "mkdir -p %s" % dest_dir
54        self._ce.RunCommand(command)
55      dest_file = os.path.join(dest_dir,
56                               ("%s.%s" % (os.path.basename(file_to_copy),
57                                           file_index)))
58      ret = self._ce.CopyFiles(file_to_copy,
59                               dest_file,
60                               recursive=False)
61      if ret:
62        raise Exception("Could not copy results file: %s" % file_to_copy)
63
64  def CopyResultsTo(self, dest_dir):
65    self._CopyFilesTo(dest_dir, self.perf_data_files)
66    self._CopyFilesTo(dest_dir, self.perf_report_files)
67    if len(self.perf_data_files) or len(self.perf_report_files):
68      self._logger.LogOutput("Perf results files stored in %s." % dest_dir)
69
70  def _GetNewKeyvals(self, keyvals_dict):
71    # Initialize 'units' dictionary.
72    units_dict = {}
73    for k in keyvals_dict:
74      units_dict[k] = ""
75    results_files = self._GetDataMeasurementsFiles()
76    for f in results_files:
77      # Make sure we can find the results file
78      if os.path.exists(f):
79        data_filename = f
80      else:
81        # Otherwise get the base filename and create the correct
82        # path for it.
83        f_dir, f_base = misc.GetRoot(f)
84        data_filename = os.path.join(self._chromeos_root, "chroot/tmp",
85                                     self._temp_dir, f_base)
86      if os.path.exists(data_filename):
87        with open(data_filename, "r") as data_file:
88          lines = data_file.readlines()
89          for line in lines:
90            tmp_dict = json.loads(line)
91            key = tmp_dict["graph"] + "__" + tmp_dict["description"]
92            keyvals_dict[key] = tmp_dict["value"]
93            units_dict[key] = tmp_dict["units"]
94
95    return keyvals_dict, units_dict
96
97
98  def _AppendTelemetryUnits(self, keyvals_dict, units_dict):
99    """
100    keyvals_dict is the dictionary of key-value pairs that is used for
101    generating Crosperf reports.
102
103    units_dict is a dictionary of the units for the return values in
104    keyvals_dict.  We need to associate the units with the return values,
105    for Telemetry tests, so that we can include the units in the reports.
106    This function takes each value in keyvals_dict, finds the corresponding
107    unit in the units_dict, and replaces the old value with a list of the
108    old value and the units.  This later gets properly parsed in the
109    ResultOrganizer class, for generating the reports.
110
111    """
112
113    results_dict = {}
114    for k in keyvals_dict:
115      # We don't want these lines in our reports; they add no useful data.
116      if k == "" or k == "telemetry_Crosperf":
117        continue
118      val = keyvals_dict[k]
119      units = units_dict[k]
120      new_val = [ val, units ]
121      results_dict[k] = new_val
122    return results_dict
123
124  def _GetKeyvals(self, show_all):
125    results_in_chroot = os.path.join(self._chromeos_root,
126                                     "chroot", "tmp")
127    if not self._temp_dir:
128      self._temp_dir = tempfile.mkdtemp(dir=results_in_chroot)
129      command = "cp -r {0}/* {1}".format(self.results_dir, self._temp_dir)
130      self._ce.RunCommand(command)
131
132    command = ("python generate_test_report --no-color --csv %s" %
133               (os.path.join("/tmp", os.path.basename(self._temp_dir))))
134    [_, out, _] = self._ce.ChrootRunCommand(self._chromeos_root,
135                                            command,
136                                            return_output=True)
137    keyvals_dict = {}
138    tmp_dir_in_chroot = misc.GetInsideChrootPath(self._chromeos_root,
139                                                 self._temp_dir)
140    for line in out.splitlines():
141      tokens = re.split("=|,", line)
142      key = tokens[-2]
143      if key.startswith(tmp_dir_in_chroot):
144        key = key[len(tmp_dir_in_chroot) + 1:]
145      value = tokens[-1]
146      keyvals_dict[key] = value
147
148    # Check to see if there is a perf_measurements file and get the
149    # data from it if so.
150    keyvals_dict, units_dict = self._GetNewKeyvals(keyvals_dict)
151    if self.suite == "telemetry_Crosperf":
152      # For telemtry_Crosperf results, append the units to the return
153      # results, for use in generating the reports.
154      keyvals_dict = self._AppendTelemetryUnits(keyvals_dict,
155                                                units_dict)
156    return keyvals_dict
157
158  def _GetResultsDir(self):
159    mo = re.search(r"Results placed in (\S+)", self.out)
160    if mo:
161      result = mo.group(1)
162      return result
163    raise Exception("Could not find results directory.")
164
165  def _FindFilesInResultsDir(self, find_args):
166    if not self.results_dir:
167      return None
168
169    command = "find %s %s" % (self.results_dir,
170                              find_args)
171    ret, out, _ = self._ce.RunCommand(command, return_output=True)
172    if ret:
173      raise Exception("Could not run find command!")
174    return out
175
176  def _GetPerfDataFiles(self):
177    return self._FindFilesInResultsDir("-name perf.data").splitlines()
178
179  def _GetPerfReportFiles(self):
180    return self._FindFilesInResultsDir("-name perf.data.report").splitlines()
181
182  def _GetDataMeasurementsFiles(self):
183    return self._FindFilesInResultsDir("-name perf_measurements").splitlines()
184
185  def _GeneratePerfReportFiles(self):
186    perf_report_files = []
187    for perf_data_file in self.perf_data_files:
188      # Generate a perf.report and store it side-by-side with the perf.data
189      # file.
190      chroot_perf_data_file = misc.GetInsideChrootPath(self._chromeos_root,
191                                                       perf_data_file)
192      perf_report_file = "%s.report" % perf_data_file
193      if os.path.exists(perf_report_file):
194        raise Exception("Perf report file already exists: %s" %
195                        perf_report_file)
196      chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root,
197                                                         perf_report_file)
198      perf_path = os.path.join (self._chromeos_root,
199                                "chroot",
200                                "usr/bin/perf")
201
202      perf_file = "/usr/sbin/perf"
203      if os.path.exists(perf_path):
204        perf_file = "/usr/bin/perf"
205
206      # The following is a hack, to use the perf.static binary that
207      # was given to us by Stephane Eranian, until he can figure out
208      # why "normal" perf cannot properly symbolize ChromeOS perf.data files.
209      # Get the directory containing the 'crosperf' script.
210      dirname, _ = misc.GetRoot(sys.argv[0])
211      perf_path = os.path.join (dirname, "..", "perf.static")
212      if os.path.exists(perf_path):
213        # copy the executable into the chroot so that it can be found.
214        src_path = perf_path
215        dst_path = os.path.join (self._chromeos_root, "chroot",
216                                 "tmp/perf.static")
217        command = "cp %s %s" % (src_path,dst_path)
218        self._ce.RunCommand (command)
219        perf_file = "/tmp/perf.static"
220
221      command = ("%s report "
222                 "-n "
223                 "--symfs /build/%s "
224                 "--vmlinux /build/%s/usr/lib/debug/boot/vmlinux "
225                 "--kallsyms /build/%s/boot/System.map-* "
226                 "-i %s --stdio "
227                 "> %s" %
228                 (perf_file,
229                  self._board,
230                  self._board,
231                  self._board,
232                  chroot_perf_data_file,
233                  chroot_perf_report_file))
234      self._ce.ChrootRunCommand(self._chromeos_root,
235                                command)
236
237      # Add a keyval to the dictionary for the events captured.
238      perf_report_files.append(
239          misc.GetOutsideChrootPath(self._chromeos_root,
240                                    chroot_perf_report_file))
241    return perf_report_files
242
243  def _GatherPerfResults(self):
244    report_id = 0
245    for perf_report_file in self.perf_report_files:
246      with open(perf_report_file, "r") as f:
247        report_contents = f.read()
248        for group in re.findall(r"Events: (\S+) (\S+)", report_contents):
249          num_events = group[0]
250          event_name = group[1]
251          key = "perf_%s_%s" % (report_id, event_name)
252          value = str(misc.UnitToNumber(num_events))
253          self.keyvals[key] = value
254
255  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
256    self._board = self.label.board
257    self.out = out
258    self.err = err
259    self.retval = retval
260    self.test_name = test
261    self.suite = suite
262    self.chroot_results_dir = self._GetResultsDir()
263    self.results_dir = misc.GetOutsideChrootPath(self._chromeos_root,
264                                                 self.chroot_results_dir)
265    self.perf_data_files = self._GetPerfDataFiles()
266    # Include all perf.report data in table.
267    self.perf_report_files = self._GeneratePerfReportFiles()
268    # TODO(asharif): Do something similar with perf stat.
269
270    # Grab keyvals from the directory.
271    self._ProcessResults(show_all)
272
273  def _ProcessResults(self, show_all):
274    # Note that this function doesn't know anything about whether there is a
275    # cache hit or miss. It should process results agnostic of the cache hit
276    # state.
277    self.keyvals = self._GetKeyvals(show_all)
278    self.keyvals["retval"] = self.retval
279    # Generate report from all perf.data files.
280    # Now parse all perf report files and include them in keyvals.
281    self._GatherPerfResults()
282
283  def _PopulateFromCacheDir(self, cache_dir, show_all, test, suite):
284    self.test_name = test
285    self.suite = suite
286    # Read in everything from the cache directory.
287    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
288      self.out = pickle.load(f)
289      self.err = pickle.load(f)
290      self.retval = pickle.load(f)
291
292    # Untar the tarball to a temporary directory
293    self._temp_dir = tempfile.mkdtemp(dir=os.path.join(self._chromeos_root,
294                                                       "chroot", "tmp"))
295
296    command = ("cd %s && tar xf %s" %
297               (self._temp_dir,
298                os.path.join(cache_dir, AUTOTEST_TARBALL)))
299    ret = self._ce.RunCommand(command)
300    if ret:
301      raise Exception("Could not untar cached tarball")
302    self.results_dir = self._temp_dir
303    self.perf_data_files = self._GetPerfDataFiles()
304    self.perf_report_files = self._GetPerfReportFiles()
305    self._ProcessResults(show_all)
306
307  def CleanUp(self, rm_chroot_tmp):
308    if rm_chroot_tmp and self.results_dir:
309      dirname, basename = misc.GetRoot(self.results_dir)
310      if basename.find("test_that_results_") != -1:
311        command = "rm -rf %s" % self.results_dir
312      else:
313        command = "rm -rf %s" % dirname
314      self._ce.RunCommand(command)
315    if self._temp_dir:
316      command = "rm -rf %s" % self._temp_dir
317      self._ce.RunCommand(command)
318
319  def StoreToCacheDir(self, cache_dir, machine_manager):
320    # Create the dir if it doesn't exist.
321    temp_dir = tempfile.mkdtemp()
322
323    # Store to the temp directory.
324    with open(os.path.join(temp_dir, RESULTS_FILE), "w") as f:
325      pickle.dump(self.out, f)
326      pickle.dump(self.err, f)
327      pickle.dump(self.retval, f)
328
329    if self.results_dir:
330      tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
331      command = ("cd %s && "
332                 "tar "
333                 "--exclude=var/spool "
334                 "--exclude=var/log "
335                 "-cjf %s ." % (self.results_dir, tarball))
336      ret = self._ce.RunCommand(command)
337      if ret:
338        raise Exception("Couldn't store autotest output directory.")
339    # Store machine info.
340    # TODO(asharif): Make machine_manager a singleton, and don't pass it into
341    # this function.
342    with open(os.path.join(temp_dir, MACHINE_FILE), "w") as f:
343      f.write(machine_manager.machine_checksum_string[self.label.name])
344
345    if os.path.exists(cache_dir):
346      command = "rm -rf {0}".format(cache_dir)
347      self._ce.RunCommand(command)
348
349    command = "mkdir -p {0} && ".format(os.path.dirname(cache_dir))
350    command += "chmod g+x {0} && ".format(temp_dir)
351    command += "mv {0} {1}".format(temp_dir, cache_dir)
352    ret = self._ce.RunCommand(command)
353    if ret:
354      command = "rm -rf {0}".format(temp_dir)
355      self._ce.RunCommand(command)
356      raise Exception("Could not move dir %s to dir %s" %
357                      (temp_dir, cache_dir))
358
359  @classmethod
360  def CreateFromRun(cls, logger, log_level, label, out, err, retval, show_all,
361                    test, suite="telemetry_Crosperf"):
362    if suite == "telemetry":
363      result = TelemetryResult(logger, label, log_level)
364    else:
365      result = cls(logger, label, log_level)
366    result._PopulateFromRun(out, err, retval, show_all, test, suite)
367    return result
368
369  @classmethod
370  def CreateFromCacheHit(cls, logger, log_level, label, cache_dir,
371                         show_all, test, suite="telemetry_Crosperf"):
372    if suite == "telemetry":
373      result = TelemetryResult(logger, label)
374    else:
375      result = cls(logger, label, log_level)
376    try:
377      result._PopulateFromCacheDir(cache_dir, show_all, test, suite)
378
379    except Exception as e:
380      logger.LogError("Exception while using cache: %s" % e)
381      return None
382    return result
383
384
385class TelemetryResult(Result):
386
387  def __init__(self, logger, label, log_level, cmd_exec=None):
388    super(TelemetryResult, self).__init__(logger, label, log_level, cmd_exec)
389
390  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
391    self.out = out
392    self.err = err
393    self.retval = retval
394
395    self._ProcessResults()
396
397  def _ProcessResults(self):
398    # The output is:
399    # url,average_commit_time (ms),...
400    # www.google.com,33.4,21.2,...
401    # We need to convert to this format:
402    # {"www.google.com:average_commit_time (ms)": "33.4",
403    #  "www.google.com:...": "21.2"}
404    # Added note:  Occasionally the output comes back
405    # with "JSON.stringify(window.automation.GetResults())" on
406    # the first line, and then the rest of the output as
407    # described above.
408
409    lines = self.out.splitlines()
410    self.keyvals = {}
411
412    if lines:
413      if lines[0].startswith("JSON.stringify"):
414        lines = lines[1:]
415
416    if not lines:
417      return
418    labels = lines[0].split(",")
419    for line in lines[1:]:
420      fields = line.split(",")
421      if len(fields) != len(labels):
422        continue
423      for i in range(1, len(labels)):
424        key = "%s %s" % (fields[0], labels[i])
425        value = fields[i]
426        self.keyvals[key] = value
427    self.keyvals["retval"] = self.retval
428
429  def _PopulateFromCacheDir(self, cache_dir):
430    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
431      self.out = pickle.load(f)
432      self.err = pickle.load(f)
433      self.retval = pickle.load(f)
434    self._ProcessResults()
435
436
437class CacheConditions(object):
438  # Cache hit only if the result file exists.
439  CACHE_FILE_EXISTS = 0
440
441  # Cache hit if the checksum of cpuinfo and totalmem of
442  # the cached result and the new run match.
443  MACHINES_MATCH = 1
444
445  # Cache hit if the image checksum of the cached result and the new run match.
446  CHECKSUMS_MATCH = 2
447
448  # Cache hit only if the cached result was successful
449  RUN_SUCCEEDED = 3
450
451  # Never a cache hit.
452  FALSE = 4
453
454  # Cache hit if the image path matches the cached image path.
455  IMAGE_PATH_MATCH = 5
456
457  # Cache hit if the uuid of hard disk mataches the cached one
458
459  SAME_MACHINE_MATCH = 6
460
461
462class ResultsCache(object):
463
464  """ This class manages the key of the cached runs without worrying about what
465  is exactly stored (value). The value generation is handled by the Results
466  class.
467  """
468  CACHE_VERSION = 6
469
470  def Init(self, chromeos_image, chromeos_root, test_name, iteration,
471           test_args, profiler_args, machine_manager, board, cache_conditions,
472           logger_to_use, log_level, label, share_users, suite,
473           show_all_results):
474    self.chromeos_image = chromeos_image
475    self.chromeos_root = chromeos_root
476    self.test_name = test_name
477    self.iteration = iteration
478    self.test_args = test_args
479    self.profiler_args = profiler_args
480    self.board = board
481    self.cache_conditions = cache_conditions
482    self.machine_manager = machine_manager
483    self._logger = logger_to_use
484    self._ce = command_executer.GetCommandExecuter(self._logger,
485                                                   log_level=log_level)
486    self.label = label
487    self.share_users = share_users
488    self.suite = suite
489    self.log_level = log_level
490    self.show_all = show_all_results
491
492  def _GetCacheDirForRead(self):
493    matching_dirs = []
494    for glob_path in self._FormCacheDir(self._GetCacheKeyList(True)):
495      matching_dirs += glob.glob(glob_path)
496
497    if matching_dirs:
498      # Cache file found.
499      return matching_dirs[0]
500    else:
501      return None
502
503  def _GetCacheDirForWrite(self):
504    return self._FormCacheDir(self._GetCacheKeyList(False))[0]
505
506  def _FormCacheDir(self, list_of_strings):
507    cache_key = " ".join(list_of_strings)
508    cache_dir = misc.GetFilenameFromString(cache_key)
509    if self.label.cache_dir:
510      cache_home = os.path.abspath(os.path.expanduser(self.label.cache_dir))
511      cache_path = [os.path.join(cache_home, cache_dir)]
512    else:
513      cache_path = [os.path.join(SCRATCH_DIR, cache_dir)]
514
515    if len(self.share_users):
516      for i in [x.strip() for x in self.share_users.split(",")]:
517        path = SCRATCH_BASE % i
518        cache_path.append(os.path.join(path, cache_dir))
519
520    return cache_path
521
522  def _GetCacheKeyList(self, read):
523    if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions:
524      machine_checksum = "*"
525    else:
526      machine_checksum = self.machine_manager.machine_checksum[self.label.name]
527    if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions:
528      checksum = "*"
529    elif self.label.image_type == "trybot":
530      checksum = hashlib.md5(self.label.chromeos_image).hexdigest()
531    elif self.label.image_type == "official":
532      checksum = "*"
533    else:
534      checksum = ImageChecksummer().Checksum(self.label, self.log_level)
535
536    if read and CacheConditions.IMAGE_PATH_MATCH not in self.cache_conditions:
537      image_path_checksum = "*"
538    else:
539      image_path_checksum = hashlib.md5(self.chromeos_image).hexdigest()
540
541    machine_id_checksum = ""
542    if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions:
543      machine_id_checksum = "*"
544    else:
545      for machine in self.machine_manager.GetMachines(self.label):
546        if machine.name == self.label.remote[0]:
547          machine_id_checksum = machine.machine_id_checksum
548          break
549
550    temp_test_args = "%s %s" % (self.test_args, self.profiler_args)
551    test_args_checksum = hashlib.md5(
552        "".join(temp_test_args)).hexdigest()
553    return (image_path_checksum,
554            self.test_name, str(self.iteration),
555            test_args_checksum,
556            checksum,
557            machine_checksum,
558            machine_id_checksum,
559            str(self.CACHE_VERSION))
560
561  def ReadResult(self):
562    if CacheConditions.FALSE in self.cache_conditions:
563      cache_dir = self._GetCacheDirForWrite()
564      command = "rm -rf {0}".format(cache_dir)
565      self._ce.RunCommand(command)
566      return None
567    cache_dir = self._GetCacheDirForRead()
568
569    if not cache_dir:
570      return None
571
572    if not os.path.isdir(cache_dir):
573      return None
574
575    self._logger.LogOutput("Trying to read from cache dir: %s" % cache_dir)
576    result = Result.CreateFromCacheHit(self._logger,
577                                       self.log_level,
578                                       self.label,
579                                       cache_dir,
580                                       self.show_all,
581                                       self.test_name,
582                                       self.suite)
583    if not result:
584      return None
585
586    if (result.retval == 0 or
587        CacheConditions.RUN_SUCCEEDED not in self.cache_conditions):
588      return result
589
590    return None
591
592  def StoreResult(self, result):
593    cache_dir = self._GetCacheDirForWrite()
594    result.StoreToCacheDir(cache_dir, self.machine_manager)
595
596
597class MockResultsCache(ResultsCache):
598  def Init(self, *args):
599    pass
600
601  def ReadResult(self):
602    return None
603
604  def StoreResult(self, result):
605    pass
606
607
608class MockResult(Result):
609  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
610    self.out = out
611    self.err = err
612    self.retval = retval
613