results_cache.py revision e77818d606f46e84a592702272d73715b321a773
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module to deal with result cache."""
8
9import getpass
10import glob
11import hashlib
12import os
13import pickle
14import re
15import tempfile
16import json
17import sys
18
19from utils import command_executer
20from utils import misc
21
22from image_checksummer import ImageChecksummer
23
24SCRATCH_BASE = "/home/%s/cros_scratch"
25SCRATCH_DIR = SCRATCH_BASE % getpass.getuser()
26RESULTS_FILE = "results.txt"
27MACHINE_FILE = "machine.txt"
28AUTOTEST_TARBALL = "autotest.tbz2"
29PERF_RESULTS_FILE = "perf-results.txt"
30TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
31
32class Result(object):
33  """ This class manages what exactly is stored inside the cache without knowing
34  what the key of the cache is. For runs with perf, it stores perf.data,
35  perf.report, etc. The key generation is handled by the ResultsCache class.
36  """
37
38  def __init__(self, logger, label):
39    self._chromeos_root = label.chromeos_root
40    self._logger = logger
41    self._ce = command_executer.GetCommandExecuter(self._logger)
42    self._temp_dir = None
43    self.label = label
44    self.results_dir = None
45    self.perf_data_files = []
46    self.perf_report_files = []
47
48  def _CopyFilesTo(self, dest_dir, files_to_copy):
49    file_index = 0
50    for file_to_copy in files_to_copy:
51      if not os.path.isdir(dest_dir):
52        command = "mkdir -p %s" % dest_dir
53        self._ce.RunCommand(command)
54      dest_file = os.path.join(dest_dir,
55                               ("%s.%s" % (os.path.basename(file_to_copy),
56                                           file_index)))
57      ret = self._ce.CopyFiles(file_to_copy,
58                               dest_file,
59                               recursive=False)
60      if ret:
61        raise Exception("Could not copy results file: %s" % file_to_copy)
62
63  def CopyResultsTo(self, dest_dir):
64    self._CopyFilesTo(dest_dir, self.perf_data_files)
65    self._CopyFilesTo(dest_dir, self.perf_report_files)
66
67  def _GetNewKeyvals(self, keyvals_dict):
68    # Initialize 'units' dictionary.
69    units_dict = {}
70    for k in keyvals_dict:
71      units_dict[k] = ""
72    results_files = self._GetDataMeasurementsFiles()
73    for f in results_files:
74      # Make sure we can find the results file
75      if os.path.exists(f):
76        data_filename = f
77      else:
78        # Otherwise get the base filename and create the correct
79        # path for it.
80        f_dir, f_base = misc.GetRoot(f)
81        data_filename = os.path.join(self._chromeos_root, "/tmp",
82                                     self._temp_dir, f_base)
83      if os.path.exists(data_filename):
84        with open(data_filename, "r") as data_file:
85          lines = data_file.readlines()
86          for line in lines:
87            tmp_dict = json.loads(line)
88            key = tmp_dict["graph"] + "__" + tmp_dict["description"]
89            keyvals_dict[key] = tmp_dict["value"]
90            units_dict[key] = tmp_dict["units"]
91
92    return keyvals_dict, units_dict
93
94
95  def _GetTelemetryResultsKeyvals(self, keyvals_dict, units_dict):
96    """
97    keyvals_dict is the dictionary of key-value pairs that is used for
98    generating Crosperf reports.
99
100    Telemetry tests return many values (fields) that are not of
101    interest, so we have created a json file that indicates, for each
102    Telemetry benchmark, what the default return fields of interest
103    are.
104
105    units_dict is a dictionary of the units for the return values in
106    keyvals_dict.  After looking for the keys in the keyvals_dict in
107    the json file of "interesting" default return fields, we append
108    the units to the name of the field, to make the report easier to
109    understand.  We don't append the units to the results name earlier,
110    because the units are not part of the field names in the json file.
111
112    This function reads that file into a dictionary, and finds the
113    entry for the current benchmark (if it exists).  The entry
114    contains a list of return fields to use in the report.  For each
115    field in the default list, we look for the field in the input
116    keyvals_dict, and if we find it we copy the entry into our results
117    dictionary. We then return the results dictionary, which gets used
118    for actually generating the report.
119    """
120
121
122    # Check to see if telemetry_Crosperf succeeded; if not, there's no point
123    # in going further...
124
125    succeeded = False
126    if "telemetry_Crosperf" in keyvals_dict:
127      if keyvals_dict["telemetry_Crosperf"] == "PASS":
128        succeeded = True
129
130    if not succeeded:
131      return keyvals_dict
132
133    # Find the Crosperf directory, and look there for the telemetry
134    # results defaults file, if it exists.
135    results_dict = {}
136    dirname, basename = misc.GetRoot(sys.argv[0])
137    fullname = os.path.join(dirname, TELEMETRY_RESULT_DEFAULTS_FILE)
138    if os.path.exists (fullname):
139      # Slurp the file into a dictionary.  The keys in the dictionary are
140      # the benchmark names.  The value for a key is a list containing the
141      # names of all the result fields that should be returned in a 'default'
142      # report.
143      result_defaults = json.load(open(fullname))
144      # Check to see if the current benchmark test actually has an entry in
145      # the dictionary.
146      if self.test_name and self.test_name in result_defaults:
147        result_list = result_defaults[self.test_name]
148        # We have the default results list.  Make sure it's not empty...
149        if len(result_list) > 0:
150          # ...look for each default result in the dictionary of actual
151          # result fields returned. If found, add the field and its value
152          # to our final results dictionary.
153          for r in result_list:
154            if r in keyvals_dict:
155              val = keyvals_dict[r]
156              units = units_dict[r]
157              # Add the units to the key name, for the report.
158              newkey = r + " (" + units + ")"
159              results_dict[newkey] = val
160    if len(results_dict) == 0:
161      # We did not find/create any new entries.  Therefore use the keyvals_dict
162      # that was passed in, but update the entry names to have the units.
163      for k in keyvals_dict:
164        val = keyvals_dict[k]
165        units = units_dict[k]
166        newkey = k + " (" + units + ")"
167        results_dict[newkey] = val
168    keyvals_dict = results_dict
169    return keyvals_dict
170
171  def _GetKeyvals(self, show_all):
172    results_in_chroot = os.path.join(self._chromeos_root,
173                                     "chroot", "tmp")
174    if not self._temp_dir:
175      self._temp_dir = tempfile.mkdtemp(dir=results_in_chroot)
176      command = "cp -r {0}/* {1}".format(self.results_dir, self._temp_dir)
177      self._ce.RunCommand(command)
178
179    command = ("python generate_test_report --no-color --csv %s" %
180               (os.path.join("/tmp", os.path.basename(self._temp_dir))))
181    [_, out, _] = self._ce.ChrootRunCommand(self._chromeos_root,
182                                            command,
183                                            return_output=True)
184    keyvals_dict = {}
185    tmp_dir_in_chroot = misc.GetInsideChrootPath(self._chromeos_root,
186                                                 self._temp_dir)
187    for line in out.splitlines():
188      tokens = re.split("=|,", line)
189      key = tokens[-2]
190      if key.startswith(tmp_dir_in_chroot):
191        key = key[len(tmp_dir_in_chroot) + 1:]
192      value = tokens[-1]
193      keyvals_dict[key] = value
194
195    # Check to see if there is a perf_measurements file and get the
196    # data from it if so.
197    keyvals_dict, units_dict = self._GetNewKeyvals(keyvals_dict)
198    if not show_all and self.suite == "telemetry_Crosperf":
199      # We're running telemetry tests and the user did not ask to
200      # see all the results, so get the default results, to be used
201      # for generating the report.
202      keyvals_dict = self._GetTelemetryResultsKeyvals(keyvals_dict,
203                                                      units_dict)
204    return keyvals_dict
205
206  def _GetResultsDir(self):
207    mo = re.search(r"Results placed in (\S+)", self.out)
208    if mo:
209      result = mo.group(1)
210      return result
211    raise Exception("Could not find results directory.")
212
213  def _FindFilesInResultsDir(self, find_args):
214    if not self.results_dir:
215      return None
216
217    command = "find %s %s" % (self.results_dir,
218                              find_args)
219    ret, out, _ = self._ce.RunCommand(command, return_output=True)
220    if ret:
221      raise Exception("Could not run find command!")
222    return out
223
224  def _GetPerfDataFiles(self):
225    return self._FindFilesInResultsDir("-name perf.data").splitlines()
226
227  def _GetPerfReportFiles(self):
228    return self._FindFilesInResultsDir("-name perf.data.report").splitlines()
229
230  def _GetDataMeasurementsFiles(self):
231    return self._FindFilesInResultsDir("-name perf_measurements").splitlines()
232
233  def _GeneratePerfReportFiles(self):
234    perf_report_files = []
235    for perf_data_file in self.perf_data_files:
236      # Generate a perf.report and store it side-by-side with the perf.data
237      # file.
238      chroot_perf_data_file = misc.GetInsideChrootPath(self._chromeos_root,
239                                                       perf_data_file)
240      perf_report_file = "%s.report" % perf_data_file
241      if os.path.exists(perf_report_file):
242        raise Exception("Perf report file already exists: %s" %
243                        perf_report_file)
244      chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root,
245                                                         perf_report_file)
246      perf_path = os.path.join (self._chromeos_root,
247                                "chroot",
248                                "usr/bin/perf")
249
250      perf_file = "/usr/sbin/perf"
251      if os.path.exists(perf_path):
252        perf_file = "/usr/bin/perf"
253
254      command = ("%s report "
255                 "-n "
256                 "--symfs /build/%s "
257                 "--vmlinux /build/%s/usr/lib/debug/boot/vmlinux "
258                 "--kallsyms /build/%s/boot/System.map-* "
259                 "-i %s --stdio "
260                 "> %s" %
261                 (perf_file,
262                  self._board,
263                  self._board,
264                  self._board,
265                  chroot_perf_data_file,
266                  chroot_perf_report_file))
267      self._ce.ChrootRunCommand(self._chromeos_root,
268                                command)
269
270      # Add a keyval to the dictionary for the events captured.
271      perf_report_files.append(
272          misc.GetOutsideChrootPath(self._chromeos_root,
273                                    chroot_perf_report_file))
274    return perf_report_files
275
276  def _GatherPerfResults(self):
277    report_id = 0
278    for perf_report_file in self.perf_report_files:
279      with open(perf_report_file, "r") as f:
280        report_contents = f.read()
281        for group in re.findall(r"Events: (\S+) (\S+)", report_contents):
282          num_events = group[0]
283          event_name = group[1]
284          key = "perf_%s_%s" % (report_id, event_name)
285          value = str(misc.UnitToNumber(num_events))
286          self.keyvals[key] = value
287
288  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
289    self._board = self.label.board
290    self.out = out
291    self.err = err
292    self.retval = retval
293    self.test_name = test
294    self.suite = suite
295    self.chroot_results_dir = self._GetResultsDir()
296    self.results_dir = misc.GetOutsideChrootPath(self._chromeos_root,
297                                                 self.chroot_results_dir)
298    self.perf_data_files = self._GetPerfDataFiles()
299    # Include all perf.report data in table.
300    self.perf_report_files = self._GeneratePerfReportFiles()
301    # TODO(asharif): Do something similar with perf stat.
302
303    # Grab keyvals from the directory.
304    self._ProcessResults(show_all)
305
306  def _ProcessResults(self, show_all):
307    # Note that this function doesn't know anything about whether there is a
308    # cache hit or miss. It should process results agnostic of the cache hit
309    # state.
310    self.keyvals = self._GetKeyvals(show_all)
311    self.keyvals["retval"] = self.retval
312    # Generate report from all perf.data files.
313    # Now parse all perf report files and include them in keyvals.
314    self._GatherPerfResults()
315
316  def _PopulateFromCacheDir(self, cache_dir, show_all, test, suite):
317    self.test_name = test
318    self.suite = suite
319    # Read in everything from the cache directory.
320    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
321      self.out = pickle.load(f)
322      self.err = pickle.load(f)
323      self.retval = pickle.load(f)
324
325    # Untar the tarball to a temporary directory
326    self._temp_dir = tempfile.mkdtemp(dir=os.path.join(self._chromeos_root,
327                                                       "chroot", "tmp"))
328
329    command = ("cd %s && tar xf %s" %
330               (self._temp_dir,
331                os.path.join(cache_dir, AUTOTEST_TARBALL)))
332    ret = self._ce.RunCommand(command)
333    if ret:
334      raise Exception("Could not untar cached tarball")
335    self.results_dir = self._temp_dir
336    self.perf_data_files = self._GetPerfDataFiles()
337    self.perf_report_files = self._GetPerfReportFiles()
338    self._ProcessResults(show_all)
339
340  def CleanUp(self, rm_chroot_tmp):
341    if rm_chroot_tmp and self.results_dir:
342      command = "rm -rf %s" % self.results_dir
343      self._ce.RunCommand(command)
344    if self._temp_dir:
345      command = "rm -rf %s" % self._temp_dir
346      self._ce.RunCommand(command)
347
348  def StoreToCacheDir(self, cache_dir, machine_manager):
349    # Create the dir if it doesn't exist.
350    temp_dir = tempfile.mkdtemp()
351
352    # Store to the temp directory.
353    with open(os.path.join(temp_dir, RESULTS_FILE), "w") as f:
354      pickle.dump(self.out, f)
355      pickle.dump(self.err, f)
356      pickle.dump(self.retval, f)
357
358    if self.results_dir:
359      tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
360      command = ("cd %s && "
361                 "tar "
362                 "--exclude=var/spool "
363                 "--exclude=var/log "
364                 "-cjf %s ." % (self.results_dir, tarball))
365      ret = self._ce.RunCommand(command)
366      if ret:
367        raise Exception("Couldn't store autotest output directory.")
368    # Store machine info.
369    # TODO(asharif): Make machine_manager a singleton, and don't pass it into
370    # this function.
371    with open(os.path.join(temp_dir, MACHINE_FILE), "w") as f:
372      f.write(machine_manager.machine_checksum_string[self.label.name])
373
374    if os.path.exists(cache_dir):
375      command = "rm -rf {0}".format(cache_dir)
376      self._ce.RunCommand(command)
377
378    command = "mkdir -p {0} && ".format(os.path.dirname(cache_dir))
379    command += "chmod g+x {0} && ".format(temp_dir)
380    command += "mv {0} {1}".format(temp_dir, cache_dir)
381    ret = self._ce.RunCommand(command)
382    if ret:
383      command = "rm -rf {0}".format(temp_dir)
384      self._ce.RunCommand(command)
385      raise Exception("Could not move dir %s to dir %s" %
386                      (temp_dir, cache_dir))
387
388  @classmethod
389  def CreateFromRun(cls, logger, label, out, err, retval, show_all, test,
390                    suite="pyauto"):
391    if suite == "telemetry":
392      result = TelemetryResult(logger, label)
393    else:
394      result = cls(logger, label)
395    result._PopulateFromRun(out, err, retval, show_all, test, suite)
396    return result
397
398  @classmethod
399  def CreateFromCacheHit(cls, logger, label, cache_dir, show_all, test,
400                         suite="pyauto"):
401    if suite == "telemetry":
402      result = TelemetryResult(logger, label)
403    else:
404      result = cls(logger, label)
405    try:
406      result._PopulateFromCacheDir(cache_dir, show_all, test, suite)
407
408    except Exception as e:
409      logger.LogError("Exception while using cache: %s" % e)
410      return None
411    return result
412
413
414class TelemetryResult(Result):
415
416  def __init__(self, logger, label):
417    super(TelemetryResult, self).__init__(logger, label)
418
419  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
420    self.out = out
421    self.err = err
422    self.retval = retval
423
424    self._ProcessResults()
425
426  def _ProcessResults(self):
427    # The output is:
428    # url,average_commit_time (ms),...
429    # www.google.com,33.4,21.2,...
430    # We need to convert to this format:
431    # {"www.google.com:average_commit_time (ms)": "33.4",
432    #  "www.google.com:...": "21.2"}
433    # Added note:  Occasionally the output comes back
434    # with "JSON.stringify(window.automation.GetResults())" on
435    # the first line, and then the rest of the output as
436    # described above.
437
438    lines = self.out.splitlines()
439    self.keyvals = {}
440
441    if lines:
442      if lines[0].startswith("JSON.stringify"):
443        lines = lines[1:]
444
445    if not lines:
446      return
447    labels = lines[0].split(",")
448    for line in lines[1:]:
449      fields = line.split(",")
450      if len(fields) != len(labels):
451        continue
452      for i in range(1, len(labels)):
453        key = "%s %s" % (fields[0], labels[i])
454        value = fields[i]
455        self.keyvals[key] = value
456    self.keyvals["retval"] = self.retval
457
458  def _PopulateFromCacheDir(self, cache_dir):
459    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
460      self.out = pickle.load(f)
461      self.err = pickle.load(f)
462      self.retval = pickle.load(f)
463    self._ProcessResults()
464
465
466class CacheConditions(object):
467  # Cache hit only if the result file exists.
468  CACHE_FILE_EXISTS = 0
469
470  # Cache hit if the checksum of cpuinfo and totalmem of
471  # the cached result and the new run match.
472  MACHINES_MATCH = 1
473
474  # Cache hit if the image checksum of the cached result and the new run match.
475  CHECKSUMS_MATCH = 2
476
477  # Cache hit only if the cached result was successful
478  RUN_SUCCEEDED = 3
479
480  # Never a cache hit.
481  FALSE = 4
482
483  # Cache hit if the image path matches the cached image path.
484  IMAGE_PATH_MATCH = 5
485
486  # Cache hit if the uuid of hard disk mataches the cached one
487
488  SAME_MACHINE_MATCH = 6
489
490
491class ResultsCache(object):
492
493  """ This class manages the key of the cached runs without worrying about what
494  is exactly stored (value). The value generation is handled by the Results
495  class.
496  """
497  CACHE_VERSION = 6
498
499  def Init(self, chromeos_image, chromeos_root, test_name, iteration,
500           test_args, profiler_args, machine_manager, board, cache_conditions,
501           logger_to_use, label, share_users, suite, show_all_results):
502    self.chromeos_image = chromeos_image
503    self.chromeos_root = chromeos_root
504    self.test_name = test_name
505    self.iteration = iteration
506    self.test_args = test_args
507    self.profiler_args = profiler_args
508    self.board = board
509    self.cache_conditions = cache_conditions
510    self.machine_manager = machine_manager
511    self._logger = logger_to_use
512    self._ce = command_executer.GetCommandExecuter(self._logger)
513    self.label = label
514    self.share_users = share_users
515    self.suite = suite
516    self.show_all = show_all_results
517
518  def _GetCacheDirForRead(self):
519    matching_dirs = []
520    for glob_path in self._FormCacheDir(self._GetCacheKeyList(True)):
521      matching_dirs += glob.glob(glob_path)
522
523    if matching_dirs:
524      # Cache file found.
525      return matching_dirs[0]
526    else:
527      return None
528
529  def _GetCacheDirForWrite(self):
530    return self._FormCacheDir(self._GetCacheKeyList(False))[0]
531
532  def _FormCacheDir(self, list_of_strings):
533    cache_key = " ".join(list_of_strings)
534    cache_dir = misc.GetFilenameFromString(cache_key)
535    if self.label.cache_dir:
536      cache_home = os.path.abspath(os.path.expanduser(self.label.cache_dir))
537      cache_path = [os.path.join(cache_home, cache_dir)]
538    else:
539      cache_path = [os.path.join(SCRATCH_DIR, cache_dir)]
540
541    for i in [x.strip() for x in self.share_users.split(",")]:
542      path = SCRATCH_BASE % i
543      cache_path.append(os.path.join(path, cache_dir))
544
545    return cache_path
546
547  def _GetCacheKeyList(self, read):
548    if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions:
549      machine_checksum = "*"
550    else:
551      machine_checksum = self.machine_manager.machine_checksum[self.label.name]
552    if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions:
553      checksum = "*"
554    elif self.label.image_type == "trybot":
555      checksum = hashlib.md5(self.label.chromeos_image).hexdigest()
556    elif self.label.image_type == "official":
557      checksum = "*"
558    else:
559      checksum = ImageChecksummer().Checksum(self.label)
560
561    if read and CacheConditions.IMAGE_PATH_MATCH not in self.cache_conditions:
562      image_path_checksum = "*"
563    else:
564      image_path_checksum = hashlib.md5(self.chromeos_image).hexdigest()
565
566    machine_id_checksum = ""
567    if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions:
568      machine_id_checksum = "*"
569    else:
570      for machine in self.machine_manager.GetMachines(self.label):
571        if machine.name == self.label.remote[0]:
572          machine_id_checksum = machine.machine_id_checksum
573          break
574
575    temp_test_args = "%s %s" % (self.test_args, self.profiler_args)
576    test_args_checksum = hashlib.md5(
577        "".join(temp_test_args)).hexdigest()
578    return (image_path_checksum,
579            self.test_name, str(self.iteration),
580            test_args_checksum,
581            checksum,
582            machine_checksum,
583            machine_id_checksum,
584            str(self.CACHE_VERSION))
585
586  def ReadResult(self):
587    if CacheConditions.FALSE in self.cache_conditions:
588      cache_dir = self._GetCacheDirForWrite()
589      command = "rm -rf {0}".format(cache_dir)
590      self._ce.RunCommand(command)
591      return None
592    cache_dir = self._GetCacheDirForRead()
593
594    if not cache_dir:
595      return None
596
597    if not os.path.isdir(cache_dir):
598      return None
599
600    self._logger.LogOutput("Trying to read from cache dir: %s" % cache_dir)
601    result = Result.CreateFromCacheHit(self._logger,
602                                       self.label,
603                                       cache_dir,
604                                       self.show_all,
605                                       self.test_name,
606                                       self.suite)
607    if not result:
608      return None
609
610    if (result.retval == 0 or
611        CacheConditions.RUN_SUCCEEDED not in self.cache_conditions):
612      return result
613
614    return None
615
616  def StoreResult(self, result):
617    cache_dir = self._GetCacheDirForWrite()
618    result.StoreToCacheDir(cache_dir, self.machine_manager)
619
620
621class MockResultsCache(ResultsCache):
622  def Init(self, *args):
623    pass
624
625  def ReadResult(self):
626    return None
627
628  def StoreResult(self, result):
629    pass
630
631
632class MockResult(Result):
633  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
634    self.out = out
635    self.err = err
636    self.retval = retval
637