results_cache.py revision 139092449a545d4d87c06af3a6d8f3d71e42e927
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module to deal with result cache."""
8
9import getpass
10import glob
11import hashlib
12import os
13import pickle
14import re
15import tempfile
16import json
17import sys
18
19from utils import command_executer
20from utils import misc
21
22from image_checksummer import ImageChecksummer
23
24SCRATCH_BASE = "/home/%s/cros_scratch"
25SCRATCH_DIR = SCRATCH_BASE % getpass.getuser()
26RESULTS_FILE = "results.txt"
27MACHINE_FILE = "machine.txt"
28AUTOTEST_TARBALL = "autotest.tbz2"
29PERF_RESULTS_FILE = "perf-results.txt"
30TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
31
32class Result(object):
33  """ This class manages what exactly is stored inside the cache without knowing
34  what the key of the cache is. For runs with perf, it stores perf.data,
35  perf.report, etc. The key generation is handled by the ResultsCache class.
36  """
37
38  def __init__(self, logger, label, log_level):
39    self._chromeos_root = label.chromeos_root
40    self._logger = logger
41    self._ce = command_executer.GetCommandExecuter(self._logger,
42                                                   log_level=log_level)
43    self._temp_dir = None
44    self.label = label
45    self.results_dir = None
46    self.log_level = log_level
47    self.perf_data_files = []
48    self.perf_report_files = []
49
50  def _CopyFilesTo(self, dest_dir, files_to_copy):
51    file_index = 0
52    for file_to_copy in files_to_copy:
53      if not os.path.isdir(dest_dir):
54        command = "mkdir -p %s" % dest_dir
55        self._ce.RunCommand(command)
56      dest_file = os.path.join(dest_dir,
57                               ("%s.%s" % (os.path.basename(file_to_copy),
58                                           file_index)))
59      ret = self._ce.CopyFiles(file_to_copy,
60                               dest_file,
61                               recursive=False)
62      if ret:
63        raise Exception("Could not copy results file: %s" % file_to_copy)
64
65  def CopyResultsTo(self, dest_dir):
66    self._CopyFilesTo(dest_dir, self.perf_data_files)
67    self._CopyFilesTo(dest_dir, self.perf_report_files)
68
69  def _GetNewKeyvals(self, keyvals_dict):
70    # Initialize 'units' dictionary.
71    units_dict = {}
72    for k in keyvals_dict:
73      units_dict[k] = ""
74    results_files = self._GetDataMeasurementsFiles()
75    for f in results_files:
76      # Make sure we can find the results file
77      if os.path.exists(f):
78        data_filename = f
79      else:
80        # Otherwise get the base filename and create the correct
81        # path for it.
82        f_dir, f_base = misc.GetRoot(f)
83        data_filename = os.path.join(self._chromeos_root, "/tmp",
84                                     self._temp_dir, f_base)
85      if os.path.exists(data_filename):
86        with open(data_filename, "r") as data_file:
87          lines = data_file.readlines()
88          for line in lines:
89            tmp_dict = json.loads(line)
90            key = tmp_dict["graph"] + "__" + tmp_dict["description"]
91            keyvals_dict[key] = tmp_dict["value"]
92            units_dict[key] = tmp_dict["units"]
93
94    return keyvals_dict, units_dict
95
96
97  def _GetTelemetryResultsKeyvals(self, keyvals_dict, units_dict):
98    """
99    keyvals_dict is the dictionary of key-value pairs that is used for
100    generating Crosperf reports.
101
102    Telemetry tests return many values (fields) that are not of
103    interest, so we have created a json file that indicates, for each
104    Telemetry benchmark, what the default return fields of interest
105    are.
106
107    units_dict is a dictionary of the units for the return values in
108    keyvals_dict.  After looking for the keys in the keyvals_dict in
109    the json file of "interesting" default return fields, we append
110    the units to the name of the field, to make the report easier to
111    understand.  We don't append the units to the results name earlier,
112    because the units are not part of the field names in the json file.
113
114    This function reads that file into a dictionary, and finds the
115    entry for the current benchmark (if it exists).  The entry
116    contains a list of return fields to use in the report.  For each
117    field in the default list, we look for the field in the input
118    keyvals_dict, and if we find it we copy the entry into our results
119    dictionary. We then return the results dictionary, which gets used
120    for actually generating the report.
121    """
122
123
124    # Check to see if telemetry_Crosperf succeeded; if not, there's no point
125    # in going further...
126
127    succeeded = False
128    if "telemetry_Crosperf" in keyvals_dict:
129      if keyvals_dict["telemetry_Crosperf"] == "PASS":
130        succeeded = True
131
132    if not succeeded:
133      return keyvals_dict
134
135    # Find the Crosperf directory, and look there for the telemetry
136    # results defaults file, if it exists.
137    results_dict = {}
138    dirname, basename = misc.GetRoot(sys.argv[0])
139    fullname = os.path.join(dirname, TELEMETRY_RESULT_DEFAULTS_FILE)
140    if os.path.exists (fullname):
141      # Slurp the file into a dictionary.  The keys in the dictionary are
142      # the benchmark names.  The value for a key is a list containing the
143      # names of all the result fields that should be returned in a 'default'
144      # report.
145      result_defaults = json.load(open(fullname))
146      # Check to see if the current benchmark test actually has an entry in
147      # the dictionary.
148      if self.test_name and self.test_name in result_defaults:
149        result_list = result_defaults[self.test_name]
150        # We have the default results list.  Make sure it's not empty...
151        if len(result_list) > 0:
152          # ...look for each default result in the dictionary of actual
153          # result fields returned. If found, add the field and its value
154          # to our final results dictionary.
155          for r in result_list:
156            if r in keyvals_dict:
157              val = keyvals_dict[r]
158              units = units_dict[r]
159              # Add the units to the key name, for the report.
160              newkey = r + " (" + units + ")"
161              results_dict[newkey] = val
162    if len(results_dict) == 0:
163      # We did not find/create any new entries.  Therefore use the keyvals_dict
164      # that was passed in, but update the entry names to have the units.
165      for k in keyvals_dict:
166        val = keyvals_dict[k]
167        units = units_dict[k]
168        newkey = k + " (" + units + ")"
169        results_dict[newkey] = val
170    keyvals_dict = results_dict
171    return keyvals_dict
172
173  def _GetKeyvals(self, show_all):
174    results_in_chroot = os.path.join(self._chromeos_root,
175                                     "chroot", "tmp")
176    if not self._temp_dir:
177      self._temp_dir = tempfile.mkdtemp(dir=results_in_chroot)
178      command = "cp -r {0}/* {1}".format(self.results_dir, self._temp_dir)
179      self._ce.RunCommand(command)
180
181    command = ("python generate_test_report --no-color --csv %s" %
182               (os.path.join("/tmp", os.path.basename(self._temp_dir))))
183    [_, out, _] = self._ce.ChrootRunCommand(self._chromeos_root,
184                                            command,
185                                            return_output=True)
186    keyvals_dict = {}
187    tmp_dir_in_chroot = misc.GetInsideChrootPath(self._chromeos_root,
188                                                 self._temp_dir)
189    for line in out.splitlines():
190      tokens = re.split("=|,", line)
191      key = tokens[-2]
192      if key.startswith(tmp_dir_in_chroot):
193        key = key[len(tmp_dir_in_chroot) + 1:]
194      value = tokens[-1]
195      keyvals_dict[key] = value
196
197    # Check to see if there is a perf_measurements file and get the
198    # data from it if so.
199    keyvals_dict, units_dict = self._GetNewKeyvals(keyvals_dict)
200    if not show_all and self.suite == "telemetry_Crosperf":
201      # We're running telemetry tests and the user did not ask to
202      # see all the results, so get the default results, to be used
203      # for generating the report.
204      keyvals_dict = self._GetTelemetryResultsKeyvals(keyvals_dict,
205                                                      units_dict)
206    return keyvals_dict
207
208  def _GetResultsDir(self):
209    mo = re.search(r"Results placed in (\S+)", self.out)
210    if mo:
211      result = mo.group(1)
212      return result
213    raise Exception("Could not find results directory.")
214
215  def _FindFilesInResultsDir(self, find_args):
216    if not self.results_dir:
217      return None
218
219    command = "find %s %s" % (self.results_dir,
220                              find_args)
221    ret, out, _ = self._ce.RunCommand(command, return_output=True)
222    if ret:
223      raise Exception("Could not run find command!")
224    return out
225
226  def _GetPerfDataFiles(self):
227    return self._FindFilesInResultsDir("-name perf.data").splitlines()
228
229  def _GetPerfReportFiles(self):
230    return self._FindFilesInResultsDir("-name perf.data.report").splitlines()
231
232  def _GetDataMeasurementsFiles(self):
233    return self._FindFilesInResultsDir("-name perf_measurements").splitlines()
234
235  def _GeneratePerfReportFiles(self):
236    perf_report_files = []
237    for perf_data_file in self.perf_data_files:
238      # Generate a perf.report and store it side-by-side with the perf.data
239      # file.
240      chroot_perf_data_file = misc.GetInsideChrootPath(self._chromeos_root,
241                                                       perf_data_file)
242      perf_report_file = "%s.report" % perf_data_file
243      if os.path.exists(perf_report_file):
244        raise Exception("Perf report file already exists: %s" %
245                        perf_report_file)
246      chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root,
247                                                         perf_report_file)
248      perf_path = os.path.join (self._chromeos_root,
249                                "chroot",
250                                "usr/bin/perf")
251
252      perf_file = "/usr/sbin/perf"
253      if os.path.exists(perf_path):
254        perf_file = "/usr/bin/perf"
255
256      command = ("%s report "
257                 "-n "
258                 "--symfs /build/%s "
259                 "--vmlinux /build/%s/usr/lib/debug/boot/vmlinux "
260                 "--kallsyms /build/%s/boot/System.map-* "
261                 "-i %s --stdio "
262                 "> %s" %
263                 (perf_file,
264                  self._board,
265                  self._board,
266                  self._board,
267                  chroot_perf_data_file,
268                  chroot_perf_report_file))
269      self._ce.ChrootRunCommand(self._chromeos_root,
270                                command)
271
272      # Add a keyval to the dictionary for the events captured.
273      perf_report_files.append(
274          misc.GetOutsideChrootPath(self._chromeos_root,
275                                    chroot_perf_report_file))
276    return perf_report_files
277
278  def _GatherPerfResults(self):
279    report_id = 0
280    for perf_report_file in self.perf_report_files:
281      with open(perf_report_file, "r") as f:
282        report_contents = f.read()
283        for group in re.findall(r"Events: (\S+) (\S+)", report_contents):
284          num_events = group[0]
285          event_name = group[1]
286          key = "perf_%s_%s" % (report_id, event_name)
287          value = str(misc.UnitToNumber(num_events))
288          self.keyvals[key] = value
289
290  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
291    self._board = self.label.board
292    self.out = out
293    self.err = err
294    self.retval = retval
295    self.test_name = test
296    self.suite = suite
297    self.chroot_results_dir = self._GetResultsDir()
298    self.results_dir = misc.GetOutsideChrootPath(self._chromeos_root,
299                                                 self.chroot_results_dir)
300    self.perf_data_files = self._GetPerfDataFiles()
301    # Include all perf.report data in table.
302    self.perf_report_files = self._GeneratePerfReportFiles()
303    # TODO(asharif): Do something similar with perf stat.
304
305    # Grab keyvals from the directory.
306    self._ProcessResults(show_all)
307
308  def _ProcessResults(self, show_all):
309    # Note that this function doesn't know anything about whether there is a
310    # cache hit or miss. It should process results agnostic of the cache hit
311    # state.
312    self.keyvals = self._GetKeyvals(show_all)
313    self.keyvals["retval"] = self.retval
314    # Generate report from all perf.data files.
315    # Now parse all perf report files and include them in keyvals.
316    self._GatherPerfResults()
317
318  def _PopulateFromCacheDir(self, cache_dir, show_all, test, suite):
319    self.test_name = test
320    self.suite = suite
321    # Read in everything from the cache directory.
322    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
323      self.out = pickle.load(f)
324      self.err = pickle.load(f)
325      self.retval = pickle.load(f)
326
327    # Untar the tarball to a temporary directory
328    self._temp_dir = tempfile.mkdtemp(dir=os.path.join(self._chromeos_root,
329                                                       "chroot", "tmp"))
330
331    command = ("cd %s && tar xf %s" %
332               (self._temp_dir,
333                os.path.join(cache_dir, AUTOTEST_TARBALL)))
334    ret = self._ce.RunCommand(command)
335    if ret:
336      raise Exception("Could not untar cached tarball")
337    self.results_dir = self._temp_dir
338    self.perf_data_files = self._GetPerfDataFiles()
339    self.perf_report_files = self._GetPerfReportFiles()
340    self._ProcessResults(show_all)
341
342  def CleanUp(self, rm_chroot_tmp):
343    if rm_chroot_tmp and self.results_dir:
344      command = "rm -rf %s" % self.results_dir
345      self._ce.RunCommand(command)
346    if self._temp_dir:
347      command = "rm -rf %s" % self._temp_dir
348      self._ce.RunCommand(command)
349
350  def StoreToCacheDir(self, cache_dir, machine_manager):
351    # Create the dir if it doesn't exist.
352    temp_dir = tempfile.mkdtemp()
353
354    # Store to the temp directory.
355    with open(os.path.join(temp_dir, RESULTS_FILE), "w") as f:
356      pickle.dump(self.out, f)
357      pickle.dump(self.err, f)
358      pickle.dump(self.retval, f)
359
360    if self.results_dir:
361      tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
362      command = ("cd %s && "
363                 "tar "
364                 "--exclude=var/spool "
365                 "--exclude=var/log "
366                 "-cjf %s ." % (self.results_dir, tarball))
367      ret = self._ce.RunCommand(command)
368      if ret:
369        raise Exception("Couldn't store autotest output directory.")
370    # Store machine info.
371    # TODO(asharif): Make machine_manager a singleton, and don't pass it into
372    # this function.
373    with open(os.path.join(temp_dir, MACHINE_FILE), "w") as f:
374      f.write(machine_manager.machine_checksum_string[self.label.name])
375
376    if os.path.exists(cache_dir):
377      command = "rm -rf {0}".format(cache_dir)
378      self._ce.RunCommand(command)
379
380    command = "mkdir -p {0} && ".format(os.path.dirname(cache_dir))
381    command += "chmod g+x {0} && ".format(temp_dir)
382    command += "mv {0} {1}".format(temp_dir, cache_dir)
383    ret = self._ce.RunCommand(command)
384    if ret:
385      command = "rm -rf {0}".format(temp_dir)
386      self._ce.RunCommand(command)
387      raise Exception("Could not move dir %s to dir %s" %
388                      (temp_dir, cache_dir))
389
390  @classmethod
391  def CreateFromRun(cls, logger, log_level, label, out, err, retval, show_all,
392                    test, suite="pyauto"):
393    if suite == "telemetry":
394      result = TelemetryResult(logger, label)
395    else:
396      result = cls(logger, label, log_level)
397    result._PopulateFromRun(out, err, retval, show_all, test, suite)
398    return result
399
400  @classmethod
401  def CreateFromCacheHit(cls, logger, log_level, label, cache_dir,
402                         show_all, test, suite="pyauto"):
403    if suite == "telemetry":
404      result = TelemetryResult(logger, label)
405    else:
406      result = cls(logger, label, log_level)
407    try:
408      result._PopulateFromCacheDir(cache_dir, show_all, test, suite)
409
410    except Exception as e:
411      logger.LogError("Exception while using cache: %s" % e)
412      return None
413    return result
414
415
416class TelemetryResult(Result):
417
418  def __init__(self, logger, label):
419    super(TelemetryResult, self).__init__(logger, label)
420
421  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
422    self.out = out
423    self.err = err
424    self.retval = retval
425
426    self._ProcessResults()
427
428  def _ProcessResults(self):
429    # The output is:
430    # url,average_commit_time (ms),...
431    # www.google.com,33.4,21.2,...
432    # We need to convert to this format:
433    # {"www.google.com:average_commit_time (ms)": "33.4",
434    #  "www.google.com:...": "21.2"}
435    # Added note:  Occasionally the output comes back
436    # with "JSON.stringify(window.automation.GetResults())" on
437    # the first line, and then the rest of the output as
438    # described above.
439
440    lines = self.out.splitlines()
441    self.keyvals = {}
442
443    if lines:
444      if lines[0].startswith("JSON.stringify"):
445        lines = lines[1:]
446
447    if not lines:
448      return
449    labels = lines[0].split(",")
450    for line in lines[1:]:
451      fields = line.split(",")
452      if len(fields) != len(labels):
453        continue
454      for i in range(1, len(labels)):
455        key = "%s %s" % (fields[0], labels[i])
456        value = fields[i]
457        self.keyvals[key] = value
458    self.keyvals["retval"] = self.retval
459
460  def _PopulateFromCacheDir(self, cache_dir):
461    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
462      self.out = pickle.load(f)
463      self.err = pickle.load(f)
464      self.retval = pickle.load(f)
465    self._ProcessResults()
466
467
468class CacheConditions(object):
469  # Cache hit only if the result file exists.
470  CACHE_FILE_EXISTS = 0
471
472  # Cache hit if the checksum of cpuinfo and totalmem of
473  # the cached result and the new run match.
474  MACHINES_MATCH = 1
475
476  # Cache hit if the image checksum of the cached result and the new run match.
477  CHECKSUMS_MATCH = 2
478
479  # Cache hit only if the cached result was successful
480  RUN_SUCCEEDED = 3
481
482  # Never a cache hit.
483  FALSE = 4
484
485  # Cache hit if the image path matches the cached image path.
486  IMAGE_PATH_MATCH = 5
487
488  # Cache hit if the uuid of hard disk mataches the cached one
489
490  SAME_MACHINE_MATCH = 6
491
492
493class ResultsCache(object):
494
495  """ This class manages the key of the cached runs without worrying about what
496  is exactly stored (value). The value generation is handled by the Results
497  class.
498  """
499  CACHE_VERSION = 6
500
501  def Init(self, chromeos_image, chromeos_root, test_name, iteration,
502           test_args, profiler_args, machine_manager, board, cache_conditions,
503           logger_to_use, log_level, label, share_users, suite,
504           show_all_results):
505    self.chromeos_image = chromeos_image
506    self.chromeos_root = chromeos_root
507    self.test_name = test_name
508    self.iteration = iteration
509    self.test_args = test_args
510    self.profiler_args = profiler_args
511    self.board = board
512    self.cache_conditions = cache_conditions
513    self.machine_manager = machine_manager
514    self._logger = logger_to_use
515    self._ce = command_executer.GetCommandExecuter(self._logger,
516                                                   log_level=log_level)
517    self.label = label
518    self.share_users = share_users
519    self.suite = suite
520    self.log_level = log_level
521    self.show_all = show_all_results
522
523  def _GetCacheDirForRead(self):
524    matching_dirs = []
525    for glob_path in self._FormCacheDir(self._GetCacheKeyList(True)):
526      matching_dirs += glob.glob(glob_path)
527
528    if matching_dirs:
529      # Cache file found.
530      return matching_dirs[0]
531    else:
532      return None
533
534  def _GetCacheDirForWrite(self):
535    return self._FormCacheDir(self._GetCacheKeyList(False))[0]
536
537  def _FormCacheDir(self, list_of_strings):
538    cache_key = " ".join(list_of_strings)
539    cache_dir = misc.GetFilenameFromString(cache_key)
540    if self.label.cache_dir:
541      cache_home = os.path.abspath(os.path.expanduser(self.label.cache_dir))
542      cache_path = [os.path.join(cache_home, cache_dir)]
543    else:
544      cache_path = [os.path.join(SCRATCH_DIR, cache_dir)]
545
546    for i in [x.strip() for x in self.share_users.split(",")]:
547      path = SCRATCH_BASE % i
548      cache_path.append(os.path.join(path, cache_dir))
549
550    return cache_path
551
552  def _GetCacheKeyList(self, read):
553    if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions:
554      machine_checksum = "*"
555    else:
556      machine_checksum = self.machine_manager.machine_checksum[self.label.name]
557    if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions:
558      checksum = "*"
559    elif self.label.image_type == "trybot":
560      checksum = hashlib.md5(self.label.chromeos_image).hexdigest()
561    elif self.label.image_type == "official":
562      checksum = "*"
563    else:
564      checksum = ImageChecksummer().Checksum(self.label, self.log_level)
565
566    if read and CacheConditions.IMAGE_PATH_MATCH not in self.cache_conditions:
567      image_path_checksum = "*"
568    else:
569      image_path_checksum = hashlib.md5(self.chromeos_image).hexdigest()
570
571    machine_id_checksum = ""
572    if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions:
573      machine_id_checksum = "*"
574    else:
575      for machine in self.machine_manager.GetMachines(self.label):
576        if machine.name == self.label.remote[0]:
577          machine_id_checksum = machine.machine_id_checksum
578          break
579
580    temp_test_args = "%s %s" % (self.test_args, self.profiler_args)
581    test_args_checksum = hashlib.md5(
582        "".join(temp_test_args)).hexdigest()
583    return (image_path_checksum,
584            self.test_name, str(self.iteration),
585            test_args_checksum,
586            checksum,
587            machine_checksum,
588            machine_id_checksum,
589            str(self.CACHE_VERSION))
590
591  def ReadResult(self):
592    if CacheConditions.FALSE in self.cache_conditions:
593      cache_dir = self._GetCacheDirForWrite()
594      command = "rm -rf {0}".format(cache_dir)
595      self._ce.RunCommand(command)
596      return None
597    cache_dir = self._GetCacheDirForRead()
598
599    if not cache_dir:
600      return None
601
602    if not os.path.isdir(cache_dir):
603      return None
604
605    self._logger.LogOutput("Trying to read from cache dir: %s" % cache_dir)
606    result = Result.CreateFromCacheHit(self._logger,
607                                       self.log_level,
608                                       self.label,
609                                       cache_dir,
610                                       self.show_all,
611                                       self.test_name,
612                                       self.suite)
613    if not result:
614      return None
615
616    if (result.retval == 0 or
617        CacheConditions.RUN_SUCCEEDED not in self.cache_conditions):
618      return result
619
620    return None
621
622  def StoreResult(self, result):
623    cache_dir = self._GetCacheDirForWrite()
624    result.StoreToCacheDir(cache_dir, self.machine_manager)
625
626
627class MockResultsCache(ResultsCache):
628  def Init(self, *args):
629    pass
630
631  def ReadResult(self):
632    return None
633
634  def StoreResult(self, result):
635    pass
636
637
638class MockResult(Result):
639  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
640    self.out = out
641    self.err = err
642    self.retval = retval
643