results_cache.py revision af5f70ecd0f07c76eb83eeb3e15d4bb168f9e8b1
1#!/usr/bin/python
2
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module to deal with result cache."""
8
9import getpass
10import glob
11import hashlib
12import os
13import pickle
14import re
15import tempfile
16import json
17import sys
18
19from utils import command_executer
20from utils import misc
21
22from image_checksummer import ImageChecksummer
23
24SCRATCH_BASE = "/home/%s/cros_scratch"
25SCRATCH_DIR = SCRATCH_BASE % getpass.getuser()
26RESULTS_FILE = "results.txt"
27MACHINE_FILE = "machine.txt"
28AUTOTEST_TARBALL = "autotest.tbz2"
29PERF_RESULTS_FILE = "perf-results.txt"
30TELEMETRY_RESULT_DEFAULTS_FILE = "default-telemetry-results.json"
31
32class Result(object):
33  """ This class manages what exactly is stored inside the cache without knowing
34  what the key of the cache is. For runs with perf, it stores perf.data,
35  perf.report, etc. The key generation is handled by the ResultsCache class.
36  """
37
38  def __init__(self, logger, label):
39    self._chromeos_root = label.chromeos_root
40    self._logger = logger
41    self._ce = command_executer.GetCommandExecuter(self._logger)
42    self._temp_dir = None
43    self.label = label
44    self.results_dir = None
45    self.perf_data_files = []
46    self.perf_report_files = []
47
48  def _CopyFilesTo(self, dest_dir, files_to_copy):
49    file_index = 0
50    for file_to_copy in files_to_copy:
51      if not os.path.isdir(dest_dir):
52        command = "mkdir -p %s" % dest_dir
53        self._ce.RunCommand(command)
54      dest_file = os.path.join(dest_dir,
55                               ("%s.%s" % (os.path.basename(file_to_copy),
56                                           file_index)))
57      ret = self._ce.CopyFiles(file_to_copy,
58                               dest_file,
59                               recursive=False)
60      if ret:
61        raise Exception("Could not copy results file: %s" % file_to_copy)
62
63  def CopyResultsTo(self, dest_dir):
64    self._CopyFilesTo(dest_dir, self.perf_data_files)
65    self._CopyFilesTo(dest_dir, self.perf_report_files)
66
67  def _GetNewKeyvals(self, keyvals_dict):
68    results_files = self._GetDataMeasurementsFiles()
69    for f in results_files:
70      # Make sure we can find the results file
71      if os.path.exists(f):
72        data_filename = f
73      else:
74        # Otherwise get the base filename and create the correct
75        # path for it.
76        f_dir, f_base = misc.GetRoot(f)
77        data_filename = os.path.join(self._chromeos_root, "/tmp",
78                                     self._temp_dir, f_base)
79      if os.path.exists(data_filename):
80        with open(data_filename, "r") as data_file:
81          lines = data_file.readlines()
82          for line in lines:
83            tmp_dict = json.loads(line)
84            key = tmp_dict["graph"] + "__" + tmp_dict["description"]
85            val = tmp_dict["value"]
86            keyvals_dict[key] = val
87
88    return keyvals_dict
89
90
91  def _GetTelemetryResultsKeyvals(self, keyvals_dict):
92    """
93    keyvals_dict is the dictionary of key-value pairs that is used for
94    generating Crosperf reports.
95
96    Telemetry tests return many values (fields) that are not of
97    interest, so we have created a json file that indicates, for each
98    Telemetry benchmark, what the default return fields of interest
99    are.
100
101    This function reads that file into a dictionary, and finds the
102    entry for the current benchmark (if it exists).  The entry
103    contains a list of return fields to use in the report.  For each
104    field in the default list, we look for the field in the input
105    keyvals_dict, and if we find it we copy the entry into our results
106    dictionary. We then return the results dictionary, which gets used
107    for actually generating the report.
108    """
109
110    # Find the Crosperf directory, and look there for the telemetry
111    # results defaults file, if it exists.
112    dirname, basename = misc.GetRoot(sys.argv[0])
113    fullname = os.path.join(dirname, TELEMETRY_RESULT_DEFAULTS_FILE)
114    if os.path.exists (fullname):
115      # Slurp the file into a dictionary.  The keys in the dictionary are
116      # the benchmark names.  The value for a key is a list containing the
117      # names of all the result fields that should be returned in a 'default'
118      # report.
119      result_defaults = json.load(open(fullname))
120      # Check to see if the current benchmark test actually has an entry in
121      # the dictionary.
122      if self.test_name and self.test_name in result_defaults:
123        result_list = result_defaults[self.test_name]
124        # We have the default results list.  Make sure it's not empty...
125        if len(result_list) > 0:
126          results_dict = {}
127          # ...look for each default result in the dictionary of actual
128          # result fields returned. If found, add the field and its value
129          # to our final results dictionary.
130          for r in result_list:
131            v = keyvals_dict[r]
132            results_dict[r] = v
133          # If we actually found some of our default values, return the
134          # abbreviated results dictionary, to be used in generating the
135          # final report.
136          if len(results_dict) > 0:
137            keyvals_dict = results_dict
138    return keyvals_dict
139
140  def _GetKeyvals(self, show_all):
141    results_in_chroot = os.path.join(self._chromeos_root,
142                                     "chroot", "tmp")
143    if not self._temp_dir:
144      self._temp_dir = tempfile.mkdtemp(dir=results_in_chroot)
145      command = "cp -r {0}/* {1}".format(self.results_dir, self._temp_dir)
146      self._ce.RunCommand(command)
147
148    command = ("python generate_test_report --no-color --csv %s" %
149               (os.path.join("/tmp", os.path.basename(self._temp_dir))))
150    [_, out, _] = self._ce.ChrootRunCommand(self._chromeos_root,
151                                            command,
152                                            return_output=True)
153    keyvals_dict = {}
154    tmp_dir_in_chroot = misc.GetInsideChrootPath(self._chromeos_root,
155                                                 self._temp_dir)
156    for line in out.splitlines():
157      tokens = re.split("=|,", line)
158      key = tokens[-2]
159      if key.startswith(tmp_dir_in_chroot):
160        key = key[len(tmp_dir_in_chroot) + 1:]
161      value = tokens[-1]
162      keyvals_dict[key] = value
163
164    # Check to see if there is a perf_measurements file and get the
165    # data from it if so.
166    keyvals_dict = self._GetNewKeyvals(keyvals_dict)
167    if not show_all and self.suite == "telemetry_Crosperf":
168      # We're running telemetry tests and the user did not ask to
169      # see all the results, so get the default results, to be used
170      # for generating the report.
171      keyvals_dict = self._GetTelemetryResultsKeyvals(keyvals_dict)
172    return keyvals_dict
173
174  def _GetResultsDir(self):
175    mo = re.search(r"Results placed in (\S+)", self.out)
176    if mo:
177      result = mo.group(1)
178      return result
179    raise Exception("Could not find results directory.")
180
181  def _FindFilesInResultsDir(self, find_args):
182    if not self.results_dir:
183      return None
184
185    command = "find %s %s" % (self.results_dir,
186                              find_args)
187    ret, out, _ = self._ce.RunCommand(command, return_output=True)
188    if ret:
189      raise Exception("Could not run find command!")
190    return out
191
192  def _GetPerfDataFiles(self):
193    return self._FindFilesInResultsDir("-name perf.data").splitlines()
194
195  def _GetPerfReportFiles(self):
196    return self._FindFilesInResultsDir("-name perf.data.report").splitlines()
197
198  def _GetDataMeasurementsFiles(self):
199    return self._FindFilesInResultsDir("-name perf_measurements").splitlines()
200
201  def _GeneratePerfReportFiles(self):
202    perf_report_files = []
203    for perf_data_file in self.perf_data_files:
204      # Generate a perf.report and store it side-by-side with the perf.data
205      # file.
206      chroot_perf_data_file = misc.GetInsideChrootPath(self._chromeos_root,
207                                                       perf_data_file)
208      perf_report_file = "%s.report" % perf_data_file
209      if os.path.exists(perf_report_file):
210        raise Exception("Perf report file already exists: %s" %
211                        perf_report_file)
212      chroot_perf_report_file = misc.GetInsideChrootPath(self._chromeos_root,
213                                                         perf_report_file)
214      command = ("/usr/sbin/perf report "
215                 "-n "
216                 "--symfs /build/%s "
217                 "--vmlinux /build/%s/usr/lib/debug/boot/vmlinux "
218                 "--kallsyms /build/%s/boot/System.map-* "
219                 "-i %s --stdio "
220                 "> %s" %
221                 (self._board,
222                  self._board,
223                  self._board,
224                  chroot_perf_data_file,
225                  chroot_perf_report_file))
226      self._ce.ChrootRunCommand(self._chromeos_root,
227                                command)
228
229      # Add a keyval to the dictionary for the events captured.
230      perf_report_files.append(
231          misc.GetOutsideChrootPath(self._chromeos_root,
232                                    chroot_perf_report_file))
233    return perf_report_files
234
235  def _GatherPerfResults(self):
236    report_id = 0
237    for perf_report_file in self.perf_report_files:
238      with open(perf_report_file, "r") as f:
239        report_contents = f.read()
240        for group in re.findall(r"Events: (\S+) (\S+)", report_contents):
241          num_events = group[0]
242          event_name = group[1]
243          key = "perf_%s_%s" % (report_id, event_name)
244          value = str(misc.UnitToNumber(num_events))
245          self.keyvals[key] = value
246
247  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
248    self._board = self.label.board
249    self.out = out
250    self.err = err
251    self.retval = retval
252    self.test_name = test
253    self.suite = suite
254    self.chroot_results_dir = self._GetResultsDir()
255    self.results_dir = misc.GetOutsideChrootPath(self._chromeos_root,
256                                                 self.chroot_results_dir)
257    self.perf_data_files = self._GetPerfDataFiles()
258    # Include all perf.report data in table.
259    self.perf_report_files = self._GeneratePerfReportFiles()
260    # TODO(asharif): Do something similar with perf stat.
261
262    # Grab keyvals from the directory.
263    self._ProcessResults(show_all)
264
265  def _ProcessResults(self, show_all):
266    # Note that this function doesn't know anything about whether there is a
267    # cache hit or miss. It should process results agnostic of the cache hit
268    # state.
269    self.keyvals = self._GetKeyvals(show_all)
270    self.keyvals["retval"] = self.retval
271    # Generate report from all perf.data files.
272    # Now parse all perf report files and include them in keyvals.
273    self._GatherPerfResults()
274
275  def _PopulateFromCacheDir(self, cache_dir):
276    # Read in everything from the cache directory.
277    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
278      self.out = pickle.load(f)
279      self.err = pickle.load(f)
280      self.retval = pickle.load(f)
281
282    # Untar the tarball to a temporary directory
283    self._temp_dir = tempfile.mkdtemp(dir=os.path.join(self._chromeos_root,
284                                                       "chroot", "tmp"))
285
286    command = ("cd %s && tar xf %s" %
287               (self._temp_dir,
288                os.path.join(cache_dir, AUTOTEST_TARBALL)))
289    ret = self._ce.RunCommand(command)
290    if ret:
291      raise Exception("Could not untar cached tarball")
292    self.results_dir = self._temp_dir
293    self.perf_data_files = self._GetPerfDataFiles()
294    self.perf_report_files = self._GetPerfReportFiles()
295    self._ProcessResults()
296
297  def CleanUp(self, rm_chroot_tmp):
298    if rm_chroot_tmp and self.results_dir:
299      command = "rm -rf %s" % self.results_dir
300      self._ce.RunCommand(command)
301    if self._temp_dir:
302      command = "rm -rf %s" % self._temp_dir
303      self._ce.RunCommand(command)
304
305  def StoreToCacheDir(self, cache_dir, machine_manager):
306    # Create the dir if it doesn't exist.
307    temp_dir = tempfile.mkdtemp()
308
309    # Store to the temp directory.
310    with open(os.path.join(temp_dir, RESULTS_FILE), "w") as f:
311      pickle.dump(self.out, f)
312      pickle.dump(self.err, f)
313      pickle.dump(self.retval, f)
314
315    if self.results_dir:
316      tarball = os.path.join(temp_dir, AUTOTEST_TARBALL)
317      command = ("cd %s && "
318                 "tar "
319                 "--exclude=var/spool "
320                 "--exclude=var/log "
321                 "-cjf %s ." % (self.results_dir, tarball))
322      ret = self._ce.RunCommand(command)
323      if ret:
324        raise Exception("Couldn't store autotest output directory.")
325    # Store machine info.
326    # TODO(asharif): Make machine_manager a singleton, and don't pass it into
327    # this function.
328    with open(os.path.join(temp_dir, MACHINE_FILE), "w") as f:
329      f.write(machine_manager.machine_checksum_string[self.label.name])
330
331    if os.path.exists(cache_dir):
332      command = "rm -rf {0}".format(cache_dir)
333      self._ce.RunCommand(command)
334
335    command = "mkdir -p {0} && ".format(os.path.dirname(cache_dir))
336    command += "chmod g+x {0} && ".format(temp_dir)
337    command += "mv {0} {1}".format(temp_dir, cache_dir)
338    ret = self._ce.RunCommand(command)
339    if ret:
340      command = "rm -rf {0}".format(temp_dir)
341      self._ce.RunCommand(command)
342      raise Exception("Could not move dir %s to dir %s" %
343                      (temp_dir, cache_dir))
344
345  @classmethod
346  def CreateFromRun(cls, logger, label, out, err, retval, show_all, test,
347                    suite="pyauto"):
348    if suite == "telemetry":
349      result = TelemetryResult(logger, label)
350    else:
351      result = cls(logger, label)
352    result._PopulateFromRun(out, err, retval, show_all, test, suite)
353    return result
354
355  @classmethod
356  def CreateFromCacheHit(cls, logger, label, cache_dir,
357                         suite="pyauto"):
358    if suite == "telemetry":
359      result = TelemetryResult(logger, label)
360    else:
361      result = cls(logger, label)
362    try:
363      result._PopulateFromCacheDir(cache_dir)
364
365    except Exception as e:
366      logger.LogError("Exception while using cache: %s" % e)
367      return None
368    return result
369
370
371class TelemetryResult(Result):
372
373  def __init__(self, logger, label):
374    super(TelemetryResult, self).__init__(logger, label)
375
376  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
377    self.out = out
378    self.err = err
379    self.retval = retval
380
381    self._ProcessResults()
382
383  def _ProcessResults(self):
384    # The output is:
385    # url,average_commit_time (ms),...
386    # www.google.com,33.4,21.2,...
387    # We need to convert to this format:
388    # {"www.google.com:average_commit_time (ms)": "33.4",
389    #  "www.google.com:...": "21.2"}
390    # Added note:  Occasionally the output comes back
391    # with "JSON.stringify(window.automation.GetResults())" on
392    # the first line, and then the rest of the output as
393    # described above.
394
395    lines = self.out.splitlines()
396    self.keyvals = {}
397
398    if lines:
399      if lines[0].startswith("JSON.stringify"):
400        lines = lines[1:]
401
402    if not lines:
403      return
404    labels = lines[0].split(",")
405    for line in lines[1:]:
406      fields = line.split(",")
407      if len(fields) != len(labels):
408        continue
409      for i in range(1, len(labels)):
410        key = "%s %s" % (fields[0], labels[i])
411        value = fields[i]
412        self.keyvals[key] = value
413    self.keyvals["retval"] = self.retval
414
415  def _PopulateFromCacheDir(self, cache_dir):
416    with open(os.path.join(cache_dir, RESULTS_FILE), "r") as f:
417      self.out = pickle.load(f)
418      self.err = pickle.load(f)
419      self.retval = pickle.load(f)
420    self._ProcessResults()
421
422
423class CacheConditions(object):
424  # Cache hit only if the result file exists.
425  CACHE_FILE_EXISTS = 0
426
427  # Cache hit if the checksum of cpuinfo and totalmem of
428  # the cached result and the new run match.
429  MACHINES_MATCH = 1
430
431  # Cache hit if the image checksum of the cached result and the new run match.
432  CHECKSUMS_MATCH = 2
433
434  # Cache hit only if the cached result was successful
435  RUN_SUCCEEDED = 3
436
437  # Never a cache hit.
438  FALSE = 4
439
440  # Cache hit if the image path matches the cached image path.
441  IMAGE_PATH_MATCH = 5
442
443  # Cache hit if the uuid of hard disk mataches the cached one
444
445  SAME_MACHINE_MATCH = 6
446
447
448class ResultsCache(object):
449
450  """ This class manages the key of the cached runs without worrying about what
451  is exactly stored (value). The value generation is handled by the Results
452  class.
453  """
454  CACHE_VERSION = 6
455
456  def Init(self, chromeos_image, chromeos_root, test_name, iteration,
457           test_args, profiler_args, machine_manager, board, cache_conditions,
458           logger_to_use, label, share_users, suite):
459    self.chromeos_image = chromeos_image
460    self.chromeos_root = chromeos_root
461    self.test_name = test_name
462    self.iteration = iteration
463    self.test_args = test_args
464    self.profiler_args = profiler_args
465    self.board = board
466    self.cache_conditions = cache_conditions
467    self.machine_manager = machine_manager
468    self._logger = logger_to_use
469    self._ce = command_executer.GetCommandExecuter(self._logger)
470    self.label = label
471    self.share_users = share_users
472    self.suite = suite
473
474  def _GetCacheDirForRead(self):
475    matching_dirs = []
476    for glob_path in self._FormCacheDir(self._GetCacheKeyList(True)):
477      matching_dirs += glob.glob(glob_path)
478
479    if matching_dirs:
480      # Cache file found.
481      return matching_dirs[0]
482    else:
483      return None
484
485  def _GetCacheDirForWrite(self):
486    return self._FormCacheDir(self._GetCacheKeyList(False))[0]
487
488  def _FormCacheDir(self, list_of_strings):
489    cache_key = " ".join(list_of_strings)
490    cache_dir = misc.GetFilenameFromString(cache_key)
491    if self.label.cache_dir:
492      cache_home = os.path.abspath(os.path.expanduser(self.label.cache_dir))
493      cache_path = [os.path.join(cache_home, cache_dir)]
494    else:
495      cache_path = [os.path.join(SCRATCH_DIR, cache_dir)]
496
497    for i in [x.strip() for x in self.share_users.split(",")]:
498      path = SCRATCH_BASE % i
499      cache_path.append(os.path.join(path, cache_dir))
500
501    return cache_path
502
503  def _GetCacheKeyList(self, read):
504    if read and CacheConditions.MACHINES_MATCH not in self.cache_conditions:
505      machine_checksum = "*"
506    else:
507      machine_checksum = self.machine_manager.machine_checksum[self.label.name]
508    if read and CacheConditions.CHECKSUMS_MATCH not in self.cache_conditions:
509      checksum = "*"
510    else:
511      checksum = ImageChecksummer().Checksum(self.label)
512
513    if read and CacheConditions.IMAGE_PATH_MATCH not in self.cache_conditions:
514      image_path_checksum = "*"
515    else:
516      image_path_checksum = hashlib.md5(self.chromeos_image).hexdigest()
517
518    machine_id_checksum = ""
519    if read and CacheConditions.SAME_MACHINE_MATCH not in self.cache_conditions:
520      machine_id_checksum = "*"
521    else:
522      for machine in self.machine_manager.GetMachines(self.label):
523        if machine.name == self.label.remote[0]:
524          machine_id_checksum = machine.machine_id_checksum
525          break
526
527    temp_test_args = "%s %s" % (self.test_args, self.profiler_args)
528    test_args_checksum = hashlib.md5(
529        "".join(temp_test_args)).hexdigest()
530    return (image_path_checksum,
531            self.test_name, str(self.iteration),
532            test_args_checksum,
533            checksum,
534            machine_checksum,
535            machine_id_checksum,
536            str(self.CACHE_VERSION))
537
538  def ReadResult(self):
539    if CacheConditions.FALSE in self.cache_conditions:
540      cache_dir = self._GetCacheDirForWrite()
541      command = "rm -rf {0}".format(cache_dir)
542      self._ce.RunCommand(command)
543      return None
544    cache_dir = self._GetCacheDirForRead()
545
546    if not cache_dir:
547      return None
548
549    if not os.path.isdir(cache_dir):
550      return None
551
552    self._logger.LogOutput("Trying to read from cache dir: %s" % cache_dir)
553    result = Result.CreateFromCacheHit(self._logger,
554                                       self.label,
555                                       cache_dir,
556                                       self.suite)
557    if not result:
558      return None
559
560    if (result.retval == 0 or
561        CacheConditions.RUN_SUCCEEDED not in self.cache_conditions):
562      return result
563
564    return None
565
566  def StoreResult(self, result):
567    cache_dir = self._GetCacheDirForWrite()
568    result.StoreToCacheDir(cache_dir, self.machine_manager)
569
570
571class MockResultsCache(ResultsCache):
572  def Init(self, *args):
573    pass
574
575  def ReadResult(self):
576    return None
577
578  def StoreResult(self, result):
579    pass
580
581
582class MockResult(Result):
583  def _PopulateFromRun(self, out, err, retval, show_all, test, suite):
584    self.out = out
585    self.err = err
586    self.retval = retval
587