1#!/usr/bin/python
2
3"""
4Copyright 2014 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Compare results of two render_pictures runs.
10
11TODO(epoger): Start using this module to compare ALL images (whether they
12were generated from GMs or SKPs), and rename it accordingly.
13"""
14
15# System-level imports
16import logging
17import os
18import shutil
19import subprocess
20import tempfile
21import time
22
23# Must fix up PYTHONPATH before importing from within Skia
24import rs_fixpypath  # pylint: disable=W0611
25
26# Imports from within Skia
27from py.utils import git_utils
28from py.utils import gs_utils
29from py.utils import url_utils
30import buildbot_globals
31import column
32import gm_json
33import imagediffdb
34import imagepair
35import imagepairset
36import results
37
38# URL under which all render_pictures images can be found in Google Storage.
39#
40# TODO(epoger): In order to allow live-view of GMs and other images, read this
41# from the input summary files, or allow the caller to set it within the
42# GET_live_results call.
43DEFAULT_IMAGE_BASE_GS_URL = 'gs://' + buildbot_globals.Get('skp_images_bucket')
44
45# Column descriptors, and display preferences for them.
46COLUMN__RESULT_TYPE = results.KEY__EXTRACOLUMNS__RESULT_TYPE
47COLUMN__SOURCE_SKP = 'sourceSkpFile'
48COLUMN__TILED_OR_WHOLE = 'tiledOrWhole'
49COLUMN__TILENUM = 'tilenum'
50COLUMN__BUILDER_A = 'builderA'
51COLUMN__RENDER_MODE_A = 'renderModeA'
52COLUMN__BUILDER_B = 'builderB'
53COLUMN__RENDER_MODE_B = 'renderModeB'
54# Known values for some of those columns.
55COLUMN__TILED_OR_WHOLE__TILED = 'tiled'
56COLUMN__TILED_OR_WHOLE__WHOLE = 'whole'
57
58FREEFORM_COLUMN_IDS = [
59    COLUMN__SOURCE_SKP,
60    COLUMN__TILENUM,
61]
62ORDERED_COLUMN_IDS = [
63    COLUMN__RESULT_TYPE,
64    COLUMN__SOURCE_SKP,
65    COLUMN__TILED_OR_WHOLE,
66    COLUMN__TILENUM,
67    COLUMN__BUILDER_A,
68    COLUMN__RENDER_MODE_A,
69    COLUMN__BUILDER_B,
70    COLUMN__RENDER_MODE_B,
71]
72
73# A special "repo:" URL type that we use to refer to Skia repo contents.
74# (Useful for comparing against expectations files we store in our repo.)
75REPO_URL_PREFIX = 'repo:'
76REPO_BASEPATH = os.path.abspath(os.path.join(
77    os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir))
78
79# Which sections within a JSON summary file can contain results.
80ALLOWED_SECTION_NAMES = [
81    gm_json.JSONKEY_ACTUALRESULTS,
82    gm_json.JSONKEY_EXPECTEDRESULTS,
83]
84
85
86class RenderedPicturesComparisons(results.BaseComparisons):
87  """Loads results from multiple render_pictures runs into an ImagePairSet.
88  """
89
90  def __init__(self,
91               setA_dir, setB_dir,
92               setA_section, setB_section,
93               image_diff_db,
94               image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL, diff_base_url=None,
95               setA_label=None, setB_label=None,
96               gs=None, truncate_results=False, prefetch_only=False,
97               download_all_images=False):
98    """Constructor: downloads images and generates diffs.
99
100    Once the object has been created (which may take a while), you can call its
101    get_packaged_results_of_type() method to quickly retrieve the results...
102    unless you have set prefetch_only to True, in which case we will
103    asynchronously warm up the ImageDiffDB cache but not fill in self._results.
104
105    Args:
106      setA_dir: root directory to copy all JSON summaries from, and to use as
107          setA within the comparisons. This directory may be specified as a
108          gs:// URL, special "repo:" URL, or local filepath.
109      setB_dir: root directory to copy all JSON summaries from, and to use as
110          setB within the comparisons. This directory may be specified as a
111          gs:// URL, special "repo:" URL, or local filepath.
112      setA_section: which section within setA to examine; must be one of
113          ALLOWED_SECTION_NAMES
114      setB_section: which section within setB to examine; must be one of
115          ALLOWED_SECTION_NAMES
116      image_diff_db: ImageDiffDB instance
117      image_base_gs_url: "gs://" URL pointing at the Google Storage bucket/dir
118          under which all render_pictures result images can
119          be found; this will be used to read images for comparison within
120          this code, and included in the ImagePairSet (as an HTTP URL) so its
121          consumers know where to download the images from
122      diff_base_url: base URL within which the client should look for diff
123          images; if not specified, defaults to a "file:///" URL representation
124          of image_diff_db's storage_root
125      setA_label: description to use for results in setA; if None, will be
126          set to a reasonable default
127      setB_label: description to use for results in setB; if None, will be
128          set to a reasonable default
129      gs: instance of GSUtils object we can use to download summary files
130      truncate_results: FOR MANUAL TESTING: if True, truncate the set of images
131          we process, to speed up testing.
132      prefetch_only: if True, return the new object as quickly as possible
133          with empty self._results (just queue up all the files to process,
134          don't wait around for them to be processed and recorded); otherwise,
135          block until the results have been assembled and recorded in
136          self._results.
137      download_all_images: if True, download all images, even if we don't
138          need them to generate diffs.  This will take much longer to complete,
139          but is useful for warming up the bitmap cache on local disk.
140    """
141    super(RenderedPicturesComparisons, self).__init__()
142    self._image_diff_db = image_diff_db
143    self._image_base_gs_url = image_base_gs_url
144    self._diff_base_url = (
145        diff_base_url or
146        url_utils.create_filepath_url(image_diff_db.storage_root))
147    self._gs = gs
148    self.truncate_results = truncate_results
149    self._prefetch_only = prefetch_only
150    self._download_all_images = download_all_images
151
152    # If we are comparing two different section types, we can use those
153    # as the default labels for setA and setB.
154    if setA_section != setB_section:
155      self._setA_label = setA_label or setA_section
156      self._setB_label = setB_label or setB_section
157    else:
158      self._setA_label = setA_label or 'setA'
159      self._setB_label = setB_label or 'setB'
160
161    tempdir = tempfile.mkdtemp()
162    try:
163      setA_root = os.path.join(tempdir, 'setA')
164      setB_root = os.path.join(tempdir, 'setB')
165      # TODO(stephana): There is a potential race condition here... we copy
166      # the contents out of the source_dir, and THEN we get the commithash
167      # of source_dir.  If source_dir points at a git checkout, and that
168      # checkout is updated (by a different thread/process) during this
169      # operation, then the contents and commithash will be out of sync.
170      self._copy_dir_contents(source_dir=setA_dir, dest_dir=setA_root)
171      setA_repo_revision = self._get_repo_revision(source_dir=setA_dir)
172      self._copy_dir_contents(source_dir=setB_dir, dest_dir=setB_root)
173      setB_repo_revision = self._get_repo_revision(source_dir=setB_dir)
174
175      self._setA_descriptions = {
176          results.KEY__SET_DESCRIPTIONS__DIR: setA_dir,
177          results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setA_repo_revision,
178          results.KEY__SET_DESCRIPTIONS__SECTION: setA_section,
179      }
180      self._setB_descriptions = {
181          results.KEY__SET_DESCRIPTIONS__DIR: setB_dir,
182          results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setB_repo_revision,
183          results.KEY__SET_DESCRIPTIONS__SECTION: setB_section,
184      }
185
186      time_start = int(time.time())
187      self._results = self._load_result_pairs(
188          setA_root=setA_root, setB_root=setB_root,
189          setA_section=setA_section, setB_section=setB_section)
190      if self._results:
191        self._timestamp = int(time.time())
192        logging.info('Number of download file collisions: %s' %
193                     imagediffdb.global_file_collisions)
194        logging.info('Results complete; took %d seconds.' %
195                     (self._timestamp - time_start))
196    finally:
197      shutil.rmtree(tempdir)
198
199  def _load_result_pairs(self, setA_root, setB_root,
200                         setA_section, setB_section):
201    """Loads all JSON image summaries from 2 directory trees and compares them.
202
203    TODO(stephana): This method is only called from within __init__(); it might
204    make more sense to just roll the content of this method into __init__().
205
206    Args:
207      setA_root: root directory containing JSON summaries of rendering results
208      setB_root: root directory containing JSON summaries of rendering results
209      setA_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
210          gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setA
211      setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or
212          gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB
213
214    Returns the summary of all image diff results (or None, depending on
215    self._prefetch_only).
216    """
217    logging.info('Reading JSON image summaries from dirs %s and %s...' % (
218        setA_root, setB_root))
219    setA_dicts = self.read_dicts_from_root(setA_root)
220    setB_dicts = self.read_dicts_from_root(setB_root)
221    logging.info('Comparing summary dicts...')
222
223    all_image_pairs = imagepairset.ImagePairSet(
224        descriptions=(self._setA_label, self._setB_label),
225        diff_base_url=self._diff_base_url)
226    failing_image_pairs = imagepairset.ImagePairSet(
227        descriptions=(self._setA_label, self._setB_label),
228        diff_base_url=self._diff_base_url)
229
230    # Override settings for columns that should be filtered using freeform text.
231    for column_id in FREEFORM_COLUMN_IDS:
232      factory = column.ColumnHeaderFactory(
233          header_text=column_id, use_freeform_filter=True)
234      all_image_pairs.set_column_header_factory(
235          column_id=column_id, column_header_factory=factory)
236      failing_image_pairs.set_column_header_factory(
237          column_id=column_id, column_header_factory=factory)
238
239    all_image_pairs.ensure_extra_column_values_in_summary(
240        column_id=COLUMN__RESULT_TYPE, values=[
241            results.KEY__RESULT_TYPE__FAILED,
242            results.KEY__RESULT_TYPE__NOCOMPARISON,
243            results.KEY__RESULT_TYPE__SUCCEEDED,
244        ])
245    failing_image_pairs.ensure_extra_column_values_in_summary(
246        column_id=COLUMN__RESULT_TYPE, values=[
247            results.KEY__RESULT_TYPE__FAILED,
248            results.KEY__RESULT_TYPE__NOCOMPARISON,
249        ])
250
251    logging.info('Starting to add imagepairs to queue.')
252    self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
253
254    union_dict_paths = sorted(set(setA_dicts.keys() + setB_dicts.keys()))
255    num_union_dict_paths = len(union_dict_paths)
256    dict_num = 0
257    for dict_path in union_dict_paths:
258      dict_num += 1
259      logging.info(
260          'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' %
261          (dict_num, num_union_dict_paths, dict_path))
262
263      dictA = self.get_default(setA_dicts, None, dict_path)
264      self._validate_dict_version(dictA)
265      dictA_results = self.get_default(dictA, {}, setA_section)
266
267      dictB = self.get_default(setB_dicts, None, dict_path)
268      self._validate_dict_version(dictB)
269      dictB_results = self.get_default(dictB, {}, setB_section)
270
271      image_A_base_url = self.get_default(
272          setA_dicts, self._image_base_gs_url, dict_path,
273          gm_json.JSONKEY_IMAGE_BASE_GS_URL)
274      image_B_base_url = self.get_default(
275          setB_dicts, self._image_base_gs_url, dict_path,
276          gm_json.JSONKEY_IMAGE_BASE_GS_URL)
277
278      # get the builders and render modes for each set
279      builder_A     = self.get_default(dictA, None,
280                        gm_json.JSONKEY_DESCRIPTIONS,
281                        gm_json.JSONKEY_DESCRIPTIONS_BUILDER)
282      render_mode_A = self.get_default(dictA, None,
283                        gm_json.JSONKEY_DESCRIPTIONS,
284                        gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE)
285      builder_B     = self.get_default(dictB, None,
286                        gm_json.JSONKEY_DESCRIPTIONS,
287                        gm_json.JSONKEY_DESCRIPTIONS_BUILDER)
288      render_mode_B = self.get_default(dictB, None,
289                        gm_json.JSONKEY_DESCRIPTIONS,
290                        gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE)
291
292      skp_names = sorted(set(dictA_results.keys() + dictB_results.keys()))
293      # Just for manual testing... truncate to an arbitrary subset.
294      if self.truncate_results:
295        skp_names = skp_names[1:3]
296      for skp_name in skp_names:
297        imagepairs_for_this_skp = []
298
299        whole_image_A = self.get_default(
300            dictA_results, None,
301            skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
302        whole_image_B = self.get_default(
303            dictB_results, None,
304            skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE)
305
306        imagepairs_for_this_skp.append(self._create_image_pair(
307            image_dict_A=whole_image_A, image_dict_B=whole_image_B,
308            image_A_base_url=image_A_base_url,
309            image_B_base_url=image_B_base_url,
310            builder_A=builder_A, render_mode_A=render_mode_A,
311            builder_B=builder_B, render_mode_B=render_mode_B,
312            source_json_file=dict_path,
313            source_skp_name=skp_name, tilenum=None))
314
315        tiled_images_A = self.get_default(
316            dictA_results, [],
317            skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
318        tiled_images_B = self.get_default(
319            dictB_results, [],
320            skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES)
321        if tiled_images_A or tiled_images_B:
322          num_tiles_A = len(tiled_images_A)
323          num_tiles_B = len(tiled_images_B)
324          num_tiles = max(num_tiles_A, num_tiles_B)
325          for tile_num in range(num_tiles):
326            imagepairs_for_this_skp.append(self._create_image_pair(
327                image_dict_A=(tiled_images_A[tile_num]
328                              if tile_num < num_tiles_A else None),
329                image_dict_B=(tiled_images_B[tile_num]
330                              if tile_num < num_tiles_B else None),
331                image_A_base_url=image_A_base_url,
332                image_B_base_url=image_B_base_url,
333                builder_A=builder_A, render_mode_A=render_mode_A,
334                builder_B=builder_B, render_mode_B=render_mode_B,
335                source_json_file=dict_path,
336                source_skp_name=skp_name, tilenum=tile_num))
337
338        for one_imagepair in imagepairs_for_this_skp:
339          if one_imagepair:
340            all_image_pairs.add_image_pair(one_imagepair)
341            result_type = one_imagepair.extra_columns_dict\
342                [COLUMN__RESULT_TYPE]
343            if result_type != results.KEY__RESULT_TYPE__SUCCEEDED:
344              failing_image_pairs.add_image_pair(one_imagepair)
345
346    logging.info('Finished adding imagepairs to queue.')
347    self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False)
348
349    if self._prefetch_only:
350      return None
351    else:
352      return {
353          results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(
354              column_ids_in_order=ORDERED_COLUMN_IDS),
355          results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(
356              column_ids_in_order=ORDERED_COLUMN_IDS),
357      }
358
359  def _validate_dict_version(self, result_dict):
360    """Raises Exception if the dict is not the type/version we know how to read.
361
362    Args:
363      result_dict: dictionary holding output of render_pictures; if None,
364          this method will return without raising an Exception
365    """
366    # TODO(stephana): These values should be defined as constants somewhere,
367    # to be kept in sync between this file and writable_expectations.py
368    expected_header_type = 'ChecksummedImages'
369    expected_header_revision = 1
370
371    if result_dict == None:
372      return
373    header = result_dict[gm_json.JSONKEY_HEADER]
374    header_type = header[gm_json.JSONKEY_HEADER_TYPE]
375    if header_type != expected_header_type:
376      raise Exception('expected header_type "%s", but got "%s"' % (
377          expected_header_type, header_type))
378    header_revision = header[gm_json.JSONKEY_HEADER_REVISION]
379    if header_revision != expected_header_revision:
380      raise Exception('expected header_revision %d, but got %d' % (
381          expected_header_revision, header_revision))
382
383  def _create_image_pair(self, image_dict_A, image_dict_B,
384                         image_A_base_url, image_B_base_url,
385                         builder_A, render_mode_A,
386                         builder_B, render_mode_B,
387                         source_json_file,
388                         source_skp_name, tilenum):
389    """Creates an ImagePair object for this pair of images.
390
391    Args:
392      image_dict_A: dict with JSONKEY_IMAGE_* keys, or None if no image
393      image_dict_B: dict with JSONKEY_IMAGE_* keys, or None if no image
394      image_A_base_url: base URL for image A
395      image_B_base_url: base URL for image B
396      builder_A: builder that created image set A or None if unknow
397      render_mode_A: render mode used to generate image set A or None if
398                     unknown.
399      builder_B: builder that created image set A or None if unknow
400      render_mode_B: render mode used to generate image set A or None if
401                     unknown.
402      source_json_file: string; relative path of the JSON file where this
403                        result came from, within setA and setB.
404      source_skp_name: string; name of the source SKP file
405      tilenum: which tile, or None if a wholeimage
406
407    Returns:
408      An ImagePair object, or None if both image_dict_A and image_dict_B are
409      None.
410    """
411    if (not image_dict_A) and (not image_dict_B):
412      return None
413
414    def _checksum_and_relative_url(dic):
415      if dic:
416        return ((dic[gm_json.JSONKEY_IMAGE_CHECKSUMALGORITHM],
417                 int(dic[gm_json.JSONKEY_IMAGE_CHECKSUMVALUE])),
418                dic[gm_json.JSONKEY_IMAGE_FILEPATH])
419      else:
420        return None, None
421
422    imageA_checksum, imageA_relative_url = _checksum_and_relative_url(
423        image_dict_A)
424    imageB_checksum, imageB_relative_url = _checksum_and_relative_url(
425        image_dict_B)
426
427    if not imageA_checksum:
428      result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
429    elif not imageB_checksum:
430      result_type = results.KEY__RESULT_TYPE__NOCOMPARISON
431    elif imageA_checksum == imageB_checksum:
432      result_type = results.KEY__RESULT_TYPE__SUCCEEDED
433    else:
434      result_type = results.KEY__RESULT_TYPE__FAILED
435
436    extra_columns_dict = {
437        COLUMN__RESULT_TYPE: result_type,
438        COLUMN__SOURCE_SKP: source_skp_name,
439        COLUMN__BUILDER_A: builder_A,
440        COLUMN__RENDER_MODE_A: render_mode_A,
441        COLUMN__BUILDER_B: builder_B,
442        COLUMN__RENDER_MODE_B: render_mode_B,
443    }
444    if tilenum == None:
445      extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__WHOLE
446      extra_columns_dict[COLUMN__TILENUM] = 'N/A'
447    else:
448      extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__TILED
449      extra_columns_dict[COLUMN__TILENUM] = str(tilenum)
450
451    try:
452      return imagepair.ImagePair(
453          image_diff_db=self._image_diff_db,
454          imageA_base_url=image_A_base_url,
455          imageB_base_url=image_B_base_url,
456          imageA_relative_url=imageA_relative_url,
457          imageB_relative_url=imageB_relative_url,
458          extra_columns=extra_columns_dict,
459          source_json_file=source_json_file,
460          download_all_images=self._download_all_images)
461    except (KeyError, TypeError):
462      logging.exception(
463          'got exception while creating ImagePair for'
464          ' urlPair=("%s","%s"), source_skp_name="%s", tilenum="%s"' % (
465              imageA_relative_url, imageB_relative_url, source_skp_name,
466              tilenum))
467      return None
468
469  def _copy_dir_contents(self, source_dir, dest_dir):
470    """Copy all contents of source_dir into dest_dir, recursing into subdirs.
471
472    Args:
473      source_dir: path to source dir (GS URL, local filepath, or a special
474          "repo:" URL type that points at a file within our Skia checkout)
475      dest_dir: path to destination dir (local filepath)
476
477    The copy operates as a "merge with overwrite": any files in source_dir will
478    be "overlaid" on top of the existing content in dest_dir.  Existing files
479    with the same names will be overwritten.
480    """
481    if gs_utils.GSUtils.is_gs_url(source_dir):
482      (bucket, path) = gs_utils.GSUtils.split_gs_url(source_dir)
483      self._gs.download_dir_contents(source_bucket=bucket, source_dir=path,
484                                     dest_dir=dest_dir)
485    elif source_dir.lower().startswith(REPO_URL_PREFIX):
486      repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
487      shutil.copytree(repo_dir, dest_dir)
488    else:
489      shutil.copytree(source_dir, dest_dir)
490
491  def _get_repo_revision(self, source_dir):
492    """Get the commit hash of source_dir, IF it refers to a git checkout.
493
494    Args:
495      source_dir: path to source dir (GS URL, local filepath, or a special
496          "repo:" URL type that points at a file within our Skia checkout;
497          only the "repo:" URL type will have a commit hash.
498    """
499    if source_dir.lower().startswith(REPO_URL_PREFIX):
500      repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):])
501      return subprocess.check_output(
502          args=[git_utils.GIT, 'rev-parse', 'HEAD'], cwd=repo_dir).strip()
503    else:
504      return None
505