1#!/usr/bin/python 2 3""" 4Copyright 2014 Google Inc. 5 6Use of this source code is governed by a BSD-style license that can be 7found in the LICENSE file. 8 9Compare results of two render_pictures runs. 10 11TODO(epoger): Start using this module to compare ALL images (whether they 12were generated from GMs or SKPs), and rename it accordingly. 13""" 14 15# System-level imports 16import logging 17import os 18import shutil 19import subprocess 20import tempfile 21import time 22 23# Must fix up PYTHONPATH before importing from within Skia 24import rs_fixpypath # pylint: disable=W0611 25 26# Imports from within Skia 27from py.utils import git_utils 28from py.utils import gs_utils 29from py.utils import url_utils 30import buildbot_globals 31import column 32import gm_json 33import imagediffdb 34import imagepair 35import imagepairset 36import results 37 38# URL under which all render_pictures images can be found in Google Storage. 39# 40# TODO(epoger): In order to allow live-view of GMs and other images, read this 41# from the input summary files, or allow the caller to set it within the 42# GET_live_results call. 43DEFAULT_IMAGE_BASE_GS_URL = 'gs://' + buildbot_globals.Get('skp_images_bucket') 44 45# Column descriptors, and display preferences for them. 46COLUMN__RESULT_TYPE = results.KEY__EXTRACOLUMNS__RESULT_TYPE 47COLUMN__SOURCE_SKP = 'sourceSkpFile' 48COLUMN__TILED_OR_WHOLE = 'tiledOrWhole' 49COLUMN__TILENUM = 'tilenum' 50COLUMN__BUILDER_A = 'builderA' 51COLUMN__RENDER_MODE_A = 'renderModeA' 52COLUMN__BUILDER_B = 'builderB' 53COLUMN__RENDER_MODE_B = 'renderModeB' 54# Known values for some of those columns. 55COLUMN__TILED_OR_WHOLE__TILED = 'tiled' 56COLUMN__TILED_OR_WHOLE__WHOLE = 'whole' 57 58FREEFORM_COLUMN_IDS = [ 59 COLUMN__SOURCE_SKP, 60 COLUMN__TILENUM, 61] 62ORDERED_COLUMN_IDS = [ 63 COLUMN__RESULT_TYPE, 64 COLUMN__SOURCE_SKP, 65 COLUMN__TILED_OR_WHOLE, 66 COLUMN__TILENUM, 67 COLUMN__BUILDER_A, 68 COLUMN__RENDER_MODE_A, 69 COLUMN__BUILDER_B, 70 COLUMN__RENDER_MODE_B, 71] 72 73# A special "repo:" URL type that we use to refer to Skia repo contents. 74# (Useful for comparing against expectations files we store in our repo.) 75REPO_URL_PREFIX = 'repo:' 76REPO_BASEPATH = os.path.abspath(os.path.join( 77 os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) 78 79# Which sections within a JSON summary file can contain results. 80ALLOWED_SECTION_NAMES = [ 81 gm_json.JSONKEY_ACTUALRESULTS, 82 gm_json.JSONKEY_EXPECTEDRESULTS, 83] 84 85 86class RenderedPicturesComparisons(results.BaseComparisons): 87 """Loads results from multiple render_pictures runs into an ImagePairSet. 88 """ 89 90 def __init__(self, 91 setA_dir, setB_dir, 92 setA_section, setB_section, 93 image_diff_db, 94 image_base_gs_url=DEFAULT_IMAGE_BASE_GS_URL, diff_base_url=None, 95 setA_label=None, setB_label=None, 96 gs=None, truncate_results=False, prefetch_only=False, 97 download_all_images=False): 98 """Constructor: downloads images and generates diffs. 99 100 Once the object has been created (which may take a while), you can call its 101 get_packaged_results_of_type() method to quickly retrieve the results... 102 unless you have set prefetch_only to True, in which case we will 103 asynchronously warm up the ImageDiffDB cache but not fill in self._results. 104 105 Args: 106 setA_dir: root directory to copy all JSON summaries from, and to use as 107 setA within the comparisons. This directory may be specified as a 108 gs:// URL, special "repo:" URL, or local filepath. 109 setB_dir: root directory to copy all JSON summaries from, and to use as 110 setB within the comparisons. This directory may be specified as a 111 gs:// URL, special "repo:" URL, or local filepath. 112 setA_section: which section within setA to examine; must be one of 113 ALLOWED_SECTION_NAMES 114 setB_section: which section within setB to examine; must be one of 115 ALLOWED_SECTION_NAMES 116 image_diff_db: ImageDiffDB instance 117 image_base_gs_url: "gs://" URL pointing at the Google Storage bucket/dir 118 under which all render_pictures result images can 119 be found; this will be used to read images for comparison within 120 this code, and included in the ImagePairSet (as an HTTP URL) so its 121 consumers know where to download the images from 122 diff_base_url: base URL within which the client should look for diff 123 images; if not specified, defaults to a "file:///" URL representation 124 of image_diff_db's storage_root 125 setA_label: description to use for results in setA; if None, will be 126 set to a reasonable default 127 setB_label: description to use for results in setB; if None, will be 128 set to a reasonable default 129 gs: instance of GSUtils object we can use to download summary files 130 truncate_results: FOR MANUAL TESTING: if True, truncate the set of images 131 we process, to speed up testing. 132 prefetch_only: if True, return the new object as quickly as possible 133 with empty self._results (just queue up all the files to process, 134 don't wait around for them to be processed and recorded); otherwise, 135 block until the results have been assembled and recorded in 136 self._results. 137 download_all_images: if True, download all images, even if we don't 138 need them to generate diffs. This will take much longer to complete, 139 but is useful for warming up the bitmap cache on local disk. 140 """ 141 super(RenderedPicturesComparisons, self).__init__() 142 self._image_diff_db = image_diff_db 143 self._image_base_gs_url = image_base_gs_url 144 self._diff_base_url = ( 145 diff_base_url or 146 url_utils.create_filepath_url(image_diff_db.storage_root)) 147 self._gs = gs 148 self.truncate_results = truncate_results 149 self._prefetch_only = prefetch_only 150 self._download_all_images = download_all_images 151 152 # If we are comparing two different section types, we can use those 153 # as the default labels for setA and setB. 154 if setA_section != setB_section: 155 self._setA_label = setA_label or setA_section 156 self._setB_label = setB_label or setB_section 157 else: 158 self._setA_label = setA_label or 'setA' 159 self._setB_label = setB_label or 'setB' 160 161 tempdir = tempfile.mkdtemp() 162 try: 163 setA_root = os.path.join(tempdir, 'setA') 164 setB_root = os.path.join(tempdir, 'setB') 165 # TODO(stephana): There is a potential race condition here... we copy 166 # the contents out of the source_dir, and THEN we get the commithash 167 # of source_dir. If source_dir points at a git checkout, and that 168 # checkout is updated (by a different thread/process) during this 169 # operation, then the contents and commithash will be out of sync. 170 self._copy_dir_contents(source_dir=setA_dir, dest_dir=setA_root) 171 setA_repo_revision = self._get_repo_revision(source_dir=setA_dir) 172 self._copy_dir_contents(source_dir=setB_dir, dest_dir=setB_root) 173 setB_repo_revision = self._get_repo_revision(source_dir=setB_dir) 174 175 self._setA_descriptions = { 176 results.KEY__SET_DESCRIPTIONS__DIR: setA_dir, 177 results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setA_repo_revision, 178 results.KEY__SET_DESCRIPTIONS__SECTION: setA_section, 179 } 180 self._setB_descriptions = { 181 results.KEY__SET_DESCRIPTIONS__DIR: setB_dir, 182 results.KEY__SET_DESCRIPTIONS__REPO_REVISION: setB_repo_revision, 183 results.KEY__SET_DESCRIPTIONS__SECTION: setB_section, 184 } 185 186 time_start = int(time.time()) 187 self._results = self._load_result_pairs( 188 setA_root=setA_root, setB_root=setB_root, 189 setA_section=setA_section, setB_section=setB_section) 190 if self._results: 191 self._timestamp = int(time.time()) 192 logging.info('Number of download file collisions: %s' % 193 imagediffdb.global_file_collisions) 194 logging.info('Results complete; took %d seconds.' % 195 (self._timestamp - time_start)) 196 finally: 197 shutil.rmtree(tempdir) 198 199 def _load_result_pairs(self, setA_root, setB_root, 200 setA_section, setB_section): 201 """Loads all JSON image summaries from 2 directory trees and compares them. 202 203 TODO(stephana): This method is only called from within __init__(); it might 204 make more sense to just roll the content of this method into __init__(). 205 206 Args: 207 setA_root: root directory containing JSON summaries of rendering results 208 setB_root: root directory containing JSON summaries of rendering results 209 setA_section: which section (gm_json.JSONKEY_ACTUALRESULTS or 210 gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setA 211 setB_section: which section (gm_json.JSONKEY_ACTUALRESULTS or 212 gm_json.JSONKEY_EXPECTEDRESULTS) to load from the summaries in setB 213 214 Returns the summary of all image diff results (or None, depending on 215 self._prefetch_only). 216 """ 217 logging.info('Reading JSON image summaries from dirs %s and %s...' % ( 218 setA_root, setB_root)) 219 setA_dicts = self.read_dicts_from_root(setA_root) 220 setB_dicts = self.read_dicts_from_root(setB_root) 221 logging.info('Comparing summary dicts...') 222 223 all_image_pairs = imagepairset.ImagePairSet( 224 descriptions=(self._setA_label, self._setB_label), 225 diff_base_url=self._diff_base_url) 226 failing_image_pairs = imagepairset.ImagePairSet( 227 descriptions=(self._setA_label, self._setB_label), 228 diff_base_url=self._diff_base_url) 229 230 # Override settings for columns that should be filtered using freeform text. 231 for column_id in FREEFORM_COLUMN_IDS: 232 factory = column.ColumnHeaderFactory( 233 header_text=column_id, use_freeform_filter=True) 234 all_image_pairs.set_column_header_factory( 235 column_id=column_id, column_header_factory=factory) 236 failing_image_pairs.set_column_header_factory( 237 column_id=column_id, column_header_factory=factory) 238 239 all_image_pairs.ensure_extra_column_values_in_summary( 240 column_id=COLUMN__RESULT_TYPE, values=[ 241 results.KEY__RESULT_TYPE__FAILED, 242 results.KEY__RESULT_TYPE__NOCOMPARISON, 243 results.KEY__RESULT_TYPE__SUCCEEDED, 244 ]) 245 failing_image_pairs.ensure_extra_column_values_in_summary( 246 column_id=COLUMN__RESULT_TYPE, values=[ 247 results.KEY__RESULT_TYPE__FAILED, 248 results.KEY__RESULT_TYPE__NOCOMPARISON, 249 ]) 250 251 logging.info('Starting to add imagepairs to queue.') 252 self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False) 253 254 union_dict_paths = sorted(set(setA_dicts.keys() + setB_dicts.keys())) 255 num_union_dict_paths = len(union_dict_paths) 256 dict_num = 0 257 for dict_path in union_dict_paths: 258 dict_num += 1 259 logging.info( 260 'Asynchronously requesting pixel diffs for dict #%d of %d, "%s"...' % 261 (dict_num, num_union_dict_paths, dict_path)) 262 263 dictA = self.get_default(setA_dicts, None, dict_path) 264 self._validate_dict_version(dictA) 265 dictA_results = self.get_default(dictA, {}, setA_section) 266 267 dictB = self.get_default(setB_dicts, None, dict_path) 268 self._validate_dict_version(dictB) 269 dictB_results = self.get_default(dictB, {}, setB_section) 270 271 image_A_base_url = self.get_default( 272 setA_dicts, self._image_base_gs_url, dict_path, 273 gm_json.JSONKEY_IMAGE_BASE_GS_URL) 274 image_B_base_url = self.get_default( 275 setB_dicts, self._image_base_gs_url, dict_path, 276 gm_json.JSONKEY_IMAGE_BASE_GS_URL) 277 278 # get the builders and render modes for each set 279 builder_A = self.get_default(dictA, None, 280 gm_json.JSONKEY_DESCRIPTIONS, 281 gm_json.JSONKEY_DESCRIPTIONS_BUILDER) 282 render_mode_A = self.get_default(dictA, None, 283 gm_json.JSONKEY_DESCRIPTIONS, 284 gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE) 285 builder_B = self.get_default(dictB, None, 286 gm_json.JSONKEY_DESCRIPTIONS, 287 gm_json.JSONKEY_DESCRIPTIONS_BUILDER) 288 render_mode_B = self.get_default(dictB, None, 289 gm_json.JSONKEY_DESCRIPTIONS, 290 gm_json.JSONKEY_DESCRIPTIONS_RENDER_MODE) 291 292 skp_names = sorted(set(dictA_results.keys() + dictB_results.keys())) 293 # Just for manual testing... truncate to an arbitrary subset. 294 if self.truncate_results: 295 skp_names = skp_names[1:3] 296 for skp_name in skp_names: 297 imagepairs_for_this_skp = [] 298 299 whole_image_A = self.get_default( 300 dictA_results, None, 301 skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE) 302 whole_image_B = self.get_default( 303 dictB_results, None, 304 skp_name, gm_json.JSONKEY_SOURCE_WHOLEIMAGE) 305 306 imagepairs_for_this_skp.append(self._create_image_pair( 307 image_dict_A=whole_image_A, image_dict_B=whole_image_B, 308 image_A_base_url=image_A_base_url, 309 image_B_base_url=image_B_base_url, 310 builder_A=builder_A, render_mode_A=render_mode_A, 311 builder_B=builder_B, render_mode_B=render_mode_B, 312 source_json_file=dict_path, 313 source_skp_name=skp_name, tilenum=None)) 314 315 tiled_images_A = self.get_default( 316 dictA_results, [], 317 skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES) 318 tiled_images_B = self.get_default( 319 dictB_results, [], 320 skp_name, gm_json.JSONKEY_SOURCE_TILEDIMAGES) 321 if tiled_images_A or tiled_images_B: 322 num_tiles_A = len(tiled_images_A) 323 num_tiles_B = len(tiled_images_B) 324 num_tiles = max(num_tiles_A, num_tiles_B) 325 for tile_num in range(num_tiles): 326 imagepairs_for_this_skp.append(self._create_image_pair( 327 image_dict_A=(tiled_images_A[tile_num] 328 if tile_num < num_tiles_A else None), 329 image_dict_B=(tiled_images_B[tile_num] 330 if tile_num < num_tiles_B else None), 331 image_A_base_url=image_A_base_url, 332 image_B_base_url=image_B_base_url, 333 builder_A=builder_A, render_mode_A=render_mode_A, 334 builder_B=builder_B, render_mode_B=render_mode_B, 335 source_json_file=dict_path, 336 source_skp_name=skp_name, tilenum=tile_num)) 337 338 for one_imagepair in imagepairs_for_this_skp: 339 if one_imagepair: 340 all_image_pairs.add_image_pair(one_imagepair) 341 result_type = one_imagepair.extra_columns_dict\ 342 [COLUMN__RESULT_TYPE] 343 if result_type != results.KEY__RESULT_TYPE__SUCCEEDED: 344 failing_image_pairs.add_image_pair(one_imagepair) 345 346 logging.info('Finished adding imagepairs to queue.') 347 self._image_diff_db.log_queue_size_if_changed(limit_verbosity=False) 348 349 if self._prefetch_only: 350 return None 351 else: 352 return { 353 results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict( 354 column_ids_in_order=ORDERED_COLUMN_IDS), 355 results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict( 356 column_ids_in_order=ORDERED_COLUMN_IDS), 357 } 358 359 def _validate_dict_version(self, result_dict): 360 """Raises Exception if the dict is not the type/version we know how to read. 361 362 Args: 363 result_dict: dictionary holding output of render_pictures; if None, 364 this method will return without raising an Exception 365 """ 366 # TODO(stephana): These values should be defined as constants somewhere, 367 # to be kept in sync between this file and writable_expectations.py 368 expected_header_type = 'ChecksummedImages' 369 expected_header_revision = 1 370 371 if result_dict == None: 372 return 373 header = result_dict[gm_json.JSONKEY_HEADER] 374 header_type = header[gm_json.JSONKEY_HEADER_TYPE] 375 if header_type != expected_header_type: 376 raise Exception('expected header_type "%s", but got "%s"' % ( 377 expected_header_type, header_type)) 378 header_revision = header[gm_json.JSONKEY_HEADER_REVISION] 379 if header_revision != expected_header_revision: 380 raise Exception('expected header_revision %d, but got %d' % ( 381 expected_header_revision, header_revision)) 382 383 def _create_image_pair(self, image_dict_A, image_dict_B, 384 image_A_base_url, image_B_base_url, 385 builder_A, render_mode_A, 386 builder_B, render_mode_B, 387 source_json_file, 388 source_skp_name, tilenum): 389 """Creates an ImagePair object for this pair of images. 390 391 Args: 392 image_dict_A: dict with JSONKEY_IMAGE_* keys, or None if no image 393 image_dict_B: dict with JSONKEY_IMAGE_* keys, or None if no image 394 image_A_base_url: base URL for image A 395 image_B_base_url: base URL for image B 396 builder_A: builder that created image set A or None if unknow 397 render_mode_A: render mode used to generate image set A or None if 398 unknown. 399 builder_B: builder that created image set A or None if unknow 400 render_mode_B: render mode used to generate image set A or None if 401 unknown. 402 source_json_file: string; relative path of the JSON file where this 403 result came from, within setA and setB. 404 source_skp_name: string; name of the source SKP file 405 tilenum: which tile, or None if a wholeimage 406 407 Returns: 408 An ImagePair object, or None if both image_dict_A and image_dict_B are 409 None. 410 """ 411 if (not image_dict_A) and (not image_dict_B): 412 return None 413 414 def _checksum_and_relative_url(dic): 415 if dic: 416 return ((dic[gm_json.JSONKEY_IMAGE_CHECKSUMALGORITHM], 417 int(dic[gm_json.JSONKEY_IMAGE_CHECKSUMVALUE])), 418 dic[gm_json.JSONKEY_IMAGE_FILEPATH]) 419 else: 420 return None, None 421 422 imageA_checksum, imageA_relative_url = _checksum_and_relative_url( 423 image_dict_A) 424 imageB_checksum, imageB_relative_url = _checksum_and_relative_url( 425 image_dict_B) 426 427 if not imageA_checksum: 428 result_type = results.KEY__RESULT_TYPE__NOCOMPARISON 429 elif not imageB_checksum: 430 result_type = results.KEY__RESULT_TYPE__NOCOMPARISON 431 elif imageA_checksum == imageB_checksum: 432 result_type = results.KEY__RESULT_TYPE__SUCCEEDED 433 else: 434 result_type = results.KEY__RESULT_TYPE__FAILED 435 436 extra_columns_dict = { 437 COLUMN__RESULT_TYPE: result_type, 438 COLUMN__SOURCE_SKP: source_skp_name, 439 COLUMN__BUILDER_A: builder_A, 440 COLUMN__RENDER_MODE_A: render_mode_A, 441 COLUMN__BUILDER_B: builder_B, 442 COLUMN__RENDER_MODE_B: render_mode_B, 443 } 444 if tilenum == None: 445 extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__WHOLE 446 extra_columns_dict[COLUMN__TILENUM] = 'N/A' 447 else: 448 extra_columns_dict[COLUMN__TILED_OR_WHOLE] = COLUMN__TILED_OR_WHOLE__TILED 449 extra_columns_dict[COLUMN__TILENUM] = str(tilenum) 450 451 try: 452 return imagepair.ImagePair( 453 image_diff_db=self._image_diff_db, 454 imageA_base_url=image_A_base_url, 455 imageB_base_url=image_B_base_url, 456 imageA_relative_url=imageA_relative_url, 457 imageB_relative_url=imageB_relative_url, 458 extra_columns=extra_columns_dict, 459 source_json_file=source_json_file, 460 download_all_images=self._download_all_images) 461 except (KeyError, TypeError): 462 logging.exception( 463 'got exception while creating ImagePair for' 464 ' urlPair=("%s","%s"), source_skp_name="%s", tilenum="%s"' % ( 465 imageA_relative_url, imageB_relative_url, source_skp_name, 466 tilenum)) 467 return None 468 469 def _copy_dir_contents(self, source_dir, dest_dir): 470 """Copy all contents of source_dir into dest_dir, recursing into subdirs. 471 472 Args: 473 source_dir: path to source dir (GS URL, local filepath, or a special 474 "repo:" URL type that points at a file within our Skia checkout) 475 dest_dir: path to destination dir (local filepath) 476 477 The copy operates as a "merge with overwrite": any files in source_dir will 478 be "overlaid" on top of the existing content in dest_dir. Existing files 479 with the same names will be overwritten. 480 """ 481 if gs_utils.GSUtils.is_gs_url(source_dir): 482 (bucket, path) = gs_utils.GSUtils.split_gs_url(source_dir) 483 self._gs.download_dir_contents(source_bucket=bucket, source_dir=path, 484 dest_dir=dest_dir) 485 elif source_dir.lower().startswith(REPO_URL_PREFIX): 486 repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):]) 487 shutil.copytree(repo_dir, dest_dir) 488 else: 489 shutil.copytree(source_dir, dest_dir) 490 491 def _get_repo_revision(self, source_dir): 492 """Get the commit hash of source_dir, IF it refers to a git checkout. 493 494 Args: 495 source_dir: path to source dir (GS URL, local filepath, or a special 496 "repo:" URL type that points at a file within our Skia checkout; 497 only the "repo:" URL type will have a commit hash. 498 """ 499 if source_dir.lower().startswith(REPO_URL_PREFIX): 500 repo_dir = os.path.join(REPO_BASEPATH, source_dir[len(REPO_URL_PREFIX):]) 501 return subprocess.check_output( 502 args=[git_utils.GIT, 'rev-parse', 'HEAD'], cwd=repo_dir).strip() 503 else: 504 return None 505