1# Copyright (C) 2011, Google Inc. All rights reserved. 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are 5# met: 6# 7# * Redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer. 9# * Redistributions in binary form must reproduce the above 10# copyright notice, this list of conditions and the following disclaimer 11# in the documentation and/or other materials provided with the 12# distribution. 13# * Neither the name of Google Inc. nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29import copy 30import logging 31 32from webkitpy.common.memoized import memoized 33 34_log = logging.getLogger(__name__) 35 36 37# FIXME: Should this function be somewhere more general? 38def _invert_dictionary(dictionary): 39 inverted_dictionary = {} 40 for key, value in dictionary.items(): 41 if inverted_dictionary.get(value): 42 inverted_dictionary[value].append(key) 43 else: 44 inverted_dictionary[value] = [key] 45 return inverted_dictionary 46 47 48class BaselineOptimizer(object): 49 ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests' 50 51 def __init__(self, host, port, port_names, skip_scm_commands): 52 self._filesystem = host.filesystem 53 self._skip_scm_commands = skip_scm_commands 54 self._files_to_delete = [] 55 self._files_to_add = [] 56 self._scm = host.scm() 57 self._default_port = port 58 self._ports = {} 59 for port_name in port_names: 60 self._ports[port_name] = host.port_factory.get(port_name) 61 62 self._webkit_base = port.webkit_base() 63 self._layout_tests_dir = port.layout_tests_dir() 64 65 # Only used by unittests. 66 self.new_results_by_directory = [] 67 68 def _baseline_root(self, baseline_name): 69 virtual_suite = self._virtual_suite(baseline_name) 70 if virtual_suite: 71 return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name) 72 return self.ROOT_LAYOUT_TESTS_DIRECTORY 73 74 def _baseline_search_path(self, port, baseline_name): 75 virtual_suite = self._virtual_suite(baseline_name) 76 if virtual_suite: 77 return port.virtual_baseline_search_path(baseline_name) 78 return port.baseline_search_path() 79 80 def _virtual_suite(self, baseline_name): 81 return self._default_port.lookup_virtual_suite(baseline_name) 82 83 def _virtual_base(self, baseline_name): 84 return self._default_port.lookup_virtual_test_base(baseline_name) 85 86 def _relative_baseline_search_paths(self, port, baseline_name): 87 baseline_search_path = self._baseline_search_path(port, baseline_name) 88 baseline_root = self._baseline_root(baseline_name) 89 relative_paths = [self._filesystem.relpath(path, self._webkit_base) for path in baseline_search_path] 90 return relative_paths + [baseline_root] 91 92 def _join_directory(self, directory, baseline_name): 93 # This code is complicated because both the directory name and the baseline_name have the virtual 94 # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name. 95 # For example, virtual/gpu/fast/canvas/foo-expected.png corresponds to fast/canvas/foo-expected.png and 96 # the baseline directories are like platform/mac/virtual/gpu/fast/canvas. So, to get the path 97 # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory. 98 virtual_suite = self._virtual_suite(baseline_name) 99 if virtual_suite: 100 baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:] 101 else: 102 baseline_name_without_virtual = baseline_name 103 return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual) 104 105 def read_results_by_directory(self, baseline_name): 106 results_by_directory = {} 107 directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port, baseline_name) for port in self._ports.values()])) 108 109 for directory in directories: 110 path = self._join_directory(directory, baseline_name) 111 if self._filesystem.exists(path): 112 results_by_directory[directory] = self._filesystem.sha1(path) 113 return results_by_directory 114 115 def _results_by_port_name(self, results_by_directory, baseline_name): 116 results_by_port_name = {} 117 for port_name, port in self._ports.items(): 118 for directory in self._relative_baseline_search_paths(port, baseline_name): 119 if directory in results_by_directory: 120 results_by_port_name[port_name] = results_by_directory[directory] 121 break 122 return results_by_port_name 123 124 @memoized 125 def _directories_immediately_preceding_root(self, baseline_name): 126 directories = set() 127 for port in self._ports.values(): 128 directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], self._webkit_base) 129 directories.add(directory) 130 return directories 131 132 def _optimize_result_for_root(self, new_results_by_directory, baseline_name): 133 # The root directory (i.e. LayoutTests) is the only one that doesn't correspond 134 # to a specific platform. As such, it's the only one where the baseline in fallback directories 135 # immediately before it can be promoted up, i.e. if win and mac 136 # have the same baseline, then it can be promoted up to be the LayoutTests baseline. 137 # All other baselines can only be removed if they're redundant with a baseline earlier 138 # in the fallback order. They can never promoted up. 139 directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name) 140 141 shared_result = None 142 root_baseline_unused = False 143 for directory in directories_immediately_preceding_root: 144 this_result = new_results_by_directory.get(directory) 145 146 # If any of these directories don't have a baseline, there's no optimization we can do. 147 if not this_result: 148 return 149 150 if not shared_result: 151 shared_result = this_result 152 elif shared_result != this_result: 153 root_baseline_unused = True 154 155 baseline_root = self._baseline_root(baseline_name) 156 157 # The root baseline is unused if all the directories immediately preceding the root 158 # have a baseline, but have different baselines, so the baselines can't be promoted up. 159 if root_baseline_unused: 160 if baseline_root in new_results_by_directory: 161 del new_results_by_directory[baseline_root] 162 return 163 164 new_results_by_directory[baseline_root] = shared_result 165 for directory in directories_immediately_preceding_root: 166 del new_results_by_directory[directory] 167 168 def _find_optimal_result_placement(self, baseline_name): 169 results_by_directory = self.read_results_by_directory(baseline_name) 170 results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name) 171 port_names_by_result = _invert_dictionary(results_by_port_name) 172 173 new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name) 174 self._optimize_result_for_root(new_results_by_directory, baseline_name) 175 176 return results_by_directory, new_results_by_directory 177 178 def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name): 179 new_results_by_directory = copy.copy(results_by_directory) 180 for port_name, port in self._ports.items(): 181 current_result = results_by_port_name.get(port_name) 182 183 # This happens if we're missing baselines for a port. 184 if not current_result: 185 continue; 186 187 fallback_path = self._relative_baseline_search_paths(port, baseline_name) 188 current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory) 189 for index in range(current_index + 1, len(fallback_path)): 190 new_directory = fallback_path[index] 191 if not new_directory in new_results_by_directory: 192 # No result for this baseline in this directory. 193 continue 194 elif new_results_by_directory[new_directory] == current_result: 195 # Result for new_directory are redundant with the result earlier in the fallback order. 196 if current_directory in new_results_by_directory: 197 del new_results_by_directory[current_directory] 198 else: 199 # The new_directory contains a different result, so stop trying to push results up. 200 break 201 202 return new_results_by_directory 203 204 def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory): 205 for index, directory in enumerate(fallback_path): 206 if directory in results_by_directory and (results_by_directory[directory] == current_result): 207 return index, directory 208 assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory) 209 210 def _platform(self, filename): 211 platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep 212 if filename.startswith(platform_dir): 213 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 214 platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir) 215 if filename.startswith(platform_dir): 216 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 217 return '(generic)' 218 219 def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory): 220 data_for_result = {} 221 for directory, result in results_by_directory.items(): 222 if not result in data_for_result: 223 source = self._join_directory(directory, baseline_name) 224 data_for_result[result] = self._filesystem.read_binary_file(source) 225 226 scm_files = [] 227 fs_files = [] 228 for directory, result in results_by_directory.items(): 229 if new_results_by_directory.get(directory) != result: 230 file_name = self._join_directory(directory, baseline_name) 231 if self._scm.exists(file_name): 232 scm_files.append(file_name) 233 else: 234 fs_files.append(file_name) 235 236 if scm_files or fs_files: 237 if scm_files: 238 _log.debug(" Deleting (SCM):") 239 for platform_dir in sorted(self._platform(filename) for filename in scm_files): 240 _log.debug(" " + platform_dir) 241 if self._skip_scm_commands: 242 self._files_to_delete.extend(scm_files) 243 else: 244 self._scm.delete_list(scm_files) 245 if fs_files: 246 _log.debug(" Deleting (file system):") 247 for platform_dir in sorted(self._platform(filename) for filename in fs_files): 248 _log.debug(" " + platform_dir) 249 for filename in fs_files: 250 self._filesystem.remove(filename) 251 else: 252 _log.debug(" (Nothing to delete)") 253 254 file_names = [] 255 for directory, result in new_results_by_directory.items(): 256 if results_by_directory.get(directory) != result: 257 destination = self._join_directory(directory, baseline_name) 258 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0]) 259 self._filesystem.write_binary_file(destination, data_for_result[result]) 260 file_names.append(destination) 261 262 if file_names: 263 _log.debug(" Adding:") 264 for platform_dir in sorted(self._platform(filename) for filename in file_names): 265 _log.debug(" " + platform_dir) 266 if self._skip_scm_commands: 267 # Have adds win over deletes. 268 self._files_to_delete = list(set(self._files_to_delete) - set(file_names)) 269 self._files_to_add.extend(file_names) 270 else: 271 self._scm.add_list(file_names) 272 else: 273 _log.debug(" (Nothing to add)") 274 275 def write_by_directory(self, results_by_directory, writer, indent): 276 for path in sorted(results_by_directory): 277 writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6])) 278 279 def _optimize_subtree(self, baseline_name): 280 basename = self._filesystem.basename(baseline_name) 281 results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name) 282 283 if new_results_by_directory == results_by_directory: 284 if new_results_by_directory: 285 _log.debug(" %s: (already optimal)" % basename) 286 self.write_by_directory(results_by_directory, _log.debug, " ") 287 else: 288 _log.debug(" %s: (no baselines found)" % basename) 289 # This is just used for unittests. Intentionally set it to the old data if we don't modify anything. 290 self.new_results_by_directory.append(results_by_directory) 291 return True 292 293 if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name): 294 # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs 295 # instead of committing incorrect baselines. 296 _log.error(" %s: optimization failed" % basename) 297 self.write_by_directory(results_by_directory, _log.warning, " ") 298 return False 299 300 _log.debug(" %s:" % basename) 301 _log.debug(" Before: ") 302 self.write_by_directory(results_by_directory, _log.debug, " ") 303 _log.debug(" After: ") 304 self.write_by_directory(new_results_by_directory, _log.debug, " ") 305 306 self._move_baselines(baseline_name, results_by_directory, new_results_by_directory) 307 return True 308 309 def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name): 310 virtual_root_expected_baseline_path = self._filesystem.join(self._layout_tests_dir, baseline_name) 311 if not self._filesystem.exists(virtual_root_expected_baseline_path): 312 return 313 root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path) 314 315 results_by_directory = self.read_results_by_directory(non_virtual_baseline_name) 316 # See if all the immediate predecessors of the virtual root have the same expected result. 317 for port in self._ports.values(): 318 directories = self._relative_baseline_search_paths(port, non_virtual_baseline_name) 319 for directory in directories: 320 if directory not in results_by_directory: 321 continue 322 if results_by_directory[directory] != root_sha1: 323 return 324 break 325 326 _log.debug("Deleting redundant virtual root expected result.") 327 if self._skip_scm_commands and virtual_root_expected_baseline_path in self._files_to_add: 328 self._files_to_add.remove(virtual_root_expected_baseline_path) 329 if self._scm.exists(virtual_root_expected_baseline_path): 330 _log.debug(" Deleting (SCM): " + virtual_root_expected_baseline_path) 331 if self._skip_scm_commands: 332 self._files_to_delete.append(virtual_root_expected_baseline_path) 333 else: 334 self._scm.delete(virtual_root_expected_baseline_path) 335 else: 336 _log.debug(" Deleting (file system): " + virtual_root_expected_baseline_path) 337 self._filesystem.remove(virtual_root_expected_baseline_path) 338 339 def optimize(self, baseline_name): 340 # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path. 341 # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for 342 # a visual representation of this. 343 # 344 # So, we can optimize the virtual path, then the virtual root and then the regular path. 345 346 self._files_to_delete = [] 347 self._files_to_add = [] 348 _log.debug("Optimizing regular fallback path.") 349 result = self._optimize_subtree(baseline_name) 350 non_virtual_baseline_name = self._virtual_base(baseline_name) 351 if not non_virtual_baseline_name: 352 return result, self._files_to_delete, self._files_to_add 353 354 self._optimize_virtual_root(baseline_name, non_virtual_baseline_name) 355 356 _log.debug("Optimizing non-virtual fallback path.") 357 result |= self._optimize_subtree(non_virtual_baseline_name) 358 return result, self._files_to_delete, self._files_to_add 359