1# Copyright (C) 2011, Google Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7#     * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#     * Redistributions in binary form must reproduce the above
10# copyright notice, this list of conditions and the following disclaimer
11# in the documentation and/or other materials provided with the
12# distribution.
13#     * Neither the name of Google Inc. nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29import copy
30import logging
31
32from webkitpy.common.memoized import memoized
33
34_log = logging.getLogger(__name__)
35
36
37# FIXME: Should this function be somewhere more general?
38def _invert_dictionary(dictionary):
39    inverted_dictionary = {}
40    for key, value in dictionary.items():
41        if inverted_dictionary.get(value):
42            inverted_dictionary[value].append(key)
43        else:
44            inverted_dictionary[value] = [key]
45    return inverted_dictionary
46
47
48class BaselineOptimizer(object):
49    ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
50
51    def __init__(self, host, port, port_names, skip_scm_commands):
52        self._filesystem = host.filesystem
53        self._skip_scm_commands = skip_scm_commands
54        self._files_to_delete = []
55        self._files_to_add = []
56        self._scm = host.scm()
57        self._default_port = port
58        self._ports = {}
59        for port_name in port_names:
60            self._ports[port_name] = host.port_factory.get(port_name)
61
62        self._webkit_base = port.webkit_base()
63        self._layout_tests_dir = port.layout_tests_dir()
64
65        # Only used by unittests.
66        self.new_results_by_directory = []
67
68    def _baseline_root(self, baseline_name):
69        virtual_suite = self._virtual_suite(baseline_name)
70        if virtual_suite:
71            return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name)
72        return self.ROOT_LAYOUT_TESTS_DIRECTORY
73
74    def _baseline_search_path(self, port, baseline_name):
75        virtual_suite = self._virtual_suite(baseline_name)
76        if virtual_suite:
77            return port.virtual_baseline_search_path(baseline_name)
78        return port.baseline_search_path()
79
80    def _virtual_suite(self, baseline_name):
81        return self._default_port.lookup_virtual_suite(baseline_name)
82
83    def _virtual_base(self, baseline_name):
84        return self._default_port.lookup_virtual_test_base(baseline_name)
85
86    def _relative_baseline_search_paths(self, port, baseline_name):
87        baseline_search_path = self._baseline_search_path(port, baseline_name)
88        baseline_root = self._baseline_root(baseline_name)
89        relative_paths = [self._filesystem.relpath(path, self._webkit_base) for path in baseline_search_path]
90        return relative_paths + [baseline_root]
91
92    def _join_directory(self, directory, baseline_name):
93        # This code is complicated because both the directory name and the baseline_name have the virtual
94        # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name.
95        # For example, virtual/gpu/fast/canvas/foo-expected.png corresponds to fast/canvas/foo-expected.png and
96        # the baseline directories are like platform/mac/virtual/gpu/fast/canvas. So, to get the path
97        # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory.
98        virtual_suite = self._virtual_suite(baseline_name)
99        if virtual_suite:
100            baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:]
101        else:
102            baseline_name_without_virtual = baseline_name
103        return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual)
104
105    def read_results_by_directory(self, baseline_name):
106        results_by_directory = {}
107        directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port, baseline_name) for port in self._ports.values()]))
108
109        for directory in directories:
110            path = self._join_directory(directory, baseline_name)
111            if self._filesystem.exists(path):
112                results_by_directory[directory] = self._filesystem.sha1(path)
113        return results_by_directory
114
115    def _results_by_port_name(self, results_by_directory, baseline_name):
116        results_by_port_name = {}
117        for port_name, port in self._ports.items():
118            for directory in self._relative_baseline_search_paths(port, baseline_name):
119                if directory in results_by_directory:
120                    results_by_port_name[port_name] = results_by_directory[directory]
121                    break
122        return results_by_port_name
123
124    @memoized
125    def _directories_immediately_preceding_root(self, baseline_name):
126        directories = set()
127        for port in self._ports.values():
128            directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], self._webkit_base)
129            directories.add(directory)
130        return directories
131
132    def _optimize_result_for_root(self, new_results_by_directory, baseline_name):
133        # The root directory (i.e. LayoutTests) is the only one that doesn't correspond
134        # to a specific platform. As such, it's the only one where the baseline in fallback directories
135        # immediately before it can be promoted up, i.e. if win and mac
136        # have the same baseline, then it can be promoted up to be the LayoutTests baseline.
137        # All other baselines can only be removed if they're redundant with a baseline earlier
138        # in the fallback order. They can never promoted up.
139        directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name)
140
141        shared_result = None
142        root_baseline_unused = False
143        for directory in directories_immediately_preceding_root:
144            this_result = new_results_by_directory.get(directory)
145
146            # If any of these directories don't have a baseline, there's no optimization we can do.
147            if not this_result:
148                return
149
150            if not shared_result:
151                shared_result = this_result
152            elif shared_result != this_result:
153                root_baseline_unused = True
154
155        baseline_root = self._baseline_root(baseline_name)
156
157        # The root baseline is unused if all the directories immediately preceding the root
158        # have a baseline, but have different baselines, so the baselines can't be promoted up.
159        if root_baseline_unused:
160            if baseline_root in new_results_by_directory:
161                del new_results_by_directory[baseline_root]
162            return
163
164        new_results_by_directory[baseline_root] = shared_result
165        for directory in directories_immediately_preceding_root:
166            del new_results_by_directory[directory]
167
168    def _find_optimal_result_placement(self, baseline_name):
169        results_by_directory = self.read_results_by_directory(baseline_name)
170        results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name)
171        port_names_by_result = _invert_dictionary(results_by_port_name)
172
173        new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name)
174        self._optimize_result_for_root(new_results_by_directory, baseline_name)
175
176        return results_by_directory, new_results_by_directory
177
178    def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name):
179        new_results_by_directory = copy.copy(results_by_directory)
180        for port_name, port in self._ports.items():
181            current_result = results_by_port_name.get(port_name)
182
183            # This happens if we're missing baselines for a port.
184            if not current_result:
185                continue;
186
187            fallback_path = self._relative_baseline_search_paths(port, baseline_name)
188            current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory)
189            for index in range(current_index + 1, len(fallback_path)):
190                new_directory = fallback_path[index]
191                if not new_directory in new_results_by_directory:
192                    # No result for this baseline in this directory.
193                    continue
194                elif new_results_by_directory[new_directory] == current_result:
195                    # Result for new_directory are redundant with the result earlier in the fallback order.
196                    if current_directory in new_results_by_directory:
197                        del new_results_by_directory[current_directory]
198                else:
199                    # The new_directory contains a different result, so stop trying to push results up.
200                    break
201
202        return new_results_by_directory
203
204    def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory):
205        for index, directory in enumerate(fallback_path):
206            if directory in results_by_directory and (results_by_directory[directory] == current_result):
207                return index, directory
208        assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory)
209
210    def _platform(self, filename):
211        platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep
212        if filename.startswith(platform_dir):
213            return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
214        platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir)
215        if filename.startswith(platform_dir):
216            return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
217        return '(generic)'
218
219    def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
220        data_for_result = {}
221        for directory, result in results_by_directory.items():
222            if not result in data_for_result:
223                source = self._join_directory(directory, baseline_name)
224                data_for_result[result] = self._filesystem.read_binary_file(source)
225
226        scm_files = []
227        fs_files = []
228        for directory, result in results_by_directory.items():
229            if new_results_by_directory.get(directory) != result:
230                file_name = self._join_directory(directory, baseline_name)
231                if self._scm.exists(file_name):
232                    scm_files.append(file_name)
233                else:
234                    fs_files.append(file_name)
235
236        if scm_files or fs_files:
237            if scm_files:
238                _log.debug("    Deleting (SCM):")
239                for platform_dir in sorted(self._platform(filename) for filename in scm_files):
240                    _log.debug("      " + platform_dir)
241                if self._skip_scm_commands:
242                    self._files_to_delete.extend(scm_files)
243                else:
244                    self._scm.delete_list(scm_files)
245            if fs_files:
246                _log.debug("    Deleting (file system):")
247                for platform_dir in sorted(self._platform(filename) for filename in fs_files):
248                    _log.debug("      " + platform_dir)
249                for filename in fs_files:
250                    self._filesystem.remove(filename)
251        else:
252            _log.debug("    (Nothing to delete)")
253
254        file_names = []
255        for directory, result in new_results_by_directory.items():
256            if results_by_directory.get(directory) != result:
257                destination = self._join_directory(directory, baseline_name)
258                self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
259                self._filesystem.write_binary_file(destination, data_for_result[result])
260                file_names.append(destination)
261
262        if file_names:
263            _log.debug("    Adding:")
264            for platform_dir in sorted(self._platform(filename) for filename in file_names):
265                _log.debug("      " + platform_dir)
266            if self._skip_scm_commands:
267                # Have adds win over deletes.
268                self._files_to_delete = list(set(self._files_to_delete) - set(file_names))
269                self._files_to_add.extend(file_names)
270            else:
271                self._scm.add_list(file_names)
272        else:
273            _log.debug("    (Nothing to add)")
274
275    def write_by_directory(self, results_by_directory, writer, indent):
276        for path in sorted(results_by_directory):
277            writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6]))
278
279    def _optimize_subtree(self, baseline_name):
280        basename = self._filesystem.basename(baseline_name)
281        results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
282
283        if new_results_by_directory == results_by_directory:
284            if new_results_by_directory:
285                _log.debug("  %s: (already optimal)" % basename)
286                self.write_by_directory(results_by_directory, _log.debug, "    ")
287            else:
288                _log.debug("  %s: (no baselines found)" % basename)
289            # This is just used for unittests. Intentionally set it to the old data if we don't modify anything.
290            self.new_results_by_directory.append(results_by_directory)
291            return True
292
293        if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name):
294            # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs
295            # instead of committing incorrect baselines.
296            _log.error("  %s: optimization failed" % basename)
297            self.write_by_directory(results_by_directory, _log.warning, "      ")
298            return False
299
300        _log.debug("  %s:" % basename)
301        _log.debug("    Before: ")
302        self.write_by_directory(results_by_directory, _log.debug, "      ")
303        _log.debug("    After: ")
304        self.write_by_directory(new_results_by_directory, _log.debug, "      ")
305
306        self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
307        return True
308
309    def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name):
310        virtual_root_expected_baseline_path = self._filesystem.join(self._layout_tests_dir, baseline_name)
311        if not self._filesystem.exists(virtual_root_expected_baseline_path):
312            return
313        root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path)
314
315        results_by_directory = self.read_results_by_directory(non_virtual_baseline_name)
316        # See if all the immediate predecessors of the virtual root have the same expected result.
317        for port in self._ports.values():
318            directories = self._relative_baseline_search_paths(port, non_virtual_baseline_name)
319            for directory in directories:
320                if directory not in results_by_directory:
321                    continue
322                if results_by_directory[directory] != root_sha1:
323                    return
324                break
325
326        _log.debug("Deleting redundant virtual root expected result.")
327        if self._skip_scm_commands and virtual_root_expected_baseline_path in self._files_to_add:
328            self._files_to_add.remove(virtual_root_expected_baseline_path)
329        if self._scm.exists(virtual_root_expected_baseline_path):
330            _log.debug("    Deleting (SCM): " + virtual_root_expected_baseline_path)
331            if self._skip_scm_commands:
332                self._files_to_delete.append(virtual_root_expected_baseline_path)
333            else:
334                self._scm.delete(virtual_root_expected_baseline_path)
335        else:
336            _log.debug("    Deleting (file system): " + virtual_root_expected_baseline_path)
337            self._filesystem.remove(virtual_root_expected_baseline_path)
338
339    def optimize(self, baseline_name):
340        # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path.
341        # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for
342        # a visual representation of this.
343        #
344        # So, we can optimize the virtual path, then the virtual root and then the regular path.
345
346        self._files_to_delete = []
347        self._files_to_add = []
348        _log.debug("Optimizing regular fallback path.")
349        result = self._optimize_subtree(baseline_name)
350        non_virtual_baseline_name = self._virtual_base(baseline_name)
351        if not non_virtual_baseline_name:
352            return result, self._files_to_delete, self._files_to_add
353
354        self._optimize_virtual_root(baseline_name, non_virtual_baseline_name)
355
356        _log.debug("Optimizing non-virtual fallback path.")
357        result |= self._optimize_subtree(non_virtual_baseline_name)
358        return result, self._files_to_delete, self._files_to_add
359