1# Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com)
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions
5# are met:
6# 1.  Redistributions of source code must retain the above copyright
7#     notice, this list of conditions and the following disclaimer.
8# 2.  Redistributions in binary form must reproduce the above copyright
9#     notice, this list of conditions and the following disclaimer in the
10#     documentation and/or other materials provided with the distribution.
11#
12# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
13# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
16# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22
23"""Contains filter-related code."""
24
25
26def validate_filter_rules(filter_rules, all_categories):
27    """Validate the given filter rules, and raise a ValueError if not valid.
28
29    Args:
30      filter_rules: A list of boolean filter rules, for example--
31                    ["-whitespace", "+whitespace/braces"]
32      all_categories: A list of all available category names, for example--
33                      ["whitespace/tabs", "whitespace/braces"]
34
35    Raises:
36      ValueError: An error occurs if a filter rule does not begin
37                  with "+" or "-" or if a filter rule does not match
38                  the beginning of some category name in the list
39                  of all available categories.
40
41    """
42    for rule in filter_rules:
43        if not (rule.startswith('+') or rule.startswith('-')):
44            raise ValueError('Invalid filter rule "%s": every rule '
45                             "must start with + or -." % rule)
46
47        for category in all_categories:
48            if category.startswith(rule[1:]):
49                break
50        else:
51            raise ValueError('Suspected incorrect filter rule "%s": '
52                             "the rule does not match the beginning "
53                             "of any category name." % rule)
54
55
56class _CategoryFilter(object):
57
58    """Filters whether to check style categories."""
59
60    def __init__(self, filter_rules=None):
61        """Create a category filter.
62
63        Args:
64          filter_rules: A list of strings that are filter rules, which
65                        are strings beginning with the plus or minus
66                        symbol (+/-).  The list should include any
67                        default filter rules at the beginning.
68                        Defaults to the empty list.
69
70        Raises:
71          ValueError: Invalid filter rule if a rule does not start with
72                      plus ("+") or minus ("-").
73
74        """
75        if filter_rules is None:
76            filter_rules = []
77
78        self._filter_rules = filter_rules
79        self._should_check_category = {} # Cached dictionary of category to True/False
80
81    def __str__(self):
82        return ",".join(self._filter_rules)
83
84    # Useful for unit testing.
85    def __eq__(self, other):
86        """Return whether this CategoryFilter instance is equal to another."""
87        return self._filter_rules == other._filter_rules
88
89    # Useful for unit testing.
90    def __ne__(self, other):
91        # Python does not automatically deduce from __eq__().
92        return not (self == other)
93
94    def should_check(self, category):
95        """Return whether the category should be checked.
96
97        The rules for determining whether a category should be checked
98        are as follows.  By default all categories should be checked.
99        Then apply the filter rules in order from first to last, with
100        later flags taking precedence.
101
102        A filter rule applies to a category if the string after the
103        leading plus/minus (+/-) matches the beginning of the category
104        name.  A plus (+) means the category should be checked, while a
105        minus (-) means the category should not be checked.
106
107        """
108        if category in self._should_check_category:
109            return self._should_check_category[category]
110
111        should_check = True # All categories checked by default.
112        for rule in self._filter_rules:
113            if not category.startswith(rule[1:]):
114                continue
115            should_check = rule.startswith('+')
116        self._should_check_category[category] = should_check # Update cache.
117        return should_check
118
119
120class FilterConfiguration(object):
121
122    """Supports filtering with path-specific and user-specified rules."""
123
124    def __init__(self, base_rules=None, path_specific=None, user_rules=None):
125        """Create a FilterConfiguration instance.
126
127        Args:
128          base_rules: The starting list of filter rules to use for
129                      processing.  The default is the empty list, which
130                      by itself would mean that all categories should be
131                      checked.
132
133          path_specific: A list of (sub_paths, path_rules) pairs
134                         that stores the path-specific filter rules for
135                         appending to the base rules.
136                             The "sub_paths" value is a list of path
137                         substrings.  If a file path contains one of the
138                         substrings, then the corresponding path rules
139                         are appended.  The first substring match takes
140                         precedence, i.e. only the first match triggers
141                         an append.
142                             The "path_rules" value is a list of filter
143                         rules that can be appended to the base rules.
144
145          user_rules: A list of filter rules that is always appended
146                      to the base rules and any path rules.  In other
147                      words, the user rules take precedence over the
148                      everything.  In practice, the user rules are
149                      provided by the user from the command line.
150
151        """
152        if base_rules is None:
153            base_rules = []
154        if path_specific is None:
155            path_specific = []
156        if user_rules is None:
157            user_rules = []
158
159        self._base_rules = base_rules
160        self._path_specific = path_specific
161        self._path_specific_lower = None
162        """The backing store for self._get_path_specific_lower()."""
163
164        self._user_rules = user_rules
165
166        self._path_rules_to_filter = {}
167        """Cached dictionary of path rules to CategoryFilter instance."""
168
169        # The same CategoryFilter instance can be shared across
170        # multiple keys in this dictionary.  This allows us to take
171        # greater advantage of the caching done by
172        # CategoryFilter.should_check().
173        self._path_to_filter = {}
174        """Cached dictionary of file path to CategoryFilter instance."""
175
176    # Useful for unit testing.
177    def __eq__(self, other):
178        """Return whether this FilterConfiguration is equal to another."""
179        if self._base_rules != other._base_rules:
180            return False
181        if self._path_specific != other._path_specific:
182            return False
183        if self._user_rules != other._user_rules:
184            return False
185
186        return True
187
188    # Useful for unit testing.
189    def __ne__(self, other):
190        # Python does not automatically deduce this from __eq__().
191        return not self.__eq__(other)
192
193    # We use the prefix "_get" since the name "_path_specific_lower"
194    # is already taken up by the data attribute backing store.
195    def _get_path_specific_lower(self):
196        """Return a copy of self._path_specific with the paths lower-cased."""
197        if self._path_specific_lower is None:
198            self._path_specific_lower = []
199            for (sub_paths, path_rules) in self._path_specific:
200                sub_paths = map(str.lower, sub_paths)
201                self._path_specific_lower.append((sub_paths, path_rules))
202        return self._path_specific_lower
203
204    def _path_rules_from_path(self, path):
205        """Determine the path-specific rules to use, and return as a tuple.
206
207         This method returns a tuple rather than a list so the return
208         value can be passed to _filter_from_path_rules() without change.
209
210        """
211        path = path.lower()
212        for (sub_paths, path_rules) in self._get_path_specific_lower():
213            for sub_path in sub_paths:
214                if path.find(sub_path) > -1:
215                    return tuple(path_rules)
216        return () # Default to the empty tuple.
217
218    def _filter_from_path_rules(self, path_rules):
219        """Return the CategoryFilter associated to the given path rules.
220
221        Args:
222          path_rules: A tuple of path rules.  We require a tuple rather
223                      than a list so the value can be used as a dictionary
224                      key in self._path_rules_to_filter.
225
226        """
227        # We reuse the same CategoryFilter where possible to take
228        # advantage of the caching they do.
229        if path_rules not in self._path_rules_to_filter:
230            rules = list(self._base_rules) # Make a copy
231            rules.extend(path_rules)
232            rules.extend(self._user_rules)
233            self._path_rules_to_filter[path_rules] = _CategoryFilter(rules)
234
235        return self._path_rules_to_filter[path_rules]
236
237    def _filter_from_path(self, path):
238        """Return the CategoryFilter associated to a path."""
239        if path not in self._path_to_filter:
240            path_rules = self._path_rules_from_path(path)
241            filter = self._filter_from_path_rules(path_rules)
242            self._path_to_filter[path] = filter
243
244        return self._path_to_filter[path]
245
246    def should_check(self, category, path):
247        """Return whether the given category should be checked.
248
249        This method determines whether a category should be checked
250        by checking the category name against the filter rules for
251        the given path.
252
253        For a given path, the filter rules are the combination of
254        the base rules, the path-specific rules, and the user-provided
255        rules -- in that order.  As we will describe below, later rules
256        in the list take precedence.  The path-specific rules are the
257        rules corresponding to the first element of the "path_specific"
258        parameter that contains a string case-insensitively matching
259        some substring of the path.  If there is no such element,
260        there are no path-specific rules for that path.
261
262        Given a list of filter rules, the logic for determining whether
263        a category should be checked is as follows.  By default all
264        categories should be checked.  Then apply the filter rules in
265        order from first to last, with later flags taking precedence.
266
267        A filter rule applies to a category if the string after the
268        leading plus/minus (+/-) matches the beginning of the category
269        name.  A plus (+) means the category should be checked, while a
270        minus (-) means the category should not be checked.
271
272        Args:
273          category: The category name.
274          path: The path of the file being checked.
275
276        """
277        return self._filter_from_path(path).should_check(category)
278
279