1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved.
4# Copyright (C) 2009 Torch Mobile Inc.
5# Copyright (C) 2009 Apple Inc. All rights reserved.
6# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12#    * Redistributions of source code must retain the above copyright
13# notice, this list of conditions and the following disclaimer.
14#    * Redistributions in binary form must reproduce the above
15# copyright notice, this list of conditions and the following disclaimer
16# in the documentation and/or other materials provided with the
17# distribution.
18#    * Neither the name of Google Inc. nor the names of its
19# contributors may be used to endorse or promote products derived from
20# this software without specific prior written permission.
21#
22# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34# This is the modified version of Google's cpplint. The original code is
35# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
36
37"""Support for check-webkit-style."""
38
39import math  # for log
40import os
41import os.path
42import re
43import sre_compile
44import string
45import sys
46import unicodedata
47
48from webkitpy.common.memoized import memoized
49from webkitpy.common.system.filesystem import FileSystem
50
51# Headers that we consider STL headers.
52_STL_HEADERS = frozenset([
53    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
54    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
55    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
56    'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
57    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
58    'utility', 'vector', 'vector.h',
59    ])
60
61
62# Non-STL C++ system headers.
63_CPP_HEADERS = frozenset([
64    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
65    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
66    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
67    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
68    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
69    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
70    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
71    'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
72    'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
73    'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
74    'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
75    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
76    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
77    ])
78
79
80# Assertion macros.  These are defined in base/logging.h and
81# testing/base/gunit.h.  Note that the _M versions need to come first
82# for substring matching to work.
83_CHECK_MACROS = [
84    'DCHECK', 'CHECK',
85    'EXPECT_TRUE_M', 'EXPECT_TRUE',
86    'ASSERT_TRUE_M', 'ASSERT_TRUE',
87    'EXPECT_FALSE_M', 'EXPECT_FALSE',
88    'ASSERT_FALSE_M', 'ASSERT_FALSE',
89    ]
90
91# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
92_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
93
94for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
95                        ('>=', 'GE'), ('>', 'GT'),
96                        ('<=', 'LE'), ('<', 'LT')]:
97    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
98    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
99    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
100    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
101    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
102    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
103
104for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
105                            ('>=', 'LT'), ('>', 'LE'),
106                            ('<=', 'GT'), ('<', 'GE')]:
107    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
108    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
109    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
110    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
111
112
113# These constants define types of headers for use with
114# _IncludeState.check_next_include_order().
115_CONFIG_HEADER = 0
116_PRIMARY_HEADER = 1
117_OTHER_HEADER = 2
118_MOC_HEADER = 3
119
120
121# The regexp compilation caching is inlined in all regexp functions for
122# performance reasons; factoring it out into a separate function turns out
123# to be noticeably expensive.
124_regexp_compile_cache = {}
125
126
127def match(pattern, s):
128    """Matches the string with the pattern, caching the compiled regexp."""
129    if not pattern in _regexp_compile_cache:
130        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
131    return _regexp_compile_cache[pattern].match(s)
132
133
134def search(pattern, s):
135    """Searches the string for the pattern, caching the compiled regexp."""
136    if not pattern in _regexp_compile_cache:
137        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
138    return _regexp_compile_cache[pattern].search(s)
139
140
141def sub(pattern, replacement, s):
142    """Substitutes occurrences of a pattern, caching the compiled regexp."""
143    if not pattern in _regexp_compile_cache:
144        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
145    return _regexp_compile_cache[pattern].sub(replacement, s)
146
147
148def subn(pattern, replacement, s):
149    """Substitutes occurrences of a pattern, caching the compiled regexp."""
150    if not pattern in _regexp_compile_cache:
151        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
152    return _regexp_compile_cache[pattern].subn(replacement, s)
153
154
155def iteratively_replace_matches_with_char(pattern, char_replacement, s):
156    """Returns the string with replacement done.
157
158    Every character in the match is replaced with char.
159    Due to the iterative nature, pattern should not match char or
160    there will be an infinite loop.
161
162    Example:
163      pattern = r'<[^>]>' # template parameters
164      char_replacement =  '_'
165      s =     'A<B<C, D>>'
166      Returns 'A_________'
167
168    Args:
169      pattern: The regex to match.
170      char_replacement: The character to put in place of every
171                        character of the match.
172      s: The string on which to do the replacements.
173
174    Returns:
175      True, if the given line is blank.
176    """
177    while True:
178        matched = search(pattern, s)
179        if not matched:
180            return s
181        start_match_index = matched.start(0)
182        end_match_index = matched.end(0)
183        match_length = end_match_index - start_match_index
184        s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
185
186
187def _find_in_lines(regex, lines, start_position, not_found_position):
188    """Does a find starting at start position and going forward until
189    a match is found.
190
191    Returns the position where the regex started.
192    """
193    current_row = start_position.row
194
195    # Start with the given row and trim off everything before what should be matched.
196    current_line = lines[start_position.row][start_position.column:]
197    starting_offset = start_position.column
198    while True:
199        found_match = search(regex, current_line)
200        if found_match:
201            return Position(current_row, starting_offset + found_match.start())
202
203        # A match was not found so continue forward.
204        current_row += 1
205        starting_offset = 0
206        if current_row >= len(lines):
207            return not_found_position
208        current_line = lines[current_row]
209
210def _rfind_in_lines(regex, lines, start_position, not_found_position):
211    """Does a reverse find starting at start position and going backwards until
212    a match is found.
213
214    Returns the position where the regex ended.
215    """
216    # Put the regex in a group and proceed it with a greedy expression that
217    # matches anything to ensure that we get the last possible match in a line.
218    last_in_line_regex = r'.*(' + regex + ')'
219    current_row = start_position.row
220
221    # Start with the given row and trim off everything past what may be matched.
222    current_line = lines[start_position.row][:start_position.column]
223    while True:
224        found_match = match(last_in_line_regex, current_line)
225        if found_match:
226            return Position(current_row, found_match.end(1))
227
228        # A match was not found so continue backward.
229        current_row -= 1
230        if current_row < 0:
231            return not_found_position
232        current_line = lines[current_row]
233
234
235def _convert_to_lower_with_underscores(text):
236    """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
237
238    # First add underscores before any capital letter followed by a lower case letter
239    # as long as it is in a word.
240    # (This put an underscore before Password but not P and A in WPAPassword).
241    text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
242
243    # Next add underscores before capitals at the end of words if it was
244    # preceeded by lower case letter or number.
245    # (This puts an underscore before A in isA but not A in CBA).
246    text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
247
248    # Next add underscores when you have a captial letter which is followed by a capital letter
249    # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
250    text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
251
252    return text.lower()
253
254
255
256def _create_acronym(text):
257    """Creates an acronym for the given text."""
258    # Removes all lower case letters except those starting words.
259    text = sub(r'(?<!\b)[a-z]', '', text)
260    return text.upper()
261
262
263def up_to_unmatched_closing_paren(s):
264    """Splits a string into two parts up to first unmatched ')'.
265
266    Args:
267      s: a string which is a substring of line after '('
268      (e.g., "a == (b + c))").
269
270    Returns:
271      A pair of strings (prefix before first unmatched ')',
272      remainder of s after first unmatched ')'), e.g.,
273      up_to_unmatched_closing_paren("a == (b + c)) { ")
274      returns "a == (b + c)", " {".
275      Returns None, None if there is no unmatched ')'
276
277    """
278    i = 1
279    for pos, c in enumerate(s):
280      if c == '(':
281        i += 1
282      elif c == ')':
283        i -= 1
284        if i == 0:
285          return s[:pos], s[pos + 1:]
286    return None, None
287
288class _IncludeState(dict):
289    """Tracks line numbers for includes, and the order in which includes appear.
290
291    As a dict, an _IncludeState object serves as a mapping between include
292    filename and line number on which that file was included.
293
294    Call check_next_include_order() once for each header in the file, passing
295    in the type constants defined above. Calls in an illegal order will
296    raise an _IncludeError with an appropriate error message.
297
298    """
299    # self._section will move monotonically through this set. If it ever
300    # needs to move backwards, check_next_include_order will raise an error.
301    _INITIAL_SECTION = 0
302    _CONFIG_SECTION = 1
303    _PRIMARY_SECTION = 2
304    _OTHER_SECTION = 3
305
306    _TYPE_NAMES = {
307        _CONFIG_HEADER: 'WebCore config.h',
308        _PRIMARY_HEADER: 'header this file implements',
309        _OTHER_HEADER: 'other header',
310        _MOC_HEADER: 'moc file',
311        }
312    _SECTION_NAMES = {
313        _INITIAL_SECTION: "... nothing.",
314        _CONFIG_SECTION: "WebCore config.h.",
315        _PRIMARY_SECTION: 'a header this file implements.',
316        _OTHER_SECTION: 'other header.',
317        }
318
319    def __init__(self):
320        dict.__init__(self)
321        self._section = self._INITIAL_SECTION
322        self._visited_primary_section = False
323        self.header_types = dict();
324
325    def visited_primary_section(self):
326        return self._visited_primary_section
327
328    def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
329        """Returns a non-empty error message if the next header is out of order.
330
331        This function also updates the internal state to be ready to check
332        the next include.
333
334        Args:
335          header_type: One of the _XXX_HEADER constants defined above.
336          file_is_header: Whether the file that owns this _IncludeState is itself a header
337
338        Returns:
339          The empty string if the header is in the right order, or an
340          error message describing what's wrong.
341
342        """
343        if header_type == _CONFIG_HEADER and file_is_header:
344            return 'Header file should not contain WebCore config.h.'
345        if header_type == _PRIMARY_HEADER and file_is_header:
346            return 'Header file should not contain itself.'
347        if header_type == _MOC_HEADER:
348            return ''
349
350        error_message = ''
351        if self._section != self._OTHER_SECTION:
352            before_error_message = ('Found %s before %s' %
353                                    (self._TYPE_NAMES[header_type],
354                                     self._SECTION_NAMES[self._section + 1]))
355        after_error_message = ('Found %s after %s' %
356                                (self._TYPE_NAMES[header_type],
357                                 self._SECTION_NAMES[self._section]))
358
359        if header_type == _CONFIG_HEADER:
360            if self._section >= self._CONFIG_SECTION:
361                error_message = after_error_message
362            self._section = self._CONFIG_SECTION
363        elif header_type == _PRIMARY_HEADER:
364            if self._section >= self._PRIMARY_SECTION:
365                error_message = after_error_message
366            elif self._section < self._CONFIG_SECTION:
367                error_message = before_error_message
368            self._section = self._PRIMARY_SECTION
369            self._visited_primary_section = True
370        else:
371            assert header_type == _OTHER_HEADER
372            if not file_is_header and self._section < self._PRIMARY_SECTION:
373                if primary_header_exists:
374                    error_message = before_error_message
375            self._section = self._OTHER_SECTION
376
377        return error_message
378
379
380class Position(object):
381    """Holds the position of something."""
382    def __init__(self, row, column):
383        self.row = row
384        self.column = column
385
386    def __str__(self):
387        return '(%s, %s)' % (self.row, self.column)
388
389    def __cmp__(self, other):
390        return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
391
392
393class Parameter(object):
394    """Information about one function parameter."""
395    def __init__(self, parameter, parameter_name_index, row):
396        self.type = parameter[:parameter_name_index].strip()
397        # Remove any initializers from the parameter name (e.g. int i = 5).
398        self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
399        self.row = row
400
401    @memoized
402    def lower_with_underscores_name(self):
403        """Returns the parameter name in the lower with underscores format."""
404        return _convert_to_lower_with_underscores(self.name)
405
406
407class SingleLineView(object):
408    """Converts multiple lines into a single line (with line breaks replaced by a
409       space) to allow for easier searching."""
410    def __init__(self, lines, start_position, end_position):
411        """Create a SingleLineView instance.
412
413        Args:
414          lines: a list of multiple lines to combine into a single line.
415          start_position: offset within lines of where to start the single line.
416          end_position: just after where to end (like a slice operation).
417        """
418        # Get the rows of interest.
419        trimmed_lines = lines[start_position.row:end_position.row + 1]
420
421        # Remove the columns on the last line that aren't included.
422        trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
423
424        # Remove the columns on the first line that aren't included.
425        trimmed_lines[0] = trimmed_lines[0][start_position.column:]
426
427        # Create a single line with all of the parameters.
428        self.single_line = ' '.join(trimmed_lines)
429
430        # Keep the row lengths, so we can calculate the original row number
431        # given a column in the single line (adding 1 due to the space added
432        # during the join).
433        self._row_lengths = [len(line) + 1 for line in trimmed_lines]
434        self._starting_row = start_position.row
435
436    def convert_column_to_row(self, single_line_column_number):
437        """Convert the column number from the single line into the original
438        line number.
439
440        Special cases:
441        * Columns in the added spaces are considered part of the previous line.
442        * Columns beyond the end of the line are consider part the last line
443        in the view."""
444        total_columns = 0
445        row_offset = 0
446        while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
447            total_columns += self._row_lengths[row_offset]
448            row_offset += 1
449        return self._starting_row + row_offset
450
451
452def create_skeleton_parameters(all_parameters):
453    """Converts a parameter list to a skeleton version.
454
455    The skeleton only has one word for the parameter name, one word for the type,
456    and commas after each parameter and only there. Everything in the skeleton
457    remains in the same columns as the original."""
458    all_simplifications = (
459        # Remove template parameters, function declaration parameters, etc.
460        r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
461        # Remove all initializers.
462        r'=[^,]*',
463        # Remove :: and everything before it.
464        r'[^,]*::',
465        # Remove modifiers like &, *.
466        r'[&*]',
467        # Remove const modifiers.
468        r'\bconst\s+(?=[A-Za-z])',
469        # Remove numerical modifiers like long.
470        r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
471
472    skeleton_parameters = all_parameters
473    for simplification in all_simplifications:
474        skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
475    # If there are any parameters, then add a , after the last one to
476    # make a regular pattern of a , following every parameter.
477    if skeleton_parameters.strip():
478        skeleton_parameters += ','
479    return skeleton_parameters
480
481
482def find_parameter_name_index(skeleton_parameter):
483    """Determines where the parametere name starts given the skeleton parameter."""
484    # The first space from the right in the simplified parameter is where the parameter
485    # name starts unless the first space is before any content in the simplified parameter.
486    before_name_index = skeleton_parameter.rstrip().rfind(' ')
487    if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
488        return before_name_index + 1
489    return len(skeleton_parameter)
490
491
492def parameter_list(elided_lines, start_position, end_position):
493    """Generator for a function's parameters."""
494    # Create new positions that omit the outer parenthesis of the parameters.
495    start_position = Position(row=start_position.row, column=start_position.column + 1)
496    end_position = Position(row=end_position.row, column=end_position.column - 1)
497    single_line_view = SingleLineView(elided_lines, start_position, end_position)
498    skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
499    end_index = -1
500
501    while True:
502        # Find the end of the next parameter.
503        start_index = end_index + 1
504        end_index = skeleton_parameters.find(',', start_index)
505
506        # No comma means that all parameters have been parsed.
507        if end_index == -1:
508            return
509        row = single_line_view.convert_column_to_row(end_index)
510
511        # Parse the parameter into a type and parameter name.
512        skeleton_parameter = skeleton_parameters[start_index:end_index]
513        name_offset = find_parameter_name_index(skeleton_parameter)
514        parameter = single_line_view.single_line[start_index:end_index]
515        yield Parameter(parameter, name_offset, row)
516
517
518class _FunctionState(object):
519    """Tracks current function name and the number of lines in its body.
520
521    Attributes:
522      min_confidence: The minimum confidence level to use while checking style.
523
524    """
525
526    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
527    _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
528
529    def __init__(self, min_confidence):
530        self.min_confidence = min_confidence
531        self.current_function = ''
532        self.in_a_function = False
533        self.lines_in_function = 0
534        # Make sure these will not be mistaken for real positions (even when a
535        # small amount is added to them).
536        self.body_start_position = Position(-1000, 0)
537        self.end_position = Position(-1000, 0)
538
539    def begin(self, function_name, function_name_start_position, body_start_position, end_position,
540              parameter_start_position, parameter_end_position, clean_lines):
541        """Start analyzing function body.
542
543        Args:
544            function_name: The name of the function being tracked.
545            function_name_start_position: Position in elided where the function name starts.
546            body_start_position: Position in elided of the { or the ; for a prototype.
547            end_position: Position in elided just after the final } (or ; is.
548            parameter_start_position: Position in elided of the '(' for the parameters.
549            parameter_end_position: Position in elided just after the ')' for the parameters.
550            clean_lines: A CleansedLines instance containing the file.
551        """
552        self.in_a_function = True
553        self.lines_in_function = -1  # Don't count the open brace line.
554        self.current_function = function_name
555        self.function_name_start_position = function_name_start_position
556        self.body_start_position = body_start_position
557        self.end_position = end_position
558        self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
559        self.parameter_start_position = parameter_start_position
560        self.parameter_end_position = parameter_end_position
561        self.is_pure = False
562        if self.is_declaration:
563            characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
564            self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
565        self._clean_lines = clean_lines
566        self._parameter_list = None
567
568    def modifiers_and_return_type(self):
569        """Returns the modifiers and the return type."""
570        # Go backwards from where the function name is until we encounter one of several things:
571        #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
572        elided = self._clean_lines.elided
573        start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
574                                          elided, self.parameter_start_position, Position(0, 0))
575        return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
576
577    def parameter_list(self):
578        if not self._parameter_list:
579            # Store the final result as a tuple since that is immutable.
580            self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
581
582        return self._parameter_list
583
584    def count(self, line_number):
585        """Count line in current function body."""
586        if self.in_a_function and line_number >= self.body_start_position.row:
587            self.lines_in_function += 1
588
589    def check(self, error, line_number):
590        """Report if too many lines in function body.
591
592        Args:
593          error: The function to call with any errors found.
594          line_number: The number of the line to check.
595        """
596        if match(r'T(EST|est)', self.current_function):
597            base_trigger = self._TEST_TRIGGER
598        else:
599            base_trigger = self._NORMAL_TRIGGER
600        trigger = base_trigger * 2 ** self.min_confidence
601
602        if self.lines_in_function > trigger:
603            error_level = int(math.log(self.lines_in_function / base_trigger, 2))
604            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
605            if error_level > 5:
606                error_level = 5
607            error(line_number, 'readability/fn_size', error_level,
608                  'Small and focused functions are preferred:'
609                  ' %s has %d non-comment lines'
610                  ' (error triggered by exceeding %d lines).'  % (
611                      self.current_function, self.lines_in_function, trigger))
612
613    def end(self):
614        """Stop analyzing function body."""
615        self.in_a_function = False
616
617
618class _IncludeError(Exception):
619    """Indicates a problem with the include order in a file."""
620    pass
621
622
623class FileInfo:
624    """Provides utility functions for filenames.
625
626    FileInfo provides easy access to the components of a file's path
627    relative to the project root.
628    """
629
630    def __init__(self, filename):
631        self._filename = filename
632
633    def full_name(self):
634        """Make Windows paths like Unix."""
635        return os.path.abspath(self._filename).replace('\\', '/')
636
637    def repository_name(self):
638        """Full name after removing the local path to the repository.
639
640        If we have a real absolute path name here we can try to do something smart:
641        detecting the root of the checkout and truncating /path/to/checkout from
642        the name so that we get header guards that don't include things like
643        "C:\Documents and Settings\..." or "/home/username/..." in them and thus
644        people on different computers who have checked the source out to different
645        locations won't see bogus errors.
646        """
647        fullname = self.full_name()
648
649        if os.path.exists(fullname):
650            project_dir = os.path.dirname(fullname)
651
652            if os.path.exists(os.path.join(project_dir, ".svn")):
653                # If there's a .svn file in the current directory, we
654                # recursively look up the directory tree for the top
655                # of the SVN checkout
656                root_dir = project_dir
657                one_up_dir = os.path.dirname(root_dir)
658                while os.path.exists(os.path.join(one_up_dir, ".svn")):
659                    root_dir = os.path.dirname(root_dir)
660                    one_up_dir = os.path.dirname(one_up_dir)
661
662                prefix = os.path.commonprefix([root_dir, project_dir])
663                return fullname[len(prefix) + 1:]
664
665            # Not SVN? Try to find a git top level directory by
666            # searching up from the current path.
667            root_dir = os.path.dirname(fullname)
668            while (root_dir != os.path.dirname(root_dir)
669                   and not os.path.exists(os.path.join(root_dir, ".git"))):
670                root_dir = os.path.dirname(root_dir)
671                if os.path.exists(os.path.join(root_dir, ".git")):
672                    prefix = os.path.commonprefix([root_dir, project_dir])
673                    return fullname[len(prefix) + 1:]
674
675        # Don't know what to do; header guard warnings may be wrong...
676        return fullname
677
678    def split(self):
679        """Splits the file into the directory, basename, and extension.
680
681        For 'chrome/browser/browser.cpp', Split() would
682        return ('chrome/browser', 'browser', '.cpp')
683
684        Returns:
685          A tuple of (directory, basename, extension).
686        """
687
688        googlename = self.repository_name()
689        project, rest = os.path.split(googlename)
690        return (project,) + os.path.splitext(rest)
691
692    def base_name(self):
693        """File base name - text after the final slash, before the final period."""
694        return self.split()[1]
695
696    def extension(self):
697        """File extension - text following the final period."""
698        return self.split()[2]
699
700    def no_extension(self):
701        """File has no source file extension."""
702        return '/'.join(self.split()[0:2])
703
704    def is_source(self):
705        """File has a source file extension."""
706        return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
707
708
709# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
710_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
711    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
712# Matches strings.  Escape codes should already be removed by ESCAPES.
713_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
714# Matches characters.  Escape codes should already be removed by ESCAPES.
715_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
716# Matches multi-line C++ comments.
717# This RE is a little bit more complicated than one might expect, because we
718# have to take care of space removals tools so we can handle comments inside
719# statements better.
720# The current rule is: We only clear spaces from both sides when we're at the
721# end of the line. Otherwise, we try to remove spaces from the right side,
722# if this doesn't work we try on left side but only if there's a non-character
723# on the right.
724_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
725    r"""(\s*/\*.*\*/\s*$|
726            /\*.*\*/\s+|
727         \s+/\*.*\*/(?=\W)|
728            /\*.*\*/)""", re.VERBOSE)
729
730
731def is_cpp_string(line):
732    """Does line terminate so, that the next symbol is in string constant.
733
734    This function does not consider single-line nor multi-line comments.
735
736    Args:
737      line: is a partial line of code starting from the 0..n.
738
739    Returns:
740      True, if next character appended to 'line' is inside a
741      string constant.
742    """
743
744    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
745    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
746
747
748def find_next_multi_line_comment_start(lines, line_index):
749    """Find the beginning marker for a multiline comment."""
750    while line_index < len(lines):
751        if lines[line_index].strip().startswith('/*'):
752            # Only return this marker if the comment goes beyond this line
753            if lines[line_index].strip().find('*/', 2) < 0:
754                return line_index
755        line_index += 1
756    return len(lines)
757
758
759def find_next_multi_line_comment_end(lines, line_index):
760    """We are inside a comment, find the end marker."""
761    while line_index < len(lines):
762        if lines[line_index].strip().endswith('*/'):
763            return line_index
764        line_index += 1
765    return len(lines)
766
767
768def remove_multi_line_comments_from_range(lines, begin, end):
769    """Clears a range of lines for multi-line comments."""
770    # Having // dummy comments makes the lines non-empty, so we will not get
771    # unnecessary blank line warnings later in the code.
772    for i in range(begin, end):
773        lines[i] = '// dummy'
774
775
776def remove_multi_line_comments(lines, error):
777    """Removes multiline (c-style) comments from lines."""
778    line_index = 0
779    while line_index < len(lines):
780        line_index_begin = find_next_multi_line_comment_start(lines, line_index)
781        if line_index_begin >= len(lines):
782            return
783        line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
784        if line_index_end >= len(lines):
785            error(line_index_begin + 1, 'readability/multiline_comment', 5,
786                  'Could not find end of multi-line comment')
787            return
788        remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
789        line_index = line_index_end + 1
790
791
792def cleanse_comments(line):
793    """Removes //-comments and single-line C-style /* */ comments.
794
795    Args:
796      line: A line of C++ source.
797
798    Returns:
799      The line with single-line comments removed.
800    """
801    comment_position = line.find('//')
802    if comment_position != -1 and not is_cpp_string(line[:comment_position]):
803        line = line[:comment_position]
804    # get rid of /* ... */
805    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
806
807
808class CleansedLines(object):
809    """Holds 3 copies of all lines with different preprocessing applied to them.
810
811    1) elided member contains lines without strings and comments,
812    2) lines member contains lines without comments, and
813    3) raw member contains all the lines without processing.
814    All these three members are of <type 'list'>, and of the same length.
815    """
816
817    def __init__(self, lines):
818        self.elided = []
819        self.lines = []
820        self.raw_lines = lines
821        self._num_lines = len(lines)
822        for line_number in range(len(lines)):
823            self.lines.append(cleanse_comments(lines[line_number]))
824            elided = self.collapse_strings(lines[line_number])
825            self.elided.append(cleanse_comments(elided))
826
827    def num_lines(self):
828        """Returns the number of lines represented."""
829        return self._num_lines
830
831    @staticmethod
832    def collapse_strings(elided):
833        """Collapses strings and chars on a line to simple "" or '' blocks.
834
835        We nix strings first so we're not fooled by text like '"http://"'
836
837        Args:
838          elided: The line being processed.
839
840        Returns:
841          The line with collapsed strings.
842        """
843        if not _RE_PATTERN_INCLUDE.match(elided):
844            # Remove escaped characters first to make quote/single quote collapsing
845            # basic.  Things that look like escaped characters shouldn't occur
846            # outside of strings and chars.
847            elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
848            elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
849            elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
850        return elided
851
852
853def close_expression(elided, position):
854    """If input points to ( or { or [, finds the position that closes it.
855
856    If elided[position.row][position.column] points to a '(' or '{' or '[',
857    finds the line_number/pos that correspond to the closing of the expression.
858
859     Args:
860       elided: A CleansedLines.elided instance containing the file.
861       position: The position of the opening item.
862
863     Returns:
864      The Position *past* the closing brace, or Position(len(elided), -1)
865      if we never find a close. Note we ignore strings and comments when matching.
866    """
867    line = elided[position.row]
868    start_character = line[position.column]
869    if start_character == '(':
870        enclosing_character_regex = r'[\(\)]'
871    elif start_character == '[':
872        enclosing_character_regex = r'[\[\]]'
873    elif start_character == '{':
874        enclosing_character_regex = r'[\{\}]'
875    else:
876        return Position(len(elided), -1)
877
878    current_column = position.column + 1
879    line_number = position.row
880    net_open = 1
881    for line in elided[position.row:]:
882        line = line[current_column:]
883
884        # Search the current line for opening and closing characters.
885        while True:
886            next_enclosing_character = search(enclosing_character_regex, line)
887            # No more on this line.
888            if not next_enclosing_character:
889                break
890            current_column += next_enclosing_character.end(0)
891            line = line[next_enclosing_character.end(0):]
892            if next_enclosing_character.group(0) == start_character:
893                net_open += 1
894            else:
895                net_open -= 1
896                if not net_open:
897                    return Position(line_number, current_column)
898
899        # Proceed to the next line.
900        line_number += 1
901        current_column = 0
902
903    # The given item was not closed.
904    return Position(len(elided), -1)
905
906def check_for_copyright(lines, error):
907    """Logs an error if no Copyright message appears at the top of the file."""
908
909    # We'll say it should occur by line 10. Don't forget there's a
910    # dummy line at the front.
911    for line in xrange(1, min(len(lines), 11)):
912        if re.search(r'Copyright', lines[line], re.I):
913            break
914    else:                       # means no copyright line was found
915        error(0, 'legal/copyright', 5,
916              'No copyright message found.  '
917              'You should have a line: "Copyright [year] <Copyright Owner>"')
918
919
920# TODO(jww) After the transition of Blink into the Chromium repo, this function
921# should be removed. This will strictly enforce Chromium-style header guards,
922# rather than allowing traditional WebKit header guards and Chromium-style
923# simultaneously.
924def get_legacy_header_guard_cpp_variable(filename):
925    """Returns the CPP variable that should be used as a header guard.
926
927    Args:
928      filename: The name of a C++ header file.
929
930    Returns:
931      The CPP variable that should be used as a header guard in the
932      named file.
933
934    """
935
936    # Restores original filename in case that style checker is invoked from Emacs's
937    # flymake.
938    filename = re.sub(r'_flymake\.h$', '.h', filename)
939
940    standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
941
942    # Files under WTF typically have header guards that start with WTF_.
943    if '/wtf/' in filename:
944        special_name = "WTF_" + standard_name
945    else:
946        special_name = standard_name
947    return (special_name, standard_name)
948
949
950def get_header_guard_cpp_variable(filename):
951    """Returns the CPP variable that should be used as a header guard in Chromium-style.
952
953    Args:
954      filename: The name of a C++ header file.
955
956    Returns:
957      The CPP variable that should be used as a header guard in the
958      named file in Chromium-style.
959
960    """
961
962    # Restores original filename in case that style checker is invoked from Emacs's
963    # flymake.
964    filename = re.sub(r'_flymake\.h$', '.h', filename)
965
966    # If it's a full path and starts with Source/, replace Source with blink
967    # since that will be the new style directory.
968    filename = sub(r'^Source\/', 'blink/', filename)
969
970    standard_name = sub(r'[-.\s\/]', '_', filename).upper() + '_'
971
972    return standard_name
973
974
975def check_for_header_guard(filename, lines, error):
976    """Checks that the file contains a header guard.
977
978    Logs an error if no #ifndef header guard is present.  For other
979    headers, checks that the full pathname is used.
980
981    Args:
982      filename: The name of the C++ header file.
983      lines: An array of strings, each representing a line of the file.
984      error: The function to call with any errors found.
985    """
986
987    legacy_cpp_var = get_legacy_header_guard_cpp_variable(filename)
988    cpp_var = get_header_guard_cpp_variable(filename)
989
990    ifndef = None
991    ifndef_line_number = 0
992    define = None
993    for line_number, line in enumerate(lines):
994        line_split = line.split()
995        if len(line_split) >= 2:
996            # find the first occurrence of #ifndef and #define, save arg
997            if not ifndef and line_split[0] == '#ifndef':
998                # set ifndef to the header guard presented on the #ifndef line.
999                ifndef = line_split[1]
1000                ifndef_line_number = line_number
1001            if not define and line_split[0] == '#define':
1002                define = line_split[1]
1003            if define and ifndef:
1004                break
1005
1006    if not ifndef or not define or ifndef != define:
1007        error(0, 'build/header_guard', 5,
1008              'No #ifndef header guard found, suggested CPP variable is: %s' %
1009              legacy_cpp_var[0])
1010        return
1011
1012    # The guard should be File_h or, for Chromium style, BLINK_PATH_TO_FILE_H_.
1013    if ifndef not in legacy_cpp_var and ifndef != cpp_var:
1014        error(ifndef_line_number, 'build/header_guard', 5,
1015              '#ifndef header guard has wrong style, please use: %s' % legacy_cpp_var[0])
1016
1017
1018def check_for_unicode_replacement_characters(lines, error):
1019    """Logs an error for each line containing Unicode replacement characters.
1020
1021    These indicate that either the file contained invalid UTF-8 (likely)
1022    or Unicode replacement characters (which it shouldn't).  Note that
1023    it's possible for this to throw off line numbering if the invalid
1024    UTF-8 occurred adjacent to a newline.
1025
1026    Args:
1027      lines: An array of strings, each representing a line of the file.
1028      error: The function to call with any errors found.
1029    """
1030    for line_number, line in enumerate(lines):
1031        if u'\ufffd' in line:
1032            error(line_number, 'readability/utf8', 5,
1033                  'Line contains invalid UTF-8 (or Unicode replacement character).')
1034
1035
1036def check_for_new_line_at_eof(lines, error):
1037    """Logs an error if there is no newline char at the end of the file.
1038
1039    Args:
1040      lines: An array of strings, each representing a line of the file.
1041      error: The function to call with any errors found.
1042    """
1043
1044    # The array lines() was created by adding two newlines to the
1045    # original file (go figure), then splitting on \n.
1046    # To verify that the file ends in \n, we just have to make sure the
1047    # last-but-two element of lines() exists and is empty.
1048    if len(lines) < 3 or lines[-2]:
1049        error(len(lines) - 2, 'whitespace/ending_newline', 5,
1050              'Could not find a newline character at the end of the file.')
1051
1052
1053def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
1054    """Logs an error if we see /* ... */ or "..." that extend past one line.
1055
1056    /* ... */ comments are legit inside macros, for one line.
1057    Otherwise, we prefer // comments, so it's ok to warn about the
1058    other.  Likewise, it's ok for strings to extend across multiple
1059    lines, as long as a line continuation character (backslash)
1060    terminates each line. Although not currently prohibited by the C++
1061    style guide, it's ugly and unnecessary. We don't do well with either
1062    in this lint program, so we warn about both.
1063
1064    Args:
1065      clean_lines: A CleansedLines instance containing the file.
1066      line_number: The number of the line to check.
1067      error: The function to call with any errors found.
1068    """
1069    line = clean_lines.elided[line_number]
1070
1071    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1072    # second (escaped) slash may trigger later \" detection erroneously.
1073    line = line.replace('\\\\', '')
1074
1075    if line.count('/*') > line.count('*/'):
1076        error(line_number, 'readability/multiline_comment', 5,
1077              'Complex multi-line /*...*/-style comment found. '
1078              'Lint may give bogus warnings.  '
1079              'Consider replacing these with //-style comments, '
1080              'with #if 0...#endif, '
1081              'or with more clearly structured multi-line comments.')
1082
1083    if (line.count('"') - line.count('\\"')) % 2:
1084        error(line_number, 'readability/multiline_string', 5,
1085              'Multi-line string ("...") found.  This lint script doesn\'t '
1086              'do well with such strings, and may give bogus warnings.  They\'re '
1087              'ugly and unnecessary, and you should use concatenation instead".')
1088
1089
1090_THREADING_LIST = (
1091    ('asctime(', 'asctime_r('),
1092    ('ctime(', 'ctime_r('),
1093    ('getgrgid(', 'getgrgid_r('),
1094    ('getgrnam(', 'getgrnam_r('),
1095    ('getlogin(', 'getlogin_r('),
1096    ('getpwnam(', 'getpwnam_r('),
1097    ('getpwuid(', 'getpwuid_r('),
1098    ('gmtime(', 'gmtime_r('),
1099    ('localtime(', 'localtime_r('),
1100    ('rand(', 'rand_r('),
1101    ('readdir(', 'readdir_r('),
1102    ('strtok(', 'strtok_r('),
1103    ('ttyname(', 'ttyname_r('),
1104    )
1105
1106
1107def check_posix_threading(clean_lines, line_number, error):
1108    """Checks for calls to thread-unsafe functions.
1109
1110    Much code has been originally written without consideration of
1111    multi-threading. Also, engineers are relying on their old experience;
1112    they have learned posix before threading extensions were added. These
1113    tests guide the engineers to use thread-safe functions (when using
1114    posix directly).
1115
1116    Args:
1117      clean_lines: A CleansedLines instance containing the file.
1118      line_number: The number of the line to check.
1119      error: The function to call with any errors found.
1120    """
1121    line = clean_lines.elided[line_number]
1122    for single_thread_function, multithread_safe_function in _THREADING_LIST:
1123        index = line.find(single_thread_function)
1124        # Comparisons made explicit for clarity
1125        if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
1126                                          and line[index - 1] not in ('_', '.', '>'))):
1127            error(line_number, 'runtime/threadsafe_fn', 2,
1128                  'Consider using ' + multithread_safe_function +
1129                  '...) instead of ' + single_thread_function +
1130                  '...) for improved thread safety.')
1131
1132
1133# Matches invalid increment: *count++, which moves pointer instead of
1134# incrementing a value.
1135_RE_PATTERN_INVALID_INCREMENT = re.compile(
1136    r'^\s*\*\w+(\+\+|--);')
1137
1138
1139def check_invalid_increment(clean_lines, line_number, error):
1140    """Checks for invalid increment *count++.
1141
1142    For example following function:
1143    void increment_counter(int* count) {
1144        *count++;
1145    }
1146    is invalid, because it effectively does count++, moving pointer, and should
1147    be replaced with ++*count, (*count)++ or *count += 1.
1148
1149    Args:
1150      clean_lines: A CleansedLines instance containing the file.
1151      line_number: The number of the line to check.
1152      error: The function to call with any errors found.
1153    """
1154    line = clean_lines.elided[line_number]
1155    if _RE_PATTERN_INVALID_INCREMENT.match(line):
1156        error(line_number, 'runtime/invalid_increment', 5,
1157              'Changing pointer instead of value (or unused value of operator*).')
1158
1159
1160class _ClassInfo(object):
1161    """Stores information about a class."""
1162
1163    def __init__(self, name, line_number):
1164        self.name = name
1165        self.line_number = line_number
1166        self.seen_open_brace = False
1167        self.is_derived = False
1168        self.virtual_method_line_number = None
1169        self.has_virtual_destructor = False
1170        self.brace_depth = 0
1171        self.unsigned_bitfields = []
1172        self.bool_bitfields = []
1173
1174
1175class _ClassState(object):
1176    """Holds the current state of the parse relating to class declarations.
1177
1178    It maintains a stack of _ClassInfos representing the parser's guess
1179    as to the current nesting of class declarations. The innermost class
1180    is at the top (back) of the stack. Typically, the stack will either
1181    be empty or have exactly one entry.
1182    """
1183
1184    def __init__(self):
1185        self.classinfo_stack = []
1186
1187    def check_finished(self, error):
1188        """Checks that all classes have been completely parsed.
1189
1190        Call this when all lines in a file have been processed.
1191        Args:
1192          error: The function to call with any errors found.
1193        """
1194        if self.classinfo_stack:
1195            # Note: This test can result in false positives if #ifdef constructs
1196            # get in the way of brace matching. See the testBuildClass test in
1197            # cpp_style_unittest.py for an example of this.
1198            error(self.classinfo_stack[0].line_number, 'build/class', 5,
1199                  'Failed to find complete declaration of class %s' %
1200                  self.classinfo_stack[0].name)
1201
1202
1203class _FileState(object):
1204    def __init__(self, clean_lines, file_extension):
1205        self._did_inside_namespace_indent_warning = False
1206        self._clean_lines = clean_lines
1207        if file_extension in ['m', 'mm']:
1208            self._is_objective_c = True
1209            self._is_c = False
1210        elif file_extension == 'h':
1211            # In the case of header files, it is unknown if the file
1212            # is c / objective c or not, so set this value to None and then
1213            # if it is requested, use heuristics to guess the value.
1214            self._is_objective_c = None
1215            self._is_c = None
1216        elif file_extension == 'c':
1217            self._is_c = True
1218            self._is_objective_c = False
1219        else:
1220            self._is_objective_c = False
1221            self._is_c = False
1222
1223    def set_did_inside_namespace_indent_warning(self):
1224        self._did_inside_namespace_indent_warning = True
1225
1226    def did_inside_namespace_indent_warning(self):
1227        return self._did_inside_namespace_indent_warning
1228
1229    def is_objective_c(self):
1230        if self._is_objective_c is None:
1231            for line in self._clean_lines.elided:
1232                # Starting with @ or #import seem like the best indications
1233                # that we have an Objective C file.
1234                if line.startswith("@") or line.startswith("#import"):
1235                    self._is_objective_c = True
1236                    break
1237            else:
1238                self._is_objective_c = False
1239        return self._is_objective_c
1240
1241    def is_c(self):
1242        if self._is_c is None:
1243            for line in self._clean_lines.lines:
1244                # if extern "C" is found, then it is a good indication
1245                # that we have a C header file.
1246                if line.startswith('extern "C"'):
1247                    self._is_c = True
1248                    break
1249            else:
1250                self._is_c = False
1251        return self._is_c
1252
1253    def is_c_or_objective_c(self):
1254        """Return whether the file extension corresponds to C or Objective-C."""
1255        return self.is_c() or self.is_objective_c()
1256
1257
1258class _EnumState(object):
1259    """Maintains whether currently in an enum declaration, and checks whether
1260    enum declarations follow the style guide.
1261    """
1262
1263    def __init__(self):
1264        self.in_enum_decl = False
1265        self.is_webidl_enum = False
1266
1267    def process_clean_line(self, line):
1268        # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers
1269        # and identifiers for the value of the enumerator, but do not accept any other constant
1270        # expressions. However, this is sufficient for now (11/27/2012).
1271        expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$'
1272        expr_starts_lowercase = r'\s*[a-z]'
1273        expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*'
1274        expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*'
1275        if self.in_enum_decl:
1276            if match(r'\s*' + expr_enum_end + r'$', line):
1277                self.in_enum_decl = False
1278                self.is_webidl_enum = False
1279            elif match(expr_all_uppercase, line):
1280                return self.is_webidl_enum
1281            elif match(expr_starts_lowercase, line):
1282                return False
1283        else:
1284            matched = match(expr_enum_start + r'$', line)
1285            if matched:
1286                self.in_enum_decl = True
1287            else:
1288                matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line)
1289                if matched:
1290                    members = matched.group('members').split(',')
1291                    found_invalid_member = False
1292                    for member in members:
1293                        if match(expr_all_uppercase, member):
1294                            found_invalid_member = not self.is_webidl_enum
1295                        if match(expr_starts_lowercase, member):
1296                            found_invalid_member = True
1297                        if found_invalid_member:
1298                            self.is_webidl_enum = False
1299                            return False
1300                    return True
1301        return True
1302
1303def check_for_non_standard_constructs(clean_lines, line_number,
1304                                      class_state, error):
1305    """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1306
1307    Complain about several constructs which gcc-2 accepts, but which are
1308    not standard C++.  Warning about these in lint is one way to ease the
1309    transition to new compilers.
1310    - put storage class first (e.g. "static const" instead of "const static").
1311    - "%lld" instead of %qd" in printf-type functions.
1312    - "%1$d" is non-standard in printf-type functions.
1313    - "\%" is an undefined character escape sequence.
1314    - text after #endif is not allowed.
1315    - invalid inner-style forward declaration.
1316    - >? and <? operators, and their >?= and <?= cousins.
1317    - classes with virtual methods need virtual destructors (compiler warning
1318        available, but not turned on yet.)
1319
1320    Additionally, check for constructor/destructor style violations as it
1321    is very convenient to do so while checking for gcc-2 compliance.
1322
1323    Args:
1324      clean_lines: A CleansedLines instance containing the file.
1325      line_number: The number of the line to check.
1326      class_state: A _ClassState instance which maintains information about
1327                   the current stack of nested class declarations being parsed.
1328      error: A callable to which errors are reported, which takes parameters:
1329             line number, error level, and message
1330    """
1331
1332    # Remove comments from the line, but leave in strings for now.
1333    line = clean_lines.lines[line_number]
1334
1335    if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1336        error(line_number, 'runtime/printf_format', 3,
1337              '%q in format strings is deprecated.  Use %ll instead.')
1338
1339    if search(r'printf\s*\(.*".*%\d+\$', line):
1340        error(line_number, 'runtime/printf_format', 2,
1341              '%N$ formats are unconventional.  Try rewriting to avoid them.')
1342
1343    # Remove escaped backslashes before looking for undefined escapes.
1344    line = line.replace('\\\\', '')
1345
1346    if search(r'("|\').*\\(%|\[|\(|{)', line):
1347        error(line_number, 'build/printf_format', 3,
1348              '%, [, (, and { are undefined character escapes.  Unescape them.')
1349
1350    # For the rest, work with both comments and strings removed.
1351    line = clean_lines.elided[line_number]
1352
1353    if search(r'\b(const|volatile|void|char|short|int|long'
1354              r'|float|double|signed|unsigned'
1355              r'|schar|u?int8|u?int16|u?int32|u?int64)'
1356              r'\s+(auto|register|static|extern|typedef)\b',
1357              line):
1358        error(line_number, 'build/storage_class', 5,
1359              'Storage class (static, extern, typedef, etc) should be first.')
1360
1361    if match(r'\s*#\s*endif\s*[^/\s]+', line):
1362        error(line_number, 'build/endif_comment', 5,
1363              'Uncommented text after #endif is non-standard.  Use a comment.')
1364
1365    if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1366        error(line_number, 'build/forward_decl', 5,
1367              'Inner-style forward declarations are invalid.  Remove this line.')
1368
1369    if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
1370        error(line_number, 'build/deprecated', 3,
1371              '>? and <? (max and min) operators are non-standard and deprecated.')
1372
1373    # Track class entry and exit, and attempt to find cases within the
1374    # class declaration that don't meet the C++ style
1375    # guidelines. Tracking is very dependent on the code matching Google
1376    # style guidelines, but it seems to perform well enough in testing
1377    # to be a worthwhile addition to the checks.
1378    classinfo_stack = class_state.classinfo_stack
1379    # Look for a class declaration
1380    class_decl_match = match(
1381        r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1382    if class_decl_match:
1383        classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
1384
1385    # Everything else in this function uses the top of the stack if it's
1386    # not empty.
1387    if not classinfo_stack:
1388        return
1389
1390    classinfo = classinfo_stack[-1]
1391
1392    # If the opening brace hasn't been seen look for it and also
1393    # parent class declarations.
1394    if not classinfo.seen_open_brace:
1395        # If the line has a ';' in it, assume it's a forward declaration or
1396        # a single-line class declaration, which we won't process.
1397        if line.find(';') != -1:
1398            classinfo_stack.pop()
1399            return
1400        classinfo.seen_open_brace = (line.find('{') != -1)
1401        # Look for a bare ':'
1402        if search('(^|[^:]):($|[^:])', line):
1403            classinfo.is_derived = True
1404        if not classinfo.seen_open_brace:
1405            return  # Everything else in this function is for after open brace
1406
1407    # The class may have been declared with namespace or classname qualifiers.
1408    # The constructor and destructor will not have those qualifiers.
1409    base_classname = classinfo.name.split('::')[-1]
1410
1411    # Look for single-argument constructors that aren't marked explicit.
1412    # Technically a valid construct, but against style.
1413    args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1414                 % re.escape(base_classname),
1415                 line)
1416    if (args
1417        and args.group(1) != 'void'
1418        and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1419                      args.group(1).strip())):
1420        error(line_number, 'runtime/explicit', 5,
1421              'Single-argument constructors should be marked explicit.')
1422
1423    # Look for methods declared virtual.
1424    if search(r'\bvirtual\b', line):
1425        classinfo.virtual_method_line_number = line_number
1426        # Only look for a destructor declaration on the same line. It would
1427        # be extremely unlikely for the destructor declaration to occupy
1428        # more than one line.
1429        if search(r'~%s\s*\(' % base_classname, line):
1430            classinfo.has_virtual_destructor = True
1431
1432    # Look for class end.
1433    brace_depth = classinfo.brace_depth
1434    brace_depth = brace_depth + line.count('{') - line.count('}')
1435    if brace_depth <= 0:
1436        classinfo = classinfo_stack.pop()
1437        # Try to detect missing virtual destructor declarations.
1438        # For now, only warn if a non-derived class with virtual methods lacks
1439        # a virtual destructor. This is to make it less likely that people will
1440        # declare derived virtual destructors without declaring the base
1441        # destructor virtual.
1442        if ((classinfo.virtual_method_line_number is not None)
1443            and (not classinfo.has_virtual_destructor)
1444            and (not classinfo.is_derived)):  # Only warn for base classes
1445            error(classinfo.line_number, 'runtime/virtual', 4,
1446                  'The class %s probably needs a virtual destructor due to '
1447                  'having virtual method(s), one declared at line %d.'
1448                  % (classinfo.name, classinfo.virtual_method_line_number))
1449        # Look for mixed bool and unsigned bitfields.
1450        if (classinfo.bool_bitfields and classinfo.unsigned_bitfields):
1451            bool_list = ', '.join(classinfo.bool_bitfields)
1452            unsigned_list = ', '.join(classinfo.unsigned_bitfields)
1453            error(classinfo.line_number, 'runtime/bitfields', 5,
1454                  'The class %s contains mixed unsigned and bool bitfields, '
1455                  'which will pack into separate words on the MSVC compiler.\n'
1456                  'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n'
1457                  'Consider converting bool bitfields to unsigned.'
1458                  % (classinfo.name, bool_list, unsigned_list))
1459    else:
1460        classinfo.brace_depth = brace_depth
1461
1462    well_typed_bitfield = False;
1463    # Look for bool <name> : 1 declarations.
1464    args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line)
1465    if args:
1466        classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1)))
1467        well_typed_bitfield = True;
1468
1469    # Look for unsigned <name> : n declarations.
1470    args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line)
1471    if args:
1472        classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1)))
1473        well_typed_bitfield = True;
1474
1475    # Look for other bitfield declarations. We don't care about those in
1476    # size-matching structs.
1477    if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or
1478            classinfo.name.startswith('Expected')):
1479        args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line)
1480        if args:
1481            error(line_number, 'runtime/bitfields', 4,
1482                  'Member %s of class %s defined as a bitfield of type %s. '
1483                  'Please declare all bitfields as unsigned.'
1484                  % (args.group(2), classinfo.name, args.group(1)))
1485
1486def check_spacing_for_function_call(line, line_number, error):
1487    """Checks for the correctness of various spacing around function calls.
1488
1489    Args:
1490      line: The text of the line to check.
1491      line_number: The number of the line to check.
1492      error: The function to call with any errors found.
1493    """
1494
1495    # Since function calls often occur inside if/for/foreach/while/switch
1496    # expressions - which have their own, more liberal conventions - we
1497    # first see if we should be looking inside such an expression for a
1498    # function call, to which we can apply more strict standards.
1499    function_call = line    # if there's no control flow construct, look at whole line
1500    for pattern in (r'\bif\s*\((.*)\)\s*{',
1501                    r'\bfor\s*\((.*)\)\s*{',
1502                    r'\bforeach\s*\((.*)\)\s*{',
1503                    r'\bwhile\s*\((.*)\)\s*[{;]',
1504                    r'\bswitch\s*\((.*)\)\s*{'):
1505        matched = search(pattern, line)
1506        if matched:
1507            function_call = matched.group(1)    # look inside the parens for function calls
1508            break
1509
1510    # Except in if/for/foreach/while/switch, there should never be space
1511    # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1512    # for nested parens ( (a+b) + c ).  Likewise, there should never be
1513    # a space before a ( when it's a function argument.  I assume it's a
1514    # function argument when the char before the whitespace is legal in
1515    # a function name (alnum + _) and we're not starting a macro. Also ignore
1516    # pointers and references to arrays and functions coz they're too tricky:
1517    # we use a very simple way to recognize these:
1518    # " (something)(maybe-something)" or
1519    # " (something)(maybe-something," or
1520    # " (something)[something]"
1521    # Note that we assume the contents of [] to be short enough that
1522    # they'll never need to wrap.
1523    if (  # Ignore control structures.
1524        not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
1525        # Ignore pointers/references to functions.
1526        and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
1527        # Ignore pointers/references to arrays.
1528        and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
1529        if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
1530            error(line_number, 'whitespace/parens', 4,
1531                  'Extra space after ( in function call')
1532        elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
1533            error(line_number, 'whitespace/parens', 2,
1534                  'Extra space after (')
1535        if (search(r'\w\s+\(', function_call)
1536            and not match(r'\s*(#|typedef)', function_call)):
1537            error(line_number, 'whitespace/parens', 4,
1538                  'Extra space before ( in function call')
1539        # If the ) is followed only by a newline or a { + newline, assume it's
1540        # part of a control statement (if/while/etc), and don't complain
1541        if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
1542            error(line_number, 'whitespace/parens', 2,
1543                  'Extra space before )')
1544
1545
1546def is_blank_line(line):
1547    """Returns true if the given line is blank.
1548
1549    We consider a line to be blank if the line is empty or consists of
1550    only white spaces.
1551
1552    Args:
1553      line: A line of a string.
1554
1555    Returns:
1556      True, if the given line is blank.
1557    """
1558    return not line or line.isspace()
1559
1560
1561def detect_functions(clean_lines, line_number, function_state, error):
1562    """Finds where functions start and end.
1563
1564    Uses a simplistic algorithm assuming other style guidelines
1565    (especially spacing) are followed.
1566    Trivial bodies are unchecked, so constructors with huge initializer lists
1567    may be missed.
1568
1569    Args:
1570      clean_lines: A CleansedLines instance containing the file.
1571      line_number: The number of the line to check.
1572      function_state: Current function name and lines in body so far.
1573      error: The function to call with any errors found.
1574    """
1575    # Are we now past the end of a function?
1576    if function_state.end_position.row + 1 == line_number:
1577        function_state.end()
1578
1579    # If we're in a function, don't try to detect a new one.
1580    if function_state.in_a_function:
1581        return
1582
1583    lines = clean_lines.lines
1584    line = lines[line_number]
1585    raw = clean_lines.raw_lines
1586    raw_line = raw[line_number]
1587
1588    # Lines ending with a \ indicate a macro. Don't try to check them.
1589    if raw_line.endswith('\\'):
1590        return
1591
1592    regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
1593    match_result = match(regexp, line)
1594    if not match_result:
1595        return
1596
1597    # If the name is all caps and underscores, figure it's a macro and
1598    # ignore it, unless it's TEST or TEST_F.
1599    function_name = match_result.group(1).split()[-1]
1600    if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
1601        return
1602
1603    joined_line = ''
1604    for start_line_number in xrange(line_number, clean_lines.num_lines()):
1605        start_line = clean_lines.elided[start_line_number]
1606        joined_line += ' ' + start_line.lstrip()
1607        body_match = search(r'{|;', start_line)
1608        if body_match:
1609            body_start_position = Position(start_line_number, body_match.start(0))
1610
1611            # Replace template constructs with _ so that no spaces remain in the function name,
1612            # while keeping the column numbers of other characters the same as "line".
1613            line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
1614            match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
1615            if not match_function:
1616                return  # The '(' must have been inside of a template.
1617
1618            # Use the column numbers from the modified line to find the
1619            # function name in the original line.
1620            function = line[match_function.start(1):match_function.end(1)]
1621            function_name_start_position = Position(line_number, match_function.start(1))
1622
1623            if match(r'TEST', function):    # Handle TEST... macros
1624                parameter_regexp = search(r'(\(.*\))', joined_line)
1625                if parameter_regexp:             # Ignore bad syntax
1626                    function += parameter_regexp.group(1)
1627            else:
1628                function += '()'
1629
1630            parameter_start_position = Position(line_number, match_function.end(1))
1631            parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
1632            if parameter_end_position.row == len(clean_lines.elided):
1633                # No end was found.
1634                return
1635
1636            if start_line[body_start_position.column] == ';':
1637                end_position = Position(body_start_position.row, body_start_position.column + 1)
1638            else:
1639                end_position = close_expression(clean_lines.elided, body_start_position)
1640
1641            # Check for nonsensical positions. (This happens in test cases which check code snippets.)
1642            if parameter_end_position > body_start_position:
1643                return
1644
1645            function_state.begin(function, function_name_start_position, body_start_position, end_position,
1646                                 parameter_start_position, parameter_end_position, clean_lines)
1647            return
1648
1649    # No body for the function (or evidence of a non-function) was found.
1650    error(line_number, 'readability/fn_size', 5,
1651          'Lint failed to find start of function body.')
1652
1653
1654def check_for_function_lengths(clean_lines, line_number, function_state, error):
1655    """Reports for long function bodies.
1656
1657    For an overview why this is done, see:
1658    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1659
1660    Blank/comment lines are not counted so as to avoid encouraging the removal
1661    of vertical space and commments just to get through a lint check.
1662    NOLINT *on the last line of a function* disables this check.
1663
1664    Args:
1665      clean_lines: A CleansedLines instance containing the file.
1666      line_number: The number of the line to check.
1667      function_state: Current function name and lines in body so far.
1668      error: The function to call with any errors found.
1669    """
1670    lines = clean_lines.lines
1671    line = lines[line_number]
1672    raw = clean_lines.raw_lines
1673    raw_line = raw[line_number]
1674
1675    if function_state.end_position.row == line_number:  # last line
1676        if not search(r'\bNOLINT\b', raw_line):
1677            function_state.check(error, line_number)
1678    elif not match(r'^\s*$', line):
1679        function_state.count(line_number)  # Count non-blank/non-comment lines.
1680
1681
1682def _check_parameter_name_against_text(parameter, text, error):
1683    """Checks to see if the parameter name is contained within the text.
1684
1685    Return false if the check failed (i.e. an error was produced).
1686    """
1687
1688    # Treat 'lower with underscores' as a canonical form because it is
1689    # case insensitive while still retaining word breaks. (This ensures that
1690    # 'elate' doesn't look like it is duplicating of 'NateLate'.)
1691    canonical_parameter_name = parameter.lower_with_underscores_name()
1692
1693    # Appends "object" to all text to catch variables that did the same (but only
1694    # do this when the parameter name is more than a single character to avoid
1695    # flagging 'b' which may be an ok variable when used in an rgba function).
1696    if len(canonical_parameter_name) > 1:
1697        text = sub(r'(\w)\b', r'\1Object', text)
1698    canonical_text = _convert_to_lower_with_underscores(text)
1699
1700    # Used to detect cases like ec for ExceptionCode.
1701    acronym = _create_acronym(text).lower()
1702    if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
1703        error(parameter.row, 'readability/parameter_name', 5,
1704              'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
1705        return False
1706    return True
1707
1708
1709def check_function_definition_and_pass_ptr(type_text, row, location_description, error):
1710    """Check that function definitions for use Pass*Ptr instead of *Ptr.
1711
1712    Args:
1713       type_text: A string containing the type. (For return values, it may contain more than the type.)
1714       row: The row number of the type.
1715       location_description: Used to indicate where the type is. This is either 'parameter' or 'return'.
1716       error: The function to call with any errors found.
1717    """
1718    match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)'
1719    exceptions = '(?:&|\*|\*\s*=\s*0)$'
1720    bad_type_usage = search(match_ref_or_own_ptr, type_text)
1721    exception_usage = search(exceptions, type_text)
1722    if not bad_type_usage or exception_usage:
1723        return
1724    type_name = bad_type_usage.group(0)
1725    error(row, 'readability/pass_ptr', 5,
1726          'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name))
1727
1728
1729def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
1730    """Check that function definitions for style issues.
1731
1732    Specifically, check that parameter names in declarations add information.
1733
1734    Args:
1735       filename: Filename of the file that is being processed.
1736       file_extension: The current file extension, without the leading dot.
1737       clean_lines: A CleansedLines instance containing the file.
1738       line_number: The number of the line to check.
1739       function_state: Current function name and lines in body so far.
1740       error: The function to call with any errors found.
1741    """
1742    if line_number != function_state.body_start_position.row:
1743        return
1744
1745    modifiers_and_return_type = function_state.modifiers_and_return_type()
1746    if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type):
1747        if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1:
1748            error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
1749                  'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.')
1750        elif not file_extension == "h":
1751            error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
1752                  'WEBKIT_EXPORT should only be used in header files.')
1753        elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
1754            error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
1755                  'WEBKIT_EXPORT should not be used on a function with a body.')
1756        elif function_state.is_pure:
1757            error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
1758                  'WEBKIT_EXPORT should not be used with a pure virtual function.')
1759
1760    check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error)
1761
1762    parameter_list = function_state.parameter_list()
1763    for parameter in parameter_list:
1764        check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error)
1765
1766        # Do checks specific to function declarations and parameter names.
1767        if not function_state.is_declaration or not parameter.name:
1768            continue
1769
1770        # Check the parameter name against the function name for single parameter set functions.
1771        if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
1772            trimmed_function_name = function_state.current_function[len('set'):]
1773            if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
1774                continue  # Since an error was noted for this name, move to the next parameter.
1775
1776        # Check the parameter name against the type.
1777        if not _check_parameter_name_against_text(parameter, parameter.type, error):
1778            continue  # Since an error was noted for this name, move to the next parameter.
1779
1780
1781def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
1782    """Check for proper usage of Pass*Ptr.
1783
1784    Currently this is limited to detecting declarations of Pass*Ptr
1785    variables inside of functions.
1786
1787    Args:
1788      clean_lines: A CleansedLines instance containing the file.
1789      line_number: The number of the line to check.
1790      function_state: Current function name and lines in body so far.
1791      error: The function to call with any errors found.
1792    """
1793    if not function_state.in_a_function:
1794        return
1795
1796    lines = clean_lines.lines
1797    line = lines[line_number]
1798    if line_number > function_state.body_start_position.row:
1799        matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
1800        if matched_pass_ptr:
1801            type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
1802            error(line_number, 'readability/pass_ptr', 5,
1803                  'Local variables should never be %s (see '
1804                  'http://webkit.org/coding/RefPtr.html).' % type_name)
1805
1806
1807def check_for_leaky_patterns(clean_lines, line_number, function_state, error):
1808    """Check for constructs known to be leak prone.
1809    Args:
1810      clean_lines: A CleansedLines instance containing the file.
1811      line_number: The number of the line to check.
1812      function_state: Current function name and lines in body so far.
1813      error: The function to call with any errors found.
1814    """
1815    lines = clean_lines.lines
1816    line = lines[line_number]
1817
1818    matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line)
1819    if matched_get_dc:
1820        error(line_number, 'runtime/leaky_pattern', 5,
1821              'Use the class HWndDC instead of calling %s to avoid potential '
1822              'memory leaks.' % matched_get_dc.group('function_name'))
1823
1824    matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line)
1825    matched_own_dc = search(r'\badoptPtr\b', line)
1826    if matched_create_dc and not matched_own_dc:
1827        error(line_number, 'runtime/leaky_pattern', 5,
1828              'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential '
1829              'memory leaks.' % matched_create_dc.group('function_name'))
1830
1831
1832def check_spacing(file_extension, clean_lines, line_number, error):
1833    """Checks for the correctness of various spacing issues in the code.
1834
1835    Things we check for: spaces around operators, spaces after
1836    if/for/while/switch, no spaces around parens in function calls, two
1837    spaces between code and comment, don't start a block with a blank
1838    line, don't end a function with a blank line, don't have too many
1839    blank lines in a row.
1840
1841    Args:
1842      file_extension: The current file extension, without the leading dot.
1843      clean_lines: A CleansedLines instance containing the file.
1844      line_number: The number of the line to check.
1845      error: The function to call with any errors found.
1846    """
1847
1848    raw = clean_lines.raw_lines
1849    line = raw[line_number]
1850
1851    # Before nixing comments, check if the line is blank for no good
1852    # reason.  This includes the first line after a block is opened, and
1853    # blank lines at the end of a function (ie, right before a line like '}').
1854    if is_blank_line(line):
1855        elided = clean_lines.elided
1856        previous_line = elided[line_number - 1]
1857        previous_brace = previous_line.rfind('{')
1858        # FIXME: Don't complain if line before blank line, and line after,
1859        #        both start with alnums and are indented the same amount.
1860        #        This ignores whitespace at the start of a namespace block
1861        #        because those are not usually indented.
1862        if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
1863            and previous_line[:previous_brace].find('namespace') == -1):
1864            # OK, we have a blank line at the start of a code block.  Before we
1865            # complain, we check if it is an exception to the rule: The previous
1866            # non-empty line has the parameters of a function header that are indented
1867            # 4 spaces (because they did not fit in a 80 column line when placed on
1868            # the same line as the function name).  We also check for the case where
1869            # the previous line is indented 6 spaces, which may happen when the
1870            # initializers of a constructor do not fit into a 80 column line.
1871            exception = False
1872            if match(r' {6}\w', previous_line):  # Initializer list?
1873                # We are looking for the opening column of initializer list, which
1874                # should be indented 4 spaces to cause 6 space indentation afterwards.
1875                search_position = line_number - 2
1876                while (search_position >= 0
1877                       and match(r' {6}\w', elided[search_position])):
1878                    search_position -= 1
1879                exception = (search_position >= 0
1880                             and elided[search_position][:5] == '    :')
1881            else:
1882                # Search for the function arguments or an initializer list.  We use a
1883                # simple heuristic here: If the line is indented 4 spaces; and we have a
1884                # closing paren, without the opening paren, followed by an opening brace
1885                # or colon (for initializer lists) we assume that it is the last line of
1886                # a function header.  If we have a colon indented 4 spaces, it is an
1887                # initializer list.
1888                exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1889                                   previous_line)
1890                             or match(r' {4}:', previous_line))
1891
1892            if not exception:
1893                error(line_number, 'whitespace/blank_line', 2,
1894                      'Blank line at the start of a code block.  Is this needed?')
1895        # This doesn't ignore whitespace at the end of a namespace block
1896        # because that is too hard without pairing open/close braces;
1897        # however, a special exception is made for namespace closing
1898        # brackets which have a comment containing "namespace".
1899        #
1900        # Also, ignore blank lines at the end of a block in a long if-else
1901        # chain, like this:
1902        #   if (condition1) {
1903        #     // Something followed by a blank line
1904        #
1905        #   } else if (condition2) {
1906        #     // Something else
1907        #   }
1908        if line_number + 1 < clean_lines.num_lines():
1909            next_line = raw[line_number + 1]
1910            if (next_line
1911                and match(r'\s*}', next_line)
1912                and next_line.find('namespace') == -1
1913                and next_line.find('} else ') == -1):
1914                error(line_number, 'whitespace/blank_line', 3,
1915                      'Blank line at the end of a code block.  Is this needed?')
1916
1917    # Next, we check for proper spacing with respect to comments.
1918    comment_position = line.find('//')
1919    if comment_position != -1:
1920        # Check if the // may be in quotes.  If so, ignore it
1921        # Comparisons made explicit for clarity
1922        if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
1923            # Allow one space before end of line comment.
1924            if (not match(r'^\s*$', line[:comment_position])
1925                and (comment_position >= 1
1926                and ((line[comment_position - 1] not in string.whitespace)
1927                     or (comment_position >= 2
1928                         and line[comment_position - 2] in string.whitespace)))):
1929                error(line_number, 'whitespace/comments', 5,
1930                      'One space before end of line comments')
1931            # There should always be a space between the // and the comment
1932            commentend = comment_position + 2
1933            if commentend < len(line) and not line[commentend] == ' ':
1934                # but some lines are exceptions -- e.g. if they're big
1935                # comment delimiters like:
1936                # //----------------------------------------------------------
1937                # or they begin with multiple slashes followed by a space:
1938                # //////// Header comment
1939                matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
1940                           or search(r'^/+ ', line[commentend:]))
1941                if not matched:
1942                    error(line_number, 'whitespace/comments', 4,
1943                          'Should have a space between // and comment')
1944
1945            # There should only be one space after punctuation in a comment.
1946            if search(r'[.!?,;:]\s\s+\w', line[comment_position:]):
1947                error(line_number, 'whitespace/comments', 5,
1948                      'Should have only a single space after a punctuation in a comment.')
1949
1950    line = clean_lines.elided[line_number]  # get rid of comments and strings
1951
1952    # Don't try to do spacing checks for operator methods
1953    line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line)
1954    # Don't try to do spacing checks for #include or #import statements at
1955    # minimum because it messes up checks for spacing around /
1956    if match(r'\s*#\s*(?:include|import)', line):
1957        return
1958    if search(r'[\w.]=[\w.]', line):
1959        error(line_number, 'whitespace/operators', 4,
1960              'Missing spaces around =')
1961
1962    # FIXME: It's not ok to have spaces around binary operators like .
1963
1964    # You should always have whitespace around binary operators.
1965    # Alas, we can't test < or > because they're legitimately used sans spaces
1966    # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1967    # only if it's not template params list spilling into the next line.
1968    matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
1969    if not matched:
1970        # Note that while it seems that the '<[^<]*' term in the following
1971        # regexp could be simplified to '<.*', which would indeed match
1972        # the same class of strings, the [^<] means that searching for the
1973        # regexp takes linear rather than quadratic time.
1974        if not search(r'<[^<]*,\s*$', line):  # template params spill
1975            matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1976    if matched:
1977        error(line_number, 'whitespace/operators', 3,
1978              'Missing spaces around %s' % matched.group(1))
1979
1980    # There shouldn't be space around unary operators
1981    matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1982    if matched:
1983        error(line_number, 'whitespace/operators', 4,
1984              'Extra space for operator %s' % matched.group(1))
1985
1986    # A pet peeve of mine: no spaces after an if, while, switch, or for
1987    matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
1988    if matched:
1989        error(line_number, 'whitespace/parens', 5,
1990              'Missing space before ( in %s' % matched.group(1))
1991
1992    # For if/for/foreach/while/switch, the left and right parens should be
1993    # consistent about how many spaces are inside the parens, and
1994    # there should either be zero or one spaces inside the parens.
1995    # We don't want: "if ( foo)" or "if ( foo   )".
1996    # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1997    matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
1998    if matched:
1999        statement = matched.group('statement')
2000        condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
2001        if condition is not None:
2002            condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
2003            if condition_match:
2004                n_leading = len(condition_match.group('leading'))
2005                n_trailing = len(condition_match.group('trailing'))
2006                if n_leading != 0:
2007                    for_exception = statement == 'for' and condition.startswith(' ;')
2008                    if not for_exception:
2009                        error(line_number, 'whitespace/parens', 5,
2010                              'Extra space after ( in %s' % statement)
2011                if n_trailing != 0:
2012                    for_exception = statement == 'for' and condition.endswith('; ')
2013                    if not for_exception:
2014                        error(line_number, 'whitespace/parens', 5,
2015                              'Extra space before ) in %s' % statement)
2016
2017            # Do not check for more than one command in macros
2018            in_preprocessor_directive = match(r'\s*#', line)
2019            if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
2020                error(line_number, 'whitespace/parens', 4,
2021                      'More than one command on the same line in %s' % statement)
2022
2023    # You should always have a space after a comma (either as fn arg or operator)
2024    if search(r',[^\s]', line):
2025        error(line_number, 'whitespace/comma', 3,
2026              'Missing space after ,')
2027
2028    matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
2029    if matched:
2030        error(line_number, 'whitespace/declaration', 3,
2031              'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
2032
2033    if file_extension == 'cpp':
2034        # C++ should have the & or * beside the type not the variable name.
2035        matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
2036        if matched:
2037            error(line_number, 'whitespace/declaration', 3,
2038                  'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
2039
2040    elif file_extension == 'c':
2041        # C Pointer declaration should have the * beside the variable not the type name.
2042        matched = search(r'^\s*\w+\*\s+\w+', line)
2043        if matched:
2044            error(line_number, 'whitespace/declaration', 3,
2045                  'Declaration has space between * and variable name in %s' % matched.group(0).strip())
2046
2047    # Next we will look for issues with function calls.
2048    check_spacing_for_function_call(line, line_number, error)
2049
2050    # Except after an opening paren, you should have spaces before your braces.
2051    # And since you should never have braces at the beginning of a line, this is
2052    # an easy test.
2053    if search(r'[^ ({]{', line):
2054        error(line_number, 'whitespace/braces', 5,
2055              'Missing space before {')
2056
2057    # Make sure '} else {' has spaces.
2058    if search(r'}else', line):
2059        error(line_number, 'whitespace/braces', 5,
2060              'Missing space before else')
2061
2062    # You shouldn't have spaces before your brackets, except maybe after
2063    # 'delete []' or 'new char * []'.
2064    if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
2065        error(line_number, 'whitespace/braces', 5,
2066              'Extra space before [')
2067
2068    # There should always be a single space in between braces on the same line.
2069    if search(r'\{\}', line):
2070        error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.')
2071    if search(r'\{\s\s+\}', line):
2072        error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.')
2073
2074    # You shouldn't have a space before a semicolon at the end of the line.
2075    # There's a special case for "for" since the style guide allows space before
2076    # the semicolon there.
2077    if search(r':\s*;\s*$', line):
2078        error(line_number, 'whitespace/semicolon', 5,
2079              'Semicolon defining empty statement. Use { } instead.')
2080    elif search(r'^\s*;\s*$', line):
2081        error(line_number, 'whitespace/semicolon', 5,
2082              'Line contains only semicolon. If this should be an empty statement, '
2083              'use { } instead.')
2084    elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
2085        error(line_number, 'whitespace/semicolon', 5,
2086              'Extra space before last semicolon. If this should be an empty '
2087              'statement, use { } instead.')
2088    elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
2089          and line.count('(') == line.count(')')
2090          # Allow do {} while();
2091          and not search(r'}\s*while', line)):
2092        error(line_number, 'whitespace/semicolon', 5,
2093              'Semicolon defining empty statement for this loop. Use { } instead.')
2094
2095
2096def get_previous_non_blank_line(clean_lines, line_number):
2097    """Return the most recent non-blank line and its line number.
2098
2099    Args:
2100      clean_lines: A CleansedLines instance containing the file contents.
2101      line_number: The number of the line to check.
2102
2103    Returns:
2104      A tuple with two elements.  The first element is the contents of the last
2105      non-blank line before the current line, or the empty string if this is the
2106      first non-blank line.  The second is the line number of that line, or -1
2107      if this is the first non-blank line.
2108    """
2109
2110    previous_line_number = line_number - 1
2111    while previous_line_number >= 0:
2112        previous_line = clean_lines.elided[previous_line_number]
2113        if not is_blank_line(previous_line):     # if not a blank line...
2114            return (previous_line, previous_line_number)
2115        previous_line_number -= 1
2116    return ('', -1)
2117
2118
2119def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
2120    """Looks for indentation errors inside of namespaces.
2121
2122    Args:
2123      clean_lines: A CleansedLines instance containing the file.
2124      line_number: The number of the line to check.
2125      file_extension: The extension (dot not included) of the file.
2126      file_state: A _FileState instance which maintains information about
2127                  the state of things in the file.
2128      error: The function to call with any errors found.
2129    """
2130
2131    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2132
2133    namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
2134    if not namespace_match:
2135        return
2136
2137    current_indentation_level = len(namespace_match.group('namespace_indentation'))
2138    if current_indentation_level > 0:
2139        # Don't warn about an indented namespace if we already warned about indented code.
2140        if not file_state.did_inside_namespace_indent_warning():
2141            error(line_number, 'whitespace/indent', 4,
2142                  'namespace should never be indented.')
2143        return
2144    looking_for_semicolon = False;
2145    line_offset = 0
2146    in_preprocessor_directive = False;
2147    for current_line in clean_lines.elided[line_number + 1:]:
2148        line_offset += 1
2149        if not current_line.strip():
2150            continue
2151        if not current_indentation_level:
2152            if not (in_preprocessor_directive or looking_for_semicolon):
2153                if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
2154                    file_state.set_did_inside_namespace_indent_warning()
2155                    error(line_number + line_offset, 'whitespace/indent', 4,
2156                          'Code inside a namespace should not be indented.')
2157            if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
2158                in_preprocessor_directive = current_line[-1] == '\\'
2159            else:
2160                looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
2161        else:
2162            looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
2163        current_indentation_level += current_line.count('{') - current_line.count('}')
2164        if current_indentation_level < 0:
2165            break;
2166
2167
2168def check_enum_casing(clean_lines, line_number, enum_state, error):
2169    """Looks for incorrectly named enum values.
2170
2171    Args:
2172      clean_lines: A CleansedLines instance containing the file.
2173      line_number: The number of the line to check.
2174      enum_state: A _EnumState instance which maintains enum declaration state.
2175      error: The function to call with any errors found.
2176    """
2177
2178    enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number]))
2179
2180    line = clean_lines.elided[line_number]  # Get rid of comments and strings.
2181    if not enum_state.process_clean_line(line):
2182        error(line_number, 'readability/enum_casing', 4,
2183              'enum members should use InterCaps with an initial capital letter.')
2184
2185def check_directive_indentation(clean_lines, line_number, file_state, error):
2186    """Looks for indentation of preprocessor directives.
2187
2188    Args:
2189      clean_lines: A CleansedLines instance containing the file.
2190      line_number: The number of the line to check.
2191      file_state: A _FileState instance which maintains information about
2192                  the state of things in the file.
2193      error: The function to call with any errors found.
2194    """
2195
2196    line = clean_lines.elided[line_number]  # Get rid of comments and strings.
2197
2198    indented_preprocessor_directives = match(r'\s+#', line)
2199    if not indented_preprocessor_directives:
2200        return
2201
2202    error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.')
2203
2204
2205def get_initial_spaces_for_line(clean_line):
2206    initial_spaces = 0
2207    while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ':
2208        initial_spaces += 1
2209    return initial_spaces
2210
2211
2212def check_indentation_amount(clean_lines, line_number, error):
2213    line = clean_lines.elided[line_number]
2214    initial_spaces = get_initial_spaces_for_line(line)
2215
2216    if initial_spaces % 4:
2217        error(line_number, 'whitespace/indent', 3,
2218              'Weird number of spaces at line-start.  Are you using a 4-space indent?')
2219        return
2220
2221    previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
2222    if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#':
2223        return
2224
2225    previous_line_initial_spaces = get_initial_spaces_for_line(previous_line)
2226    if initial_spaces > previous_line_initial_spaces + 4:
2227        error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.')
2228
2229
2230def check_using_std(clean_lines, line_number, file_state, error):
2231    """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
2232
2233    Args:
2234      clean_lines: A CleansedLines instance containing the file.
2235      line_number: The number of the line to check.
2236      file_state: A _FileState instance which maintains information about
2237                  the state of things in the file.
2238      error: The function to call with any errors found.
2239    """
2240
2241    # This check doesn't apply to C or Objective-C implementation files.
2242    if file_state.is_c_or_objective_c():
2243        return
2244
2245    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2246
2247    using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
2248    if not using_std_match:
2249        return
2250
2251    method_name = using_std_match.group('method_name')
2252    # Exception for the established idiom for swapping objects in generic code.
2253    if method_name == 'swap':
2254        return
2255    error(line_number, 'build/using_std', 4,
2256          "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
2257
2258
2259def check_max_min_macros(clean_lines, line_number, file_state, error):
2260    """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
2261
2262    Args:
2263      clean_lines: A CleansedLines instance containing the file.
2264      line_number: The number of the line to check.
2265      file_state: A _FileState instance which maintains information about
2266                  the state of things in the file.
2267      error: The function to call with any errors found.
2268    """
2269
2270    # This check doesn't apply to C or Objective-C implementation files.
2271    if file_state.is_c_or_objective_c():
2272        return
2273
2274    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2275
2276    max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
2277    if not max_min_macros_search:
2278        return
2279
2280    max_min_macro = max_min_macros_search.group('max_min_macro')
2281    max_min_macro_lower = max_min_macro.lower()
2282    error(line_number, 'runtime/max_min_macros', 4,
2283          'Use std::%s() or std::%s<type>() instead of the %s() macro.'
2284          % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
2285
2286
2287def check_ctype_functions(clean_lines, line_number, file_state, error):
2288    """Looks for use of the standard functions in ctype.h and suggest they be replaced
2289       by use of equivilent ones in <wtf/ASCIICType.h>?.
2290
2291    Args:
2292      clean_lines: A CleansedLines instance containing the file.
2293      line_number: The number of the line to check.
2294      file_state: A _FileState instance which maintains information about
2295                  the state of things in the file.
2296      error: The function to call with any errors found.
2297    """
2298
2299    line = clean_lines.elided[line_number]  # Get rid of comments and strings.
2300
2301    ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line)
2302    if not ctype_function_search:
2303        return
2304
2305    ctype_function = ctype_function_search.group('ctype_function')
2306    error(line_number, 'runtime/ctype_function', 4,
2307          'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.'
2308          % (ctype_function))
2309
2310def check_switch_indentation(clean_lines, line_number, error):
2311    """Looks for indentation errors inside of switch statements.
2312
2313    Args:
2314      clean_lines: A CleansedLines instance containing the file.
2315      line_number: The number of the line to check.
2316      error: The function to call with any errors found.
2317    """
2318
2319    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2320
2321    switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
2322    if not switch_match:
2323        return
2324
2325    switch_indentation = switch_match.group('switch_indentation')
2326    inner_indentation = switch_indentation + ' ' * 4
2327    line_offset = 0
2328    encountered_nested_switch = False
2329
2330    for current_line in clean_lines.elided[line_number + 1:]:
2331        line_offset += 1
2332
2333        # Skip not only empty lines but also those with preprocessor directives.
2334        if current_line.strip() == '' or current_line.startswith('#'):
2335            continue
2336
2337        if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
2338            # Complexity alarm - another switch statement nested inside the one
2339            # that we're currently testing. We'll need to track the extent of
2340            # that inner switch if the upcoming label tests are still supposed
2341            # to work correctly. Let's not do that; instead, we'll finish
2342            # checking this line, and then leave it like that. Assuming the
2343            # indentation is done consistently (even if incorrectly), this will
2344            # still catch all indentation issues in practice.
2345            encountered_nested_switch = True
2346
2347        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
2348        current_indentation = current_indentation_match.group('indentation')
2349        remaining_line = current_indentation_match.group('remaining_line')
2350
2351        # End the check at the end of the switch statement.
2352        if remaining_line.startswith('}') and current_indentation == switch_indentation:
2353            break
2354        # Case and default branches should not be indented. The regexp also
2355        # catches single-line cases like "default: break;" but does not trigger
2356        # on stuff like "Document::Foo();".
2357        elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
2358            if current_indentation != switch_indentation:
2359                error(line_number + line_offset, 'whitespace/indent', 4,
2360                      'A case label should not be indented, but line up with its switch statement.')
2361                # Don't throw an error for multiple badly indented labels,
2362                # one should be enough to figure out the problem.
2363                break
2364        # We ignore goto labels at the very beginning of a line.
2365        elif match(r'\w+\s*:\s*$', remaining_line):
2366            continue
2367        # It's not a goto label, so check if it's indented at least as far as
2368        # the switch statement plus one more level of indentation.
2369        elif not current_indentation.startswith(inner_indentation):
2370            error(line_number + line_offset, 'whitespace/indent', 4,
2371                  'Non-label code inside switch statements should be indented.')
2372            # Don't throw an error for multiple badly indented statements,
2373            # one should be enough to figure out the problem.
2374            break
2375
2376        if encountered_nested_switch:
2377            break
2378
2379
2380def check_braces(clean_lines, line_number, error):
2381    """Looks for misplaced braces (e.g. at the end of line).
2382
2383    Args:
2384      clean_lines: A CleansedLines instance containing the file.
2385      line_number: The number of the line to check.
2386      error: The function to call with any errors found.
2387    """
2388
2389    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2390
2391    if match(r'\s*{\s*$', line):
2392        # We allow an open brace to start a line in the case where someone
2393        # is using braces for function definition or in a block to
2394        # explicitly create a new scope, which is commonly used to control
2395        # the lifetime of stack-allocated variables.  We don't detect this
2396        # perfectly: we just don't complain if the last non-whitespace
2397        # character on the previous non-blank line is ';', ':', '{', '}',
2398        # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
2399        # We also allow '#' for #endif and '=' for array initialization.
2400        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
2401        if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line)
2402             or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
2403            and previous_line.find('#') < 0):
2404            error(line_number, 'whitespace/braces', 4,
2405                  'This { should be at the end of the previous line')
2406    elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line)
2407          and line.count('(') == line.count(')')
2408          and not search(r'\b(if|for|foreach|while|switch)\b', line)
2409          and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
2410        error(line_number, 'whitespace/braces', 4,
2411              'Place brace on its own line for function definitions.')
2412
2413    # An else clause should be on the same line as the preceding closing brace.
2414    if match(r'\s*else\s*', line):
2415        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
2416        if match(r'\s*}\s*$', previous_line):
2417            error(line_number, 'whitespace/newline', 4,
2418                  'An else should appear on the same line as the preceding }')
2419
2420    # Likewise, an else should never have the else clause on the same line
2421    if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
2422        error(line_number, 'whitespace/newline', 4,
2423              'Else clause should never be on same line as else (use 2 lines)')
2424
2425    # In the same way, a do/while should never be on one line
2426    if match(r'\s*do [^\s{]', line):
2427        error(line_number, 'whitespace/newline', 4,
2428              'do/while clauses should not be on a single line')
2429
2430    # Braces shouldn't be followed by a ; unless they're defining a struct
2431    # or initializing an array.
2432    # We can't tell in general, but we can for some common cases.
2433    previous_line_number = line_number
2434    while True:
2435        (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
2436        if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
2437            line = previous_line + line
2438        else:
2439            break
2440    if (search(r'{.*}\s*;', line)
2441        and line.count('{') == line.count('}')
2442        and not search(r'struct|class|enum|\s*=\s*{', line)):
2443        error(line_number, 'readability/braces', 4,
2444              "You don't need a ; after a }")
2445
2446
2447def check_exit_statement_simplifications(clean_lines, line_number, error):
2448    """Looks for else or else-if statements that should be written as an
2449    if statement when the prior if concludes with a return, break, continue or
2450    goto statement.
2451
2452    Args:
2453      clean_lines: A CleansedLines instance containing the file.
2454      line_number: The number of the line to check.
2455      error: The function to call with any errors found.
2456    """
2457
2458    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2459
2460    else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
2461    if not else_match:
2462        return
2463
2464    else_indentation = else_match.group('else_indentation')
2465    inner_indentation = else_indentation + ' ' * 4
2466
2467    previous_lines = clean_lines.elided[:line_number]
2468    previous_lines.reverse()
2469    line_offset = 0
2470    encountered_exit_statement = False
2471
2472    for current_line in previous_lines:
2473        line_offset -= 1
2474
2475        # Skip not only empty lines but also those with preprocessor directives
2476        # and goto labels.
2477        if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
2478            continue
2479
2480        # Skip lines with closing braces on the original indentation level.
2481        # Even though the styleguide says they should be on the same line as
2482        # the "else if" statement, we also want to check for instances where
2483        # the current code does not comply with the coding style. Thus, ignore
2484        # these lines and proceed to the line before that.
2485        if current_line == else_indentation + '}':
2486            continue
2487
2488        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
2489        current_indentation = current_indentation_match.group('indentation')
2490        remaining_line = current_indentation_match.group('remaining_line')
2491
2492        # As we're going up the lines, the first real statement to encounter
2493        # has to be an exit statement (return, break, continue or goto) -
2494        # otherwise, this check doesn't apply.
2495        if not encountered_exit_statement:
2496            # We only want to find exit statements if they are on exactly
2497            # the same level of indentation as expected from the code inside
2498            # the block. If the indentation doesn't strictly match then we
2499            # might have a nested if or something, which must be ignored.
2500            if current_indentation != inner_indentation:
2501                break
2502            if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
2503                encountered_exit_statement = True
2504                continue
2505            break
2506
2507        # When code execution reaches this point, we've found an exit statement
2508        # as last statement of the previous block. Now we only need to make
2509        # sure that the block belongs to an "if", then we can throw an error.
2510
2511        # Skip lines with opening braces on the original indentation level,
2512        # similar to the closing braces check above. ("if (condition)\n{")
2513        if current_line == else_indentation + '{':
2514            continue
2515
2516        # Skip everything that's further indented than our "else" or "else if".
2517        if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
2518            continue
2519
2520        # So we've got a line with same (or less) indentation. Is it an "if"?
2521        # If yes: throw an error. If no: don't throw an error.
2522        # Whatever the outcome, this is the end of our loop.
2523        if match(r'if\s*\(', remaining_line):
2524            if else_match.start('else') != -1:
2525                error(line_number + line_offset, 'readability/control_flow', 4,
2526                      'An else statement can be removed when the prior "if" '
2527                      'concludes with a return, break, continue or goto statement.')
2528            else:
2529                error(line_number + line_offset, 'readability/control_flow', 4,
2530                      'An else if statement should be written as an if statement '
2531                      'when the prior "if" concludes with a return, break, '
2532                      'continue or goto statement.')
2533        break
2534
2535
2536def replaceable_check(operator, macro, line):
2537    """Determine whether a basic CHECK can be replaced with a more specific one.
2538
2539    For example suggest using CHECK_EQ instead of CHECK(a == b) and
2540    similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
2541
2542    Args:
2543      operator: The C++ operator used in the CHECK.
2544      macro: The CHECK or EXPECT macro being called.
2545      line: The current source line.
2546
2547    Returns:
2548      True if the CHECK can be replaced with a more specific one.
2549    """
2550
2551    # This matches decimal and hex integers, strings, and chars (in that order).
2552    match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2553
2554    # Expression to match two sides of the operator with something that
2555    # looks like a literal, since CHECK(x == iterator) won't compile.
2556    # This means we can't catch all the cases where a more specific
2557    # CHECK is possible, but it's less annoying than dealing with
2558    # extraneous warnings.
2559    match_this = (r'\s*' + macro + r'\((\s*' +
2560                  match_constant + r'\s*' + operator + r'[^<>].*|'
2561                  r'.*[^<>]' + operator + r'\s*' + match_constant +
2562                  r'\s*\))')
2563
2564    # Don't complain about CHECK(x == NULL) or similar because
2565    # CHECK_EQ(x, NULL) won't compile (requires a cast).
2566    # Also, don't complain about more complex boolean expressions
2567    # involving && or || such as CHECK(a == b || c == d).
2568    return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
2569
2570
2571def check_check(clean_lines, line_number, error):
2572    """Checks the use of CHECK and EXPECT macros.
2573
2574    Args:
2575      clean_lines: A CleansedLines instance containing the file.
2576      line_number: The number of the line to check.
2577      error: The function to call with any errors found.
2578    """
2579
2580    # Decide the set of replacement macros that should be suggested
2581    raw_lines = clean_lines.raw_lines
2582    current_macro = ''
2583    for macro in _CHECK_MACROS:
2584        if raw_lines[line_number].find(macro) >= 0:
2585            current_macro = macro
2586            break
2587    if not current_macro:
2588        # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2589        return
2590
2591    line = clean_lines.elided[line_number]        # get rid of comments and strings
2592
2593    # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2594    for operator in ['==', '!=', '>=', '>', '<=', '<']:
2595        if replaceable_check(operator, current_macro, line):
2596            error(line_number, 'readability/check', 2,
2597                  'Consider using %s instead of %s(a %s b)' % (
2598                      _CHECK_REPLACEMENT[current_macro][operator],
2599                      current_macro, operator))
2600            break
2601
2602
2603def check_for_comparisons_to_boolean(clean_lines, line_number, error):
2604    # Get the line without comments and strings.
2605    line = clean_lines.elided[line_number]
2606
2607    # Must include NULL here, as otherwise users will convert NULL to 0 and
2608    # then we can't catch it, since it looks like a valid integer comparison.
2609    if search(r'[=!]=\s*(NULL|nullptr|true|false)[^\w.]', line) or search(r'[^\w.](NULL|nullptr|true|false)\s*[=!]=', line):
2610        if not search('LIKELY', line) and not search('UNLIKELY', line):
2611            error(line_number, 'readability/comparison_to_boolean', 5,
2612                  'Tests for true/false and null/non-null should be done without equality comparisons.')
2613
2614
2615def check_for_null(clean_lines, line_number, file_state, error):
2616    # This check doesn't apply to C or Objective-C implementation files.
2617    if file_state.is_c_or_objective_c():
2618        return
2619
2620    line = clean_lines.elided[line_number]
2621
2622    # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
2623    if search(r'\bg(_[a-z]+)+\b', line):
2624        return
2625
2626    # Don't warn about NULL usage in gst_*(). See Bug 70498.
2627    if search(r'\bgst(_[a-z]+)+\b', line):
2628        return
2629
2630    # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
2631    if search(r'\bgdk_pixbuf_save_to\w+\b', line):
2632        return
2633
2634    # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758
2635    if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line):
2636        return
2637
2638    # Don't warn about NULL usage in soup_server_new(). See Bug 77890.
2639    if search(r'\bsoup_server_new\(\w+\b', line):
2640        return
2641
2642    if search(r'\bNULL\b', line):
2643        error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
2644        return
2645
2646    line = clean_lines.raw_lines[line_number]
2647    # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
2648    # matches, then do the check with strings collapsed to avoid giving errors for
2649    # NULLs occurring in strings.
2650    if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
2651        error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
2652
2653def get_line_width(line):
2654    """Determines the width of the line in column positions.
2655
2656    Args:
2657      line: A string, which may be a Unicode string.
2658
2659    Returns:
2660      The width of the line in column positions, accounting for Unicode
2661      combining characters and wide characters.
2662    """
2663    if isinstance(line, unicode):
2664        width = 0
2665        for c in unicodedata.normalize('NFC', line):
2666            if unicodedata.east_asian_width(c) in ('W', 'F'):
2667                width += 2
2668            elif not unicodedata.combining(c):
2669                width += 1
2670        return width
2671    return len(line)
2672
2673
2674def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error):
2675    """Scans the bodies of conditionals and loops, and in particular
2676    all the arms of conditionals, for violations in the use of braces.
2677
2678    Specifically:
2679
2680    (1) If an arm omits braces, then the following statement must be on one
2681    physical line.
2682    (2) If any arm uses braces, all arms must use them.
2683
2684    These checks are only done here if we find the start of an
2685    'if/for/foreach/while' statement, because this function fails fast
2686    if it encounters constructs it doesn't understand. Checks
2687    elsewhere validate other constraints, such as requiring '}' and
2688    'else' to be on the same line.
2689
2690    Args:
2691      clean_lines: A CleansedLines instance containing the file.
2692      line_number: The number of the line to check.
2693      error: The function to call with any errors found.
2694    """
2695
2696    # We work with the elided lines. Comments have been removed, but line
2697    # numbers are preserved, so we can still find situations where
2698    # single-expression control clauses span multiple lines, or when a
2699    # comment preceded the expression.
2700    lines = clean_lines.elided
2701    line = lines[line_number]
2702
2703    # Match control structures.
2704    control_match = match(r'\s*(if|foreach|for|while)\s*\(', line)
2705    if not control_match:
2706        return
2707
2708    # Found the start of a conditional or loop.
2709
2710    # The following loop handles all potential arms of the control clause.
2711    # The initial conditions are the following:
2712    #   - We start on the opening paren '(' of the condition, *unless* we are
2713    #     handling an 'else' block, in which case there is no condition.
2714    #   - In the latter case, we start at the position just beyond the 'else'
2715    #     token.
2716    expect_conditional_expression = True
2717    know_whether_using_braces = False
2718    using_braces = False
2719    search_for_else_clause = control_match.group(1) == "if"
2720    current_pos = Position(line_number, control_match.end() - 1)
2721
2722    while True:
2723        if expect_conditional_expression:
2724            # Try to find the end of the conditional expression,
2725            # potentially spanning multiple lines.
2726            open_paren_pos = current_pos
2727            close_paren_pos = close_expression(lines, open_paren_pos)
2728            if close_paren_pos.column < 0:
2729                return
2730            current_pos = close_paren_pos
2731
2732        end_line_of_conditional = current_pos.row
2733
2734        # Find the start of the body.
2735        current_pos = _find_in_lines(r'\S', lines, current_pos, None)
2736        if not current_pos:
2737            return
2738
2739        current_arm_uses_brace = False
2740        if lines[current_pos.row][current_pos.column] == '{':
2741            current_arm_uses_brace = True
2742        if know_whether_using_braces:
2743            if using_braces != current_arm_uses_brace:
2744                error(current_pos.row, 'whitespace/braces', 4,
2745                      'If one part of an if-else statement uses curly braces, the other part must too.')
2746                return
2747        know_whether_using_braces = True
2748        using_braces = current_arm_uses_brace
2749
2750        if using_braces:
2751            # Skip over the entire arm.
2752            current_pos = close_expression(lines, current_pos)
2753            if current_pos.column < 0:
2754                return
2755        else:
2756            # Skip over the current expression.
2757            current_line_number = current_pos.row
2758            current_pos = _find_in_lines(r';', lines, current_pos, None)
2759            if not current_pos:
2760                return
2761            # If the end of the expression is beyond the line just after
2762            # the close parenthesis or control clause, we've found a
2763            # single-expression arm that spans multiple lines. (We don't
2764            # fire this error for expressions ending on the same line; that
2765            # is a different error, handled elsewhere.)
2766            if current_pos.row > 1 + end_line_of_conditional:
2767                error(current_pos.row, 'whitespace/braces', 4,
2768                      'A conditional or loop body must use braces if the statement is more than one line long.')
2769                return
2770            current_pos = Position(current_pos.row, 1 + current_pos.column)
2771
2772        # At this point current_pos points just past the end of the last
2773        # arm. If we just handled the last control clause, we're done.
2774        if not search_for_else_clause:
2775            return
2776
2777        # Scan forward for the next non-whitespace character, and see
2778        # whether we are continuing a conditional (with an 'else' or
2779        # 'else if'), or are done.
2780        current_pos = _find_in_lines(r'\S', lines, current_pos, None)
2781        if not current_pos:
2782            return
2783        next_nonspace_string = lines[current_pos.row][current_pos.column:]
2784        next_conditional = match(r'(else\s*if|else)', next_nonspace_string)
2785        if not next_conditional:
2786            # Done processing this 'if' and all arms.
2787            return
2788        if next_conditional.group(1) == "else if":
2789            current_pos = _find_in_lines(r'\(', lines, current_pos, None)
2790        else:
2791            current_pos.column += 4  # skip 'else'
2792            expect_conditional_expression = False
2793            search_for_else_clause = False
2794    # End while loop
2795
2796def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error):
2797    """Checks rules from the 'C++ style rules' section of cppguide.html.
2798
2799    Most of these rules are hard to test (naming, comment style), but we
2800    do what we can.  In particular we check for 4-space indents, line lengths,
2801    tab usage, spaces inside code, etc.
2802
2803    Args:
2804      clean_lines: A CleansedLines instance containing the file.
2805      line_number: The number of the line to check.
2806      file_extension: The extension (without the dot) of the filename.
2807      class_state: A _ClassState instance which maintains information about
2808                   the current stack of nested class declarations being parsed.
2809      file_state: A _FileState instance which maintains information about
2810                  the state of things in the file.
2811      enum_state: A _EnumState instance which maintains the current enum state.
2812      error: The function to call with any errors found.
2813    """
2814
2815    raw_lines = clean_lines.raw_lines
2816    line = raw_lines[line_number]
2817
2818    if line.find('\t') != -1:
2819        error(line_number, 'whitespace/tab', 1,
2820              'Tab found; better to use spaces')
2821
2822    cleansed_line = clean_lines.elided[line_number]
2823    if line and line[-1].isspace():
2824        error(line_number, 'whitespace/end_of_line', 4,
2825              'Line ends in whitespace.  Consider deleting these extra spaces.')
2826
2827    if (cleansed_line.count(';') > 1
2828        # for loops are allowed two ;'s (and may run over two lines).
2829        and cleansed_line.find('for') == -1
2830        and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
2831             or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
2832        # It's ok to have many commands in a switch case that fits in 1 line
2833        and not ((cleansed_line.find('case ') != -1
2834                  or cleansed_line.find('default:') != -1)
2835                 and cleansed_line.find('break;') != -1)
2836        # Also it's ok to have many commands in trivial single-line accessors in class definitions.
2837        and not (match(r'.*\(.*\).*{.*.}', line)
2838                 and class_state.classinfo_stack
2839                 and line.count('{') == line.count('}'))
2840        and not cleansed_line.startswith('#define ')
2841        # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line
2842        and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1
2843                 and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)):
2844        error(line_number, 'whitespace/newline', 4,
2845              'More than one command on the same line')
2846
2847    if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
2848        error(line_number, 'whitespace/operators', 4,
2849              'Boolean expressions that span multiple lines should have their '
2850              'operators on the left side of the line instead of the right side.')
2851
2852    # Some more style checks
2853    check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
2854    check_directive_indentation(clean_lines, line_number, file_state, error)
2855    check_using_std(clean_lines, line_number, file_state, error)
2856    check_max_min_macros(clean_lines, line_number, file_state, error)
2857    check_ctype_functions(clean_lines, line_number, file_state, error)
2858    check_switch_indentation(clean_lines, line_number, error)
2859    check_braces(clean_lines, line_number, error)
2860    check_exit_statement_simplifications(clean_lines, line_number, error)
2861    check_spacing(file_extension, clean_lines, line_number, error)
2862    check_check(clean_lines, line_number, error)
2863    check_for_comparisons_to_boolean(clean_lines, line_number, error)
2864    check_for_null(clean_lines, line_number, file_state, error)
2865    check_indentation_amount(clean_lines, line_number, error)
2866    check_enum_casing(clean_lines, line_number, enum_state, error)
2867
2868
2869_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2870_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2871# Matches the first component of a filename delimited by -s and _s. That is:
2872#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2873#  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
2874#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
2875#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
2876_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2877
2878
2879def _drop_common_suffixes(filename):
2880    """Drops common suffixes like _test.cpp or -inl.h from filename.
2881
2882    For example:
2883      >>> _drop_common_suffixes('foo/foo-inl.h')
2884      'foo/foo'
2885      >>> _drop_common_suffixes('foo/bar/foo.cpp')
2886      'foo/bar/foo'
2887      >>> _drop_common_suffixes('foo/foo_internal.h')
2888      'foo/foo'
2889      >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
2890      'foo/foo_unusualinternal'
2891
2892    Args:
2893      filename: The input filename.
2894
2895    Returns:
2896      The filename with the common suffix removed.
2897    """
2898    for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
2899                   'inl.h', 'impl.h', 'internal.h'):
2900        if (filename.endswith(suffix) and len(filename) > len(suffix)
2901            and filename[-len(suffix) - 1] in ('-', '_')):
2902            return filename[:-len(suffix) - 1]
2903    return os.path.splitext(filename)[0]
2904
2905
2906def _classify_include(filename, include, is_system, include_state):
2907    """Figures out what kind of header 'include' is.
2908
2909    Args:
2910      filename: The current file cpp_style is running over.
2911      include: The path to a #included file.
2912      is_system: True if the #include used <> rather than "".
2913      include_state: An _IncludeState instance in which the headers are inserted.
2914
2915    Returns:
2916      One of the _XXX_HEADER constants.
2917
2918    For example:
2919      >>> _classify_include('foo.cpp', 'config.h', False)
2920      _CONFIG_HEADER
2921      >>> _classify_include('foo.cpp', 'foo.h', False)
2922      _PRIMARY_HEADER
2923      >>> _classify_include('foo.cpp', 'bar.h', False)
2924      _OTHER_HEADER
2925    """
2926
2927    # If it is a system header we know it is classified as _OTHER_HEADER.
2928    if is_system and not include.startswith('public/'):
2929        return _OTHER_HEADER
2930
2931    # If the include is named config.h then this is WebCore/config.h.
2932    if include == "config.h":
2933        return _CONFIG_HEADER
2934
2935    # There cannot be primary includes in header files themselves. Only an
2936    # include exactly matches the header filename will be is flagged as
2937    # primary, so that it triggers the "don't include yourself" check.
2938    if filename.endswith('.h') and filename != include:
2939        return _OTHER_HEADER;
2940
2941    # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
2942    if include.startswith('moc_') and include.endswith('.cpp'):
2943        return _MOC_HEADER
2944
2945    if include.endswith('.moc'):
2946        return _MOC_HEADER
2947
2948    # If the target file basename starts with the include we're checking
2949    # then we consider it the primary header.
2950    target_base = FileInfo(filename).base_name()
2951    include_base = FileInfo(include).base_name()
2952
2953    # If we haven't encountered a primary header, then be lenient in checking.
2954    if not include_state.visited_primary_section():
2955        if target_base.find(include_base) != -1:
2956            return _PRIMARY_HEADER
2957        # Qt private APIs use _p.h suffix.
2958        if include_base.find(target_base) != -1 and include_base.endswith('_p'):
2959            return _PRIMARY_HEADER
2960
2961    # If we already encountered a primary header, perform a strict comparison.
2962    # In case the two filename bases are the same then the above lenient check
2963    # probably was a false positive.
2964    elif include_state.visited_primary_section() and target_base == include_base:
2965        if include == "ResourceHandleWin.h":
2966            # FIXME: Thus far, we've only seen one example of these, but if we
2967            # start to see more, please consider generalizing this check
2968            # somehow.
2969            return _OTHER_HEADER
2970        return _PRIMARY_HEADER
2971
2972    return _OTHER_HEADER
2973
2974
2975def _does_primary_header_exist(filename):
2976    """Return a primary header file name for a file, or empty string
2977    if the file is not source file or primary header does not exist.
2978    """
2979    fileinfo = FileInfo(filename)
2980    if not fileinfo.is_source():
2981        return False
2982    primary_header = fileinfo.no_extension() + ".h"
2983    return os.path.isfile(primary_header)
2984
2985
2986def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
2987    """Check rules that are applicable to #include lines.
2988
2989    Strings on #include lines are NOT removed from elided line, to make
2990    certain tasks easier. However, to prevent false positives, checks
2991    applicable to #include lines in CheckLanguage must be put here.
2992
2993    Args:
2994      filename: The name of the current file.
2995      file_extension: The current file extension, without the leading dot.
2996      clean_lines: A CleansedLines instance containing the file.
2997      line_number: The number of the line to check.
2998      include_state: An _IncludeState instance in which the headers are inserted.
2999      error: The function to call with any errors found.
3000    """
3001    # FIXME: For readability or as a possible optimization, consider
3002    #        exiting early here by checking whether the "build/include"
3003    #        category should be checked for the given filename.  This
3004    #        may involve having the error handler classes expose a
3005    #        should_check() method, in addition to the usual __call__
3006    #        method.
3007    line = clean_lines.lines[line_number]
3008
3009    matched = _RE_PATTERN_INCLUDE.search(line)
3010    if not matched:
3011        return
3012
3013    include = matched.group(2)
3014    is_system = (matched.group(1) == '<')
3015
3016    # Look for any of the stream classes that are part of standard C++.
3017    if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3018        error(line_number, 'readability/streams', 3,
3019              'Streams are highly discouraged.')
3020
3021    # Look for specific includes to fix.
3022    if include.startswith('wtf/') and is_system:
3023        error(line_number, 'build/include', 4,
3024              'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.')
3025
3026    if filename.find('/chromium/') != -1 and include.startswith('cc/CC'):
3027        error(line_number, 'build/include', 4,
3028              'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".')
3029
3030    duplicate_header = include in include_state
3031    if duplicate_header:
3032        error(line_number, 'build/include', 4,
3033              '"%s" already included at %s:%s' %
3034              (include, filename, include_state[include]))
3035    else:
3036        include_state[include] = line_number
3037
3038    header_type = _classify_include(filename, include, is_system, include_state)
3039    primary_header_exists = _does_primary_header_exist(filename)
3040    include_state.header_types[line_number] = header_type
3041
3042    # Only proceed if this isn't a duplicate header.
3043    if duplicate_header:
3044        return
3045
3046    # We want to ensure that headers appear in the right order:
3047    # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
3048    # 2) for header files: alphabetically sorted
3049    # The include_state object keeps track of the last type seen
3050    # and complains if the header types are out of order or missing.
3051    error_message = include_state.check_next_include_order(header_type,
3052                                                           file_extension == "h",
3053                                                           primary_header_exists)
3054
3055    # Check to make sure we have a blank line after primary header.
3056    if not error_message and header_type == _PRIMARY_HEADER:
3057         next_line = clean_lines.raw_lines[line_number + 1]
3058         if not is_blank_line(next_line):
3059            error(line_number, 'build/include_order', 4,
3060                  'You should add a blank line after implementation file\'s own header.')
3061
3062    # Check to make sure all headers besides config.h and the primary header are
3063    # alphabetically sorted. Skip Qt's moc files.
3064    if not error_message and header_type == _OTHER_HEADER:
3065         previous_line_number = line_number - 1;
3066         previous_line = clean_lines.lines[previous_line_number]
3067         previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
3068         while (not previous_match and previous_line_number > 0
3069                and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
3070            previous_line_number -= 1;
3071            previous_line = clean_lines.lines[previous_line_number]
3072            previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
3073         if previous_match:
3074            previous_header_type = include_state.header_types[previous_line_number]
3075            if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
3076                # This type of error is potentially a problem with this line or the previous one,
3077                # so if the error is filtered for one line, report it for the next. This is so that
3078                # we properly handle patches, for which only modified lines produce errors.
3079                if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'):
3080                    error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.')
3081
3082    if error_message:
3083        if file_extension == 'h':
3084            error(line_number, 'build/include_order', 4,
3085                  '%s Should be: alphabetically sorted.' %
3086                  error_message)
3087        else:
3088            error(line_number, 'build/include_order', 4,
3089                  '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
3090                  error_message)
3091
3092
3093def check_language(filename, clean_lines, line_number, file_extension, include_state,
3094                   file_state, error):
3095    """Checks rules from the 'C++ language rules' section of cppguide.html.
3096
3097    Some of these rules are hard to test (function overloading, using
3098    uint32 inappropriately), but we do the best we can.
3099
3100    Args:
3101      filename: The name of the current file.
3102      clean_lines: A CleansedLines instance containing the file.
3103      line_number: The number of the line to check.
3104      file_extension: The extension (without the dot) of the filename.
3105      include_state: An _IncludeState instance in which the headers are inserted.
3106      file_state: A _FileState instance which maintains information about
3107                  the state of things in the file.
3108      error: The function to call with any errors found.
3109    """
3110    # If the line is empty or consists of entirely a comment, no need to
3111    # check it.
3112    line = clean_lines.elided[line_number]
3113    if not line:
3114        return
3115
3116    matched = _RE_PATTERN_INCLUDE.search(line)
3117    if matched:
3118        check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
3119        return
3120
3121    # FIXME: figure out if they're using default arguments in fn proto.
3122
3123    # Check to see if they're using an conversion function cast.
3124    # I just try to capture the most common basic types, though there are more.
3125    # Parameterless conversion functions, such as bool(), are allowed as they are
3126    # probably a member operator declaration or default constructor.
3127    matched = search(
3128        r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
3129    if matched:
3130        # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3131        # where type may be float(), int(string), etc.  Without context they are
3132        # virtually indistinguishable from int(x) casts.
3133        if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
3134            error(line_number, 'readability/casting', 4,
3135                  'Using deprecated casting style.  '
3136                  'Use static_cast<%s>(...) instead' %
3137                  matched.group(1))
3138
3139    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
3140                       'static_cast',
3141                       r'\((int|float|double|bool|char|u?int(16|32|64))\)',
3142                       error)
3143    # This doesn't catch all cases.  Consider (const char * const)"hello".
3144    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
3145                       'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
3146
3147    # In addition, we look for people taking the address of a cast.  This
3148    # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3149    # point where you think.
3150    if search(
3151        r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
3152        error(line_number, 'runtime/casting', 4,
3153              ('Are you taking an address of a cast?  '
3154               'This is dangerous: could be a temp var.  '
3155               'Take the address before doing the cast, rather than after'))
3156
3157    # Check for people declaring static/global STL strings at the top level.
3158    # This is dangerous because the C++ language does not guarantee that
3159    # globals with constructors are initialized before the first access.
3160    matched = match(
3161        r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3162        line)
3163    # Make sure it's not a function.
3164    # Function template specialization looks like: "string foo<Type>(...".
3165    # Class template definitions look like: "string Foo<Type>::Method(...".
3166    if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
3167                             matched.group(3)):
3168        error(line_number, 'runtime/string', 4,
3169              'For a static/global string constant, use a C style string instead: '
3170              '"%schar %s[]".' %
3171              (matched.group(1), matched.group(2)))
3172
3173    # Check that we're not using RTTI outside of testing code.
3174    if search(r'\bdynamic_cast<', line):
3175        error(line_number, 'runtime/rtti', 5,
3176              'Do not use dynamic_cast<>.  If you need to cast within a class '
3177              "hierarchy, use static_cast<> to upcast.  Google doesn't support "
3178              'RTTI.')
3179
3180    if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3181        error(line_number, 'runtime/init', 4,
3182              'You seem to be initializing a member variable with itself.')
3183
3184    if file_extension == 'h':
3185        # FIXME: check that 1-arg constructors are explicit.
3186        #        How to tell it's a constructor?
3187        #        (handled in check_for_non_standard_constructs for now)
3188        pass
3189
3190    # Check if people are using the verboten C basic types.  The only exception
3191    # we regularly allow is "unsigned short port" for port.
3192    if search(r'\bshort port\b', line):
3193        if not search(r'\bunsigned short port\b', line):
3194            error(line_number, 'runtime/int', 4,
3195                  'Use "unsigned short" for ports, not "short"')
3196
3197    # When snprintf is used, the second argument shouldn't be a literal.
3198    matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
3199    if matched:
3200        error(line_number, 'runtime/printf', 3,
3201              'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3202              'to snprintf.' % (matched.group(1), matched.group(2)))
3203
3204    # Check if some verboten C functions are being used.
3205    if search(r'\bsprintf\b', line):
3206        error(line_number, 'runtime/printf', 5,
3207              'Never use sprintf.  Use snprintf instead.')
3208    matched = search(r'\b(strcpy|strcat)\b', line)
3209    if matched:
3210        error(line_number, 'runtime/printf', 4,
3211              'Almost always, snprintf is better than %s' % matched.group(1))
3212
3213    if search(r'\bsscanf\b', line):
3214        error(line_number, 'runtime/printf', 1,
3215              'sscanf can be ok, but is slow and can overflow buffers.')
3216
3217    # Check for suspicious usage of "if" like
3218    # } if (a == b) {
3219    if search(r'\}\s*if\s*\(', line):
3220        error(line_number, 'readability/braces', 4,
3221              'Did you mean "else if"? If not, start a new line for "if".')
3222
3223    # Check for potential format string bugs like printf(foo).
3224    # We constrain the pattern not to pick things like DocidForPrintf(foo).
3225    # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
3226    matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
3227    if matched:
3228        error(line_number, 'runtime/printf', 4,
3229              'Potential format string bug. Do %s("%%s", %s) instead.'
3230              % (matched.group(1), matched.group(2)))
3231
3232    # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3233    matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3234    if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
3235        error(line_number, 'runtime/memset', 4,
3236              'Did you mean "memset(%s, 0, %s)"?'
3237              % (matched.group(1), matched.group(2)))
3238
3239    # Detect variable-length arrays.
3240    matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3241    if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
3242        matched.group(3).find(']') == -1):
3243        # Split the size using space and arithmetic operators as delimiters.
3244        # If any of the resulting tokens are not compile time constants then
3245        # report the error.
3246        tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
3247        is_const = True
3248        skip_next = False
3249        for tok in tokens:
3250            if skip_next:
3251                skip_next = False
3252                continue
3253
3254            if search(r'sizeof\(.+\)', tok):
3255                continue
3256            if search(r'arraysize\(\w+\)', tok):
3257                continue
3258
3259            tok = tok.lstrip('(')
3260            tok = tok.rstrip(')')
3261            if not tok:
3262                continue
3263            if match(r'\d+', tok):
3264                continue
3265            if match(r'0[xX][0-9a-fA-F]+', tok):
3266                continue
3267            if match(r'k[A-Z0-9]\w*', tok):
3268                continue
3269            if match(r'(.+::)?k[A-Z0-9]\w*', tok):
3270                continue
3271            if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
3272                continue
3273            # A catch all for tricky sizeof cases, including 'sizeof expression',
3274            # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
3275            # requires skipping the next token becasue we split on ' ' and '*'.
3276            if tok.startswith('sizeof'):
3277                skip_next = True
3278                continue
3279            is_const = False
3280            break
3281        if not is_const:
3282            error(line_number, 'runtime/arrays', 1,
3283                  'Do not use variable-length arrays.  Use an appropriately named '
3284                  "('k' followed by CamelCase) compile-time constant for the size.")
3285
3286    # Check for use of unnamed namespaces in header files.  Registration
3287    # macros are typically OK, so we allow use of "namespace {" on lines
3288    # that end with backslashes.
3289    if (file_extension == 'h'
3290        and search(r'\bnamespace\s*{', line)
3291        and line[-1] != '\\'):
3292        error(line_number, 'build/namespaces', 4,
3293              'Do not use unnamed namespaces in header files.  See '
3294              'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
3295              ' for more information.')
3296
3297    # Check for plain bitfields declared without either "singed" or "unsigned".
3298    # Most compilers treat such bitfields as signed, but there are still compilers like
3299    # RVCT 4.0 that use unsigned by default.
3300    matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line)
3301    if matched:
3302        error(line_number, 'runtime/bitfields', 5,
3303              'Please declare integral type bitfields with either signed or unsigned.')
3304
3305    check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
3306
3307    # Check for unsigned int (should be just 'unsigned')
3308    if search(r'\bunsigned int\b', line):
3309        error(line_number, 'runtime/unsigned', 1,
3310              'Omit int when using unsigned')
3311
3312    # Check for usage of static_cast<Classname*>.
3313    check_for_object_static_cast(filename, line_number, line, error)
3314
3315
3316def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
3317    """Checks if identifier names contain any underscores.
3318
3319    As identifiers in libraries we are using have a bunch of
3320    underscores, we only warn about the declarations of identifiers
3321    and don't check use of identifiers.
3322
3323    Args:
3324      filename: The name of the current file.
3325      line_number: The number of the line to check.
3326      line: The line of code to check.
3327      file_state: A _FileState instance which maintains information about
3328                  the state of things in the file.
3329      error: The function to call with any errors found.
3330    """
3331    # We don't check return and delete statements and conversion operator declarations.
3332    if match(r'\s*(return|delete|operator)\b', line):
3333        return
3334
3335    # Basically, a declaration is a type name followed by whitespaces
3336    # followed by an identifier. The type name can be complicated
3337    # due to type adjectives and templates. We remove them first to
3338    # simplify the process to find declarations of identifiers.
3339
3340    # Convert "long long", "long double", and "long long int" to
3341    # simple types, but don't remove simple "long".
3342    line = sub(r'long (long )?(?=long|double|int)', '', line)
3343    # Convert unsigned/signed types to simple types, too.
3344    line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
3345    line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
3346
3347    # Remove "new" and "new (expr)" to simplify, too.
3348    line = sub(r'new\s*(\([^)]*\))?', '', line)
3349
3350    # Remove all template parameters by removing matching < and >.
3351    # Loop until no templates are removed to remove nested templates.
3352    while True:
3353        line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
3354        if not number_of_replacements:
3355            break
3356
3357    # Declarations of local variables can be in condition expressions
3358    # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
3359    # We remove the keywords and the first parenthesis.
3360    #
3361    # Declarations in "while", "if", and "switch" are different from
3362    # other declarations in two aspects:
3363    #
3364    # - There can be only one declaration between the parentheses.
3365    #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
3366    # - The variable must be initialized.
3367    #   (i.e., you cannot write "if (int i) {}")
3368    #
3369    # and we will need different treatments for them.
3370    line = sub(r'^\s*for\s*\(', '', line)
3371    line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
3372
3373    # Detect variable and functions.
3374    type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
3375    attribute_regexp = r'ALLOW_UNUSED'
3376    identifier_regexp = r'(?!' + attribute_regexp + r')(?P<identifier>[\w:]+)'
3377    maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
3378    character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
3379    declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*(' + attribute_regexp + r')?\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
3380    declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
3381    is_function_arguments = False
3382    number_of_identifiers = 0
3383    while True:
3384        # If we are seeing the first identifier or arguments of a
3385        # function, there should be a type name before an identifier.
3386        if not number_of_identifiers or is_function_arguments:
3387            declaration_regexp = declaration_with_type_regexp
3388        else:
3389            declaration_regexp = declaration_without_type_regexp
3390
3391        matched = match(declaration_regexp, line)
3392        if not matched:
3393            return
3394        identifier = matched.group('identifier')
3395        character_after_identifier = matched.group('character_after_identifier')
3396
3397        # If we removed a non-for-control statement, the character after
3398        # the identifier should be '='. With this rule, we can avoid
3399        # warning for cases like "if (val & INT_MAX) {".
3400        if control_statement and character_after_identifier != '=':
3401            return
3402
3403        is_function_arguments = is_function_arguments or character_after_identifier == '('
3404
3405        # Remove "m_" and "s_" to allow them.
3406        modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
3407        if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
3408            # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
3409            if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
3410                and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0)
3411                and not modified_identifier.startswith('tst_')
3412                and not modified_identifier.startswith('webkit_dom_object_')
3413                and not modified_identifier.startswith('webkit_soup')
3414                and not modified_identifier.startswith('NPN_')
3415                and not modified_identifier.startswith('NPP_')
3416                and not modified_identifier.startswith('NP_')
3417                and not modified_identifier.startswith('qt_')
3418                and not modified_identifier.startswith('_q_')
3419                and not modified_identifier.startswith('cairo_')
3420                and not modified_identifier.startswith('Ecore_')
3421                and not modified_identifier.startswith('Eina_')
3422                and not modified_identifier.startswith('Evas_')
3423                and not modified_identifier.startswith('Ewk_')
3424                and not modified_identifier.startswith('cti_')
3425                and not modified_identifier.find('::qt_') >= 0
3426                and not modified_identifier.find('::_q_') >= 0
3427                and not modified_identifier == "const_iterator"
3428                and not modified_identifier == "vm_throw"
3429                and not modified_identifier == "DFG_OPERATION"):
3430                error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
3431
3432        # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
3433        if modified_identifier == 'l':
3434            error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
3435
3436        # There can be only one declaration in non-for-control statements.
3437        if control_statement:
3438            return
3439        # We should continue checking if this is a function
3440        # declaration because we need to check its arguments.
3441        # Also, we need to check multiple declarations.
3442        if character_after_identifier != '(' and character_after_identifier != ',':
3443            return
3444
3445        number_of_identifiers += 1
3446        line = line[matched.end():]
3447
3448
3449def check_for_toFoo_definition(filename, pattern, error):
3450    """ Reports for using static_cast instead of toFoo convenience function.
3451
3452    This function will output warnings to make sure you are actually using
3453    the added toFoo conversion functions rather than directly hard coding
3454    the static_cast<Classname*> call. For example, you should toHTMLELement(Node*)
3455    to convert Node* to HTMLElement*, instead of static_cast<HTMLElement*>(Node*)
3456
3457    Args:
3458      filename: The name of the header file in which to check for toFoo definition.
3459      pattern: The conversion function pattern to grep for.
3460      error: The function to call with any errors found.
3461    """
3462    def get_abs_filepath(filename):
3463        fileSystem = FileSystem()
3464        base_dir = fileSystem.path_to_module(FileSystem.__module__).split('WebKit', 1)[0]
3465        base_dir = ''.join((base_dir, 'WebKit/Source'))
3466        for root, dirs, names in os.walk(base_dir):
3467            if filename in names:
3468                return os.path.join(root, filename)
3469        return None
3470
3471    def grep(lines, pattern, error):
3472        matches = []
3473        function_state = None
3474        for line_number in xrange(lines.num_lines()):
3475            line = (lines.elided[line_number]).rstrip()
3476            try:
3477                if pattern in line:
3478                    if not function_state:
3479                        function_state = _FunctionState(1)
3480                    detect_functions(lines, line_number, function_state, error)
3481                    # Exclude the match of dummy conversion function. Dummy function is just to
3482                    # catch invalid conversions and shouldn't be part of possible alternatives.
3483                    result = re.search(r'%s(\s+)%s' % ("void", pattern), line)
3484                    if not result:
3485                        matches.append([line, function_state.body_start_position.row, function_state.end_position.row + 1])
3486                        function_state = None
3487            except UnicodeDecodeError:
3488                # There would be no non-ascii characters in the codebase ever. The only exception
3489                # would be comments/copyright text which might have non-ascii characters. Hence,
3490                # it is prefectly safe to catch the UnicodeDecodeError and just pass the line.
3491                pass
3492
3493        return matches
3494
3495    def check_in_mock_header(filename, matches=None):
3496        if not filename == 'Foo.h':
3497            return False
3498
3499        header_file = None
3500        try:
3501            header_file = CppChecker.fs.read_text_file(filename)
3502        except IOError:
3503            return False
3504        line_number = 0
3505        for line in header_file:
3506            line_number += 1
3507            matched = re.search(r'\btoFoo\b', line)
3508            if matched:
3509                matches.append(['toFoo', line_number, line_number + 3])
3510        return True
3511
3512    # For unit testing only, avoid header search and lookup locally.
3513    matches = []
3514    mock_def_found = check_in_mock_header(filename, matches)
3515    if mock_def_found:
3516        return matches
3517
3518    # Regular style check flow. Search for actual header file & defs.
3519    file_path = get_abs_filepath(filename)
3520    if not file_path:
3521        return None
3522    try:
3523        f = open(file_path)
3524        clean_lines = CleansedLines(f.readlines())
3525    finally:
3526        f.close()
3527
3528    # Make a list of all genuine alternatives to static_cast.
3529    matches = grep(clean_lines, pattern, error)
3530    return matches
3531
3532
3533def check_for_object_static_cast(processing_file, line_number, line, error):
3534    """Checks for a Cpp-style static cast on objects by looking for the pattern.
3535
3536    Args:
3537      processing_file: The name of the processing file.
3538      line_number: The number of the line to check.
3539      line: The line of code to check.
3540      error: The function to call with any errors found.
3541    """
3542    matched = search(r'\bstatic_cast<(\s*\w*:?:?\w+\s*\*+\s*)>', line)
3543    if not matched:
3544        return
3545
3546    class_name = re.sub('[\*]', '', matched.group(1))
3547    class_name = class_name.strip()
3548    # Ignore (for now) when the casting is to void*,
3549    if class_name == 'void':
3550        return
3551
3552    namespace_pos = class_name.find(':')
3553    if not namespace_pos == -1:
3554        class_name = class_name[namespace_pos + 2:]
3555
3556    header_file = ''.join((class_name, '.h'))
3557    matches = check_for_toFoo_definition(header_file, ''.join(('to', class_name)), error)
3558    # Ignore (for now) if not able to find the header where toFoo might be defined.
3559    # TODO: Handle cases where Classname might be defined in some other header or cpp file.
3560    if matches is None:
3561        return
3562
3563    report_error = True
3564    # Ensure found static_cast instance is not from within toFoo definition itself.
3565    if (os.path.basename(processing_file) == header_file):
3566        for item in matches:
3567            if line_number in range(item[1], item[2]):
3568                report_error = False
3569                break
3570
3571    if report_error:
3572        if len(matches):
3573            # toFoo is defined - enforce using it.
3574            # TODO: Suggest an appropriate toFoo from the alternatives present in matches.
3575            error(line_number, 'runtime/casting', 4,
3576                  'static_cast of class objects is not allowed. Use to%s defined in %s.' %
3577                  (class_name, header_file))
3578        else:
3579            # No toFoo defined - enforce definition & usage.
3580            # TODO: Automate the generation of toFoo() to avoid any slippages ever.
3581            error(line_number, 'runtime/casting', 4,
3582                  'static_cast of class objects is not allowed. Add to%s in %s and use it instead.' %
3583                  (class_name, header_file))
3584
3585
3586def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
3587                       error):
3588    """Checks for a C-style cast by looking for the pattern.
3589
3590    This also handles sizeof(type) warnings, due to similarity of content.
3591
3592    Args:
3593      line_number: The number of the line to check.
3594      line: The line of code to check.
3595      raw_line: The raw line of code to check, with comments.
3596      cast_type: The string for the C++ cast to recommend.  This is either
3597                 reinterpret_cast or static_cast, depending.
3598      pattern: The regular expression used to find C-style casts.
3599      error: The function to call with any errors found.
3600    """
3601    matched = search(pattern, line)
3602    if not matched:
3603        return
3604
3605    # e.g., sizeof(int)
3606    sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
3607    if sizeof_match:
3608        error(line_number, 'runtime/sizeof', 1,
3609              'Using sizeof(type).  Use sizeof(varname) instead if possible')
3610        return
3611
3612    remainder = line[matched.end(0):]
3613
3614    # The close paren is for function pointers as arguments to a function.
3615    # eg, void foo(void (*bar)(int));
3616    # The semicolon check is a more basic function check; also possibly a
3617    # function pointer typedef.
3618    # eg, void foo(int); or void foo(int) const;
3619    # The equals check is for function pointer assignment.
3620    # eg, void *(*foo)(int) = ...
3621    #
3622    # Right now, this will only catch cases where there's a single argument, and
3623    # it's unnamed.  It should probably be expanded to check for multiple
3624    # arguments with some unnamed.
3625    function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
3626    if function_match:
3627        if (not function_match.group(3)
3628            or function_match.group(3) == ';'
3629            or raw_line.find('/*') < 0):
3630            error(line_number, 'readability/function', 3,
3631                  'All parameters should be named in a function')
3632        return
3633
3634    # At this point, all that should be left is actual casts.
3635    error(line_number, 'readability/casting', 4,
3636          'Using C-style cast.  Use %s<%s>(...) instead' %
3637          (cast_type, matched.group(1)))
3638
3639
3640_HEADERS_CONTAINING_TEMPLATES = (
3641    ('<deque>', ('deque',)),
3642    ('<functional>', ('unary_function', 'binary_function',
3643                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
3644                      'negate',
3645                      'equal_to', 'not_equal_to', 'greater', 'less',
3646                      'greater_equal', 'less_equal',
3647                      'logical_and', 'logical_or', 'logical_not',
3648                      'unary_negate', 'not1', 'binary_negate', 'not2',
3649                      'bind1st', 'bind2nd',
3650                      'pointer_to_unary_function',
3651                      'pointer_to_binary_function',
3652                      'ptr_fun',
3653                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
3654                      'mem_fun_ref_t',
3655                      'const_mem_fun_t', 'const_mem_fun1_t',
3656                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
3657                      'mem_fun_ref',
3658                     )),
3659    ('<limits>', ('numeric_limits',)),
3660    ('<list>', ('list',)),
3661    ('<map>', ('map', 'multimap',)),
3662    ('<memory>', ('allocator',)),
3663    ('<queue>', ('queue', 'priority_queue',)),
3664    ('<set>', ('set', 'multiset',)),
3665    ('<stack>', ('stack',)),
3666    ('<string>', ('char_traits', 'basic_string',)),
3667    ('<utility>', ('pair',)),
3668    ('<vector>', ('vector',)),
3669
3670    # gcc extensions.
3671    # Note: std::hash is their hash, ::hash is our hash
3672    ('<hash_map>', ('hash_map', 'hash_multimap',)),
3673    ('<hash_set>', ('hash_set', 'hash_multiset',)),
3674    ('<slist>', ('slist',)),
3675    )
3676
3677_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
3678    # We can trust with reasonable confidence that map gives us pair<>, too.
3679    'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
3680}
3681
3682_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3683
3684_re_pattern_algorithm_header = []
3685for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
3686                  'transform'):
3687    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
3688    # type::max().
3689    _re_pattern_algorithm_header.append(
3690        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3691         _template,
3692         '<algorithm>'))
3693
3694_re_pattern_templates = []
3695for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
3696    for _template in _templates:
3697        _re_pattern_templates.append(
3698            (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3699             _template + '<>',
3700             _header))
3701
3702
3703def files_belong_to_same_module(filename_cpp, filename_h):
3704    """Check if these two filenames belong to the same module.
3705
3706    The concept of a 'module' here is a as follows:
3707    foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
3708    same 'module' if they are in the same directory.
3709    some/path/public/xyzzy and some/path/internal/xyzzy are also considered
3710    to belong to the same module here.
3711
3712    If the filename_cpp contains a longer path than the filename_h, for example,
3713    '/absolute/path/to/base/sysinfo.cpp', and this file would include
3714    'base/sysinfo.h', this function also produces the prefix needed to open the
3715    header. This is used by the caller of this function to more robustly open the
3716    header file. We don't have access to the real include paths in this context,
3717    so we need this guesswork here.
3718
3719    Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
3720    according to this implementation. Because of this, this function gives
3721    some false positives. This should be sufficiently rare in practice.
3722
3723    Args:
3724      filename_cpp: is the path for the .cpp file
3725      filename_h: is the path for the header path
3726
3727    Returns:
3728      Tuple with a bool and a string:
3729      bool: True if filename_cpp and filename_h belong to the same module.
3730      string: the additional prefix needed to open the header file.
3731    """
3732
3733    if not filename_cpp.endswith('.cpp'):
3734        return (False, '')
3735    filename_cpp = filename_cpp[:-len('.cpp')]
3736    if filename_cpp.endswith('_unittest'):
3737        filename_cpp = filename_cpp[:-len('_unittest')]
3738    elif filename_cpp.endswith('_test'):
3739        filename_cpp = filename_cpp[:-len('_test')]
3740    filename_cpp = filename_cpp.replace('/public/', '/')
3741    filename_cpp = filename_cpp.replace('/internal/', '/')
3742
3743    if not filename_h.endswith('.h'):
3744        return (False, '')
3745    filename_h = filename_h[:-len('.h')]
3746    if filename_h.endswith('-inl'):
3747        filename_h = filename_h[:-len('-inl')]
3748    filename_h = filename_h.replace('/public/', '/')
3749    filename_h = filename_h.replace('/internal/', '/')
3750
3751    files_belong_to_same_module = filename_cpp.endswith(filename_h)
3752    common_path = ''
3753    if files_belong_to_same_module:
3754        common_path = filename_cpp[:-len(filename_h)]
3755    return files_belong_to_same_module, common_path
3756
3757
3758def update_include_state(filename, include_state):
3759    """Fill up the include_state with new includes found from the file.
3760
3761    Args:
3762      filename: the name of the header to read.
3763      include_state: an _IncludeState instance in which the headers are inserted.
3764      io: The io factory to use to read the file. Provided for testability.
3765
3766    Returns:
3767      True if a header was succesfully added. False otherwise.
3768    """
3769    header_file = None
3770    try:
3771        header_file = CppChecker.fs.read_text_file(filename)
3772    except IOError:
3773        return False
3774    line_number = 0
3775    for line in header_file:
3776        line_number += 1
3777        clean_line = cleanse_comments(line)
3778        matched = _RE_PATTERN_INCLUDE.search(clean_line)
3779        if matched:
3780            include = matched.group(2)
3781            # The value formatting is cute, but not really used right now.
3782            # What matters here is that the key is in include_state.
3783            include_state.setdefault(include, '%s:%d' % (filename, line_number))
3784    return True
3785
3786
3787def check_for_include_what_you_use(filename, clean_lines, include_state, error):
3788    """Reports for missing stl includes.
3789
3790    This function will output warnings to make sure you are including the headers
3791    necessary for the stl containers and functions that you use. We only give one
3792    reason to include a header. For example, if you use both equal_to<> and
3793    less<> in a .h file, only one (the latter in the file) of these will be
3794    reported as a reason to include the <functional>.
3795
3796    Args:
3797      filename: The name of the current file.
3798      clean_lines: A CleansedLines instance containing the file.
3799      include_state: An _IncludeState instance.
3800      error: The function to call with any errors found.
3801    """
3802    required = {}  # A map of header name to line_number and the template entity.
3803        # Example of required: { '<functional>': (1219, 'less<>') }
3804
3805    for line_number in xrange(clean_lines.num_lines()):
3806        line = clean_lines.elided[line_number]
3807        if not line or line[0] == '#':
3808            continue
3809
3810        # String is special -- it is a non-templatized type in STL.
3811        if _RE_PATTERN_STRING.search(line):
3812            required['<string>'] = (line_number, 'string')
3813
3814        for pattern, template, header in _re_pattern_algorithm_header:
3815            if pattern.search(line):
3816                required[header] = (line_number, template)
3817
3818        # The following function is just a speed up, no semantics are changed.
3819        if not '<' in line:  # Reduces the cpu time usage by skipping lines.
3820            continue
3821
3822        for pattern, template, header in _re_pattern_templates:
3823            if pattern.search(line):
3824                required[header] = (line_number, template)
3825
3826    # The policy is that if you #include something in foo.h you don't need to
3827    # include it again in foo.cpp. Here, we will look at possible includes.
3828    # Let's copy the include_state so it is only messed up within this function.
3829    include_state = include_state.copy()
3830
3831    # Did we find the header for this file (if any) and succesfully load it?
3832    header_found = False
3833
3834    # Use the absolute path so that matching works properly.
3835    abs_filename = os.path.abspath(filename)
3836
3837    # For Emacs's flymake.
3838    # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
3839    # by flymake and that file name might end with '_flymake.cpp'. In that case,
3840    # restore original file name here so that the corresponding header file can be
3841    # found.
3842    # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
3843    # instead of 'foo_flymake.h'
3844    abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
3845
3846    # include_state is modified during iteration, so we iterate over a copy of
3847    # the keys.
3848    for header in include_state.keys():  #NOLINT
3849        (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
3850        fullpath = common_path + header
3851        if same_module and update_include_state(fullpath, include_state):
3852            header_found = True
3853
3854    # If we can't find the header file for a .cpp, assume it's because we don't
3855    # know where to look. In that case we'll give up as we're not sure they
3856    # didn't include it in the .h file.
3857    # FIXME: Do a better job of finding .h files so we are confident that
3858    #        not having the .h file means there isn't one.
3859    if filename.endswith('.cpp') and not header_found:
3860        return
3861
3862    # All the lines have been processed, report the errors found.
3863    for required_header_unstripped in required:
3864        template = required[required_header_unstripped][1]
3865        if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
3866            headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
3867            if [True for header in headers if header in include_state]:
3868                continue
3869        if required_header_unstripped.strip('<>"') not in include_state:
3870            error(required[required_header_unstripped][0],
3871                  'build/include_what_you_use', 4,
3872                  'Add #include ' + required_header_unstripped + ' for ' + template)
3873
3874
3875def process_line(filename, file_extension,
3876                 clean_lines, line, include_state, function_state,
3877                 class_state, file_state, enum_state, error):
3878    """Processes a single line in the file.
3879
3880    Args:
3881      filename: Filename of the file that is being processed.
3882      file_extension: The extension (dot not included) of the file.
3883      clean_lines: An array of strings, each representing a line of the file,
3884                   with comments stripped.
3885      line: Number of line being processed.
3886      include_state: An _IncludeState instance in which the headers are inserted.
3887      function_state: A _FunctionState instance which counts function lines, etc.
3888      class_state: A _ClassState instance which maintains information about
3889                   the current stack of nested class declarations being parsed.
3890      file_state: A _FileState instance which maintains information about
3891                  the state of things in the file.
3892      enum_state: A _EnumState instance which maintains an enum declaration
3893                  state.
3894      error: A callable to which errors are reported, which takes arguments:
3895             line number, error level, and message
3896
3897    """
3898    raw_lines = clean_lines.raw_lines
3899    detect_functions(clean_lines, line, function_state, error)
3900    check_for_function_lengths(clean_lines, line, function_state, error)
3901    if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
3902        return
3903    if match(r'\s*\b__asm\b', raw_lines[line]):  # Ignore asm lines as they format differently.
3904        return
3905    check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
3906    check_pass_ptr_usage(clean_lines, line, function_state, error)
3907    check_for_leaky_patterns(clean_lines, line, function_state, error)
3908    check_for_multiline_comments_and_strings(clean_lines, line, error)
3909    check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error)
3910    check_language(filename, clean_lines, line, file_extension, include_state,
3911                   file_state, error)
3912    check_for_non_standard_constructs(clean_lines, line, class_state, error)
3913    check_posix_threading(clean_lines, line, error)
3914    check_invalid_increment(clean_lines, line, error)
3915    check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error)
3916
3917def _process_lines(filename, file_extension, lines, error, min_confidence):
3918    """Performs lint checks and reports any errors to the given error function.
3919
3920    Args:
3921      filename: Filename of the file that is being processed.
3922      file_extension: The extension (dot not included) of the file.
3923      lines: An array of strings, each representing a line of the file, with the
3924             last element being empty if the file is termined with a newline.
3925      error: A callable to which errors are reported, which takes 4 arguments:
3926    """
3927    lines = (['// marker so line numbers and indices both start at 1'] + lines +
3928             ['// marker so line numbers end in a known way'])
3929
3930    include_state = _IncludeState()
3931    function_state = _FunctionState(min_confidence)
3932    class_state = _ClassState()
3933
3934    check_for_copyright(lines, error)
3935
3936    if file_extension == 'h':
3937        check_for_header_guard(filename, lines, error)
3938
3939    remove_multi_line_comments(lines, error)
3940    clean_lines = CleansedLines(lines)
3941    file_state = _FileState(clean_lines, file_extension)
3942    enum_state = _EnumState()
3943    for line in xrange(clean_lines.num_lines()):
3944        process_line(filename, file_extension, clean_lines, line,
3945                     include_state, function_state, class_state, file_state,
3946                     enum_state, error)
3947    class_state.check_finished(error)
3948
3949    check_for_include_what_you_use(filename, clean_lines, include_state, error)
3950
3951    # We check here rather than inside process_line so that we see raw
3952    # lines rather than "cleaned" lines.
3953    check_for_unicode_replacement_characters(lines, error)
3954
3955    check_for_new_line_at_eof(lines, error)
3956
3957
3958class CppChecker(object):
3959
3960    """Processes C++ lines for checking style."""
3961
3962    # This list is used to--
3963    #
3964    # (1) generate an explicit list of all possible categories,
3965    # (2) unit test that all checked categories have valid names, and
3966    # (3) unit test that all categories are getting unit tested.
3967    #
3968    categories = set([
3969        'build/class',
3970        'build/deprecated',
3971        'build/endif_comment',
3972        'build/forward_decl',
3973        'build/header_guard',
3974        'build/include',
3975        'build/include_order',
3976        'build/include_what_you_use',
3977        'build/namespaces',
3978        'build/printf_format',
3979        'build/storage_class',
3980        'build/using_std',
3981        'legal/copyright',
3982        'readability/braces',
3983        'readability/casting',
3984        'readability/check',
3985        'readability/comparison_to_boolean',
3986        'readability/constructors',
3987        'readability/control_flow',
3988        'readability/enum_casing',
3989        'readability/fn_size',
3990        'readability/function',
3991        'readability/multiline_comment',
3992        'readability/multiline_string',
3993        'readability/parameter_name',
3994        'readability/naming',
3995        'readability/naming/underscores',
3996        'readability/null',
3997        'readability/pass_ptr',
3998        'readability/streams',
3999        'readability/todo',
4000        'readability/utf8',
4001        'readability/webkit_export',
4002        'runtime/arrays',
4003        'runtime/bitfields',
4004        'runtime/casting',
4005        'runtime/ctype_function',
4006        'runtime/explicit',
4007        'runtime/init',
4008        'runtime/int',
4009        'runtime/invalid_increment',
4010        'runtime/leaky_pattern',
4011        'runtime/max_min_macros',
4012        'runtime/memset',
4013        'runtime/printf',
4014        'runtime/printf_format',
4015        'runtime/references',
4016        'runtime/rtti',
4017        'runtime/sizeof',
4018        'runtime/string',
4019        'runtime/threadsafe_fn',
4020        'runtime/unsigned',
4021        'runtime/virtual',
4022        'whitespace/blank_line',
4023        'whitespace/braces',
4024        'whitespace/comma',
4025        'whitespace/comments',
4026        'whitespace/declaration',
4027        'whitespace/end_of_line',
4028        'whitespace/ending_newline',
4029        'whitespace/indent',
4030        'whitespace/line_length',
4031        'whitespace/newline',
4032        'whitespace/operators',
4033        'whitespace/parens',
4034        'whitespace/semicolon',
4035        'whitespace/tab',
4036        'whitespace/todo',
4037        ])
4038
4039    fs = None
4040
4041    def __init__(self, file_path, file_extension, handle_style_error,
4042                 min_confidence, fs=None):
4043        """Create a CppChecker instance.
4044
4045        Args:
4046          file_extension: A string that is the file extension, without
4047                          the leading dot.
4048
4049        """
4050        self.file_extension = file_extension
4051        self.file_path = file_path
4052        self.handle_style_error = handle_style_error
4053        self.min_confidence = min_confidence
4054        CppChecker.fs = fs or FileSystem()
4055
4056    # Useful for unit testing.
4057    def __eq__(self, other):
4058        """Return whether this CppChecker instance is equal to another."""
4059        if self.file_extension != other.file_extension:
4060            return False
4061        if self.file_path != other.file_path:
4062            return False
4063        if self.handle_style_error != other.handle_style_error:
4064            return False
4065        if self.min_confidence != other.min_confidence:
4066            return False
4067
4068        return True
4069
4070    # Useful for unit testing.
4071    def __ne__(self, other):
4072        # Python does not automatically deduce __ne__() from __eq__().
4073        return not self.__eq__(other)
4074
4075    def check(self, lines):
4076        _process_lines(self.file_path, self.file_extension, lines,
4077                       self.handle_style_error, self.min_confidence)
4078
4079
4080# FIXME: Remove this function (requires refactoring unit tests).
4081def process_file_data(filename, file_extension, lines, error, min_confidence, fs=None):
4082    checker = CppChecker(filename, file_extension, error, min_confidence, fs)
4083    checker.check(lines)
4084