1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2009, 2010 Google Inc. All rights reserved.
5# Copyright (C) 2009 Torch Mobile Inc.
6# Copyright (C) 2009 Apple Inc. All rights reserved.
7# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are
11# met:
12#
13#    * Redistributions of source code must retain the above copyright
14# notice, this list of conditions and the following disclaimer.
15#    * Redistributions in binary form must reproduce the above
16# copyright notice, this list of conditions and the following disclaimer
17# in the documentation and/or other materials provided with the
18# distribution.
19#    * Neither the name of Google Inc. nor the names of its
20# contributors may be used to endorse or promote products derived from
21# this software without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35# This is the modified version of Google's cpplint. The original code is
36# http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
37
38"""Support for check-webkit-style."""
39
40import codecs
41import math  # for log
42import os
43import os.path
44import re
45import sre_compile
46import string
47import sys
48import unicodedata
49
50from webkitpy.common.memoized import memoized
51
52# The key to use to provide a class to fake loading a header file.
53INCLUDE_IO_INJECTION_KEY = 'include_header_io'
54
55# Headers that we consider STL headers.
56_STL_HEADERS = frozenset([
57    'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
58    'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
59    'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
60    'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
61    'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
62    'utility', 'vector', 'vector.h',
63    ])
64
65
66# Non-STL C++ system headers.
67_CPP_HEADERS = frozenset([
68    'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
69    'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
70    'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
71    'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
72    'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
73    'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
74    'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
75    'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
76    'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
77    'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
78    'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
79    'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
80    'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
81    ])
82
83
84# Assertion macros.  These are defined in base/logging.h and
85# testing/base/gunit.h.  Note that the _M versions need to come first
86# for substring matching to work.
87_CHECK_MACROS = [
88    'DCHECK', 'CHECK',
89    'EXPECT_TRUE_M', 'EXPECT_TRUE',
90    'ASSERT_TRUE_M', 'ASSERT_TRUE',
91    'EXPECT_FALSE_M', 'EXPECT_FALSE',
92    'ASSERT_FALSE_M', 'ASSERT_FALSE',
93    ]
94
95# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
96_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
97
98for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
99                        ('>=', 'GE'), ('>', 'GT'),
100                        ('<=', 'LE'), ('<', 'LT')]:
101    _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
102    _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
103    _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
104    _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
105    _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
106    _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
107
108for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
109                            ('>=', 'LT'), ('>', 'LE'),
110                            ('<=', 'GT'), ('<', 'GE')]:
111    _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
112    _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
113    _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
114    _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
115
116
117# These constants define types of headers for use with
118# _IncludeState.check_next_include_order().
119_CONFIG_HEADER = 0
120_PRIMARY_HEADER = 1
121_OTHER_HEADER = 2
122_MOC_HEADER = 3
123
124
125# A dictionary of items customize behavior for unit test. For example,
126# INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows
127# for faking a header file.
128_unit_test_config = {}
129
130
131# The regexp compilation caching is inlined in all regexp functions for
132# performance reasons; factoring it out into a separate function turns out
133# to be noticeably expensive.
134_regexp_compile_cache = {}
135
136
137def match(pattern, s):
138    """Matches the string with the pattern, caching the compiled regexp."""
139    if not pattern in _regexp_compile_cache:
140        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
141    return _regexp_compile_cache[pattern].match(s)
142
143
144def search(pattern, s):
145    """Searches the string for the pattern, caching the compiled regexp."""
146    if not pattern in _regexp_compile_cache:
147        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
148    return _regexp_compile_cache[pattern].search(s)
149
150
151def sub(pattern, replacement, s):
152    """Substitutes occurrences of a pattern, caching the compiled regexp."""
153    if not pattern in _regexp_compile_cache:
154        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
155    return _regexp_compile_cache[pattern].sub(replacement, s)
156
157
158def subn(pattern, replacement, s):
159    """Substitutes occurrences of a pattern, caching the compiled regexp."""
160    if not pattern in _regexp_compile_cache:
161        _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
162    return _regexp_compile_cache[pattern].subn(replacement, s)
163
164
165def iteratively_replace_matches_with_char(pattern, char_replacement, s):
166    """Returns the string with replacement done.
167
168    Every character in the match is replaced with char.
169    Due to the iterative nature, pattern should not match char or
170    there will be an infinite loop.
171
172    Example:
173      pattern = r'<[^>]>' # template parameters
174      char_replacement =  '_'
175      s =     'A<B<C, D>>'
176      Returns 'A_________'
177
178    Args:
179      pattern: The regex to match.
180      char_replacement: The character to put in place of every
181                        character of the match.
182      s: The string on which to do the replacements.
183
184    Returns:
185      True, if the given line is blank.
186    """
187    while True:
188        matched = search(pattern, s)
189        if not matched:
190            return s
191        start_match_index = matched.start(0)
192        end_match_index = matched.end(0)
193        match_length = end_match_index - start_match_index
194        s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
195
196
197def _rfind_in_lines(regex, lines, start_position, not_found_position):
198    """Does a reverse find starting at start position and going backwards until
199    a match is found.
200
201    Returns the position where the regex ended.
202    """
203    # Put the regex in a group and proceed it with a greedy expression that
204    # matches anything to ensure that we get the last possible match in a line.
205    last_in_line_regex = r'.*(' + regex + ')'
206    current_row = start_position.row
207
208    # Start with the given row and trim off everything past what may be matched.
209    current_line = lines[start_position.row][:start_position.column]
210    while True:
211        found_match = match(last_in_line_regex, current_line)
212        if found_match:
213            return Position(current_row, found_match.end(1))
214
215        # A match was not found so continue backward.
216        current_row -= 1
217        if current_row < 0:
218            return not_found_position
219        current_line = lines[current_row]
220
221
222def _convert_to_lower_with_underscores(text):
223    """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
224
225    # First add underscores before any capital letter followed by a lower case letter
226    # as long as it is in a word.
227    # (This put an underscore before Password but not P and A in WPAPassword).
228    text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
229
230    # Next add underscores before capitals at the end of words if it was
231    # preceeded by lower case letter or number.
232    # (This puts an underscore before A in isA but not A in CBA).
233    text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
234
235    # Next add underscores when you have a captial letter which is followed by a capital letter
236    # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
237    text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
238
239    return text.lower()
240
241
242
243def _create_acronym(text):
244    """Creates an acronym for the given text."""
245    # Removes all lower case letters except those starting words.
246    text = sub(r'(?<!\b)[a-z]', '', text)
247    return text.upper()
248
249
250def up_to_unmatched_closing_paren(s):
251    """Splits a string into two parts up to first unmatched ')'.
252
253    Args:
254      s: a string which is a substring of line after '('
255      (e.g., "a == (b + c))").
256
257    Returns:
258      A pair of strings (prefix before first unmatched ')',
259      remainder of s after first unmatched ')'), e.g.,
260      up_to_unmatched_closing_paren("a == (b + c)) { ")
261      returns "a == (b + c)", " {".
262      Returns None, None if there is no unmatched ')'
263
264    """
265    i = 1
266    for pos, c in enumerate(s):
267      if c == '(':
268        i += 1
269      elif c == ')':
270        i -= 1
271        if i == 0:
272          return s[:pos], s[pos + 1:]
273    return None, None
274
275class _IncludeState(dict):
276    """Tracks line numbers for includes, and the order in which includes appear.
277
278    As a dict, an _IncludeState object serves as a mapping between include
279    filename and line number on which that file was included.
280
281    Call check_next_include_order() once for each header in the file, passing
282    in the type constants defined above. Calls in an illegal order will
283    raise an _IncludeError with an appropriate error message.
284
285    """
286    # self._section will move monotonically through this set. If it ever
287    # needs to move backwards, check_next_include_order will raise an error.
288    _INITIAL_SECTION = 0
289    _CONFIG_SECTION = 1
290    _PRIMARY_SECTION = 2
291    _OTHER_SECTION = 3
292
293    _TYPE_NAMES = {
294        _CONFIG_HEADER: 'WebCore config.h',
295        _PRIMARY_HEADER: 'header this file implements',
296        _OTHER_HEADER: 'other header',
297        _MOC_HEADER: 'moc file',
298        }
299    _SECTION_NAMES = {
300        _INITIAL_SECTION: "... nothing.",
301        _CONFIG_SECTION: "WebCore config.h.",
302        _PRIMARY_SECTION: 'a header this file implements.',
303        _OTHER_SECTION: 'other header.',
304        }
305
306    def __init__(self):
307        dict.__init__(self)
308        self._section = self._INITIAL_SECTION
309        self._visited_primary_section = False
310        self.header_types = dict();
311
312    def visited_primary_section(self):
313        return self._visited_primary_section
314
315    def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
316        """Returns a non-empty error message if the next header is out of order.
317
318        This function also updates the internal state to be ready to check
319        the next include.
320
321        Args:
322          header_type: One of the _XXX_HEADER constants defined above.
323          file_is_header: Whether the file that owns this _IncludeState is itself a header
324
325        Returns:
326          The empty string if the header is in the right order, or an
327          error message describing what's wrong.
328
329        """
330        if header_type == _CONFIG_HEADER and file_is_header:
331            return 'Header file should not contain WebCore config.h.'
332        if header_type == _PRIMARY_HEADER and file_is_header:
333            return 'Header file should not contain itself.'
334        if header_type == _MOC_HEADER:
335            return ''
336
337        error_message = ''
338        if self._section != self._OTHER_SECTION:
339            before_error_message = ('Found %s before %s' %
340                                    (self._TYPE_NAMES[header_type],
341                                     self._SECTION_NAMES[self._section + 1]))
342        after_error_message = ('Found %s after %s' %
343                                (self._TYPE_NAMES[header_type],
344                                 self._SECTION_NAMES[self._section]))
345
346        if header_type == _CONFIG_HEADER:
347            if self._section >= self._CONFIG_SECTION:
348                error_message = after_error_message
349            self._section = self._CONFIG_SECTION
350        elif header_type == _PRIMARY_HEADER:
351            if self._section >= self._PRIMARY_SECTION:
352                error_message = after_error_message
353            elif self._section < self._CONFIG_SECTION:
354                error_message = before_error_message
355            self._section = self._PRIMARY_SECTION
356            self._visited_primary_section = True
357        else:
358            assert header_type == _OTHER_HEADER
359            if not file_is_header and self._section < self._PRIMARY_SECTION:
360                if primary_header_exists:
361                    error_message = before_error_message
362            self._section = self._OTHER_SECTION
363
364        return error_message
365
366
367class Position(object):
368    """Holds the position of something."""
369    def __init__(self, row, column):
370        self.row = row
371        self.column = column
372
373    def __str__(self):
374        return '(%s, %s)' % (self.row, self.column)
375
376    def __cmp__(self, other):
377        return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
378
379
380class Parameter(object):
381    """Information about one function parameter."""
382    def __init__(self, parameter, parameter_name_index, row):
383        self.type = parameter[:parameter_name_index].strip()
384        # Remove any initializers from the parameter name (e.g. int i = 5).
385        self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
386        self.row = row
387
388    @memoized
389    def lower_with_underscores_name(self):
390        """Returns the parameter name in the lower with underscores format."""
391        return _convert_to_lower_with_underscores(self.name)
392
393
394class SingleLineView(object):
395    """Converts multiple lines into a single line (with line breaks replaced by a
396       space) to allow for easier searching."""
397    def __init__(self, lines, start_position, end_position):
398        """Create a SingleLineView instance.
399
400        Args:
401          lines: a list of multiple lines to combine into a single line.
402          start_position: offset within lines of where to start the single line.
403          end_position: just after where to end (like a slice operation).
404        """
405        # Get the rows of interest.
406        trimmed_lines = lines[start_position.row:end_position.row + 1]
407
408        # Remove the columns on the last line that aren't included.
409        trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
410
411        # Remove the columns on the first line that aren't included.
412        trimmed_lines[0] = trimmed_lines[0][start_position.column:]
413
414        # Create a single line with all of the parameters.
415        self.single_line = ' '.join(trimmed_lines)
416
417        # Keep the row lengths, so we can calculate the original row number
418        # given a column in the single line (adding 1 due to the space added
419        # during the join).
420        self._row_lengths = [len(line) + 1 for line in trimmed_lines]
421        self._starting_row = start_position.row
422
423    def convert_column_to_row(self, single_line_column_number):
424        """Convert the column number from the single line into the original
425        line number.
426
427        Special cases:
428        * Columns in the added spaces are considered part of the previous line.
429        * Columns beyond the end of the line are consider part the last line
430        in the view."""
431        total_columns = 0
432        row_offset = 0
433        while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
434            total_columns += self._row_lengths[row_offset]
435            row_offset += 1
436        return self._starting_row + row_offset
437
438
439def create_skeleton_parameters(all_parameters):
440    """Converts a parameter list to a skeleton version.
441
442    The skeleton only has one word for the parameter name, one word for the type,
443    and commas after each parameter and only there. Everything in the skeleton
444    remains in the same columns as the original."""
445    all_simplifications = (
446        # Remove template parameters, function declaration parameters, etc.
447        r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
448        # Remove all initializers.
449        r'=[^,]*',
450        # Remove :: and everything before it.
451        r'[^,]*::',
452        # Remove modifiers like &, *.
453        r'[&*]',
454        # Remove const modifiers.
455        r'\bconst\s+(?=[A-Za-z])',
456        # Remove numerical modifiers like long.
457        r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
458
459    skeleton_parameters = all_parameters
460    for simplification in all_simplifications:
461        skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
462    # If there are any parameters, then add a , after the last one to
463    # make a regular pattern of a , following every parameter.
464    if skeleton_parameters.strip():
465        skeleton_parameters += ','
466    return skeleton_parameters
467
468
469def find_parameter_name_index(skeleton_parameter):
470    """Determines where the parametere name starts given the skeleton parameter."""
471    # The first space from the right in the simplified parameter is where the parameter
472    # name starts unless the first space is before any content in the simplified parameter.
473    before_name_index = skeleton_parameter.rstrip().rfind(' ')
474    if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
475        return before_name_index + 1
476    return len(skeleton_parameter)
477
478
479def parameter_list(elided_lines, start_position, end_position):
480    """Generator for a function's parameters."""
481    # Create new positions that omit the outer parenthesis of the parameters.
482    start_position = Position(row=start_position.row, column=start_position.column + 1)
483    end_position = Position(row=end_position.row, column=end_position.column - 1)
484    single_line_view = SingleLineView(elided_lines, start_position, end_position)
485    skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
486    end_index = -1
487
488    while True:
489        # Find the end of the next parameter.
490        start_index = end_index + 1
491        end_index = skeleton_parameters.find(',', start_index)
492
493        # No comma means that all parameters have been parsed.
494        if end_index == -1:
495            return
496        row = single_line_view.convert_column_to_row(end_index)
497
498        # Parse the parameter into a type and parameter name.
499        skeleton_parameter = skeleton_parameters[start_index:end_index]
500        name_offset = find_parameter_name_index(skeleton_parameter)
501        parameter = single_line_view.single_line[start_index:end_index]
502        yield Parameter(parameter, name_offset, row)
503
504
505class _FunctionState(object):
506    """Tracks current function name and the number of lines in its body.
507
508    Attributes:
509      min_confidence: The minimum confidence level to use while checking style.
510
511    """
512
513    _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
514    _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
515
516    def __init__(self, min_confidence):
517        self.min_confidence = min_confidence
518        self.current_function = ''
519        self.in_a_function = False
520        self.lines_in_function = 0
521        # Make sure these will not be mistaken for real positions (even when a
522        # small amount is added to them).
523        self.body_start_position = Position(-1000, 0)
524        self.end_position = Position(-1000, 0)
525
526    def begin(self, function_name, function_name_start_position, body_start_position, end_position,
527              parameter_start_position, parameter_end_position, clean_lines):
528        """Start analyzing function body.
529
530        Args:
531            function_name: The name of the function being tracked.
532            function_name_start_position: Position in elided where the function name starts.
533            body_start_position: Position in elided of the { or the ; for a prototype.
534            end_position: Position in elided just after the final } (or ; is.
535            parameter_start_position: Position in elided of the '(' for the parameters.
536            parameter_end_position: Position in elided just after the ')' for the parameters.
537            clean_lines: A CleansedLines instance containing the file.
538        """
539        self.in_a_function = True
540        self.lines_in_function = -1  # Don't count the open brace line.
541        self.current_function = function_name
542        self.function_name_start_position = function_name_start_position
543        self.body_start_position = body_start_position
544        self.end_position = end_position
545        self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
546        self.parameter_start_position = parameter_start_position
547        self.parameter_end_position = parameter_end_position
548        self.is_pure = False
549        if self.is_declaration:
550            characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
551            self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
552        self._clean_lines = clean_lines
553        self._parameter_list = None
554
555    def modifiers_and_return_type(self):
556        """Returns the modifiers and the return type."""
557        # Go backwards from where the function name is until we encounter one of several things:
558        #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
559        elided = self._clean_lines.elided
560        start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
561                                          elided, self.parameter_start_position, Position(0, 0))
562        return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
563
564    def parameter_list(self):
565        if not self._parameter_list:
566            # Store the final result as a tuple since that is immutable.
567            self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
568
569        return self._parameter_list
570
571    def count(self, line_number):
572        """Count line in current function body."""
573        if self.in_a_function and line_number >= self.body_start_position.row:
574            self.lines_in_function += 1
575
576    def check(self, error, line_number):
577        """Report if too many lines in function body.
578
579        Args:
580          error: The function to call with any errors found.
581          line_number: The number of the line to check.
582        """
583        if match(r'T(EST|est)', self.current_function):
584            base_trigger = self._TEST_TRIGGER
585        else:
586            base_trigger = self._NORMAL_TRIGGER
587        trigger = base_trigger * 2 ** self.min_confidence
588
589        if self.lines_in_function > trigger:
590            error_level = int(math.log(self.lines_in_function / base_trigger, 2))
591            # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
592            if error_level > 5:
593                error_level = 5
594            error(line_number, 'readability/fn_size', error_level,
595                  'Small and focused functions are preferred:'
596                  ' %s has %d non-comment lines'
597                  ' (error triggered by exceeding %d lines).'  % (
598                      self.current_function, self.lines_in_function, trigger))
599
600    def end(self):
601        """Stop analyzing function body."""
602        self.in_a_function = False
603
604
605class _IncludeError(Exception):
606    """Indicates a problem with the include order in a file."""
607    pass
608
609
610class FileInfo:
611    """Provides utility functions for filenames.
612
613    FileInfo provides easy access to the components of a file's path
614    relative to the project root.
615    """
616
617    def __init__(self, filename):
618        self._filename = filename
619
620    def full_name(self):
621        """Make Windows paths like Unix."""
622        return os.path.abspath(self._filename).replace('\\', '/')
623
624    def repository_name(self):
625        """Full name after removing the local path to the repository.
626
627        If we have a real absolute path name here we can try to do something smart:
628        detecting the root of the checkout and truncating /path/to/checkout from
629        the name so that we get header guards that don't include things like
630        "C:\Documents and Settings\..." or "/home/username/..." in them and thus
631        people on different computers who have checked the source out to different
632        locations won't see bogus errors.
633        """
634        fullname = self.full_name()
635
636        if os.path.exists(fullname):
637            project_dir = os.path.dirname(fullname)
638
639            if os.path.exists(os.path.join(project_dir, ".svn")):
640                # If there's a .svn file in the current directory, we
641                # recursively look up the directory tree for the top
642                # of the SVN checkout
643                root_dir = project_dir
644                one_up_dir = os.path.dirname(root_dir)
645                while os.path.exists(os.path.join(one_up_dir, ".svn")):
646                    root_dir = os.path.dirname(root_dir)
647                    one_up_dir = os.path.dirname(one_up_dir)
648
649                prefix = os.path.commonprefix([root_dir, project_dir])
650                return fullname[len(prefix) + 1:]
651
652            # Not SVN? Try to find a git top level directory by
653            # searching up from the current path.
654            root_dir = os.path.dirname(fullname)
655            while (root_dir != os.path.dirname(root_dir)
656                   and not os.path.exists(os.path.join(root_dir, ".git"))):
657                root_dir = os.path.dirname(root_dir)
658                if os.path.exists(os.path.join(root_dir, ".git")):
659                    prefix = os.path.commonprefix([root_dir, project_dir])
660                    return fullname[len(prefix) + 1:]
661
662        # Don't know what to do; header guard warnings may be wrong...
663        return fullname
664
665    def split(self):
666        """Splits the file into the directory, basename, and extension.
667
668        For 'chrome/browser/browser.cpp', Split() would
669        return ('chrome/browser', 'browser', '.cpp')
670
671        Returns:
672          A tuple of (directory, basename, extension).
673        """
674
675        googlename = self.repository_name()
676        project, rest = os.path.split(googlename)
677        return (project,) + os.path.splitext(rest)
678
679    def base_name(self):
680        """File base name - text after the final slash, before the final period."""
681        return self.split()[1]
682
683    def extension(self):
684        """File extension - text following the final period."""
685        return self.split()[2]
686
687    def no_extension(self):
688        """File has no source file extension."""
689        return '/'.join(self.split()[0:2])
690
691    def is_source(self):
692        """File has a source file extension."""
693        return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
694
695
696# Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
697_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
698    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
699# Matches strings.  Escape codes should already be removed by ESCAPES.
700_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
701# Matches characters.  Escape codes should already be removed by ESCAPES.
702_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
703# Matches multi-line C++ comments.
704# This RE is a little bit more complicated than one might expect, because we
705# have to take care of space removals tools so we can handle comments inside
706# statements better.
707# The current rule is: We only clear spaces from both sides when we're at the
708# end of the line. Otherwise, we try to remove spaces from the right side,
709# if this doesn't work we try on left side but only if there's a non-character
710# on the right.
711_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
712    r"""(\s*/\*.*\*/\s*$|
713            /\*.*\*/\s+|
714         \s+/\*.*\*/(?=\W)|
715            /\*.*\*/)""", re.VERBOSE)
716
717
718def is_cpp_string(line):
719    """Does line terminate so, that the next symbol is in string constant.
720
721    This function does not consider single-line nor multi-line comments.
722
723    Args:
724      line: is a partial line of code starting from the 0..n.
725
726    Returns:
727      True, if next character appended to 'line' is inside a
728      string constant.
729    """
730
731    line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
732    return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
733
734
735def find_next_multi_line_comment_start(lines, line_index):
736    """Find the beginning marker for a multiline comment."""
737    while line_index < len(lines):
738        if lines[line_index].strip().startswith('/*'):
739            # Only return this marker if the comment goes beyond this line
740            if lines[line_index].strip().find('*/', 2) < 0:
741                return line_index
742        line_index += 1
743    return len(lines)
744
745
746def find_next_multi_line_comment_end(lines, line_index):
747    """We are inside a comment, find the end marker."""
748    while line_index < len(lines):
749        if lines[line_index].strip().endswith('*/'):
750            return line_index
751        line_index += 1
752    return len(lines)
753
754
755def remove_multi_line_comments_from_range(lines, begin, end):
756    """Clears a range of lines for multi-line comments."""
757    # Having // dummy comments makes the lines non-empty, so we will not get
758    # unnecessary blank line warnings later in the code.
759    for i in range(begin, end):
760        lines[i] = '// dummy'
761
762
763def remove_multi_line_comments(lines, error):
764    """Removes multiline (c-style) comments from lines."""
765    line_index = 0
766    while line_index < len(lines):
767        line_index_begin = find_next_multi_line_comment_start(lines, line_index)
768        if line_index_begin >= len(lines):
769            return
770        line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
771        if line_index_end >= len(lines):
772            error(line_index_begin + 1, 'readability/multiline_comment', 5,
773                  'Could not find end of multi-line comment')
774            return
775        remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
776        line_index = line_index_end + 1
777
778
779def cleanse_comments(line):
780    """Removes //-comments and single-line C-style /* */ comments.
781
782    Args:
783      line: A line of C++ source.
784
785    Returns:
786      The line with single-line comments removed.
787    """
788    comment_position = line.find('//')
789    if comment_position != -1 and not is_cpp_string(line[:comment_position]):
790        line = line[:comment_position]
791    # get rid of /* ... */
792    return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
793
794
795class CleansedLines(object):
796    """Holds 3 copies of all lines with different preprocessing applied to them.
797
798    1) elided member contains lines without strings and comments,
799    2) lines member contains lines without comments, and
800    3) raw member contains all the lines without processing.
801    All these three members are of <type 'list'>, and of the same length.
802    """
803
804    def __init__(self, lines):
805        self.elided = []
806        self.lines = []
807        self.raw_lines = lines
808        self._num_lines = len(lines)
809        for line_number in range(len(lines)):
810            self.lines.append(cleanse_comments(lines[line_number]))
811            elided = self.collapse_strings(lines[line_number])
812            self.elided.append(cleanse_comments(elided))
813
814    def num_lines(self):
815        """Returns the number of lines represented."""
816        return self._num_lines
817
818    @staticmethod
819    def collapse_strings(elided):
820        """Collapses strings and chars on a line to simple "" or '' blocks.
821
822        We nix strings first so we're not fooled by text like '"http://"'
823
824        Args:
825          elided: The line being processed.
826
827        Returns:
828          The line with collapsed strings.
829        """
830        if not _RE_PATTERN_INCLUDE.match(elided):
831            # Remove escaped characters first to make quote/single quote collapsing
832            # basic.  Things that look like escaped characters shouldn't occur
833            # outside of strings and chars.
834            elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
835            elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
836            elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
837        return elided
838
839
840def close_expression(elided, position):
841    """If input points to ( or { or [, finds the position that closes it.
842
843    If elided[position.row][position.column] points to a '(' or '{' or '[',
844    finds the line_number/pos that correspond to the closing of the expression.
845
846     Args:
847       elided: A CleansedLines.elided instance containing the file.
848       position: The position of the opening item.
849
850     Returns:
851      The Position *past* the closing brace, or Position(len(elided), -1)
852      if we never find a close. Note we ignore strings and comments when matching.
853    """
854    line = elided[position.row]
855    start_character = line[position.column]
856    if start_character == '(':
857        enclosing_character_regex = r'[\(\)]'
858    elif start_character == '[':
859        enclosing_character_regex = r'[\[\]]'
860    elif start_character == '{':
861        enclosing_character_regex = r'[\{\}]'
862    else:
863        return Position(len(elided), -1)
864
865    current_column = position.column + 1
866    line_number = position.row
867    net_open = 1
868    for line in elided[position.row:]:
869        line = line[current_column:]
870
871        # Search the current line for opening and closing characters.
872        while True:
873            next_enclosing_character = search(enclosing_character_regex, line)
874            # No more on this line.
875            if not next_enclosing_character:
876                break
877            current_column += next_enclosing_character.end(0)
878            line = line[next_enclosing_character.end(0):]
879            if next_enclosing_character.group(0) == start_character:
880                net_open += 1
881            else:
882                net_open -= 1
883                if not net_open:
884                    return Position(line_number, current_column)
885
886        # Proceed to the next line.
887        line_number += 1
888        current_column = 0
889
890    # The given item was not closed.
891    return Position(len(elided), -1)
892
893def check_for_copyright(lines, error):
894    """Logs an error if no Copyright message appears at the top of the file."""
895
896    # We'll say it should occur by line 10. Don't forget there's a
897    # dummy line at the front.
898    for line in xrange(1, min(len(lines), 11)):
899        if re.search(r'Copyright', lines[line], re.I):
900            break
901    else:                       # means no copyright line was found
902        error(0, 'legal/copyright', 5,
903              'No copyright message found.  '
904              'You should have a line: "Copyright [year] <Copyright Owner>"')
905
906
907def get_header_guard_cpp_variable(filename):
908    """Returns the CPP variable that should be used as a header guard.
909
910    Args:
911      filename: The name of a C++ header file.
912
913    Returns:
914      The CPP variable that should be used as a header guard in the
915      named file.
916
917    """
918
919    # Restores original filename in case that style checker is invoked from Emacs's
920    # flymake.
921    filename = re.sub(r'_flymake\.h$', '.h', filename)
922
923    standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
924
925    # Files under WTF typically have header guards that start with WTF_.
926    if filename.find('/wtf/'):
927        special_name = "WTF_" + standard_name
928    else:
929        special_name = standard_name
930    return (special_name, standard_name)
931
932
933def check_for_header_guard(filename, lines, error):
934    """Checks that the file contains a header guard.
935
936    Logs an error if no #ifndef header guard is present.  For other
937    headers, checks that the full pathname is used.
938
939    Args:
940      filename: The name of the C++ header file.
941      lines: An array of strings, each representing a line of the file.
942      error: The function to call with any errors found.
943    """
944
945    cppvar = get_header_guard_cpp_variable(filename)
946
947    ifndef = None
948    ifndef_line_number = 0
949    define = None
950    for line_number, line in enumerate(lines):
951        line_split = line.split()
952        if len(line_split) >= 2:
953            # find the first occurrence of #ifndef and #define, save arg
954            if not ifndef and line_split[0] == '#ifndef':
955                # set ifndef to the header guard presented on the #ifndef line.
956                ifndef = line_split[1]
957                ifndef_line_number = line_number
958            if not define and line_split[0] == '#define':
959                define = line_split[1]
960            if define and ifndef:
961                break
962
963    if not ifndef or not define or ifndef != define:
964        error(0, 'build/header_guard', 5,
965              'No #ifndef header guard found, suggested CPP variable is: %s' %
966              cppvar[0])
967        return
968
969    # The guard should be File_h.
970    if ifndef not in cppvar:
971        error(ifndef_line_number, 'build/header_guard', 5,
972              '#ifndef header guard has wrong style, please use: %s' % cppvar[0])
973
974
975def check_for_unicode_replacement_characters(lines, error):
976    """Logs an error for each line containing Unicode replacement characters.
977
978    These indicate that either the file contained invalid UTF-8 (likely)
979    or Unicode replacement characters (which it shouldn't).  Note that
980    it's possible for this to throw off line numbering if the invalid
981    UTF-8 occurred adjacent to a newline.
982
983    Args:
984      lines: An array of strings, each representing a line of the file.
985      error: The function to call with any errors found.
986    """
987    for line_number, line in enumerate(lines):
988        if u'\ufffd' in line:
989            error(line_number, 'readability/utf8', 5,
990                  'Line contains invalid UTF-8 (or Unicode replacement character).')
991
992
993def check_for_new_line_at_eof(lines, error):
994    """Logs an error if there is no newline char at the end of the file.
995
996    Args:
997      lines: An array of strings, each representing a line of the file.
998      error: The function to call with any errors found.
999    """
1000
1001    # The array lines() was created by adding two newlines to the
1002    # original file (go figure), then splitting on \n.
1003    # To verify that the file ends in \n, we just have to make sure the
1004    # last-but-two element of lines() exists and is empty.
1005    if len(lines) < 3 or lines[-2]:
1006        error(len(lines) - 2, 'whitespace/ending_newline', 5,
1007              'Could not find a newline character at the end of the file.')
1008
1009
1010def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
1011    """Logs an error if we see /* ... */ or "..." that extend past one line.
1012
1013    /* ... */ comments are legit inside macros, for one line.
1014    Otherwise, we prefer // comments, so it's ok to warn about the
1015    other.  Likewise, it's ok for strings to extend across multiple
1016    lines, as long as a line continuation character (backslash)
1017    terminates each line. Although not currently prohibited by the C++
1018    style guide, it's ugly and unnecessary. We don't do well with either
1019    in this lint program, so we warn about both.
1020
1021    Args:
1022      clean_lines: A CleansedLines instance containing the file.
1023      line_number: The number of the line to check.
1024      error: The function to call with any errors found.
1025    """
1026    line = clean_lines.elided[line_number]
1027
1028    # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1029    # second (escaped) slash may trigger later \" detection erroneously.
1030    line = line.replace('\\\\', '')
1031
1032    if line.count('/*') > line.count('*/'):
1033        error(line_number, 'readability/multiline_comment', 5,
1034              'Complex multi-line /*...*/-style comment found. '
1035              'Lint may give bogus warnings.  '
1036              'Consider replacing these with //-style comments, '
1037              'with #if 0...#endif, '
1038              'or with more clearly structured multi-line comments.')
1039
1040    if (line.count('"') - line.count('\\"')) % 2:
1041        error(line_number, 'readability/multiline_string', 5,
1042              'Multi-line string ("...") found.  This lint script doesn\'t '
1043              'do well with such strings, and may give bogus warnings.  They\'re '
1044              'ugly and unnecessary, and you should use concatenation instead".')
1045
1046
1047_THREADING_LIST = (
1048    ('asctime(', 'asctime_r('),
1049    ('ctime(', 'ctime_r('),
1050    ('getgrgid(', 'getgrgid_r('),
1051    ('getgrnam(', 'getgrnam_r('),
1052    ('getlogin(', 'getlogin_r('),
1053    ('getpwnam(', 'getpwnam_r('),
1054    ('getpwuid(', 'getpwuid_r('),
1055    ('gmtime(', 'gmtime_r('),
1056    ('localtime(', 'localtime_r('),
1057    ('rand(', 'rand_r('),
1058    ('readdir(', 'readdir_r('),
1059    ('strtok(', 'strtok_r('),
1060    ('ttyname(', 'ttyname_r('),
1061    )
1062
1063
1064def check_posix_threading(clean_lines, line_number, error):
1065    """Checks for calls to thread-unsafe functions.
1066
1067    Much code has been originally written without consideration of
1068    multi-threading. Also, engineers are relying on their old experience;
1069    they have learned posix before threading extensions were added. These
1070    tests guide the engineers to use thread-safe functions (when using
1071    posix directly).
1072
1073    Args:
1074      clean_lines: A CleansedLines instance containing the file.
1075      line_number: The number of the line to check.
1076      error: The function to call with any errors found.
1077    """
1078    line = clean_lines.elided[line_number]
1079    for single_thread_function, multithread_safe_function in _THREADING_LIST:
1080        index = line.find(single_thread_function)
1081        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1082        if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
1083                                          and line[index - 1] not in ('_', '.', '>'))):
1084            error(line_number, 'runtime/threadsafe_fn', 2,
1085                  'Consider using ' + multithread_safe_function +
1086                  '...) instead of ' + single_thread_function +
1087                  '...) for improved thread safety.')
1088
1089
1090# Matches invalid increment: *count++, which moves pointer instead of
1091# incrementing a value.
1092_RE_PATTERN_INVALID_INCREMENT = re.compile(
1093    r'^\s*\*\w+(\+\+|--);')
1094
1095
1096def check_invalid_increment(clean_lines, line_number, error):
1097    """Checks for invalid increment *count++.
1098
1099    For example following function:
1100    void increment_counter(int* count) {
1101        *count++;
1102    }
1103    is invalid, because it effectively does count++, moving pointer, and should
1104    be replaced with ++*count, (*count)++ or *count += 1.
1105
1106    Args:
1107      clean_lines: A CleansedLines instance containing the file.
1108      line_number: The number of the line to check.
1109      error: The function to call with any errors found.
1110    """
1111    line = clean_lines.elided[line_number]
1112    if _RE_PATTERN_INVALID_INCREMENT.match(line):
1113        error(line_number, 'runtime/invalid_increment', 5,
1114              'Changing pointer instead of value (or unused value of operator*).')
1115
1116
1117class _ClassInfo(object):
1118    """Stores information about a class."""
1119
1120    def __init__(self, name, line_number):
1121        self.name = name
1122        self.line_number = line_number
1123        self.seen_open_brace = False
1124        self.is_derived = False
1125        self.virtual_method_line_number = None
1126        self.has_virtual_destructor = False
1127        self.brace_depth = 0
1128
1129
1130class _ClassState(object):
1131    """Holds the current state of the parse relating to class declarations.
1132
1133    It maintains a stack of _ClassInfos representing the parser's guess
1134    as to the current nesting of class declarations. The innermost class
1135    is at the top (back) of the stack. Typically, the stack will either
1136    be empty or have exactly one entry.
1137    """
1138
1139    def __init__(self):
1140        self.classinfo_stack = []
1141
1142    def check_finished(self, error):
1143        """Checks that all classes have been completely parsed.
1144
1145        Call this when all lines in a file have been processed.
1146        Args:
1147          error: The function to call with any errors found.
1148        """
1149        if self.classinfo_stack:
1150            # Note: This test can result in false positives if #ifdef constructs
1151            # get in the way of brace matching. See the testBuildClass test in
1152            # cpp_style_unittest.py for an example of this.
1153            error(self.classinfo_stack[0].line_number, 'build/class', 5,
1154                  'Failed to find complete declaration of class %s' %
1155                  self.classinfo_stack[0].name)
1156
1157
1158class _FileState(object):
1159    def __init__(self, clean_lines, file_extension):
1160        self._did_inside_namespace_indent_warning = False
1161        self._clean_lines = clean_lines
1162        if file_extension in ['m', 'mm']:
1163            self._is_objective_c = True
1164        elif file_extension == 'h':
1165            # In the case of header files, it is unknown if the file
1166            # is objective c or not, so set this value to None and then
1167            # if it is requested, use heuristics to guess the value.
1168            self._is_objective_c = None
1169        else:
1170            self._is_objective_c = False
1171        self._is_c = file_extension == 'c'
1172
1173    def set_did_inside_namespace_indent_warning(self):
1174        self._did_inside_namespace_indent_warning = True
1175
1176    def did_inside_namespace_indent_warning(self):
1177        return self._did_inside_namespace_indent_warning
1178
1179    def is_objective_c(self):
1180        if self._is_objective_c is None:
1181            for line in self._clean_lines.elided:
1182                # Starting with @ or #import seem like the best indications
1183                # that we have an Objective C file.
1184                if line.startswith("@") or line.startswith("#import"):
1185                    self._is_objective_c = True
1186                    break
1187            else:
1188                self._is_objective_c = False
1189        return self._is_objective_c
1190
1191    def is_c_or_objective_c(self):
1192        """Return whether the file extension corresponds to C or Objective-C."""
1193        return self._is_c or self.is_objective_c()
1194
1195
1196def check_for_non_standard_constructs(clean_lines, line_number,
1197                                      class_state, error):
1198    """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
1199
1200    Complain about several constructs which gcc-2 accepts, but which are
1201    not standard C++.  Warning about these in lint is one way to ease the
1202    transition to new compilers.
1203    - put storage class first (e.g. "static const" instead of "const static").
1204    - "%lld" instead of %qd" in printf-type functions.
1205    - "%1$d" is non-standard in printf-type functions.
1206    - "\%" is an undefined character escape sequence.
1207    - text after #endif is not allowed.
1208    - invalid inner-style forward declaration.
1209    - >? and <? operators, and their >?= and <?= cousins.
1210    - classes with virtual methods need virtual destructors (compiler warning
1211        available, but not turned on yet.)
1212
1213    Additionally, check for constructor/destructor style violations as it
1214    is very convenient to do so while checking for gcc-2 compliance.
1215
1216    Args:
1217      clean_lines: A CleansedLines instance containing the file.
1218      line_number: The number of the line to check.
1219      class_state: A _ClassState instance which maintains information about
1220                   the current stack of nested class declarations being parsed.
1221      error: A callable to which errors are reported, which takes parameters:
1222             line number, error level, and message
1223    """
1224
1225    # Remove comments from the line, but leave in strings for now.
1226    line = clean_lines.lines[line_number]
1227
1228    if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
1229        error(line_number, 'runtime/printf_format', 3,
1230              '%q in format strings is deprecated.  Use %ll instead.')
1231
1232    if search(r'printf\s*\(.*".*%\d+\$', line):
1233        error(line_number, 'runtime/printf_format', 2,
1234              '%N$ formats are unconventional.  Try rewriting to avoid them.')
1235
1236    # Remove escaped backslashes before looking for undefined escapes.
1237    line = line.replace('\\\\', '')
1238
1239    if search(r'("|\').*\\(%|\[|\(|{)', line):
1240        error(line_number, 'build/printf_format', 3,
1241              '%, [, (, and { are undefined character escapes.  Unescape them.')
1242
1243    # For the rest, work with both comments and strings removed.
1244    line = clean_lines.elided[line_number]
1245
1246    if search(r'\b(const|volatile|void|char|short|int|long'
1247              r'|float|double|signed|unsigned'
1248              r'|schar|u?int8|u?int16|u?int32|u?int64)'
1249              r'\s+(auto|register|static|extern|typedef)\b',
1250              line):
1251        error(line_number, 'build/storage_class', 5,
1252              'Storage class (static, extern, typedef, etc) should be first.')
1253
1254    if match(r'\s*#\s*endif\s*[^/\s]+', line):
1255        error(line_number, 'build/endif_comment', 5,
1256              'Uncommented text after #endif is non-standard.  Use a comment.')
1257
1258    if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
1259        error(line_number, 'build/forward_decl', 5,
1260              'Inner-style forward declarations are invalid.  Remove this line.')
1261
1262    if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
1263        error(line_number, 'build/deprecated', 3,
1264              '>? and <? (max and min) operators are non-standard and deprecated.')
1265
1266    # Track class entry and exit, and attempt to find cases within the
1267    # class declaration that don't meet the C++ style
1268    # guidelines. Tracking is very dependent on the code matching Google
1269    # style guidelines, but it seems to perform well enough in testing
1270    # to be a worthwhile addition to the checks.
1271    classinfo_stack = class_state.classinfo_stack
1272    # Look for a class declaration
1273    class_decl_match = match(
1274        r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
1275    if class_decl_match:
1276        classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
1277
1278    # Everything else in this function uses the top of the stack if it's
1279    # not empty.
1280    if not classinfo_stack:
1281        return
1282
1283    classinfo = classinfo_stack[-1]
1284
1285    # If the opening brace hasn't been seen look for it and also
1286    # parent class declarations.
1287    if not classinfo.seen_open_brace:
1288        # If the line has a ';' in it, assume it's a forward declaration or
1289        # a single-line class declaration, which we won't process.
1290        if line.find(';') != -1:
1291            classinfo_stack.pop()
1292            return
1293        classinfo.seen_open_brace = (line.find('{') != -1)
1294        # Look for a bare ':'
1295        if search('(^|[^:]):($|[^:])', line):
1296            classinfo.is_derived = True
1297        if not classinfo.seen_open_brace:
1298            return  # Everything else in this function is for after open brace
1299
1300    # The class may have been declared with namespace or classname qualifiers.
1301    # The constructor and destructor will not have those qualifiers.
1302    base_classname = classinfo.name.split('::')[-1]
1303
1304    # Look for single-argument constructors that aren't marked explicit.
1305    # Technically a valid construct, but against style.
1306    args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
1307                 % re.escape(base_classname),
1308                 line)
1309    if (args
1310        and args.group(1) != 'void'
1311        and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
1312                      args.group(1).strip())):
1313        error(line_number, 'runtime/explicit', 5,
1314              'Single-argument constructors should be marked explicit.')
1315
1316    # Look for methods declared virtual.
1317    if search(r'\bvirtual\b', line):
1318        classinfo.virtual_method_line_number = line_number
1319        # Only look for a destructor declaration on the same line. It would
1320        # be extremely unlikely for the destructor declaration to occupy
1321        # more than one line.
1322        if search(r'~%s\s*\(' % base_classname, line):
1323            classinfo.has_virtual_destructor = True
1324
1325    # Look for class end.
1326    brace_depth = classinfo.brace_depth
1327    brace_depth = brace_depth + line.count('{') - line.count('}')
1328    if brace_depth <= 0:
1329        classinfo = classinfo_stack.pop()
1330        # Try to detect missing virtual destructor declarations.
1331        # For now, only warn if a non-derived class with virtual methods lacks
1332        # a virtual destructor. This is to make it less likely that people will
1333        # declare derived virtual destructors without declaring the base
1334        # destructor virtual.
1335        if ((classinfo.virtual_method_line_number is not None)
1336            and (not classinfo.has_virtual_destructor)
1337            and (not classinfo.is_derived)):  # Only warn for base classes
1338            error(classinfo.line_number, 'runtime/virtual', 4,
1339                  'The class %s probably needs a virtual destructor due to '
1340                  'having virtual method(s), one declared at line %d.'
1341                  % (classinfo.name, classinfo.virtual_method_line_number))
1342    else:
1343        classinfo.brace_depth = brace_depth
1344
1345
1346def check_spacing_for_function_call(line, line_number, error):
1347    """Checks for the correctness of various spacing around function calls.
1348
1349    Args:
1350      line: The text of the line to check.
1351      line_number: The number of the line to check.
1352      error: The function to call with any errors found.
1353    """
1354
1355    # Since function calls often occur inside if/for/foreach/while/switch
1356    # expressions - which have their own, more liberal conventions - we
1357    # first see if we should be looking inside such an expression for a
1358    # function call, to which we can apply more strict standards.
1359    function_call = line    # if there's no control flow construct, look at whole line
1360    for pattern in (r'\bif\s*\((.*)\)\s*{',
1361                    r'\bfor\s*\((.*)\)\s*{',
1362                    r'\bforeach\s*\((.*)\)\s*{',
1363                    r'\bwhile\s*\((.*)\)\s*[{;]',
1364                    r'\bswitch\s*\((.*)\)\s*{'):
1365        matched = search(pattern, line)
1366        if matched:
1367            function_call = matched.group(1)    # look inside the parens for function calls
1368            break
1369
1370    # Except in if/for/foreach/while/switch, there should never be space
1371    # immediately inside parens (eg "f( 3, 4 )").  We make an exception
1372    # for nested parens ( (a+b) + c ).  Likewise, there should never be
1373    # a space before a ( when it's a function argument.  I assume it's a
1374    # function argument when the char before the whitespace is legal in
1375    # a function name (alnum + _) and we're not starting a macro. Also ignore
1376    # pointers and references to arrays and functions coz they're too tricky:
1377    # we use a very simple way to recognize these:
1378    # " (something)(maybe-something)" or
1379    # " (something)(maybe-something," or
1380    # " (something)[something]"
1381    # Note that we assume the contents of [] to be short enough that
1382    # they'll never need to wrap.
1383    if (  # Ignore control structures.
1384        not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
1385        # Ignore pointers/references to functions.
1386        and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
1387        # Ignore pointers/references to arrays.
1388        and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
1389        if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
1390            error(line_number, 'whitespace/parens', 4,
1391                  'Extra space after ( in function call')
1392        elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
1393            error(line_number, 'whitespace/parens', 2,
1394                  'Extra space after (')
1395        if (search(r'\w\s+\(', function_call)
1396            and not match(r'\s*(#|typedef)', function_call)):
1397            error(line_number, 'whitespace/parens', 4,
1398                  'Extra space before ( in function call')
1399        # If the ) is followed only by a newline or a { + newline, assume it's
1400        # part of a control statement (if/while/etc), and don't complain
1401        if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
1402            error(line_number, 'whitespace/parens', 2,
1403                  'Extra space before )')
1404
1405
1406def is_blank_line(line):
1407    """Returns true if the given line is blank.
1408
1409    We consider a line to be blank if the line is empty or consists of
1410    only white spaces.
1411
1412    Args:
1413      line: A line of a string.
1414
1415    Returns:
1416      True, if the given line is blank.
1417    """
1418    return not line or line.isspace()
1419
1420
1421def detect_functions(clean_lines, line_number, function_state, error):
1422    """Finds where functions start and end.
1423
1424    Uses a simplistic algorithm assuming other style guidelines
1425    (especially spacing) are followed.
1426    Trivial bodies are unchecked, so constructors with huge initializer lists
1427    may be missed.
1428
1429    Args:
1430      clean_lines: A CleansedLines instance containing the file.
1431      line_number: The number of the line to check.
1432      function_state: Current function name and lines in body so far.
1433      error: The function to call with any errors found.
1434    """
1435    # Are we now past the end of a function?
1436    if function_state.end_position.row + 1 == line_number:
1437        function_state.end()
1438
1439    # If we're in a function, don't try to detect a new one.
1440    if function_state.in_a_function:
1441        return
1442
1443    lines = clean_lines.lines
1444    line = lines[line_number]
1445    raw = clean_lines.raw_lines
1446    raw_line = raw[line_number]
1447
1448    # Lines ending with a \ indicate a macro. Don't try to check them.
1449    if raw_line.endswith('\\'):
1450        return
1451
1452    regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
1453    match_result = match(regexp, line)
1454    if not match_result:
1455        return
1456
1457    # If the name is all caps and underscores, figure it's a macro and
1458    # ignore it, unless it's TEST or TEST_F.
1459    function_name = match_result.group(1).split()[-1]
1460    if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
1461        return
1462
1463    joined_line = ''
1464    for start_line_number in xrange(line_number, clean_lines.num_lines()):
1465        start_line = clean_lines.elided[start_line_number]
1466        joined_line += ' ' + start_line.lstrip()
1467        body_match = search(r'{|;', start_line)
1468        if body_match:
1469            body_start_position = Position(start_line_number, body_match.start(0))
1470
1471            # Replace template constructs with _ so that no spaces remain in the function name,
1472            # while keeping the column numbers of other characters the same as "line".
1473            line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
1474            match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
1475            if not match_function:
1476                return  # The '(' must have been inside of a template.
1477
1478            # Use the column numbers from the modified line to find the
1479            # function name in the original line.
1480            function = line[match_function.start(1):match_function.end(1)]
1481            function_name_start_position = Position(line_number, match_function.start(1))
1482
1483            if match(r'TEST', function):    # Handle TEST... macros
1484                parameter_regexp = search(r'(\(.*\))', joined_line)
1485                if parameter_regexp:             # Ignore bad syntax
1486                    function += parameter_regexp.group(1)
1487            else:
1488                function += '()'
1489
1490            parameter_start_position = Position(line_number, match_function.end(1))
1491            parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
1492            if parameter_end_position.row == len(clean_lines.elided):
1493                # No end was found.
1494                return
1495
1496            if start_line[body_start_position.column] == ';':
1497                end_position = Position(body_start_position.row, body_start_position.column + 1)
1498            else:
1499                end_position = close_expression(clean_lines.elided, body_start_position)
1500
1501            # Check for nonsensical positions. (This happens in test cases which check code snippets.)
1502            if parameter_end_position > body_start_position:
1503                return
1504
1505            function_state.begin(function, function_name_start_position, body_start_position, end_position,
1506                                 parameter_start_position, parameter_end_position, clean_lines)
1507            return
1508
1509    # No body for the function (or evidence of a non-function) was found.
1510    error(line_number, 'readability/fn_size', 5,
1511          'Lint failed to find start of function body.')
1512
1513
1514def check_for_function_lengths(clean_lines, line_number, function_state, error):
1515    """Reports for long function bodies.
1516
1517    For an overview why this is done, see:
1518    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
1519
1520    Blank/comment lines are not counted so as to avoid encouraging the removal
1521    of vertical space and commments just to get through a lint check.
1522    NOLINT *on the last line of a function* disables this check.
1523
1524    Args:
1525      clean_lines: A CleansedLines instance containing the file.
1526      line_number: The number of the line to check.
1527      function_state: Current function name and lines in body so far.
1528      error: The function to call with any errors found.
1529    """
1530    lines = clean_lines.lines
1531    line = lines[line_number]
1532    raw = clean_lines.raw_lines
1533    raw_line = raw[line_number]
1534
1535    if function_state.end_position.row == line_number:  # last line
1536        if not search(r'\bNOLINT\b', raw_line):
1537            function_state.check(error, line_number)
1538    elif not match(r'^\s*$', line):
1539        function_state.count(line_number)  # Count non-blank/non-comment lines.
1540
1541
1542def _check_parameter_name_against_text(parameter, text, error):
1543    """Checks to see if the parameter name is contained within the text.
1544
1545    Return false if the check failed (i.e. an error was produced).
1546    """
1547
1548    # Treat 'lower with underscores' as a canonical form because it is
1549    # case insensitive while still retaining word breaks. (This ensures that
1550    # 'elate' doesn't look like it is duplicating of 'NateLate'.)
1551    canonical_parameter_name = parameter.lower_with_underscores_name()
1552
1553    # Appends "object" to all text to catch variables that did the same (but only
1554    # do this when the parameter name is more than a single character to avoid
1555    # flagging 'b' which may be an ok variable when used in an rgba function).
1556    if len(canonical_parameter_name) > 1:
1557        text = sub(r'(\w)\b', r'\1Object', text)
1558    canonical_text = _convert_to_lower_with_underscores(text)
1559
1560    # Used to detect cases like ec for ExceptionCode.
1561    acronym = _create_acronym(text).lower()
1562    if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
1563        error(parameter.row, 'readability/parameter_name', 5,
1564              'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
1565        return False
1566    return True
1567
1568
1569def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
1570    """Check that function definitions for style issues.
1571
1572    Specifically, check that parameter names in declarations add information.
1573
1574    Args:
1575       filename: Filename of the file that is being processed.
1576       file_extension: The current file extension, without the leading dot.
1577       clean_lines: A CleansedLines instance containing the file.
1578       line_number: The number of the line to check.
1579       function_state: Current function name and lines in body so far.
1580       error: The function to call with any errors found.
1581    """
1582    if line_number != function_state.body_start_position.row:
1583        return
1584
1585    modifiers_and_return_type = function_state.modifiers_and_return_type()
1586    if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_API\b', modifiers_and_return_type):
1587        if filename.find('/chromium/public/') == -1:
1588            error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
1589                  'WEBKIT_API should only appear in the chromium public directory.')
1590        elif not file_extension == "h":
1591            error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
1592                  'WEBKIT_API should only be used in header files.')
1593        elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
1594            error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
1595                  'WEBKIT_API should not be used on a function with a body.')
1596        elif function_state.is_pure:
1597            error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
1598                  'WEBKIT_API should not be used with a pure virtual function.')
1599
1600    # Do checks specific to function declaractions.
1601    if not function_state.is_declaration:
1602        return
1603    parameter_list = function_state.parameter_list()
1604    for parameter in parameter_list:
1605        if not parameter.name:
1606            continue
1607
1608        # Check the parameter name against the function name for single parameter set functions.
1609        if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
1610            trimmed_function_name = function_state.current_function[len('set'):]
1611            if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
1612                continue  # Since an error was noted for this name, move to the next parameter.
1613
1614        # Check the parameter name against the type.
1615        if not _check_parameter_name_against_text(parameter, parameter.type, error):
1616            continue  # Since an error was noted for this name, move to the next parameter.
1617
1618
1619def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
1620    """Check for proper usage of Pass*Ptr.
1621
1622    Currently this is limited to detecting declarations of Pass*Ptr
1623    variables inside of functions.
1624
1625    Args:
1626      clean_lines: A CleansedLines instance containing the file.
1627      line_number: The number of the line to check.
1628      function_state: Current function name and lines in body so far.
1629      error: The function to call with any errors found.
1630    """
1631    if not function_state.in_a_function:
1632        return
1633
1634    lines = clean_lines.lines
1635    line = lines[line_number]
1636    if line_number > function_state.body_start_position.row:
1637        matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
1638        if matched_pass_ptr:
1639            type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
1640            error(line_number, 'readability/pass_ptr', 5,
1641                  'Local variables should never be %s (see '
1642                  'http://webkit.org/coding/RefPtr.html).' % type_name)
1643
1644
1645def check_spacing(file_extension, clean_lines, line_number, error):
1646    """Checks for the correctness of various spacing issues in the code.
1647
1648    Things we check for: spaces around operators, spaces after
1649    if/for/while/switch, no spaces around parens in function calls, two
1650    spaces between code and comment, don't start a block with a blank
1651    line, don't end a function with a blank line, don't have too many
1652    blank lines in a row.
1653
1654    Args:
1655      file_extension: The current file extension, without the leading dot.
1656      clean_lines: A CleansedLines instance containing the file.
1657      line_number: The number of the line to check.
1658      error: The function to call with any errors found.
1659    """
1660
1661    raw = clean_lines.raw_lines
1662    line = raw[line_number]
1663
1664    # Before nixing comments, check if the line is blank for no good
1665    # reason.  This includes the first line after a block is opened, and
1666    # blank lines at the end of a function (ie, right before a line like '}').
1667    if is_blank_line(line):
1668        elided = clean_lines.elided
1669        previous_line = elided[line_number - 1]
1670        previous_brace = previous_line.rfind('{')
1671        # FIXME: Don't complain if line before blank line, and line after,
1672        #        both start with alnums and are indented the same amount.
1673        #        This ignores whitespace at the start of a namespace block
1674        #        because those are not usually indented.
1675        if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
1676            and previous_line[:previous_brace].find('namespace') == -1):
1677            # OK, we have a blank line at the start of a code block.  Before we
1678            # complain, we check if it is an exception to the rule: The previous
1679            # non-empty line has the parameters of a function header that are indented
1680            # 4 spaces (because they did not fit in a 80 column line when placed on
1681            # the same line as the function name).  We also check for the case where
1682            # the previous line is indented 6 spaces, which may happen when the
1683            # initializers of a constructor do not fit into a 80 column line.
1684            exception = False
1685            if match(r' {6}\w', previous_line):  # Initializer list?
1686                # We are looking for the opening column of initializer list, which
1687                # should be indented 4 spaces to cause 6 space indentation afterwards.
1688                search_position = line_number - 2
1689                while (search_position >= 0
1690                       and match(r' {6}\w', elided[search_position])):
1691                    search_position -= 1
1692                exception = (search_position >= 0
1693                             and elided[search_position][:5] == '    :')
1694            else:
1695                # Search for the function arguments or an initializer list.  We use a
1696                # simple heuristic here: If the line is indented 4 spaces; and we have a
1697                # closing paren, without the opening paren, followed by an opening brace
1698                # or colon (for initializer lists) we assume that it is the last line of
1699                # a function header.  If we have a colon indented 4 spaces, it is an
1700                # initializer list.
1701                exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
1702                                   previous_line)
1703                             or match(r' {4}:', previous_line))
1704
1705            if not exception:
1706                error(line_number, 'whitespace/blank_line', 2,
1707                      'Blank line at the start of a code block.  Is this needed?')
1708        # This doesn't ignore whitespace at the end of a namespace block
1709        # because that is too hard without pairing open/close braces;
1710        # however, a special exception is made for namespace closing
1711        # brackets which have a comment containing "namespace".
1712        #
1713        # Also, ignore blank lines at the end of a block in a long if-else
1714        # chain, like this:
1715        #   if (condition1) {
1716        #     // Something followed by a blank line
1717        #
1718        #   } else if (condition2) {
1719        #     // Something else
1720        #   }
1721        if line_number + 1 < clean_lines.num_lines():
1722            next_line = raw[line_number + 1]
1723            if (next_line
1724                and match(r'\s*}', next_line)
1725                and next_line.find('namespace') == -1
1726                and next_line.find('} else ') == -1):
1727                error(line_number, 'whitespace/blank_line', 3,
1728                      'Blank line at the end of a code block.  Is this needed?')
1729
1730    # Next, we check for proper spacing with respect to comments.
1731    comment_position = line.find('//')
1732    if comment_position != -1:
1733        # Check if the // may be in quotes.  If so, ignore it
1734        # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
1735        if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
1736            # Allow one space before end of line comment.
1737            if (not match(r'^\s*$', line[:comment_position])
1738                and (comment_position >= 1
1739                and ((line[comment_position - 1] not in string.whitespace)
1740                     or (comment_position >= 2
1741                         and line[comment_position - 2] in string.whitespace)))):
1742                error(line_number, 'whitespace/comments', 5,
1743                      'One space before end of line comments')
1744            # There should always be a space between the // and the comment
1745            commentend = comment_position + 2
1746            if commentend < len(line) and not line[commentend] == ' ':
1747                # but some lines are exceptions -- e.g. if they're big
1748                # comment delimiters like:
1749                # //----------------------------------------------------------
1750                # or they begin with multiple slashes followed by a space:
1751                # //////// Header comment
1752                matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
1753                           or search(r'^/+ ', line[commentend:]))
1754                if not matched:
1755                    error(line_number, 'whitespace/comments', 4,
1756                          'Should have a space between // and comment')
1757
1758            # There should only be one space after punctuation in a comment.
1759            if search('[.!?,;:]\s\s', line[comment_position:]):
1760                error(line_number, 'whitespace/comments', 5,
1761                      'Should only a single space after a punctuation in a comment.')
1762
1763    line = clean_lines.elided[line_number]  # get rid of comments and strings
1764
1765    # Don't try to do spacing checks for operator methods
1766    line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=)\(', 'operator\(', line)
1767    # Don't try to do spacing checks for #include or #import statements at
1768    # minimum because it messes up checks for spacing around /
1769    if match(r'\s*#\s*(?:include|import)', line):
1770        return
1771    if search(r'[\w.]=[\w.]', line):
1772        error(line_number, 'whitespace/operators', 4,
1773              'Missing spaces around =')
1774
1775    # FIXME: It's not ok to have spaces around binary operators like .
1776
1777    # You should always have whitespace around binary operators.
1778    # Alas, we can't test < or > because they're legitimately used sans spaces
1779    # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
1780    # only if it's not template params list spilling into the next line.
1781    matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
1782    if not matched:
1783        # Note that while it seems that the '<[^<]*' term in the following
1784        # regexp could be simplified to '<.*', which would indeed match
1785        # the same class of strings, the [^<] means that searching for the
1786        # regexp takes linear rather than quadratic time.
1787        if not search(r'<[^<]*,\s*$', line):  # template params spill
1788            matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
1789    if matched:
1790        error(line_number, 'whitespace/operators', 3,
1791              'Missing spaces around %s' % matched.group(1))
1792
1793    # There shouldn't be space around unary operators
1794    matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
1795    if matched:
1796        error(line_number, 'whitespace/operators', 4,
1797              'Extra space for operator %s' % matched.group(1))
1798
1799    # A pet peeve of mine: no spaces after an if, while, switch, or for
1800    matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
1801    if matched:
1802        error(line_number, 'whitespace/parens', 5,
1803              'Missing space before ( in %s' % matched.group(1))
1804
1805    # For if/for/foreach/while/switch, the left and right parens should be
1806    # consistent about how many spaces are inside the parens, and
1807    # there should either be zero or one spaces inside the parens.
1808    # We don't want: "if ( foo)" or "if ( foo   )".
1809    # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
1810    matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
1811    if matched:
1812        statement = matched.group('statement')
1813        condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
1814        if condition is not None:
1815            condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
1816            if condition_match:
1817                n_leading = len(condition_match.group('leading'))
1818                n_trailing = len(condition_match.group('trailing'))
1819                if n_leading != 0:
1820                    for_exception = statement == 'for' and condition.startswith(' ;')
1821                    if not for_exception:
1822                        error(line_number, 'whitespace/parens', 5,
1823                              'Extra space after ( in %s' % statement)
1824                if n_trailing != 0:
1825                    for_exception = statement == 'for' and condition.endswith('; ')
1826                    if not for_exception:
1827                        error(line_number, 'whitespace/parens', 5,
1828                              'Extra space before ) in %s' % statement)
1829
1830            # Do not check for more than one command in macros
1831            in_preprocessor_directive = match(r'\s*#', line)
1832            if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
1833                error(line_number, 'whitespace/parens', 4,
1834                      'More than one command on the same line in %s' % statement)
1835
1836    # You should always have a space after a comma (either as fn arg or operator)
1837    if search(r',[^\s]', line):
1838        error(line_number, 'whitespace/comma', 3,
1839              'Missing space after ,')
1840
1841    matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
1842    if matched:
1843        error(line_number, 'whitespace/declaration', 3,
1844              'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
1845
1846    if file_extension == 'cpp':
1847        # C++ should have the & or * beside the type not the variable name.
1848        matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
1849        if matched:
1850            error(line_number, 'whitespace/declaration', 3,
1851                  'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
1852
1853    elif file_extension == 'c':
1854        # C Pointer declaration should have the * beside the variable not the type name.
1855        matched = search(r'^\s*\w+\*\s+\w+', line)
1856        if matched:
1857            error(line_number, 'whitespace/declaration', 3,
1858                  'Declaration has space between * and variable name in %s' % matched.group(0).strip())
1859
1860    # Next we will look for issues with function calls.
1861    check_spacing_for_function_call(line, line_number, error)
1862
1863    # Except after an opening paren, you should have spaces before your braces.
1864    # And since you should never have braces at the beginning of a line, this is
1865    # an easy test.
1866    if search(r'[^ ({]{', line):
1867        error(line_number, 'whitespace/braces', 5,
1868              'Missing space before {')
1869
1870    # Make sure '} else {' has spaces.
1871    if search(r'}else', line):
1872        error(line_number, 'whitespace/braces', 5,
1873              'Missing space before else')
1874
1875    # You shouldn't have spaces before your brackets, except maybe after
1876    # 'delete []' or 'new char * []'.
1877    if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
1878        error(line_number, 'whitespace/braces', 5,
1879              'Extra space before [')
1880
1881    # You shouldn't have a space before a semicolon at the end of the line.
1882    # There's a special case for "for" since the style guide allows space before
1883    # the semicolon there.
1884    if search(r':\s*;\s*$', line):
1885        error(line_number, 'whitespace/semicolon', 5,
1886              'Semicolon defining empty statement. Use { } instead.')
1887    elif search(r'^\s*;\s*$', line):
1888        error(line_number, 'whitespace/semicolon', 5,
1889              'Line contains only semicolon. If this should be an empty statement, '
1890              'use { } instead.')
1891    elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
1892        error(line_number, 'whitespace/semicolon', 5,
1893              'Extra space before last semicolon. If this should be an empty '
1894              'statement, use { } instead.')
1895    elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
1896          and line.count('(') == line.count(')')
1897          # Allow do {} while();
1898          and not search(r'}\s*while', line)):
1899        error(line_number, 'whitespace/semicolon', 5,
1900              'Semicolon defining empty statement for this loop. Use { } instead.')
1901
1902
1903def get_previous_non_blank_line(clean_lines, line_number):
1904    """Return the most recent non-blank line and its line number.
1905
1906    Args:
1907      clean_lines: A CleansedLines instance containing the file contents.
1908      line_number: The number of the line to check.
1909
1910    Returns:
1911      A tuple with two elements.  The first element is the contents of the last
1912      non-blank line before the current line, or the empty string if this is the
1913      first non-blank line.  The second is the line number of that line, or -1
1914      if this is the first non-blank line.
1915    """
1916
1917    previous_line_number = line_number - 1
1918    while previous_line_number >= 0:
1919        previous_line = clean_lines.elided[previous_line_number]
1920        if not is_blank_line(previous_line):     # if not a blank line...
1921            return (previous_line, previous_line_number)
1922        previous_line_number -= 1
1923    return ('', -1)
1924
1925
1926def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
1927    """Looks for indentation errors inside of namespaces.
1928
1929    Args:
1930      clean_lines: A CleansedLines instance containing the file.
1931      line_number: The number of the line to check.
1932      file_extension: The extension (dot not included) of the file.
1933      file_state: A _FileState instance which maintains information about
1934                  the state of things in the file.
1935      error: The function to call with any errors found.
1936    """
1937
1938    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1939
1940    namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
1941    if not namespace_match:
1942        return
1943
1944    current_indentation_level = len(namespace_match.group('namespace_indentation'))
1945    if current_indentation_level > 0:
1946        # Don't warn about an indented namespace if we already warned about indented code.
1947        if not file_state.did_inside_namespace_indent_warning():
1948            error(line_number, 'whitespace/indent', 4,
1949                  'namespace should never be indented.')
1950        return
1951    looking_for_semicolon = False;
1952    line_offset = 0
1953    in_preprocessor_directive = False;
1954    for current_line in clean_lines.elided[line_number + 1:]:
1955        line_offset += 1
1956        if not current_line.strip():
1957            continue
1958        if not current_indentation_level:
1959            if not (in_preprocessor_directive or looking_for_semicolon):
1960                if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
1961                    file_state.set_did_inside_namespace_indent_warning()
1962                    error(line_number + line_offset, 'whitespace/indent', 4,
1963                          'Code inside a namespace should not be indented.')
1964            if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
1965                in_preprocessor_directive = current_line[-1] == '\\'
1966            else:
1967                looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
1968        else:
1969            looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
1970        current_indentation_level += current_line.count('{') - current_line.count('}')
1971        if current_indentation_level < 0:
1972            break;
1973
1974
1975def check_using_std(clean_lines, line_number, file_state, error):
1976    """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
1977
1978    Args:
1979      clean_lines: A CleansedLines instance containing the file.
1980      line_number: The number of the line to check.
1981      file_state: A _FileState instance which maintains information about
1982                  the state of things in the file.
1983      error: The function to call with any errors found.
1984    """
1985
1986    # This check doesn't apply to C or Objective-C implementation files.
1987    if file_state.is_c_or_objective_c():
1988        return
1989
1990    line = clean_lines.elided[line_number] # Get rid of comments and strings.
1991
1992    using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
1993    if not using_std_match:
1994        return
1995
1996    method_name = using_std_match.group('method_name')
1997    error(line_number, 'build/using_std', 4,
1998          "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
1999
2000
2001def check_max_min_macros(clean_lines, line_number, file_state, error):
2002    """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
2003
2004    Args:
2005      clean_lines: A CleansedLines instance containing the file.
2006      line_number: The number of the line to check.
2007      file_state: A _FileState instance which maintains information about
2008                  the state of things in the file.
2009      error: The function to call with any errors found.
2010    """
2011
2012    # This check doesn't apply to C or Objective-C implementation files.
2013    if file_state.is_c_or_objective_c():
2014        return
2015
2016    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2017
2018    max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
2019    if not max_min_macros_search:
2020        return
2021
2022    max_min_macro = max_min_macros_search.group('max_min_macro')
2023    max_min_macro_lower = max_min_macro.lower()
2024    error(line_number, 'runtime/max_min_macros', 4,
2025          'Use std::%s() or std::%s<type>() instead of the %s() macro.'
2026          % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
2027
2028
2029def check_switch_indentation(clean_lines, line_number, error):
2030    """Looks for indentation errors inside of switch statements.
2031
2032    Args:
2033      clean_lines: A CleansedLines instance containing the file.
2034      line_number: The number of the line to check.
2035      error: The function to call with any errors found.
2036    """
2037
2038    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2039
2040    switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
2041    if not switch_match:
2042        return
2043
2044    switch_indentation = switch_match.group('switch_indentation')
2045    inner_indentation = switch_indentation + ' ' * 4
2046    line_offset = 0
2047    encountered_nested_switch = False
2048
2049    for current_line in clean_lines.elided[line_number + 1:]:
2050        line_offset += 1
2051
2052        # Skip not only empty lines but also those with preprocessor directives.
2053        if current_line.strip() == '' or current_line.startswith('#'):
2054            continue
2055
2056        if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
2057            # Complexity alarm - another switch statement nested inside the one
2058            # that we're currently testing. We'll need to track the extent of
2059            # that inner switch if the upcoming label tests are still supposed
2060            # to work correctly. Let's not do that; instead, we'll finish
2061            # checking this line, and then leave it like that. Assuming the
2062            # indentation is done consistently (even if incorrectly), this will
2063            # still catch all indentation issues in practice.
2064            encountered_nested_switch = True
2065
2066        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
2067        current_indentation = current_indentation_match.group('indentation')
2068        remaining_line = current_indentation_match.group('remaining_line')
2069
2070        # End the check at the end of the switch statement.
2071        if remaining_line.startswith('}') and current_indentation == switch_indentation:
2072            break
2073        # Case and default branches should not be indented. The regexp also
2074        # catches single-line cases like "default: break;" but does not trigger
2075        # on stuff like "Document::Foo();".
2076        elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
2077            if current_indentation != switch_indentation:
2078                error(line_number + line_offset, 'whitespace/indent', 4,
2079                      'A case label should not be indented, but line up with its switch statement.')
2080                # Don't throw an error for multiple badly indented labels,
2081                # one should be enough to figure out the problem.
2082                break
2083        # We ignore goto labels at the very beginning of a line.
2084        elif match(r'\w+\s*:\s*$', remaining_line):
2085            continue
2086        # It's not a goto label, so check if it's indented at least as far as
2087        # the switch statement plus one more level of indentation.
2088        elif not current_indentation.startswith(inner_indentation):
2089            error(line_number + line_offset, 'whitespace/indent', 4,
2090                  'Non-label code inside switch statements should be indented.')
2091            # Don't throw an error for multiple badly indented statements,
2092            # one should be enough to figure out the problem.
2093            break
2094
2095        if encountered_nested_switch:
2096            break
2097
2098
2099def check_braces(clean_lines, line_number, error):
2100    """Looks for misplaced braces (e.g. at the end of line).
2101
2102    Args:
2103      clean_lines: A CleansedLines instance containing the file.
2104      line_number: The number of the line to check.
2105      error: The function to call with any errors found.
2106    """
2107
2108    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2109
2110    if match(r'\s*{\s*$', line):
2111        # We allow an open brace to start a line in the case where someone
2112        # is using braces for function definition or in a block to
2113        # explicitly create a new scope, which is commonly used to control
2114        # the lifetime of stack-allocated variables.  We don't detect this
2115        # perfectly: we just don't complain if the last non-whitespace
2116        # character on the previous non-blank line is ';', ':', '{', '}',
2117        # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
2118        # We also allow '#' for #endif and '=' for array initialization.
2119        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
2120        if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
2121             or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
2122            and previous_line.find('#') < 0):
2123            error(line_number, 'whitespace/braces', 4,
2124                  'This { should be at the end of the previous line')
2125    elif (search(r'\)\s*(const\s*)?{\s*$', line)
2126          and line.count('(') == line.count(')')
2127          and not search(r'\b(if|for|foreach|while|switch)\b', line)
2128          and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
2129        error(line_number, 'whitespace/braces', 4,
2130              'Place brace on its own line for function definitions.')
2131
2132    if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
2133        # We check if a closed brace has started a line to see if a
2134        # one line control statement was previous.
2135        previous_line = clean_lines.elided[line_number - 2]
2136        last_open_brace = previous_line.rfind('{')
2137        if (last_open_brace != -1 and previous_line.find('}', last_open_brace) == -1
2138            and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
2139            error(line_number, 'whitespace/braces', 4,
2140                  'One line control clauses should not use braces.')
2141
2142    # An else clause should be on the same line as the preceding closing brace.
2143    if match(r'\s*else\s*', line):
2144        previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
2145        if match(r'\s*}\s*$', previous_line):
2146            error(line_number, 'whitespace/newline', 4,
2147                  'An else should appear on the same line as the preceding }')
2148
2149    # Likewise, an else should never have the else clause on the same line
2150    if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
2151        error(line_number, 'whitespace/newline', 4,
2152              'Else clause should never be on same line as else (use 2 lines)')
2153
2154    # In the same way, a do/while should never be on one line
2155    if match(r'\s*do [^\s{]', line):
2156        error(line_number, 'whitespace/newline', 4,
2157              'do/while clauses should not be on a single line')
2158
2159    # Braces shouldn't be followed by a ; unless they're defining a struct
2160    # or initializing an array.
2161    # We can't tell in general, but we can for some common cases.
2162    previous_line_number = line_number
2163    while True:
2164        (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
2165        if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
2166            line = previous_line + line
2167        else:
2168            break
2169    if (search(r'{.*}\s*;', line)
2170        and line.count('{') == line.count('}')
2171        and not search(r'struct|class|enum|\s*=\s*{', line)):
2172        error(line_number, 'readability/braces', 4,
2173              "You don't need a ; after a }")
2174
2175
2176def check_exit_statement_simplifications(clean_lines, line_number, error):
2177    """Looks for else or else-if statements that should be written as an
2178    if statement when the prior if concludes with a return, break, continue or
2179    goto statement.
2180
2181    Args:
2182      clean_lines: A CleansedLines instance containing the file.
2183      line_number: The number of the line to check.
2184      error: The function to call with any errors found.
2185    """
2186
2187    line = clean_lines.elided[line_number] # Get rid of comments and strings.
2188
2189    else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
2190    if not else_match:
2191        return
2192
2193    else_indentation = else_match.group('else_indentation')
2194    inner_indentation = else_indentation + ' ' * 4
2195
2196    previous_lines = clean_lines.elided[:line_number]
2197    previous_lines.reverse()
2198    line_offset = 0
2199    encountered_exit_statement = False
2200
2201    for current_line in previous_lines:
2202        line_offset -= 1
2203
2204        # Skip not only empty lines but also those with preprocessor directives
2205        # and goto labels.
2206        if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
2207            continue
2208
2209        # Skip lines with closing braces on the original indentation level.
2210        # Even though the styleguide says they should be on the same line as
2211        # the "else if" statement, we also want to check for instances where
2212        # the current code does not comply with the coding style. Thus, ignore
2213        # these lines and proceed to the line before that.
2214        if current_line == else_indentation + '}':
2215            continue
2216
2217        current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
2218        current_indentation = current_indentation_match.group('indentation')
2219        remaining_line = current_indentation_match.group('remaining_line')
2220
2221        # As we're going up the lines, the first real statement to encounter
2222        # has to be an exit statement (return, break, continue or goto) -
2223        # otherwise, this check doesn't apply.
2224        if not encountered_exit_statement:
2225            # We only want to find exit statements if they are on exactly
2226            # the same level of indentation as expected from the code inside
2227            # the block. If the indentation doesn't strictly match then we
2228            # might have a nested if or something, which must be ignored.
2229            if current_indentation != inner_indentation:
2230                break
2231            if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
2232                encountered_exit_statement = True
2233                continue
2234            break
2235
2236        # When code execution reaches this point, we've found an exit statement
2237        # as last statement of the previous block. Now we only need to make
2238        # sure that the block belongs to an "if", then we can throw an error.
2239
2240        # Skip lines with opening braces on the original indentation level,
2241        # similar to the closing braces check above. ("if (condition)\n{")
2242        if current_line == else_indentation + '{':
2243            continue
2244
2245        # Skip everything that's further indented than our "else" or "else if".
2246        if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
2247            continue
2248
2249        # So we've got a line with same (or less) indentation. Is it an "if"?
2250        # If yes: throw an error. If no: don't throw an error.
2251        # Whatever the outcome, this is the end of our loop.
2252        if match(r'if\s*\(', remaining_line):
2253            if else_match.start('else') != -1:
2254                error(line_number + line_offset, 'readability/control_flow', 4,
2255                      'An else statement can be removed when the prior "if" '
2256                      'concludes with a return, break, continue or goto statement.')
2257            else:
2258                error(line_number + line_offset, 'readability/control_flow', 4,
2259                      'An else if statement should be written as an if statement '
2260                      'when the prior "if" concludes with a return, break, '
2261                      'continue or goto statement.')
2262        break
2263
2264
2265def replaceable_check(operator, macro, line):
2266    """Determine whether a basic CHECK can be replaced with a more specific one.
2267
2268    For example suggest using CHECK_EQ instead of CHECK(a == b) and
2269    similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
2270
2271    Args:
2272      operator: The C++ operator used in the CHECK.
2273      macro: The CHECK or EXPECT macro being called.
2274      line: The current source line.
2275
2276    Returns:
2277      True if the CHECK can be replaced with a more specific one.
2278    """
2279
2280    # This matches decimal and hex integers, strings, and chars (in that order).
2281    match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
2282
2283    # Expression to match two sides of the operator with something that
2284    # looks like a literal, since CHECK(x == iterator) won't compile.
2285    # This means we can't catch all the cases where a more specific
2286    # CHECK is possible, but it's less annoying than dealing with
2287    # extraneous warnings.
2288    match_this = (r'\s*' + macro + r'\((\s*' +
2289                  match_constant + r'\s*' + operator + r'[^<>].*|'
2290                  r'.*[^<>]' + operator + r'\s*' + match_constant +
2291                  r'\s*\))')
2292
2293    # Don't complain about CHECK(x == NULL) or similar because
2294    # CHECK_EQ(x, NULL) won't compile (requires a cast).
2295    # Also, don't complain about more complex boolean expressions
2296    # involving && or || such as CHECK(a == b || c == d).
2297    return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
2298
2299
2300def check_check(clean_lines, line_number, error):
2301    """Checks the use of CHECK and EXPECT macros.
2302
2303    Args:
2304      clean_lines: A CleansedLines instance containing the file.
2305      line_number: The number of the line to check.
2306      error: The function to call with any errors found.
2307    """
2308
2309    # Decide the set of replacement macros that should be suggested
2310    raw_lines = clean_lines.raw_lines
2311    current_macro = ''
2312    for macro in _CHECK_MACROS:
2313        if raw_lines[line_number].find(macro) >= 0:
2314            current_macro = macro
2315            break
2316    if not current_macro:
2317        # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
2318        return
2319
2320    line = clean_lines.elided[line_number]        # get rid of comments and strings
2321
2322    # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
2323    for operator in ['==', '!=', '>=', '>', '<=', '<']:
2324        if replaceable_check(operator, current_macro, line):
2325            error(line_number, 'readability/check', 2,
2326                  'Consider using %s instead of %s(a %s b)' % (
2327                      _CHECK_REPLACEMENT[current_macro][operator],
2328                      current_macro, operator))
2329            break
2330
2331
2332def check_for_comparisons_to_zero(clean_lines, line_number, error):
2333    # Get the line without comments and strings.
2334    line = clean_lines.elided[line_number]
2335
2336    # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
2337    if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
2338        error(line_number, 'readability/comparison_to_zero', 5,
2339              'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
2340
2341
2342def check_for_null(clean_lines, line_number, file_state, error):
2343    # This check doesn't apply to C or Objective-C implementation files.
2344    if file_state.is_c_or_objective_c():
2345        return
2346
2347    line = clean_lines.elided[line_number]
2348
2349    # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
2350    if search(r'\bg(_[a-z]+)+\b', line):
2351        return
2352
2353    # Don't warn about NULL usage in gst_*_many(). See Bug 39740
2354    if search(r'\bgst_\w+_many\b', line):
2355        return
2356
2357    # Don't warn about NULL usage in g_str{join,concat}(). See Bug 34834
2358    if search(r'\bg_str(join|concat)\b', line):
2359        return
2360
2361    # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
2362    if search(r'\bgdk_pixbuf_save_to\w+\b', line):
2363        return
2364
2365    # Don't warn about NULL usage in gtk_widget_style_get(). See Bug 51758.
2366    if search(r'\bgtk_widget_style_get\(\w+\b', line):
2367        return
2368
2369    if search(r'\bNULL\b', line):
2370        error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
2371        return
2372
2373    line = clean_lines.raw_lines[line_number]
2374    # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
2375    # matches, then do the check with strings collapsed to avoid giving errors for
2376    # NULLs occurring in strings.
2377    if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
2378        error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
2379
2380def get_line_width(line):
2381    """Determines the width of the line in column positions.
2382
2383    Args:
2384      line: A string, which may be a Unicode string.
2385
2386    Returns:
2387      The width of the line in column positions, accounting for Unicode
2388      combining characters and wide characters.
2389    """
2390    if isinstance(line, unicode):
2391        width = 0
2392        for c in unicodedata.normalize('NFC', line):
2393            if unicodedata.east_asian_width(c) in ('W', 'F'):
2394                width += 2
2395            elif not unicodedata.combining(c):
2396                width += 1
2397        return width
2398    return len(line)
2399
2400
2401def check_style(clean_lines, line_number, file_extension, class_state, file_state, error):
2402    """Checks rules from the 'C++ style rules' section of cppguide.html.
2403
2404    Most of these rules are hard to test (naming, comment style), but we
2405    do what we can.  In particular we check for 4-space indents, line lengths,
2406    tab usage, spaces inside code, etc.
2407
2408    Args:
2409      clean_lines: A CleansedLines instance containing the file.
2410      line_number: The number of the line to check.
2411      file_extension: The extension (without the dot) of the filename.
2412      class_state: A _ClassState instance which maintains information about
2413                   the current stack of nested class declarations being parsed.
2414      file_state: A _FileState instance which maintains information about
2415                  the state of things in the file.
2416      error: The function to call with any errors found.
2417    """
2418
2419    raw_lines = clean_lines.raw_lines
2420    line = raw_lines[line_number]
2421
2422    if line.find('\t') != -1:
2423        error(line_number, 'whitespace/tab', 1,
2424              'Tab found; better to use spaces')
2425
2426    # One or three blank spaces at the beginning of the line is weird; it's
2427    # hard to reconcile that with 4-space indents.
2428    # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
2429    # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
2430    # if(RLENGTH > 20) complain = 0;
2431    # if(match($0, " +(error|private|public|protected):")) complain = 0;
2432    # if(match(prev, "&& *$")) complain = 0;
2433    # if(match(prev, "\\|\\| *$")) complain = 0;
2434    # if(match(prev, "[\",=><] *$")) complain = 0;
2435    # if(match($0, " <<")) complain = 0;
2436    # if(match(prev, " +for \\(")) complain = 0;
2437    # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
2438    initial_spaces = 0
2439    cleansed_line = clean_lines.elided[line_number]
2440    while initial_spaces < len(line) and line[initial_spaces] == ' ':
2441        initial_spaces += 1
2442    if line and line[-1].isspace():
2443        error(line_number, 'whitespace/end_of_line', 4,
2444              'Line ends in whitespace.  Consider deleting these extra spaces.')
2445    # There are certain situations we allow one space, notably for labels
2446    elif ((initial_spaces >= 1 and initial_spaces <= 3)
2447          and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
2448        error(line_number, 'whitespace/indent', 3,
2449              'Weird number of spaces at line-start.  '
2450              'Are you using a 4-space indent?')
2451    # Labels should always be indented at least one space.
2452    elif not initial_spaces and line[:2] != '//':
2453        label_match = match(r'(?P<label>[^:]+):\s*$', line)
2454
2455        if label_match:
2456            label = label_match.group('label')
2457            # Only throw errors for stuff that is definitely not a goto label,
2458            # because goto labels can in fact occur at the start of the line.
2459            if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
2460                error(line_number, 'whitespace/labels', 4,
2461                      'Labels should always be indented at least one space.  '
2462                      'If this is a member-initializer list in a constructor, '
2463                      'the colon should be on the line after the definition header.')
2464
2465    if (cleansed_line.count(';') > 1
2466        # for loops are allowed two ;'s (and may run over two lines).
2467        and cleansed_line.find('for') == -1
2468        and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
2469             or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
2470        # It's ok to have many commands in a switch case that fits in 1 line
2471        and not ((cleansed_line.find('case ') != -1
2472                  or cleansed_line.find('default:') != -1)
2473                 and cleansed_line.find('break;') != -1)
2474        # Also it's ok to have many commands in trivial single-line accessors in class definitions.
2475        and not (match(r'.*\(.*\).*{.*.}', line)
2476                 and class_state.classinfo_stack
2477                 and line.count('{') == line.count('}'))
2478        and not cleansed_line.startswith('#define ')):
2479        error(line_number, 'whitespace/newline', 4,
2480              'More than one command on the same line')
2481
2482    if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
2483        error(line_number, 'whitespace/operators', 4,
2484              'Boolean expressions that span multiple lines should have their '
2485              'operators on the left side of the line instead of the right side.')
2486
2487    # Some more style checks
2488    check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
2489    check_using_std(clean_lines, line_number, file_state, error)
2490    check_max_min_macros(clean_lines, line_number, file_state, error)
2491    check_switch_indentation(clean_lines, line_number, error)
2492    check_braces(clean_lines, line_number, error)
2493    check_exit_statement_simplifications(clean_lines, line_number, error)
2494    check_spacing(file_extension, clean_lines, line_number, error)
2495    check_check(clean_lines, line_number, error)
2496    check_for_comparisons_to_zero(clean_lines, line_number, error)
2497    check_for_null(clean_lines, line_number, file_state, error)
2498
2499
2500_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
2501_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
2502# Matches the first component of a filename delimited by -s and _s. That is:
2503#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
2504#  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
2505#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
2506#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
2507_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
2508
2509
2510def _drop_common_suffixes(filename):
2511    """Drops common suffixes like _test.cpp or -inl.h from filename.
2512
2513    For example:
2514      >>> _drop_common_suffixes('foo/foo-inl.h')
2515      'foo/foo'
2516      >>> _drop_common_suffixes('foo/bar/foo.cpp')
2517      'foo/bar/foo'
2518      >>> _drop_common_suffixes('foo/foo_internal.h')
2519      'foo/foo'
2520      >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
2521      'foo/foo_unusualinternal'
2522
2523    Args:
2524      filename: The input filename.
2525
2526    Returns:
2527      The filename with the common suffix removed.
2528    """
2529    for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
2530                   'inl.h', 'impl.h', 'internal.h'):
2531        if (filename.endswith(suffix) and len(filename) > len(suffix)
2532            and filename[-len(suffix) - 1] in ('-', '_')):
2533            return filename[:-len(suffix) - 1]
2534    return os.path.splitext(filename)[0]
2535
2536
2537def _classify_include(filename, include, is_system, include_state):
2538    """Figures out what kind of header 'include' is.
2539
2540    Args:
2541      filename: The current file cpp_style is running over.
2542      include: The path to a #included file.
2543      is_system: True if the #include used <> rather than "".
2544      include_state: An _IncludeState instance in which the headers are inserted.
2545
2546    Returns:
2547      One of the _XXX_HEADER constants.
2548
2549    For example:
2550      >>> _classify_include('foo.cpp', 'config.h', False)
2551      _CONFIG_HEADER
2552      >>> _classify_include('foo.cpp', 'foo.h', False)
2553      _PRIMARY_HEADER
2554      >>> _classify_include('foo.cpp', 'bar.h', False)
2555      _OTHER_HEADER
2556    """
2557
2558    # If it is a system header we know it is classified as _OTHER_HEADER.
2559    if is_system:
2560        return _OTHER_HEADER
2561
2562    # If the include is named config.h then this is WebCore/config.h.
2563    if include == "config.h":
2564        return _CONFIG_HEADER
2565
2566    # There cannot be primary includes in header files themselves. Only an
2567    # include exactly matches the header filename will be is flagged as
2568    # primary, so that it triggers the "don't include yourself" check.
2569    if filename.endswith('.h') and filename != include:
2570        return _OTHER_HEADER;
2571
2572    # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
2573    if include.startswith('moc_') and include.endswith('.cpp'):
2574        return _MOC_HEADER
2575
2576    if include.endswith('.moc'):
2577        return _MOC_HEADER
2578
2579    # If the target file basename starts with the include we're checking
2580    # then we consider it the primary header.
2581    target_base = FileInfo(filename).base_name()
2582    include_base = FileInfo(include).base_name()
2583
2584    # If we haven't encountered a primary header, then be lenient in checking.
2585    if not include_state.visited_primary_section() and target_base.find(include_base) != -1:
2586        return _PRIMARY_HEADER
2587    # If we already encountered a primary header, perform a strict comparison.
2588    # In case the two filename bases are the same then the above lenient check
2589    # probably was a false positive.
2590    elif include_state.visited_primary_section() and target_base == include_base:
2591        if include == "ResourceHandleWin.h":
2592            # FIXME: Thus far, we've only seen one example of these, but if we
2593            # start to see more, please consider generalizing this check
2594            # somehow.
2595            return _OTHER_HEADER
2596        return _PRIMARY_HEADER
2597
2598    return _OTHER_HEADER
2599
2600
2601def _does_primary_header_exist(filename):
2602    """Return a primary header file name for a file, or empty string
2603    if the file is not source file or primary header does not exist.
2604    """
2605    fileinfo = FileInfo(filename)
2606    if not fileinfo.is_source():
2607        return False
2608    primary_header = fileinfo.no_extension() + ".h"
2609    return os.path.isfile(primary_header)
2610
2611
2612def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
2613    """Check rules that are applicable to #include lines.
2614
2615    Strings on #include lines are NOT removed from elided line, to make
2616    certain tasks easier. However, to prevent false positives, checks
2617    applicable to #include lines in CheckLanguage must be put here.
2618
2619    Args:
2620      filename: The name of the current file.
2621      file_extension: The current file extension, without the leading dot.
2622      clean_lines: A CleansedLines instance containing the file.
2623      line_number: The number of the line to check.
2624      include_state: An _IncludeState instance in which the headers are inserted.
2625      error: The function to call with any errors found.
2626    """
2627    # FIXME: For readability or as a possible optimization, consider
2628    #        exiting early here by checking whether the "build/include"
2629    #        category should be checked for the given filename.  This
2630    #        may involve having the error handler classes expose a
2631    #        should_check() method, in addition to the usual __call__
2632    #        method.
2633    line = clean_lines.lines[line_number]
2634
2635    matched = _RE_PATTERN_INCLUDE.search(line)
2636    if not matched:
2637        return
2638
2639    include = matched.group(2)
2640    is_system = (matched.group(1) == '<')
2641
2642    # Look for any of the stream classes that are part of standard C++.
2643    if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
2644        error(line_number, 'readability/streams', 3,
2645              'Streams are highly discouraged.')
2646
2647    # Look for specific includes to fix.
2648    if include.startswith('wtf/') and not is_system:
2649        error(line_number, 'build/include', 4,
2650              'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
2651
2652    duplicate_header = include in include_state
2653    if duplicate_header:
2654        error(line_number, 'build/include', 4,
2655              '"%s" already included at %s:%s' %
2656              (include, filename, include_state[include]))
2657    else:
2658        include_state[include] = line_number
2659
2660    header_type = _classify_include(filename, include, is_system, include_state)
2661    primary_header_exists = _does_primary_header_exist(filename)
2662    include_state.header_types[line_number] = header_type
2663
2664    # Only proceed if this isn't a duplicate header.
2665    if duplicate_header:
2666        return
2667
2668    # We want to ensure that headers appear in the right order:
2669    # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
2670    # 2) for header files: alphabetically sorted
2671    # The include_state object keeps track of the last type seen
2672    # and complains if the header types are out of order or missing.
2673    error_message = include_state.check_next_include_order(header_type,
2674                                                           file_extension == "h",
2675                                                           primary_header_exists)
2676
2677    # Check to make sure we have a blank line after primary header.
2678    if not error_message and header_type == _PRIMARY_HEADER:
2679         next_line = clean_lines.raw_lines[line_number + 1]
2680         if not is_blank_line(next_line):
2681            error(line_number, 'build/include_order', 4,
2682                  'You should add a blank line after implementation file\'s own header.')
2683
2684    # Check to make sure all headers besides config.h and the primary header are
2685    # alphabetically sorted. Skip Qt's moc files.
2686    if not error_message and header_type == _OTHER_HEADER:
2687         previous_line_number = line_number - 1;
2688         previous_line = clean_lines.lines[previous_line_number]
2689         previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2690         while (not previous_match and previous_line_number > 0
2691                and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
2692            previous_line_number -= 1;
2693            previous_line = clean_lines.lines[previous_line_number]
2694            previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
2695         if previous_match:
2696            previous_header_type = include_state.header_types[previous_line_number]
2697            if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
2698                error(line_number, 'build/include_order', 4,
2699                      'Alphabetical sorting problem.')
2700
2701    if error_message:
2702        if file_extension == 'h':
2703            error(line_number, 'build/include_order', 4,
2704                  '%s Should be: alphabetically sorted.' %
2705                  error_message)
2706        else:
2707            error(line_number, 'build/include_order', 4,
2708                  '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
2709                  error_message)
2710
2711
2712def check_language(filename, clean_lines, line_number, file_extension, include_state,
2713                   file_state, error):
2714    """Checks rules from the 'C++ language rules' section of cppguide.html.
2715
2716    Some of these rules are hard to test (function overloading, using
2717    uint32 inappropriately), but we do the best we can.
2718
2719    Args:
2720      filename: The name of the current file.
2721      clean_lines: A CleansedLines instance containing the file.
2722      line_number: The number of the line to check.
2723      file_extension: The extension (without the dot) of the filename.
2724      include_state: An _IncludeState instance in which the headers are inserted.
2725      file_state: A _FileState instance which maintains information about
2726                  the state of things in the file.
2727      error: The function to call with any errors found.
2728    """
2729    # If the line is empty or consists of entirely a comment, no need to
2730    # check it.
2731    line = clean_lines.elided[line_number]
2732    if not line:
2733        return
2734
2735    matched = _RE_PATTERN_INCLUDE.search(line)
2736    if matched:
2737        check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
2738        return
2739
2740    # FIXME: figure out if they're using default arguments in fn proto.
2741
2742    # Check to see if they're using an conversion function cast.
2743    # I just try to capture the most common basic types, though there are more.
2744    # Parameterless conversion functions, such as bool(), are allowed as they are
2745    # probably a member operator declaration or default constructor.
2746    matched = search(
2747        r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
2748    if matched:
2749        # gMock methods are defined using some variant of MOCK_METHODx(name, type)
2750        # where type may be float(), int(string), etc.  Without context they are
2751        # virtually indistinguishable from int(x) casts.
2752        if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
2753            error(line_number, 'readability/casting', 4,
2754                  'Using deprecated casting style.  '
2755                  'Use static_cast<%s>(...) instead' %
2756                  matched.group(1))
2757
2758    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
2759                       'static_cast',
2760                       r'\((int|float|double|bool|char|u?int(16|32|64))\)',
2761                       error)
2762    # This doesn't catch all cases.  Consider (const char * const)"hello".
2763    check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
2764                       'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
2765
2766    # In addition, we look for people taking the address of a cast.  This
2767    # is dangerous -- casts can assign to temporaries, so the pointer doesn't
2768    # point where you think.
2769    if search(
2770        r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
2771        error(line_number, 'runtime/casting', 4,
2772              ('Are you taking an address of a cast?  '
2773               'This is dangerous: could be a temp var.  '
2774               'Take the address before doing the cast, rather than after'))
2775
2776    # Check for people declaring static/global STL strings at the top level.
2777    # This is dangerous because the C++ language does not guarantee that
2778    # globals with constructors are initialized before the first access.
2779    matched = match(
2780        r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
2781        line)
2782    # Make sure it's not a function.
2783    # Function template specialization looks like: "string foo<Type>(...".
2784    # Class template definitions look like: "string Foo<Type>::Method(...".
2785    if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
2786                             matched.group(3)):
2787        error(line_number, 'runtime/string', 4,
2788              'For a static/global string constant, use a C style string instead: '
2789              '"%schar %s[]".' %
2790              (matched.group(1), matched.group(2)))
2791
2792    # Check that we're not using RTTI outside of testing code.
2793    if search(r'\bdynamic_cast<', line):
2794        error(line_number, 'runtime/rtti', 5,
2795              'Do not use dynamic_cast<>.  If you need to cast within a class '
2796              "hierarchy, use static_cast<> to upcast.  Google doesn't support "
2797              'RTTI.')
2798
2799    if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
2800        error(line_number, 'runtime/init', 4,
2801              'You seem to be initializing a member variable with itself.')
2802
2803    if file_extension == 'h':
2804        # FIXME: check that 1-arg constructors are explicit.
2805        #        How to tell it's a constructor?
2806        #        (handled in check_for_non_standard_constructs for now)
2807        pass
2808
2809    # Check if people are using the verboten C basic types.  The only exception
2810    # we regularly allow is "unsigned short port" for port.
2811    if search(r'\bshort port\b', line):
2812        if not search(r'\bunsigned short port\b', line):
2813            error(line_number, 'runtime/int', 4,
2814                  'Use "unsigned short" for ports, not "short"')
2815
2816    # When snprintf is used, the second argument shouldn't be a literal.
2817    matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
2818    if matched:
2819        error(line_number, 'runtime/printf', 3,
2820              'If you can, use sizeof(%s) instead of %s as the 2nd arg '
2821              'to snprintf.' % (matched.group(1), matched.group(2)))
2822
2823    # Check if some verboten C functions are being used.
2824    if search(r'\bsprintf\b', line):
2825        error(line_number, 'runtime/printf', 5,
2826              'Never use sprintf.  Use snprintf instead.')
2827    matched = search(r'\b(strcpy|strcat)\b', line)
2828    if matched:
2829        error(line_number, 'runtime/printf', 4,
2830              'Almost always, snprintf is better than %s' % matched.group(1))
2831
2832    if search(r'\bsscanf\b', line):
2833        error(line_number, 'runtime/printf', 1,
2834              'sscanf can be ok, but is slow and can overflow buffers.')
2835
2836    # Check for suspicious usage of "if" like
2837    # } if (a == b) {
2838    if search(r'\}\s*if\s*\(', line):
2839        error(line_number, 'readability/braces', 4,
2840              'Did you mean "else if"? If not, start a new line for "if".')
2841
2842    # Check for potential format string bugs like printf(foo).
2843    # We constrain the pattern not to pick things like DocidForPrintf(foo).
2844    # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
2845    matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
2846    if matched:
2847        error(line_number, 'runtime/printf', 4,
2848              'Potential format string bug. Do %s("%%s", %s) instead.'
2849              % (matched.group(1), matched.group(2)))
2850
2851    # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
2852    matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
2853    if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
2854        error(line_number, 'runtime/memset', 4,
2855              'Did you mean "memset(%s, 0, %s)"?'
2856              % (matched.group(1), matched.group(2)))
2857
2858    # Detect variable-length arrays.
2859    matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
2860    if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
2861        matched.group(3).find(']') == -1):
2862        # Split the size using space and arithmetic operators as delimiters.
2863        # If any of the resulting tokens are not compile time constants then
2864        # report the error.
2865        tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
2866        is_const = True
2867        skip_next = False
2868        for tok in tokens:
2869            if skip_next:
2870                skip_next = False
2871                continue
2872
2873            if search(r'sizeof\(.+\)', tok):
2874                continue
2875            if search(r'arraysize\(\w+\)', tok):
2876                continue
2877
2878            tok = tok.lstrip('(')
2879            tok = tok.rstrip(')')
2880            if not tok:
2881                continue
2882            if match(r'\d+', tok):
2883                continue
2884            if match(r'0[xX][0-9a-fA-F]+', tok):
2885                continue
2886            if match(r'k[A-Z0-9]\w*', tok):
2887                continue
2888            if match(r'(.+::)?k[A-Z0-9]\w*', tok):
2889                continue
2890            if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
2891                continue
2892            # A catch all for tricky sizeof cases, including 'sizeof expression',
2893            # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
2894            # requires skipping the next token becasue we split on ' ' and '*'.
2895            if tok.startswith('sizeof'):
2896                skip_next = True
2897                continue
2898            is_const = False
2899            break
2900        if not is_const:
2901            error(line_number, 'runtime/arrays', 1,
2902                  'Do not use variable-length arrays.  Use an appropriately named '
2903                  "('k' followed by CamelCase) compile-time constant for the size.")
2904
2905    # Check for use of unnamed namespaces in header files.  Registration
2906    # macros are typically OK, so we allow use of "namespace {" on lines
2907    # that end with backslashes.
2908    if (file_extension == 'h'
2909        and search(r'\bnamespace\s*{', line)
2910        and line[-1] != '\\'):
2911        error(line_number, 'build/namespaces', 4,
2912              'Do not use unnamed namespaces in header files.  See '
2913              'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
2914              ' for more information.')
2915
2916    check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
2917
2918
2919def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
2920    """Checks if identifier names contain any underscores.
2921
2922    As identifiers in libraries we are using have a bunch of
2923    underscores, we only warn about the declarations of identifiers
2924    and don't check use of identifiers.
2925
2926    Args:
2927      filename: The name of the current file.
2928      line_number: The number of the line to check.
2929      line: The line of code to check.
2930      file_state: A _FileState instance which maintains information about
2931                  the state of things in the file.
2932      error: The function to call with any errors found.
2933    """
2934    # We don't check a return statement.
2935    if match(r'\s*(return|delete)\b', line):
2936        return
2937
2938    # Basically, a declaration is a type name followed by whitespaces
2939    # followed by an identifier. The type name can be complicated
2940    # due to type adjectives and templates. We remove them first to
2941    # simplify the process to find declarations of identifiers.
2942
2943    # Convert "long long", "long double", and "long long int" to
2944    # simple types, but don't remove simple "long".
2945    line = sub(r'long (long )?(?=long|double|int)', '', line)
2946    # Convert unsigned/signed types to simple types, too.
2947    line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
2948    line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
2949
2950    # Remove "new" and "new (expr)" to simplify, too.
2951    line = sub(r'new\s*(\([^)]*\))?', '', line)
2952
2953    # Remove all template parameters by removing matching < and >.
2954    # Loop until no templates are removed to remove nested templates.
2955    while True:
2956        line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
2957        if not number_of_replacements:
2958            break
2959
2960    # Declarations of local variables can be in condition expressions
2961    # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
2962    # We remove the keywords and the first parenthesis.
2963    #
2964    # Declarations in "while", "if", and "switch" are different from
2965    # other declarations in two aspects:
2966    #
2967    # - There can be only one declaration between the parentheses.
2968    #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
2969    # - The variable must be initialized.
2970    #   (i.e., you cannot write "if (int i) {}")
2971    #
2972    # and we will need different treatments for them.
2973    line = sub(r'^\s*for\s*\(', '', line)
2974    line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
2975
2976    # Detect variable and functions.
2977    type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
2978    identifier_regexp = r'(?P<identifier>[\w:]+)'
2979    maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
2980    character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
2981    declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
2982    declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
2983    is_function_arguments = False
2984    number_of_identifiers = 0
2985    while True:
2986        # If we are seeing the first identifier or arguments of a
2987        # function, there should be a type name before an identifier.
2988        if not number_of_identifiers or is_function_arguments:
2989            declaration_regexp = declaration_with_type_regexp
2990        else:
2991            declaration_regexp = declaration_without_type_regexp
2992
2993        matched = match(declaration_regexp, line)
2994        if not matched:
2995            return
2996        identifier = matched.group('identifier')
2997        character_after_identifier = matched.group('character_after_identifier')
2998
2999        # If we removed a non-for-control statement, the character after
3000        # the identifier should be '='. With this rule, we can avoid
3001        # warning for cases like "if (val & INT_MAX) {".
3002        if control_statement and character_after_identifier != '=':
3003            return
3004
3005        is_function_arguments = is_function_arguments or character_after_identifier == '('
3006
3007        # Remove "m_" and "s_" to allow them.
3008        modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
3009        if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
3010            # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
3011            if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
3012                and not modified_identifier.startswith('tst_')
3013                and not modified_identifier.startswith('webkit_dom_object_')
3014                and not modified_identifier.startswith('NPN_')
3015                and not modified_identifier.startswith('NPP_')
3016                and not modified_identifier.startswith('NP_')
3017                and not modified_identifier.startswith('qt_')
3018                and not modified_identifier.startswith('cairo_')
3019                and not modified_identifier.find('::qt_') >= 0
3020                and not modified_identifier == "const_iterator"
3021                and not modified_identifier == "vm_throw"):
3022                error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
3023
3024        # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
3025        if modified_identifier == 'l':
3026            error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
3027
3028        # There can be only one declaration in non-for-control statements.
3029        if control_statement:
3030            return
3031        # We should continue checking if this is a function
3032        # declaration because we need to check its arguments.
3033        # Also, we need to check multiple declarations.
3034        if character_after_identifier != '(' and character_after_identifier != ',':
3035            return
3036
3037        number_of_identifiers += 1
3038        line = line[matched.end():]
3039
3040def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
3041                       error):
3042    """Checks for a C-style cast by looking for the pattern.
3043
3044    This also handles sizeof(type) warnings, due to similarity of content.
3045
3046    Args:
3047      line_number: The number of the line to check.
3048      line: The line of code to check.
3049      raw_line: The raw line of code to check, with comments.
3050      cast_type: The string for the C++ cast to recommend.  This is either
3051                 reinterpret_cast or static_cast, depending.
3052      pattern: The regular expression used to find C-style casts.
3053      error: The function to call with any errors found.
3054    """
3055    matched = search(pattern, line)
3056    if not matched:
3057        return
3058
3059    # e.g., sizeof(int)
3060    sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
3061    if sizeof_match:
3062        error(line_number, 'runtime/sizeof', 1,
3063              'Using sizeof(type).  Use sizeof(varname) instead if possible')
3064        return
3065
3066    remainder = line[matched.end(0):]
3067
3068    # The close paren is for function pointers as arguments to a function.
3069    # eg, void foo(void (*bar)(int));
3070    # The semicolon check is a more basic function check; also possibly a
3071    # function pointer typedef.
3072    # eg, void foo(int); or void foo(int) const;
3073    # The equals check is for function pointer assignment.
3074    # eg, void *(*foo)(int) = ...
3075    #
3076    # Right now, this will only catch cases where there's a single argument, and
3077    # it's unnamed.  It should probably be expanded to check for multiple
3078    # arguments with some unnamed.
3079    function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
3080    if function_match:
3081        if (not function_match.group(3)
3082            or function_match.group(3) == ';'
3083            or raw_line.find('/*') < 0):
3084            error(line_number, 'readability/function', 3,
3085                  'All parameters should be named in a function')
3086        return
3087
3088    # At this point, all that should be left is actual casts.
3089    error(line_number, 'readability/casting', 4,
3090          'Using C-style cast.  Use %s<%s>(...) instead' %
3091          (cast_type, matched.group(1)))
3092
3093
3094_HEADERS_CONTAINING_TEMPLATES = (
3095    ('<deque>', ('deque',)),
3096    ('<functional>', ('unary_function', 'binary_function',
3097                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
3098                      'negate',
3099                      'equal_to', 'not_equal_to', 'greater', 'less',
3100                      'greater_equal', 'less_equal',
3101                      'logical_and', 'logical_or', 'logical_not',
3102                      'unary_negate', 'not1', 'binary_negate', 'not2',
3103                      'bind1st', 'bind2nd',
3104                      'pointer_to_unary_function',
3105                      'pointer_to_binary_function',
3106                      'ptr_fun',
3107                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
3108                      'mem_fun_ref_t',
3109                      'const_mem_fun_t', 'const_mem_fun1_t',
3110                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
3111                      'mem_fun_ref',
3112                     )),
3113    ('<limits>', ('numeric_limits',)),
3114    ('<list>', ('list',)),
3115    ('<map>', ('map', 'multimap',)),
3116    ('<memory>', ('allocator',)),
3117    ('<queue>', ('queue', 'priority_queue',)),
3118    ('<set>', ('set', 'multiset',)),
3119    ('<stack>', ('stack',)),
3120    ('<string>', ('char_traits', 'basic_string',)),
3121    ('<utility>', ('pair',)),
3122    ('<vector>', ('vector',)),
3123
3124    # gcc extensions.
3125    # Note: std::hash is their hash, ::hash is our hash
3126    ('<hash_map>', ('hash_map', 'hash_multimap',)),
3127    ('<hash_set>', ('hash_set', 'hash_multiset',)),
3128    ('<slist>', ('slist',)),
3129    )
3130
3131_HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
3132    # We can trust with reasonable confidence that map gives us pair<>, too.
3133    'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
3134}
3135
3136_RE_PATTERN_STRING = re.compile(r'\bstring\b')
3137
3138_re_pattern_algorithm_header = []
3139for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
3140                  'transform'):
3141    # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
3142    # type::max().
3143    _re_pattern_algorithm_header.append(
3144        (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
3145         _template,
3146         '<algorithm>'))
3147
3148_re_pattern_templates = []
3149for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
3150    for _template in _templates:
3151        _re_pattern_templates.append(
3152            (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
3153             _template + '<>',
3154             _header))
3155
3156
3157def files_belong_to_same_module(filename_cpp, filename_h):
3158    """Check if these two filenames belong to the same module.
3159
3160    The concept of a 'module' here is a as follows:
3161    foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
3162    same 'module' if they are in the same directory.
3163    some/path/public/xyzzy and some/path/internal/xyzzy are also considered
3164    to belong to the same module here.
3165
3166    If the filename_cpp contains a longer path than the filename_h, for example,
3167    '/absolute/path/to/base/sysinfo.cpp', and this file would include
3168    'base/sysinfo.h', this function also produces the prefix needed to open the
3169    header. This is used by the caller of this function to more robustly open the
3170    header file. We don't have access to the real include paths in this context,
3171    so we need this guesswork here.
3172
3173    Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
3174    according to this implementation. Because of this, this function gives
3175    some false positives. This should be sufficiently rare in practice.
3176
3177    Args:
3178      filename_cpp: is the path for the .cpp file
3179      filename_h: is the path for the header path
3180
3181    Returns:
3182      Tuple with a bool and a string:
3183      bool: True if filename_cpp and filename_h belong to the same module.
3184      string: the additional prefix needed to open the header file.
3185    """
3186
3187    if not filename_cpp.endswith('.cpp'):
3188        return (False, '')
3189    filename_cpp = filename_cpp[:-len('.cpp')]
3190    if filename_cpp.endswith('_unittest'):
3191        filename_cpp = filename_cpp[:-len('_unittest')]
3192    elif filename_cpp.endswith('_test'):
3193        filename_cpp = filename_cpp[:-len('_test')]
3194    filename_cpp = filename_cpp.replace('/public/', '/')
3195    filename_cpp = filename_cpp.replace('/internal/', '/')
3196
3197    if not filename_h.endswith('.h'):
3198        return (False, '')
3199    filename_h = filename_h[:-len('.h')]
3200    if filename_h.endswith('-inl'):
3201        filename_h = filename_h[:-len('-inl')]
3202    filename_h = filename_h.replace('/public/', '/')
3203    filename_h = filename_h.replace('/internal/', '/')
3204
3205    files_belong_to_same_module = filename_cpp.endswith(filename_h)
3206    common_path = ''
3207    if files_belong_to_same_module:
3208        common_path = filename_cpp[:-len(filename_h)]
3209    return files_belong_to_same_module, common_path
3210
3211
3212def update_include_state(filename, include_state, io=codecs):
3213    """Fill up the include_state with new includes found from the file.
3214
3215    Args:
3216      filename: the name of the header to read.
3217      include_state: an _IncludeState instance in which the headers are inserted.
3218      io: The io factory to use to read the file. Provided for testability.
3219
3220    Returns:
3221      True if a header was succesfully added. False otherwise.
3222    """
3223    io = _unit_test_config.get(INCLUDE_IO_INJECTION_KEY, codecs)
3224    header_file = None
3225    try:
3226        header_file = io.open(filename, 'r', 'utf8', 'replace')
3227    except IOError:
3228        return False
3229    line_number = 0
3230    for line in header_file:
3231        line_number += 1
3232        clean_line = cleanse_comments(line)
3233        matched = _RE_PATTERN_INCLUDE.search(clean_line)
3234        if matched:
3235            include = matched.group(2)
3236            # The value formatting is cute, but not really used right now.
3237            # What matters here is that the key is in include_state.
3238            include_state.setdefault(include, '%s:%d' % (filename, line_number))
3239    return True
3240
3241
3242def check_for_include_what_you_use(filename, clean_lines, include_state, error):
3243    """Reports for missing stl includes.
3244
3245    This function will output warnings to make sure you are including the headers
3246    necessary for the stl containers and functions that you use. We only give one
3247    reason to include a header. For example, if you use both equal_to<> and
3248    less<> in a .h file, only one (the latter in the file) of these will be
3249    reported as a reason to include the <functional>.
3250
3251    Args:
3252      filename: The name of the current file.
3253      clean_lines: A CleansedLines instance containing the file.
3254      include_state: An _IncludeState instance.
3255      error: The function to call with any errors found.
3256    """
3257    required = {}  # A map of header name to line_number and the template entity.
3258        # Example of required: { '<functional>': (1219, 'less<>') }
3259
3260    for line_number in xrange(clean_lines.num_lines()):
3261        line = clean_lines.elided[line_number]
3262        if not line or line[0] == '#':
3263            continue
3264
3265        # String is special -- it is a non-templatized type in STL.
3266        if _RE_PATTERN_STRING.search(line):
3267            required['<string>'] = (line_number, 'string')
3268
3269        for pattern, template, header in _re_pattern_algorithm_header:
3270            if pattern.search(line):
3271                required[header] = (line_number, template)
3272
3273        # The following function is just a speed up, no semantics are changed.
3274        if not '<' in line:  # Reduces the cpu time usage by skipping lines.
3275            continue
3276
3277        for pattern, template, header in _re_pattern_templates:
3278            if pattern.search(line):
3279                required[header] = (line_number, template)
3280
3281    # The policy is that if you #include something in foo.h you don't need to
3282    # include it again in foo.cpp. Here, we will look at possible includes.
3283    # Let's copy the include_state so it is only messed up within this function.
3284    include_state = include_state.copy()
3285
3286    # Did we find the header for this file (if any) and succesfully load it?
3287    header_found = False
3288
3289    # Use the absolute path so that matching works properly.
3290    abs_filename = os.path.abspath(filename)
3291
3292    # For Emacs's flymake.
3293    # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
3294    # by flymake and that file name might end with '_flymake.cpp'. In that case,
3295    # restore original file name here so that the corresponding header file can be
3296    # found.
3297    # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
3298    # instead of 'foo_flymake.h'
3299    abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
3300
3301    # include_state is modified during iteration, so we iterate over a copy of
3302    # the keys.
3303    for header in include_state.keys():  #NOLINT
3304        (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
3305        fullpath = common_path + header
3306        if same_module and update_include_state(fullpath, include_state):
3307            header_found = True
3308
3309    # If we can't find the header file for a .cpp, assume it's because we don't
3310    # know where to look. In that case we'll give up as we're not sure they
3311    # didn't include it in the .h file.
3312    # FIXME: Do a better job of finding .h files so we are confident that
3313    #        not having the .h file means there isn't one.
3314    if filename.endswith('.cpp') and not header_found:
3315        return
3316
3317    # All the lines have been processed, report the errors found.
3318    for required_header_unstripped in required:
3319        template = required[required_header_unstripped][1]
3320        if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
3321            headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
3322            if [True for header in headers if header in include_state]:
3323                continue
3324        if required_header_unstripped.strip('<>"') not in include_state:
3325            error(required[required_header_unstripped][0],
3326                  'build/include_what_you_use', 4,
3327                  'Add #include ' + required_header_unstripped + ' for ' + template)
3328
3329
3330def process_line(filename, file_extension,
3331                 clean_lines, line, include_state, function_state,
3332                 class_state, file_state, error):
3333    """Processes a single line in the file.
3334
3335    Args:
3336      filename: Filename of the file that is being processed.
3337      file_extension: The extension (dot not included) of the file.
3338      clean_lines: An array of strings, each representing a line of the file,
3339                   with comments stripped.
3340      line: Number of line being processed.
3341      include_state: An _IncludeState instance in which the headers are inserted.
3342      function_state: A _FunctionState instance which counts function lines, etc.
3343      class_state: A _ClassState instance which maintains information about
3344                   the current stack of nested class declarations being parsed.
3345      file_state: A _FileState instance which maintains information about
3346                  the state of things in the file.
3347      error: A callable to which errors are reported, which takes arguments:
3348             line number, error level, and message
3349
3350    """
3351    raw_lines = clean_lines.raw_lines
3352    detect_functions(clean_lines, line, function_state, error)
3353    check_for_function_lengths(clean_lines, line, function_state, error)
3354    if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
3355        return
3356    check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
3357    check_pass_ptr_usage(clean_lines, line, function_state, error)
3358    check_for_multiline_comments_and_strings(clean_lines, line, error)
3359    check_style(clean_lines, line, file_extension, class_state, file_state, error)
3360    check_language(filename, clean_lines, line, file_extension, include_state,
3361                   file_state, error)
3362    check_for_non_standard_constructs(clean_lines, line, class_state, error)
3363    check_posix_threading(clean_lines, line, error)
3364    check_invalid_increment(clean_lines, line, error)
3365
3366
3367def _process_lines(filename, file_extension, lines, error, min_confidence):
3368    """Performs lint checks and reports any errors to the given error function.
3369
3370    Args:
3371      filename: Filename of the file that is being processed.
3372      file_extension: The extension (dot not included) of the file.
3373      lines: An array of strings, each representing a line of the file, with the
3374             last element being empty if the file is termined with a newline.
3375      error: A callable to which errors are reported, which takes 4 arguments:
3376    """
3377    lines = (['// marker so line numbers and indices both start at 1'] + lines +
3378             ['// marker so line numbers end in a known way'])
3379
3380    include_state = _IncludeState()
3381    function_state = _FunctionState(min_confidence)
3382    class_state = _ClassState()
3383
3384    check_for_copyright(lines, error)
3385
3386    if file_extension == 'h':
3387        check_for_header_guard(filename, lines, error)
3388
3389    remove_multi_line_comments(lines, error)
3390    clean_lines = CleansedLines(lines)
3391    file_state = _FileState(clean_lines, file_extension)
3392    for line in xrange(clean_lines.num_lines()):
3393        process_line(filename, file_extension, clean_lines, line,
3394                     include_state, function_state, class_state, file_state, error)
3395    class_state.check_finished(error)
3396
3397    check_for_include_what_you_use(filename, clean_lines, include_state, error)
3398
3399    # We check here rather than inside process_line so that we see raw
3400    # lines rather than "cleaned" lines.
3401    check_for_unicode_replacement_characters(lines, error)
3402
3403    check_for_new_line_at_eof(lines, error)
3404
3405
3406class CppChecker(object):
3407
3408    """Processes C++ lines for checking style."""
3409
3410    # This list is used to--
3411    #
3412    # (1) generate an explicit list of all possible categories,
3413    # (2) unit test that all checked categories have valid names, and
3414    # (3) unit test that all categories are getting unit tested.
3415    #
3416    categories = set([
3417        'build/class',
3418        'build/deprecated',
3419        'build/endif_comment',
3420        'build/forward_decl',
3421        'build/header_guard',
3422        'build/include',
3423        'build/include_order',
3424        'build/include_what_you_use',
3425        'build/namespaces',
3426        'build/printf_format',
3427        'build/storage_class',
3428        'build/using_std',
3429        'legal/copyright',
3430        'readability/braces',
3431        'readability/casting',
3432        'readability/check',
3433        'readability/comparison_to_zero',
3434        'readability/constructors',
3435        'readability/control_flow',
3436        'readability/fn_size',
3437        'readability/function',
3438        'readability/multiline_comment',
3439        'readability/multiline_string',
3440        'readability/parameter_name',
3441        'readability/naming',
3442        'readability/null',
3443        'readability/pass_ptr',
3444        'readability/streams',
3445        'readability/todo',
3446        'readability/utf8',
3447        'readability/webkit_api',
3448        'runtime/arrays',
3449        'runtime/casting',
3450        'runtime/explicit',
3451        'runtime/init',
3452        'runtime/int',
3453        'runtime/invalid_increment',
3454        'runtime/max_min_macros',
3455        'runtime/memset',
3456        'runtime/printf',
3457        'runtime/printf_format',
3458        'runtime/references',
3459        'runtime/rtti',
3460        'runtime/sizeof',
3461        'runtime/string',
3462        'runtime/threadsafe_fn',
3463        'runtime/virtual',
3464        'whitespace/blank_line',
3465        'whitespace/braces',
3466        'whitespace/comma',
3467        'whitespace/comments',
3468        'whitespace/declaration',
3469        'whitespace/end_of_line',
3470        'whitespace/ending_newline',
3471        'whitespace/indent',
3472        'whitespace/labels',
3473        'whitespace/line_length',
3474        'whitespace/newline',
3475        'whitespace/operators',
3476        'whitespace/parens',
3477        'whitespace/semicolon',
3478        'whitespace/tab',
3479        'whitespace/todo',
3480        ])
3481
3482    def __init__(self, file_path, file_extension, handle_style_error,
3483                 min_confidence):
3484        """Create a CppChecker instance.
3485
3486        Args:
3487          file_extension: A string that is the file extension, without
3488                          the leading dot.
3489
3490        """
3491        self.file_extension = file_extension
3492        self.file_path = file_path
3493        self.handle_style_error = handle_style_error
3494        self.min_confidence = min_confidence
3495
3496    # Useful for unit testing.
3497    def __eq__(self, other):
3498        """Return whether this CppChecker instance is equal to another."""
3499        if self.file_extension != other.file_extension:
3500            return False
3501        if self.file_path != other.file_path:
3502            return False
3503        if self.handle_style_error != other.handle_style_error:
3504            return False
3505        if self.min_confidence != other.min_confidence:
3506            return False
3507
3508        return True
3509
3510    # Useful for unit testing.
3511    def __ne__(self, other):
3512        # Python does not automatically deduce __ne__() from __eq__().
3513        return not self.__eq__(other)
3514
3515    def check(self, lines):
3516        _process_lines(self.file_path, self.file_extension, lines,
3517                       self.handle_style_error, self.min_confidence)
3518
3519
3520# FIXME: Remove this function (requires refactoring unit tests).
3521def process_file_data(filename, file_extension, lines, error, min_confidence, unit_test_config):
3522    global _unit_test_config
3523    _unit_test_config = unit_test_config
3524    checker = CppChecker(filename, file_extension, error, min_confidence)
3525    checker.check(lines)
3526    _unit_test_config = {}
3527