1#!/usr/bin/python
2#
3# Copyright (c) 2009 Google Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#    * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#    * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#    * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Does google-lint on c++ files.
32
33The goal of this script is to identify places in the code that *may*
34be in non-compliance with google style.  It does not attempt to fix
35up these problems -- the point is to educate.  It does also not
36attempt to find all problems, or to ensure that everything it does
37find is legitimately a problem.
38
39In particular, we can get very confused by /* and // inside strings!
40We do a small hack, which is to ignore //'s with "'s after them on the
41same line, but it is far from perfect (in either direction).
42"""
43
44import codecs
45import copy
46import getopt
47import math  # for log
48import os
49import re
50import sre_compile
51import string
52import sys
53import unicodedata
54
55
56_USAGE = """
57Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
58                   [--counting=total|toplevel|detailed] [--root=subdir]
59                   [--linelength=digits]
60        <file> [file] ...
61
62  The style guidelines this tries to follow are those in
63    http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
64
65  Every problem is given a confidence score from 1-5, with 5 meaning we are
66  certain of the problem, and 1 meaning it could be a legitimate construct.
67  This will miss some errors, and is not a substitute for a code review.
68
69  To suppress false-positive errors of a certain category, add a
70  'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
71  suppresses errors of all categories on that line.
72
73  The files passed in will be linted; at least one file must be provided.
74  Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
75  extensions with the --extensions flag.
76
77  Flags:
78
79    output=vs7
80      By default, the output is formatted to ease emacs parsing.  Visual Studio
81      compatible output (vs7) may also be used.  Other formats are unsupported.
82
83    verbose=#
84      Specify a number 0-5 to restrict errors to certain verbosity levels.
85
86    filter=-x,+y,...
87      Specify a comma-separated list of category-filters to apply: only
88      error messages whose category names pass the filters will be printed.
89      (Category names are printed with the message and look like
90      "[whitespace/indent]".)  Filters are evaluated left to right.
91      "-FOO" and "FOO" means "do not print categories that start with FOO".
92      "+FOO" means "do print categories that start with FOO".
93
94      Examples: --filter=-whitespace,+whitespace/braces
95                --filter=whitespace,runtime/printf,+runtime/printf_format
96                --filter=-,+build/include_what_you_use
97
98      To see a list of all the categories used in cpplint, pass no arg:
99         --filter=
100
101    counting=total|toplevel|detailed
102      The total number of errors found is always printed. If
103      'toplevel' is provided, then the count of errors in each of
104      the top-level categories like 'build' and 'whitespace' will
105      also be printed. If 'detailed' is provided, then a count
106      is provided for each category like 'build/class'.
107
108    root=subdir
109      The root directory used for deriving header guard CPP variable.
110      By default, the header guard CPP variable is calculated as the relative
111      path to the directory that contains .git, .hg, or .svn.  When this flag
112      is specified, the relative path is calculated from the specified
113      directory. If the specified directory does not exist, this flag is
114      ignored.
115
116      Examples:
117        Assuing that src/.git exists, the header guard CPP variables for
118        src/chrome/browser/ui/browser.h are:
119
120        No flag => CHROME_BROWSER_UI_BROWSER_H_
121        --root=chrome => BROWSER_UI_BROWSER_H_
122        --root=chrome/browser => UI_BROWSER_H_
123
124    linelength=digits
125      This is the allowed line length for the project. The default value is
126      80 characters.
127
128      Examples:
129        --linelength=120
130
131    extensions=extension,extension,...
132      The allowed file extensions that cpplint will check
133
134      Examples:
135        --extensions=hpp,cpp
136"""
137
138# We categorize each error message we print.  Here are the categories.
139# We want an explicit list so we can list them all in cpplint --filter=.
140# If you add a new error message with a new category, add it to the list
141# here!  cpplint_unittest.py should tell you if you forget to do this.
142_ERROR_CATEGORIES = [
143  'build/class',
144  'build/deprecated',
145  'build/endif_comment',
146  'build/explicit_make_pair',
147  'build/forward_decl',
148  'build/header_guard',
149  'build/include',
150  'build/include_alpha',
151  'build/include_order',
152  'build/include_what_you_use',
153  'build/namespaces',
154  'build/printf_format',
155  'build/storage_class',
156  'legal/copyright',
157  'readability/alt_tokens',
158  'readability/braces',
159  'readability/casting',
160  'readability/check',
161  'readability/constructors',
162  'readability/fn_size',
163  'readability/function',
164  'readability/multiline_comment',
165  'readability/multiline_string',
166  'readability/namespace',
167  'readability/nolint',
168  'readability/nul',
169  'readability/streams',
170  'readability/todo',
171  'readability/utf8',
172  'runtime/arrays',
173  'runtime/casting',
174  'runtime/explicit',
175  'runtime/int',
176  'runtime/init',
177  'runtime/invalid_increment',
178  'runtime/member_string_references',
179  'runtime/memset',
180  'runtime/operator',
181  'runtime/printf',
182  'runtime/printf_format',
183  'runtime/references',
184  'runtime/sizeof',
185  'runtime/string',
186  'runtime/threadsafe_fn',
187  'runtime/vlog',
188  'whitespace/blank_line',
189  'whitespace/braces',
190  'whitespace/comma',
191  'whitespace/comments',
192  'whitespace/empty_conditional_body',
193  'whitespace/empty_loop_body',
194  'whitespace/end_of_line',
195  'whitespace/ending_newline',
196  'whitespace/forcolon',
197  'whitespace/indent',
198  'whitespace/line_length',
199  'whitespace/newline',
200  'whitespace/operators',
201  'whitespace/parens',
202  'whitespace/semicolon',
203  'whitespace/tab',
204  'whitespace/todo'
205  ]
206
207# The default state of the category filter. This is overrided by the --filter=
208# flag. By default all errors are on, so only add here categories that should be
209# off by default (i.e., categories that must be enabled by the --filter= flags).
210# All entries here should start with a '-' or '+', as in the --filter= flag.
211_DEFAULT_FILTERS = ['-build/include_alpha']
212
213# We used to check for high-bit characters, but after much discussion we
214# decided those were OK, as long as they were in UTF-8 and didn't represent
215# hard-coded international strings, which belong in a separate i18n file.
216
217
218# C++ headers
219_CPP_HEADERS = frozenset([
220    # Legacy
221    'algobase.h',
222    'algo.h',
223    'alloc.h',
224    'builtinbuf.h',
225    'bvector.h',
226    'complex.h',
227    'defalloc.h',
228    'deque.h',
229    'editbuf.h',
230    'fstream.h',
231    'function.h',
232    'hash_map',
233    'hash_map.h',
234    'hash_set',
235    'hash_set.h',
236    'hashtable.h',
237    'heap.h',
238    'indstream.h',
239    'iomanip.h',
240    'iostream.h',
241    'istream.h',
242    'iterator.h',
243    'list.h',
244    'map.h',
245    'multimap.h',
246    'multiset.h',
247    'ostream.h',
248    'pair.h',
249    'parsestream.h',
250    'pfstream.h',
251    'procbuf.h',
252    'pthread_alloc',
253    'pthread_alloc.h',
254    'rope',
255    'rope.h',
256    'ropeimpl.h',
257    'set.h',
258    'slist',
259    'slist.h',
260    'stack.h',
261    'stdiostream.h',
262    'stl_alloc.h',
263    'stl_relops.h',
264    'streambuf.h',
265    'stream.h',
266    'strfile.h',
267    'strstream.h',
268    'tempbuf.h',
269    'tree.h',
270    'type_traits.h',
271    'vector.h',
272    # 17.6.1.2 C++ library headers
273    'algorithm',
274    'array',
275    'atomic',
276    'bitset',
277    'chrono',
278    'codecvt',
279    'complex',
280    'condition_variable',
281    'deque',
282    'exception',
283    'forward_list',
284    'fstream',
285    'functional',
286    'future',
287    'initializer_list',
288    'iomanip',
289    'ios',
290    'iosfwd',
291    'iostream',
292    'istream',
293    'iterator',
294    'limits',
295    'list',
296    'locale',
297    'map',
298    'memory',
299    'mutex',
300    'new',
301    'numeric',
302    'ostream',
303    'queue',
304    'random',
305    'ratio',
306    'regex',
307    'set',
308    'sstream',
309    'stack',
310    'stdexcept',
311    'streambuf',
312    'string',
313    'strstream',
314    'system_error',
315    'thread',
316    'tuple',
317    'typeindex',
318    'typeinfo',
319    'type_traits',
320    'unordered_map',
321    'unordered_set',
322    'utility',
323    'valarray',
324    'vector',
325    # 17.6.1.2 C++ headers for C library facilities
326    'cassert',
327    'ccomplex',
328    'cctype',
329    'cerrno',
330    'cfenv',
331    'cfloat',
332    'cinttypes',
333    'ciso646',
334    'climits',
335    'clocale',
336    'cmath',
337    'csetjmp',
338    'csignal',
339    'cstdalign',
340    'cstdarg',
341    'cstdbool',
342    'cstddef',
343    'cstdint',
344    'cstdio',
345    'cstdlib',
346    'cstring',
347    'ctgmath',
348    'ctime',
349    'cuchar',
350    'cwchar',
351    'cwctype',
352    ])
353
354# Assertion macros.  These are defined in base/logging.h and
355# testing/base/gunit.h.  Note that the _M versions need to come first
356# for substring matching to work.
357_CHECK_MACROS = [
358    'DCHECK', 'CHECK',
359    'EXPECT_TRUE_M', 'EXPECT_TRUE',
360    'ASSERT_TRUE_M', 'ASSERT_TRUE',
361    'EXPECT_FALSE_M', 'EXPECT_FALSE',
362    'ASSERT_FALSE_M', 'ASSERT_FALSE',
363    ]
364
365# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
366_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
367
368for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
369                        ('>=', 'GE'), ('>', 'GT'),
370                        ('<=', 'LE'), ('<', 'LT')]:
371  _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
372  _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
373  _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
374  _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
375  _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
376  _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
377
378for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
379                            ('>=', 'LT'), ('>', 'LE'),
380                            ('<=', 'GT'), ('<', 'GE')]:
381  _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
382  _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
383  _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
384  _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
385
386# Alternative tokens and their replacements.  For full list, see section 2.5
387# Alternative tokens [lex.digraph] in the C++ standard.
388#
389# Digraphs (such as '%:') are not included here since it's a mess to
390# match those on a word boundary.
391_ALT_TOKEN_REPLACEMENT = {
392    'and': '&&',
393    'bitor': '|',
394    'or': '||',
395    'xor': '^',
396    'compl': '~',
397    'bitand': '&',
398    'and_eq': '&=',
399    'or_eq': '|=',
400    'xor_eq': '^=',
401    'not': '!',
402    'not_eq': '!='
403    }
404
405# Compile regular expression that matches all the above keywords.  The "[ =()]"
406# bit is meant to avoid matching these keywords outside of boolean expressions.
407#
408# False positives include C-style multi-line comments and multi-line strings
409# but those have always been troublesome for cpplint.
410_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
411    r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
412
413
414# These constants define types of headers for use with
415# _IncludeState.CheckNextIncludeOrder().
416_C_SYS_HEADER = 1
417_CPP_SYS_HEADER = 2
418_LIKELY_MY_HEADER = 3
419_POSSIBLE_MY_HEADER = 4
420_OTHER_HEADER = 5
421
422# These constants define the current inline assembly state
423_NO_ASM = 0       # Outside of inline assembly block
424_INSIDE_ASM = 1   # Inside inline assembly block
425_END_ASM = 2      # Last line of inline assembly block
426_BLOCK_ASM = 3    # The whole block is an inline assembly block
427
428# Match start of assembly blocks
429_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
430                        r'(?:\s+(volatile|__volatile__))?'
431                        r'\s*[{(]')
432
433
434_regexp_compile_cache = {}
435
436# Finds occurrences of NOLINT or NOLINT(...).
437_RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
438
439# {str, set(int)}: a map from error categories to sets of linenumbers
440# on which those errors are expected and should be suppressed.
441_error_suppressions = {}
442
443# The root directory used for deriving header guard CPP variable.
444# This is set by --root flag.
445_root = None
446
447# The allowed line length of files.
448# This is set by --linelength flag.
449_line_length = 80
450
451# The allowed extensions for file names
452# This is set by --extensions flag.
453_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
454
455def ParseNolintSuppressions(filename, raw_line, linenum, error):
456  """Updates the global list of error-suppressions.
457
458  Parses any NOLINT comments on the current line, updating the global
459  error_suppressions store.  Reports an error if the NOLINT comment
460  was malformed.
461
462  Args:
463    filename: str, the name of the input file.
464    raw_line: str, the line of input text, with comments.
465    linenum: int, the number of the current line.
466    error: function, an error handler.
467  """
468  # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
469  matched = _RE_SUPPRESSION.search(raw_line)
470  if matched:
471    category = matched.group(1)
472    if category in (None, '(*)'):  # => "suppress all"
473      _error_suppressions.setdefault(None, set()).add(linenum)
474    else:
475      if category.startswith('(') and category.endswith(')'):
476        category = category[1:-1]
477        if category in _ERROR_CATEGORIES:
478          _error_suppressions.setdefault(category, set()).add(linenum)
479        else:
480          error(filename, linenum, 'readability/nolint', 5,
481                'Unknown NOLINT error category: %s' % category)
482
483
484def ResetNolintSuppressions():
485  "Resets the set of NOLINT suppressions to empty."
486  _error_suppressions.clear()
487
488
489def IsErrorSuppressedByNolint(category, linenum):
490  """Returns true if the specified error category is suppressed on this line.
491
492  Consults the global error_suppressions map populated by
493  ParseNolintSuppressions/ResetNolintSuppressions.
494
495  Args:
496    category: str, the category of the error.
497    linenum: int, the current line number.
498  Returns:
499    bool, True iff the error should be suppressed due to a NOLINT comment.
500  """
501  return (linenum in _error_suppressions.get(category, set()) or
502          linenum in _error_suppressions.get(None, set()))
503
504def Match(pattern, s):
505  """Matches the string with the pattern, caching the compiled regexp."""
506  # The regexp compilation caching is inlined in both Match and Search for
507  # performance reasons; factoring it out into a separate function turns out
508  # to be noticeably expensive.
509  if pattern not in _regexp_compile_cache:
510    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
511  return _regexp_compile_cache[pattern].match(s)
512
513
514def ReplaceAll(pattern, rep, s):
515  """Replaces instances of pattern in a string with a replacement.
516
517  The compiled regex is kept in a cache shared by Match and Search.
518
519  Args:
520    pattern: regex pattern
521    rep: replacement text
522    s: search string
523
524  Returns:
525    string with replacements made (or original string if no replacements)
526  """
527  if pattern not in _regexp_compile_cache:
528    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
529  return _regexp_compile_cache[pattern].sub(rep, s)
530
531
532def Search(pattern, s):
533  """Searches the string for the pattern, caching the compiled regexp."""
534  if pattern not in _regexp_compile_cache:
535    _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
536  return _regexp_compile_cache[pattern].search(s)
537
538
539class _IncludeState(dict):
540  """Tracks line numbers for includes, and the order in which includes appear.
541
542  As a dict, an _IncludeState object serves as a mapping between include
543  filename and line number on which that file was included.
544
545  Call CheckNextIncludeOrder() once for each header in the file, passing
546  in the type constants defined above. Calls in an illegal order will
547  raise an _IncludeError with an appropriate error message.
548
549  """
550  # self._section will move monotonically through this set. If it ever
551  # needs to move backwards, CheckNextIncludeOrder will raise an error.
552  _INITIAL_SECTION = 0
553  _MY_H_SECTION = 1
554  _C_SECTION = 2
555  _CPP_SECTION = 3
556  _OTHER_H_SECTION = 4
557
558  _TYPE_NAMES = {
559      _C_SYS_HEADER: 'C system header',
560      _CPP_SYS_HEADER: 'C++ system header',
561      _LIKELY_MY_HEADER: 'header this file implements',
562      _POSSIBLE_MY_HEADER: 'header this file may implement',
563      _OTHER_HEADER: 'other header',
564      }
565  _SECTION_NAMES = {
566      _INITIAL_SECTION: "... nothing. (This can't be an error.)",
567      _MY_H_SECTION: 'a header this file implements',
568      _C_SECTION: 'C system header',
569      _CPP_SECTION: 'C++ system header',
570      _OTHER_H_SECTION: 'other header',
571      }
572
573  def __init__(self):
574    dict.__init__(self)
575    self.ResetSection()
576
577  def ResetSection(self):
578    # The name of the current section.
579    self._section = self._INITIAL_SECTION
580    # The path of last found header.
581    self._last_header = ''
582
583  def SetLastHeader(self, header_path):
584    self._last_header = header_path
585
586  def CanonicalizeAlphabeticalOrder(self, header_path):
587    """Returns a path canonicalized for alphabetical comparison.
588
589    - replaces "-" with "_" so they both cmp the same.
590    - removes '-inl' since we don't require them to be after the main header.
591    - lowercase everything, just in case.
592
593    Args:
594      header_path: Path to be canonicalized.
595
596    Returns:
597      Canonicalized path.
598    """
599    return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
600
601  def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
602    """Check if a header is in alphabetical order with the previous header.
603
604    Args:
605      clean_lines: A CleansedLines instance containing the file.
606      linenum: The number of the line to check.
607      header_path: Canonicalized header to be checked.
608
609    Returns:
610      Returns true if the header is in alphabetical order.
611    """
612    # If previous section is different from current section, _last_header will
613    # be reset to empty string, so it's always less than current header.
614    #
615    # If previous line was a blank line, assume that the headers are
616    # intentionally sorted the way they are.
617    if (self._last_header > header_path and
618        not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
619      return False
620    return True
621
622  def CheckNextIncludeOrder(self, header_type):
623    """Returns a non-empty error message if the next header is out of order.
624
625    This function also updates the internal state to be ready to check
626    the next include.
627
628    Args:
629      header_type: One of the _XXX_HEADER constants defined above.
630
631    Returns:
632      The empty string if the header is in the right order, or an
633      error message describing what's wrong.
634
635    """
636    error_message = ('Found %s after %s' %
637                     (self._TYPE_NAMES[header_type],
638                      self._SECTION_NAMES[self._section]))
639
640    last_section = self._section
641
642    if header_type == _C_SYS_HEADER:
643      if self._section <= self._C_SECTION:
644        self._section = self._C_SECTION
645      else:
646        self._last_header = ''
647        return error_message
648    elif header_type == _CPP_SYS_HEADER:
649      if self._section <= self._CPP_SECTION:
650        self._section = self._CPP_SECTION
651      else:
652        self._last_header = ''
653        return error_message
654    elif header_type == _LIKELY_MY_HEADER:
655      if self._section <= self._MY_H_SECTION:
656        self._section = self._MY_H_SECTION
657      else:
658        self._section = self._OTHER_H_SECTION
659    elif header_type == _POSSIBLE_MY_HEADER:
660      if self._section <= self._MY_H_SECTION:
661        self._section = self._MY_H_SECTION
662      else:
663        # This will always be the fallback because we're not sure
664        # enough that the header is associated with this file.
665        self._section = self._OTHER_H_SECTION
666    else:
667      assert header_type == _OTHER_HEADER
668      self._section = self._OTHER_H_SECTION
669
670    if last_section != self._section:
671      self._last_header = ''
672
673    return ''
674
675
676class _CppLintState(object):
677  """Maintains module-wide state.."""
678
679  def __init__(self):
680    self.verbose_level = 1  # global setting.
681    self.error_count = 0    # global count of reported errors
682    # filters to apply when emitting error messages
683    self.filters = _DEFAULT_FILTERS[:]
684    self.counting = 'total'  # In what way are we counting errors?
685    self.errors_by_category = {}  # string to int dict storing error counts
686
687    # output format:
688    # "emacs" - format that emacs can parse (default)
689    # "vs7" - format that Microsoft Visual Studio 7 can parse
690    self.output_format = 'emacs'
691
692  def SetOutputFormat(self, output_format):
693    """Sets the output format for errors."""
694    self.output_format = output_format
695
696  def SetVerboseLevel(self, level):
697    """Sets the module's verbosity, and returns the previous setting."""
698    last_verbose_level = self.verbose_level
699    self.verbose_level = level
700    return last_verbose_level
701
702  def SetCountingStyle(self, counting_style):
703    """Sets the module's counting options."""
704    self.counting = counting_style
705
706  def SetFilters(self, filters):
707    """Sets the error-message filters.
708
709    These filters are applied when deciding whether to emit a given
710    error message.
711
712    Args:
713      filters: A string of comma-separated filters (eg "+whitespace/indent").
714               Each filter should start with + or -; else we die.
715
716    Raises:
717      ValueError: The comma-separated filters did not all start with '+' or '-'.
718                  E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
719    """
720    # Default filters always have less priority than the flag ones.
721    self.filters = _DEFAULT_FILTERS[:]
722    for filt in filters.split(','):
723      clean_filt = filt.strip()
724      if clean_filt:
725        self.filters.append(clean_filt)
726    for filt in self.filters:
727      if not (filt.startswith('+') or filt.startswith('-')):
728        raise ValueError('Every filter in --filters must start with + or -'
729                         ' (%s does not)' % filt)
730
731  def ResetErrorCounts(self):
732    """Sets the module's error statistic back to zero."""
733    self.error_count = 0
734    self.errors_by_category = {}
735
736  def IncrementErrorCount(self, category):
737    """Bumps the module's error statistic."""
738    self.error_count += 1
739    if self.counting in ('toplevel', 'detailed'):
740      if self.counting != 'detailed':
741        category = category.split('/')[0]
742      if category not in self.errors_by_category:
743        self.errors_by_category[category] = 0
744      self.errors_by_category[category] += 1
745
746  def PrintErrorCounts(self):
747    """Print a summary of errors by category, and the total."""
748    for category, count in self.errors_by_category.iteritems():
749      sys.stderr.write('Category \'%s\' errors found: %d\n' %
750                       (category, count))
751    sys.stderr.write('Total errors found: %d\n' % self.error_count)
752
753_cpplint_state = _CppLintState()
754
755
756def _OutputFormat():
757  """Gets the module's output format."""
758  return _cpplint_state.output_format
759
760
761def _SetOutputFormat(output_format):
762  """Sets the module's output format."""
763  _cpplint_state.SetOutputFormat(output_format)
764
765
766def _VerboseLevel():
767  """Returns the module's verbosity setting."""
768  return _cpplint_state.verbose_level
769
770
771def _SetVerboseLevel(level):
772  """Sets the module's verbosity, and returns the previous setting."""
773  return _cpplint_state.SetVerboseLevel(level)
774
775
776def _SetCountingStyle(level):
777  """Sets the module's counting options."""
778  _cpplint_state.SetCountingStyle(level)
779
780
781def _Filters():
782  """Returns the module's list of output filters, as a list."""
783  return _cpplint_state.filters
784
785
786def _SetFilters(filters):
787  """Sets the module's error-message filters.
788
789  These filters are applied when deciding whether to emit a given
790  error message.
791
792  Args:
793    filters: A string of comma-separated filters (eg "whitespace/indent").
794             Each filter should start with + or -; else we die.
795  """
796  _cpplint_state.SetFilters(filters)
797
798
799class _FunctionState(object):
800  """Tracks current function name and the number of lines in its body."""
801
802  _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
803  _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
804
805  def __init__(self):
806    self.in_a_function = False
807    self.lines_in_function = 0
808    self.current_function = ''
809
810  def Begin(self, function_name):
811    """Start analyzing function body.
812
813    Args:
814      function_name: The name of the function being tracked.
815    """
816    self.in_a_function = True
817    self.lines_in_function = 0
818    self.current_function = function_name
819
820  def Count(self):
821    """Count line in current function body."""
822    if self.in_a_function:
823      self.lines_in_function += 1
824
825  def Check(self, error, filename, linenum):
826    """Report if too many lines in function body.
827
828    Args:
829      error: The function to call with any errors found.
830      filename: The name of the current file.
831      linenum: The number of the line to check.
832    """
833    if Match(r'T(EST|est)', self.current_function):
834      base_trigger = self._TEST_TRIGGER
835    else:
836      base_trigger = self._NORMAL_TRIGGER
837    trigger = base_trigger * 2**_VerboseLevel()
838
839    if self.lines_in_function > trigger:
840      error_level = int(math.log(self.lines_in_function / base_trigger, 2))
841      # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
842      if error_level > 5:
843        error_level = 5
844      error(filename, linenum, 'readability/fn_size', error_level,
845            'Small and focused functions are preferred:'
846            ' %s has %d non-comment lines'
847            ' (error triggered by exceeding %d lines).'  % (
848                self.current_function, self.lines_in_function, trigger))
849
850  def End(self):
851    """Stop analyzing function body."""
852    self.in_a_function = False
853
854
855class _IncludeError(Exception):
856  """Indicates a problem with the include order in a file."""
857  pass
858
859
860class FileInfo:
861  """Provides utility functions for filenames.
862
863  FileInfo provides easy access to the components of a file's path
864  relative to the project root.
865  """
866
867  def __init__(self, filename):
868    self._filename = filename
869
870  def FullName(self):
871    """Make Windows paths like Unix."""
872    return os.path.abspath(self._filename).replace('\\', '/')
873
874  def RepositoryName(self):
875    """FullName after removing the local path to the repository.
876
877    If we have a real absolute path name here we can try to do something smart:
878    detecting the root of the checkout and truncating /path/to/checkout from
879    the name so that we get header guards that don't include things like
880    "C:\Documents and Settings\..." or "/home/username/..." in them and thus
881    people on different computers who have checked the source out to different
882    locations won't see bogus errors.
883    """
884    fullname = self.FullName()
885
886    if os.path.exists(fullname):
887      project_dir = os.path.dirname(fullname)
888
889      if os.path.exists(os.path.join(project_dir, ".svn")):
890        # If there's a .svn file in the current directory, we recursively look
891        # up the directory tree for the top of the SVN checkout
892        root_dir = project_dir
893        one_up_dir = os.path.dirname(root_dir)
894        while os.path.exists(os.path.join(one_up_dir, ".svn")):
895          root_dir = os.path.dirname(root_dir)
896          one_up_dir = os.path.dirname(one_up_dir)
897
898        prefix = os.path.commonprefix([root_dir, project_dir])
899        return fullname[len(prefix) + 1:]
900
901      # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
902      # searching up from the current path.
903      root_dir = os.path.dirname(fullname)
904      while (root_dir != os.path.dirname(root_dir) and
905             not os.path.exists(os.path.join(root_dir, ".git")) and
906             not os.path.exists(os.path.join(root_dir, ".hg")) and
907             not os.path.exists(os.path.join(root_dir, ".svn"))):
908        root_dir = os.path.dirname(root_dir)
909
910      if (os.path.exists(os.path.join(root_dir, ".git")) or
911          os.path.exists(os.path.join(root_dir, ".hg")) or
912          os.path.exists(os.path.join(root_dir, ".svn"))):
913        prefix = os.path.commonprefix([root_dir, project_dir])
914        return fullname[len(prefix) + 1:]
915
916    # Don't know what to do; header guard warnings may be wrong...
917    return fullname
918
919  def Split(self):
920    """Splits the file into the directory, basename, and extension.
921
922    For 'chrome/browser/browser.cc', Split() would
923    return ('chrome/browser', 'browser', '.cc')
924
925    Returns:
926      A tuple of (directory, basename, extension).
927    """
928
929    googlename = self.RepositoryName()
930    project, rest = os.path.split(googlename)
931    return (project,) + os.path.splitext(rest)
932
933  def BaseName(self):
934    """File base name - text after the final slash, before the final period."""
935    return self.Split()[1]
936
937  def Extension(self):
938    """File extension - text following the final period."""
939    return self.Split()[2]
940
941  def NoExtension(self):
942    """File has no source file extension."""
943    return '/'.join(self.Split()[0:2])
944
945  def IsSource(self):
946    """File has a source file extension."""
947    return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
948
949
950def _ShouldPrintError(category, confidence, linenum):
951  """If confidence >= verbose, category passes filter and is not suppressed."""
952
953  # There are three ways we might decide not to print an error message:
954  # a "NOLINT(category)" comment appears in the source,
955  # the verbosity level isn't high enough, or the filters filter it out.
956  if IsErrorSuppressedByNolint(category, linenum):
957    return False
958  if confidence < _cpplint_state.verbose_level:
959    return False
960
961  is_filtered = False
962  for one_filter in _Filters():
963    if one_filter.startswith('-'):
964      if category.startswith(one_filter[1:]):
965        is_filtered = True
966    elif one_filter.startswith('+'):
967      if category.startswith(one_filter[1:]):
968        is_filtered = False
969    else:
970      assert False  # should have been checked for in SetFilter.
971  if is_filtered:
972    return False
973
974  return True
975
976
977def Error(filename, linenum, category, confidence, message):
978  """Logs the fact we've found a lint error.
979
980  We log where the error was found, and also our confidence in the error,
981  that is, how certain we are this is a legitimate style regression, and
982  not a misidentification or a use that's sometimes justified.
983
984  False positives can be suppressed by the use of
985  "cpplint(category)"  comments on the offending line.  These are
986  parsed into _error_suppressions.
987
988  Args:
989    filename: The name of the file containing the error.
990    linenum: The number of the line containing the error.
991    category: A string used to describe the "category" this bug
992      falls under: "whitespace", say, or "runtime".  Categories
993      may have a hierarchy separated by slashes: "whitespace/indent".
994    confidence: A number from 1-5 representing a confidence score for
995      the error, with 5 meaning that we are certain of the problem,
996      and 1 meaning that it could be a legitimate construct.
997    message: The error message.
998  """
999  if _ShouldPrintError(category, confidence, linenum):
1000    _cpplint_state.IncrementErrorCount(category)
1001    if _cpplint_state.output_format == 'vs7':
1002      sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
1003          filename, linenum, message, category, confidence))
1004    elif _cpplint_state.output_format == 'eclipse':
1005      sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1006          filename, linenum, message, category, confidence))
1007    else:
1008      sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
1009          filename, linenum, message, category, confidence))
1010
1011
1012# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1013_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1014    r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1015# Matches strings.  Escape codes should already be removed by ESCAPES.
1016_RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
1017# Matches characters.  Escape codes should already be removed by ESCAPES.
1018_RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
1019# Matches multi-line C++ comments.
1020# This RE is a little bit more complicated than one might expect, because we
1021# have to take care of space removals tools so we can handle comments inside
1022# statements better.
1023# The current rule is: We only clear spaces from both sides when we're at the
1024# end of the line. Otherwise, we try to remove spaces from the right side,
1025# if this doesn't work we try on left side but only if there's a non-character
1026# on the right.
1027_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1028    r"""(\s*/\*.*\*/\s*$|
1029            /\*.*\*/\s+|
1030         \s+/\*.*\*/(?=\W)|
1031            /\*.*\*/)""", re.VERBOSE)
1032
1033
1034def IsCppString(line):
1035  """Does line terminate so, that the next symbol is in string constant.
1036
1037  This function does not consider single-line nor multi-line comments.
1038
1039  Args:
1040    line: is a partial line of code starting from the 0..n.
1041
1042  Returns:
1043    True, if next character appended to 'line' is inside a
1044    string constant.
1045  """
1046
1047  line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1048  return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1049
1050
1051def CleanseRawStrings(raw_lines):
1052  """Removes C++11 raw strings from lines.
1053
1054    Before:
1055      static const char kData[] = R"(
1056          multi-line string
1057          )";
1058
1059    After:
1060      static const char kData[] = ""
1061          (replaced by blank line)
1062          "";
1063
1064  Args:
1065    raw_lines: list of raw lines.
1066
1067  Returns:
1068    list of lines with C++11 raw strings replaced by empty strings.
1069  """
1070
1071  delimiter = None
1072  lines_without_raw_strings = []
1073  for line in raw_lines:
1074    if delimiter:
1075      # Inside a raw string, look for the end
1076      end = line.find(delimiter)
1077      if end >= 0:
1078        # Found the end of the string, match leading space for this
1079        # line and resume copying the original lines, and also insert
1080        # a "" on the last line.
1081        leading_space = Match(r'^(\s*)\S', line)
1082        line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1083        delimiter = None
1084      else:
1085        # Haven't found the end yet, append a blank line.
1086        line = ''
1087
1088    else:
1089      # Look for beginning of a raw string.
1090      # See 2.14.15 [lex.string] for syntax.
1091      matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1092      if matched:
1093        delimiter = ')' + matched.group(2) + '"'
1094
1095        end = matched.group(3).find(delimiter)
1096        if end >= 0:
1097          # Raw string ended on same line
1098          line = (matched.group(1) + '""' +
1099                  matched.group(3)[end + len(delimiter):])
1100          delimiter = None
1101        else:
1102          # Start of a multi-line raw string
1103          line = matched.group(1) + '""'
1104
1105    lines_without_raw_strings.append(line)
1106
1107  # TODO(unknown): if delimiter is not None here, we might want to
1108  # emit a warning for unterminated string.
1109  return lines_without_raw_strings
1110
1111
1112def FindNextMultiLineCommentStart(lines, lineix):
1113  """Find the beginning marker for a multiline comment."""
1114  while lineix < len(lines):
1115    if lines[lineix].strip().startswith('/*'):
1116      # Only return this marker if the comment goes beyond this line
1117      if lines[lineix].strip().find('*/', 2) < 0:
1118        return lineix
1119    lineix += 1
1120  return len(lines)
1121
1122
1123def FindNextMultiLineCommentEnd(lines, lineix):
1124  """We are inside a comment, find the end marker."""
1125  while lineix < len(lines):
1126    if lines[lineix].strip().endswith('*/'):
1127      return lineix
1128    lineix += 1
1129  return len(lines)
1130
1131
1132def RemoveMultiLineCommentsFromRange(lines, begin, end):
1133  """Clears a range of lines for multi-line comments."""
1134  # Having // dummy comments makes the lines non-empty, so we will not get
1135  # unnecessary blank line warnings later in the code.
1136  for i in range(begin, end):
1137    lines[i] = '// dummy'
1138
1139
1140def RemoveMultiLineComments(filename, lines, error):
1141  """Removes multiline (c-style) comments from lines."""
1142  lineix = 0
1143  while lineix < len(lines):
1144    lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1145    if lineix_begin >= len(lines):
1146      return
1147    lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1148    if lineix_end >= len(lines):
1149      error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1150            'Could not find end of multi-line comment')
1151      return
1152    RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1153    lineix = lineix_end + 1
1154
1155
1156def CleanseComments(line):
1157  """Removes //-comments and single-line C-style /* */ comments.
1158
1159  Args:
1160    line: A line of C++ source.
1161
1162  Returns:
1163    The line with single-line comments removed.
1164  """
1165  commentpos = line.find('//')
1166  if commentpos != -1 and not IsCppString(line[:commentpos]):
1167    line = line[:commentpos].rstrip()
1168  # get rid of /* ... */
1169  return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1170
1171
1172class CleansedLines(object):
1173  """Holds 3 copies of all lines with different preprocessing applied to them.
1174
1175  1) elided member contains lines without strings and comments,
1176  2) lines member contains lines without comments, and
1177  3) raw_lines member contains all the lines without processing.
1178  All these three members are of <type 'list'>, and of the same length.
1179  """
1180
1181  def __init__(self, lines):
1182    self.elided = []
1183    self.lines = []
1184    self.raw_lines = lines
1185    self.num_lines = len(lines)
1186    self.lines_without_raw_strings = CleanseRawStrings(lines)
1187    for linenum in range(len(self.lines_without_raw_strings)):
1188      self.lines.append(CleanseComments(
1189          self.lines_without_raw_strings[linenum]))
1190      elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1191      self.elided.append(CleanseComments(elided))
1192
1193  def NumLines(self):
1194    """Returns the number of lines represented."""
1195    return self.num_lines
1196
1197  @staticmethod
1198  def _CollapseStrings(elided):
1199    """Collapses strings and chars on a line to simple "" or '' blocks.
1200
1201    We nix strings first so we're not fooled by text like '"http://"'
1202
1203    Args:
1204      elided: The line being processed.
1205
1206    Returns:
1207      The line with collapsed strings.
1208    """
1209    if not _RE_PATTERN_INCLUDE.match(elided):
1210      # Remove escaped characters first to make quote/single quote collapsing
1211      # basic.  Things that look like escaped characters shouldn't occur
1212      # outside of strings and chars.
1213      elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1214      elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1215      elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1216    return elided
1217
1218
1219def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1220  """Find the position just after the matching endchar.
1221
1222  Args:
1223    line: a CleansedLines line.
1224    startpos: start searching at this position.
1225    depth: nesting level at startpos.
1226    startchar: expression opening character.
1227    endchar: expression closing character.
1228
1229  Returns:
1230    On finding matching endchar: (index just after matching endchar, 0)
1231    Otherwise: (-1, new depth at end of this line)
1232  """
1233  for i in xrange(startpos, len(line)):
1234    if line[i] == startchar:
1235      depth += 1
1236    elif line[i] == endchar:
1237      depth -= 1
1238      if depth == 0:
1239        return (i + 1, 0)
1240  return (-1, depth)
1241
1242
1243def CloseExpression(clean_lines, linenum, pos):
1244  """If input points to ( or { or [ or <, finds the position that closes it.
1245
1246  If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1247  linenum/pos that correspond to the closing of the expression.
1248
1249  Args:
1250    clean_lines: A CleansedLines instance containing the file.
1251    linenum: The number of the line to check.
1252    pos: A position on the line.
1253
1254  Returns:
1255    A tuple (line, linenum, pos) pointer *past* the closing brace, or
1256    (line, len(lines), -1) if we never find a close.  Note we ignore
1257    strings and comments when matching; and the line we return is the
1258    'cleansed' line at linenum.
1259  """
1260
1261  line = clean_lines.elided[linenum]
1262  startchar = line[pos]
1263  if startchar not in '({[<':
1264    return (line, clean_lines.NumLines(), -1)
1265  if startchar == '(': endchar = ')'
1266  if startchar == '[': endchar = ']'
1267  if startchar == '{': endchar = '}'
1268  if startchar == '<': endchar = '>'
1269
1270  # Check first line
1271  (end_pos, num_open) = FindEndOfExpressionInLine(
1272      line, pos, 0, startchar, endchar)
1273  if end_pos > -1:
1274    return (line, linenum, end_pos)
1275
1276  # Continue scanning forward
1277  while linenum < clean_lines.NumLines() - 1:
1278    linenum += 1
1279    line = clean_lines.elided[linenum]
1280    (end_pos, num_open) = FindEndOfExpressionInLine(
1281        line, 0, num_open, startchar, endchar)
1282    if end_pos > -1:
1283      return (line, linenum, end_pos)
1284
1285  # Did not find endchar before end of file, give up
1286  return (line, clean_lines.NumLines(), -1)
1287
1288
1289def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
1290  """Find position at the matching startchar.
1291
1292  This is almost the reverse of FindEndOfExpressionInLine, but note
1293  that the input position and returned position differs by 1.
1294
1295  Args:
1296    line: a CleansedLines line.
1297    endpos: start searching at this position.
1298    depth: nesting level at endpos.
1299    startchar: expression opening character.
1300    endchar: expression closing character.
1301
1302  Returns:
1303    On finding matching startchar: (index at matching startchar, 0)
1304    Otherwise: (-1, new depth at beginning of this line)
1305  """
1306  for i in xrange(endpos, -1, -1):
1307    if line[i] == endchar:
1308      depth += 1
1309    elif line[i] == startchar:
1310      depth -= 1
1311      if depth == 0:
1312        return (i, 0)
1313  return (-1, depth)
1314
1315
1316def ReverseCloseExpression(clean_lines, linenum, pos):
1317  """If input points to ) or } or ] or >, finds the position that opens it.
1318
1319  If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1320  linenum/pos that correspond to the opening of the expression.
1321
1322  Args:
1323    clean_lines: A CleansedLines instance containing the file.
1324    linenum: The number of the line to check.
1325    pos: A position on the line.
1326
1327  Returns:
1328    A tuple (line, linenum, pos) pointer *at* the opening brace, or
1329    (line, 0, -1) if we never find the matching opening brace.  Note
1330    we ignore strings and comments when matching; and the line we
1331    return is the 'cleansed' line at linenum.
1332  """
1333  line = clean_lines.elided[linenum]
1334  endchar = line[pos]
1335  if endchar not in ')}]>':
1336    return (line, 0, -1)
1337  if endchar == ')': startchar = '('
1338  if endchar == ']': startchar = '['
1339  if endchar == '}': startchar = '{'
1340  if endchar == '>': startchar = '<'
1341
1342  # Check last line
1343  (start_pos, num_open) = FindStartOfExpressionInLine(
1344      line, pos, 0, startchar, endchar)
1345  if start_pos > -1:
1346    return (line, linenum, start_pos)
1347
1348  # Continue scanning backward
1349  while linenum > 0:
1350    linenum -= 1
1351    line = clean_lines.elided[linenum]
1352    (start_pos, num_open) = FindStartOfExpressionInLine(
1353        line, len(line) - 1, num_open, startchar, endchar)
1354    if start_pos > -1:
1355      return (line, linenum, start_pos)
1356
1357  # Did not find startchar before beginning of file, give up
1358  return (line, 0, -1)
1359
1360
1361def CheckForCopyright(filename, lines, error):
1362  """Logs an error if no Copyright message appears at the top of the file."""
1363
1364  # We'll say it should occur by line 10. Don't forget there's a
1365  # dummy line at the front.
1366  for line in xrange(1, min(len(lines), 11)):
1367    if re.search(r'Copyright', lines[line], re.I): break
1368  else:                       # means no copyright line was found
1369    error(filename, 0, 'legal/copyright', 5,
1370          'No copyright message found.  '
1371          'You should have a line: "Copyright [year] <Copyright Owner>"')
1372
1373
1374def GetHeaderGuardCPPVariable(filename):
1375  """Returns the CPP variable that should be used as a header guard.
1376
1377  Args:
1378    filename: The name of a C++ header file.
1379
1380  Returns:
1381    The CPP variable that should be used as a header guard in the
1382    named file.
1383
1384  """
1385
1386  # Restores original filename in case that cpplint is invoked from Emacs's
1387  # flymake.
1388  filename = re.sub(r'_flymake\.h$', '.h', filename)
1389  filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1390
1391  fileinfo = FileInfo(filename)
1392  file_path_from_root = fileinfo.RepositoryName()
1393  if _root:
1394    file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1395  return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
1396
1397
1398def CheckForHeaderGuard(filename, lines, error):
1399  """Checks that the file contains a header guard.
1400
1401  Logs an error if no #ifndef header guard is present.  For other
1402  headers, checks that the full pathname is used.
1403
1404  Args:
1405    filename: The name of the C++ header file.
1406    lines: An array of strings, each representing a line of the file.
1407    error: The function to call with any errors found.
1408  """
1409
1410  cppvar = GetHeaderGuardCPPVariable(filename)
1411
1412  ifndef = None
1413  ifndef_linenum = 0
1414  define = None
1415  endif = None
1416  endif_linenum = 0
1417  for linenum, line in enumerate(lines):
1418    linesplit = line.split()
1419    if len(linesplit) >= 2:
1420      # find the first occurrence of #ifndef and #define, save arg
1421      if not ifndef and linesplit[0] == '#ifndef':
1422        # set ifndef to the header guard presented on the #ifndef line.
1423        ifndef = linesplit[1]
1424        ifndef_linenum = linenum
1425      if not define and linesplit[0] == '#define':
1426        define = linesplit[1]
1427    # find the last occurrence of #endif, save entire line
1428    if line.startswith('#endif'):
1429      endif = line
1430      endif_linenum = linenum
1431
1432  if not ifndef:
1433    error(filename, 0, 'build/header_guard', 5,
1434          'No #ifndef header guard found, suggested CPP variable is: %s' %
1435          cppvar)
1436    return
1437
1438  if not define:
1439    error(filename, 0, 'build/header_guard', 5,
1440          'No #define header guard found, suggested CPP variable is: %s' %
1441          cppvar)
1442    return
1443
1444  # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1445  # for backward compatibility.
1446  if ifndef != cppvar:
1447    error_level = 0
1448    if ifndef != cppvar + '_':
1449      error_level = 5
1450
1451    ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1452                            error)
1453    error(filename, ifndef_linenum, 'build/header_guard', error_level,
1454          '#ifndef header guard has wrong style, please use: %s' % cppvar)
1455
1456  if define != ifndef:
1457    error(filename, 0, 'build/header_guard', 5,
1458          '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1459          cppvar)
1460    return
1461
1462  if endif != ('#endif  // %s' % cppvar):
1463    error_level = 0
1464    if endif != ('#endif  // %s' % (cppvar + '_')):
1465      error_level = 5
1466
1467    ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1468                            error)
1469    error(filename, endif_linenum, 'build/header_guard', error_level,
1470          '#endif line should be "#endif  // %s"' % cppvar)
1471
1472
1473def CheckForBadCharacters(filename, lines, error):
1474  """Logs an error for each line containing bad characters.
1475
1476  Two kinds of bad characters:
1477
1478  1. Unicode replacement characters: These indicate that either the file
1479  contained invalid UTF-8 (likely) or Unicode replacement characters (which
1480  it shouldn't).  Note that it's possible for this to throw off line
1481  numbering if the invalid UTF-8 occurred adjacent to a newline.
1482
1483  2. NUL bytes.  These are problematic for some tools.
1484
1485  Args:
1486    filename: The name of the current file.
1487    lines: An array of strings, each representing a line of the file.
1488    error: The function to call with any errors found.
1489  """
1490  for linenum, line in enumerate(lines):
1491    if u'\ufffd' in line:
1492      error(filename, linenum, 'readability/utf8', 5,
1493            'Line contains invalid UTF-8 (or Unicode replacement character).')
1494    if '\0' in line:
1495      error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
1496
1497
1498def CheckForNewlineAtEOF(filename, lines, error):
1499  """Logs an error if there is no newline char at the end of the file.
1500
1501  Args:
1502    filename: The name of the current file.
1503    lines: An array of strings, each representing a line of the file.
1504    error: The function to call with any errors found.
1505  """
1506
1507  # The array lines() was created by adding two newlines to the
1508  # original file (go figure), then splitting on \n.
1509  # To verify that the file ends in \n, we just have to make sure the
1510  # last-but-two element of lines() exists and is empty.
1511  if len(lines) < 3 or lines[-2]:
1512    error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1513          'Could not find a newline character at the end of the file.')
1514
1515
1516def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1517  """Logs an error if we see /* ... */ or "..." that extend past one line.
1518
1519  /* ... */ comments are legit inside macros, for one line.
1520  Otherwise, we prefer // comments, so it's ok to warn about the
1521  other.  Likewise, it's ok for strings to extend across multiple
1522  lines, as long as a line continuation character (backslash)
1523  terminates each line. Although not currently prohibited by the C++
1524  style guide, it's ugly and unnecessary. We don't do well with either
1525  in this lint program, so we warn about both.
1526
1527  Args:
1528    filename: The name of the current file.
1529    clean_lines: A CleansedLines instance containing the file.
1530    linenum: The number of the line to check.
1531    error: The function to call with any errors found.
1532  """
1533  line = clean_lines.elided[linenum]
1534
1535  # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1536  # second (escaped) slash may trigger later \" detection erroneously.
1537  line = line.replace('\\\\', '')
1538
1539  if line.count('/*') > line.count('*/'):
1540    error(filename, linenum, 'readability/multiline_comment', 5,
1541          'Complex multi-line /*...*/-style comment found. '
1542          'Lint may give bogus warnings.  '
1543          'Consider replacing these with //-style comments, '
1544          'with #if 0...#endif, '
1545          'or with more clearly structured multi-line comments.')
1546
1547  if (line.count('"') - line.count('\\"')) % 2:
1548    error(filename, linenum, 'readability/multiline_string', 5,
1549          'Multi-line string ("...") found.  This lint script doesn\'t '
1550          'do well with such strings, and may give bogus warnings.  '
1551          'Use C++11 raw strings or concatenation instead.')
1552
1553
1554threading_list = (
1555    ('asctime(', 'asctime_r('),
1556    ('ctime(', 'ctime_r('),
1557    ('getgrgid(', 'getgrgid_r('),
1558    ('getgrnam(', 'getgrnam_r('),
1559    ('getlogin(', 'getlogin_r('),
1560    ('getpwnam(', 'getpwnam_r('),
1561    ('getpwuid(', 'getpwuid_r('),
1562    ('gmtime(', 'gmtime_r('),
1563    ('localtime(', 'localtime_r('),
1564    ('rand(', 'rand_r('),
1565    ('strtok(', 'strtok_r('),
1566    ('ttyname(', 'ttyname_r('),
1567    )
1568
1569
1570def CheckPosixThreading(filename, clean_lines, linenum, error):
1571  """Checks for calls to thread-unsafe functions.
1572
1573  Much code has been originally written without consideration of
1574  multi-threading. Also, engineers are relying on their old experience;
1575  they have learned posix before threading extensions were added. These
1576  tests guide the engineers to use thread-safe functions (when using
1577  posix directly).
1578
1579  Args:
1580    filename: The name of the current file.
1581    clean_lines: A CleansedLines instance containing the file.
1582    linenum: The number of the line to check.
1583    error: The function to call with any errors found.
1584  """
1585  line = clean_lines.elided[linenum]
1586  for single_thread_function, multithread_safe_function in threading_list:
1587    ix = line.find(single_thread_function)
1588    # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
1589    if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1590                                line[ix - 1] not in ('_', '.', '>'))):
1591      error(filename, linenum, 'runtime/threadsafe_fn', 2,
1592            'Consider using ' + multithread_safe_function +
1593            '...) instead of ' + single_thread_function +
1594            '...) for improved thread safety.')
1595
1596
1597def CheckVlogArguments(filename, clean_lines, linenum, error):
1598  """Checks that VLOG() is only used for defining a logging level.
1599
1600  For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1601  VLOG(FATAL) are not.
1602
1603  Args:
1604    filename: The name of the current file.
1605    clean_lines: A CleansedLines instance containing the file.
1606    linenum: The number of the line to check.
1607    error: The function to call with any errors found.
1608  """
1609  line = clean_lines.elided[linenum]
1610  if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1611    error(filename, linenum, 'runtime/vlog', 5,
1612          'VLOG() should be used with numeric verbosity level.  '
1613          'Use LOG() if you want symbolic severity levels.')
1614
1615
1616# Matches invalid increment: *count++, which moves pointer instead of
1617# incrementing a value.
1618_RE_PATTERN_INVALID_INCREMENT = re.compile(
1619    r'^\s*\*\w+(\+\+|--);')
1620
1621
1622def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1623  """Checks for invalid increment *count++.
1624
1625  For example following function:
1626  void increment_counter(int* count) {
1627    *count++;
1628  }
1629  is invalid, because it effectively does count++, moving pointer, and should
1630  be replaced with ++*count, (*count)++ or *count += 1.
1631
1632  Args:
1633    filename: The name of the current file.
1634    clean_lines: A CleansedLines instance containing the file.
1635    linenum: The number of the line to check.
1636    error: The function to call with any errors found.
1637  """
1638  line = clean_lines.elided[linenum]
1639  if _RE_PATTERN_INVALID_INCREMENT.match(line):
1640    error(filename, linenum, 'runtime/invalid_increment', 5,
1641          'Changing pointer instead of value (or unused value of operator*).')
1642
1643
1644class _BlockInfo(object):
1645  """Stores information about a generic block of code."""
1646
1647  def __init__(self, seen_open_brace):
1648    self.seen_open_brace = seen_open_brace
1649    self.open_parentheses = 0
1650    self.inline_asm = _NO_ASM
1651
1652  def CheckBegin(self, filename, clean_lines, linenum, error):
1653    """Run checks that applies to text up to the opening brace.
1654
1655    This is mostly for checking the text after the class identifier
1656    and the "{", usually where the base class is specified.  For other
1657    blocks, there isn't much to check, so we always pass.
1658
1659    Args:
1660      filename: The name of the current file.
1661      clean_lines: A CleansedLines instance containing the file.
1662      linenum: The number of the line to check.
1663      error: The function to call with any errors found.
1664    """
1665    pass
1666
1667  def CheckEnd(self, filename, clean_lines, linenum, error):
1668    """Run checks that applies to text after the closing brace.
1669
1670    This is mostly used for checking end of namespace comments.
1671
1672    Args:
1673      filename: The name of the current file.
1674      clean_lines: A CleansedLines instance containing the file.
1675      linenum: The number of the line to check.
1676      error: The function to call with any errors found.
1677    """
1678    pass
1679
1680
1681class _ClassInfo(_BlockInfo):
1682  """Stores information about a class."""
1683
1684  def __init__(self, name, class_or_struct, clean_lines, linenum):
1685    _BlockInfo.__init__(self, False)
1686    self.name = name
1687    self.starting_linenum = linenum
1688    self.is_derived = False
1689    if class_or_struct == 'struct':
1690      self.access = 'public'
1691      self.is_struct = True
1692    else:
1693      self.access = 'private'
1694      self.is_struct = False
1695
1696    # Remember initial indentation level for this class.  Using raw_lines here
1697    # instead of elided to account for leading comments.
1698    initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1699    if initial_indent:
1700      self.class_indent = len(initial_indent.group(1))
1701    else:
1702      self.class_indent = 0
1703
1704    # Try to find the end of the class.  This will be confused by things like:
1705    #   class A {
1706    #   } *x = { ...
1707    #
1708    # But it's still good enough for CheckSectionSpacing.
1709    self.last_line = 0
1710    depth = 0
1711    for i in range(linenum, clean_lines.NumLines()):
1712      line = clean_lines.elided[i]
1713      depth += line.count('{') - line.count('}')
1714      if not depth:
1715        self.last_line = i
1716        break
1717
1718  def CheckBegin(self, filename, clean_lines, linenum, error):
1719    # Look for a bare ':'
1720    if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1721      self.is_derived = True
1722
1723  def CheckEnd(self, filename, clean_lines, linenum, error):
1724    # Check that closing brace is aligned with beginning of the class.
1725    # Only do this if the closing brace is indented by only whitespaces.
1726    # This means we will not check single-line class definitions.
1727    indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1728    if indent and len(indent.group(1)) != self.class_indent:
1729      if self.is_struct:
1730        parent = 'struct ' + self.name
1731      else:
1732        parent = 'class ' + self.name
1733      error(filename, linenum, 'whitespace/indent', 3,
1734            'Closing brace should be aligned with beginning of %s' % parent)
1735
1736
1737class _NamespaceInfo(_BlockInfo):
1738  """Stores information about a namespace."""
1739
1740  def __init__(self, name, linenum):
1741    _BlockInfo.__init__(self, False)
1742    self.name = name or ''
1743    self.starting_linenum = linenum
1744
1745  def CheckEnd(self, filename, clean_lines, linenum, error):
1746    """Check end of namespace comments."""
1747    line = clean_lines.raw_lines[linenum]
1748
1749    # Check how many lines is enclosed in this namespace.  Don't issue
1750    # warning for missing namespace comments if there aren't enough
1751    # lines.  However, do apply checks if there is already an end of
1752    # namespace comment and it's incorrect.
1753    #
1754    # TODO(unknown): We always want to check end of namespace comments
1755    # if a namespace is large, but sometimes we also want to apply the
1756    # check if a short namespace contained nontrivial things (something
1757    # other than forward declarations).  There is currently no logic on
1758    # deciding what these nontrivial things are, so this check is
1759    # triggered by namespace size only, which works most of the time.
1760    if (linenum - self.starting_linenum < 10
1761        and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1762      return
1763
1764    # Look for matching comment at end of namespace.
1765    #
1766    # Note that we accept C style "/* */" comments for terminating
1767    # namespaces, so that code that terminate namespaces inside
1768    # preprocessor macros can be cpplint clean.
1769    #
1770    # We also accept stuff like "// end of namespace <name>." with the
1771    # period at the end.
1772    #
1773    # Besides these, we don't accept anything else, otherwise we might
1774    # get false negatives when existing comment is a substring of the
1775    # expected namespace.
1776    if self.name:
1777      # Named namespace
1778      if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1779                    r'[\*/\.\\\s]*$'),
1780                   line):
1781        error(filename, linenum, 'readability/namespace', 5,
1782              'Namespace should be terminated with "// namespace %s"' %
1783              self.name)
1784    else:
1785      # Anonymous namespace
1786      if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1787        error(filename, linenum, 'readability/namespace', 5,
1788              'Namespace should be terminated with "// namespace"')
1789
1790
1791class _PreprocessorInfo(object):
1792  """Stores checkpoints of nesting stacks when #if/#else is seen."""
1793
1794  def __init__(self, stack_before_if):
1795    # The entire nesting stack before #if
1796    self.stack_before_if = stack_before_if
1797
1798    # The entire nesting stack up to #else
1799    self.stack_before_else = []
1800
1801    # Whether we have already seen #else or #elif
1802    self.seen_else = False
1803
1804
1805class _NestingState(object):
1806  """Holds states related to parsing braces."""
1807
1808  def __init__(self):
1809    # Stack for tracking all braces.  An object is pushed whenever we
1810    # see a "{", and popped when we see a "}".  Only 3 types of
1811    # objects are possible:
1812    # - _ClassInfo: a class or struct.
1813    # - _NamespaceInfo: a namespace.
1814    # - _BlockInfo: some other type of block.
1815    self.stack = []
1816
1817    # Stack of _PreprocessorInfo objects.
1818    self.pp_stack = []
1819
1820  def SeenOpenBrace(self):
1821    """Check if we have seen the opening brace for the innermost block.
1822
1823    Returns:
1824      True if we have seen the opening brace, False if the innermost
1825      block is still expecting an opening brace.
1826    """
1827    return (not self.stack) or self.stack[-1].seen_open_brace
1828
1829  def InNamespaceBody(self):
1830    """Check if we are currently one level inside a namespace body.
1831
1832    Returns:
1833      True if top of the stack is a namespace block, False otherwise.
1834    """
1835    return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1836
1837  def UpdatePreprocessor(self, line):
1838    """Update preprocessor stack.
1839
1840    We need to handle preprocessors due to classes like this:
1841      #ifdef SWIG
1842      struct ResultDetailsPageElementExtensionPoint {
1843      #else
1844      struct ResultDetailsPageElementExtensionPoint : public Extension {
1845      #endif
1846
1847    We make the following assumptions (good enough for most files):
1848    - Preprocessor condition evaluates to true from #if up to first
1849      #else/#elif/#endif.
1850
1851    - Preprocessor condition evaluates to false from #else/#elif up
1852      to #endif.  We still perform lint checks on these lines, but
1853      these do not affect nesting stack.
1854
1855    Args:
1856      line: current line to check.
1857    """
1858    if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1859      # Beginning of #if block, save the nesting stack here.  The saved
1860      # stack will allow us to restore the parsing state in the #else case.
1861      self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1862    elif Match(r'^\s*#\s*(else|elif)\b', line):
1863      # Beginning of #else block
1864      if self.pp_stack:
1865        if not self.pp_stack[-1].seen_else:
1866          # This is the first #else or #elif block.  Remember the
1867          # whole nesting stack up to this point.  This is what we
1868          # keep after the #endif.
1869          self.pp_stack[-1].seen_else = True
1870          self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1871
1872        # Restore the stack to how it was before the #if
1873        self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1874      else:
1875        # TODO(unknown): unexpected #else, issue warning?
1876        pass
1877    elif Match(r'^\s*#\s*endif\b', line):
1878      # End of #if or #else blocks.
1879      if self.pp_stack:
1880        # If we saw an #else, we will need to restore the nesting
1881        # stack to its former state before the #else, otherwise we
1882        # will just continue from where we left off.
1883        if self.pp_stack[-1].seen_else:
1884          # Here we can just use a shallow copy since we are the last
1885          # reference to it.
1886          self.stack = self.pp_stack[-1].stack_before_else
1887        # Drop the corresponding #if
1888        self.pp_stack.pop()
1889      else:
1890        # TODO(unknown): unexpected #endif, issue warning?
1891        pass
1892
1893  def Update(self, filename, clean_lines, linenum, error):
1894    """Update nesting state with current line.
1895
1896    Args:
1897      filename: The name of the current file.
1898      clean_lines: A CleansedLines instance containing the file.
1899      linenum: The number of the line to check.
1900      error: The function to call with any errors found.
1901    """
1902    line = clean_lines.elided[linenum]
1903
1904    # Update pp_stack first
1905    self.UpdatePreprocessor(line)
1906
1907    # Count parentheses.  This is to avoid adding struct arguments to
1908    # the nesting stack.
1909    if self.stack:
1910      inner_block = self.stack[-1]
1911      depth_change = line.count('(') - line.count(')')
1912      inner_block.open_parentheses += depth_change
1913
1914      # Also check if we are starting or ending an inline assembly block.
1915      if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1916        if (depth_change != 0 and
1917            inner_block.open_parentheses == 1 and
1918            _MATCH_ASM.match(line)):
1919          # Enter assembly block
1920          inner_block.inline_asm = _INSIDE_ASM
1921        else:
1922          # Not entering assembly block.  If previous line was _END_ASM,
1923          # we will now shift to _NO_ASM state.
1924          inner_block.inline_asm = _NO_ASM
1925      elif (inner_block.inline_asm == _INSIDE_ASM and
1926            inner_block.open_parentheses == 0):
1927        # Exit assembly block
1928        inner_block.inline_asm = _END_ASM
1929
1930    # Consume namespace declaration at the beginning of the line.  Do
1931    # this in a loop so that we catch same line declarations like this:
1932    #   namespace proto2 { namespace bridge { class MessageSet; } }
1933    while True:
1934      # Match start of namespace.  The "\b\s*" below catches namespace
1935      # declarations even if it weren't followed by a whitespace, this
1936      # is so that we don't confuse our namespace checker.  The
1937      # missing spaces will be flagged by CheckSpacing.
1938      namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1939      if not namespace_decl_match:
1940        break
1941
1942      new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1943      self.stack.append(new_namespace)
1944
1945      line = namespace_decl_match.group(2)
1946      if line.find('{') != -1:
1947        new_namespace.seen_open_brace = True
1948        line = line[line.find('{') + 1:]
1949
1950    # Look for a class declaration in whatever is left of the line
1951    # after parsing namespaces.  The regexp accounts for decorated classes
1952    # such as in:
1953    #   class LOCKABLE API Object {
1954    #   };
1955    #
1956    # Templates with class arguments may confuse the parser, for example:
1957    #   template <class T
1958    #             class Comparator = less<T>,
1959    #             class Vector = vector<T> >
1960    #   class HeapQueue {
1961    #
1962    # Because this parser has no nesting state about templates, by the
1963    # time it saw "class Comparator", it may think that it's a new class.
1964    # Nested templates have a similar problem:
1965    #   template <
1966    #       typename ExportedType,
1967    #       typename TupleType,
1968    #       template <typename, typename> class ImplTemplate>
1969    #
1970    # To avoid these cases, we ignore classes that are followed by '=' or '>'
1971    class_decl_match = Match(
1972        r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
1973        r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1974        r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
1975    if (class_decl_match and
1976        (not self.stack or self.stack[-1].open_parentheses == 0)):
1977      self.stack.append(_ClassInfo(
1978          class_decl_match.group(4), class_decl_match.group(2),
1979          clean_lines, linenum))
1980      line = class_decl_match.group(5)
1981
1982    # If we have not yet seen the opening brace for the innermost block,
1983    # run checks here.
1984    if not self.SeenOpenBrace():
1985      self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1986
1987    # Update access control if we are inside a class/struct
1988    if self.stack and isinstance(self.stack[-1], _ClassInfo):
1989      classinfo = self.stack[-1]
1990      access_match = Match(
1991          r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1992          r':(?:[^:]|$)',
1993          line)
1994      if access_match:
1995        classinfo.access = access_match.group(2)
1996
1997        # Check that access keywords are indented +1 space.  Skip this
1998        # check if the keywords are not preceded by whitespaces.
1999        indent = access_match.group(1)
2000        if (len(indent) != classinfo.class_indent + 1 and
2001            Match(r'^\s*$', indent)):
2002          if classinfo.is_struct:
2003            parent = 'struct ' + classinfo.name
2004          else:
2005            parent = 'class ' + classinfo.name
2006          slots = ''
2007          if access_match.group(3):
2008            slots = access_match.group(3)
2009          error(filename, linenum, 'whitespace/indent', 3,
2010                '%s%s: should be indented +1 space inside %s' % (
2011                    access_match.group(2), slots, parent))
2012
2013    # Consume braces or semicolons from what's left of the line
2014    while True:
2015      # Match first brace, semicolon, or closed parenthesis.
2016      matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2017      if not matched:
2018        break
2019
2020      token = matched.group(1)
2021      if token == '{':
2022        # If namespace or class hasn't seen a opening brace yet, mark
2023        # namespace/class head as complete.  Push a new block onto the
2024        # stack otherwise.
2025        if not self.SeenOpenBrace():
2026          self.stack[-1].seen_open_brace = True
2027        else:
2028          self.stack.append(_BlockInfo(True))
2029          if _MATCH_ASM.match(line):
2030            self.stack[-1].inline_asm = _BLOCK_ASM
2031      elif token == ';' or token == ')':
2032        # If we haven't seen an opening brace yet, but we already saw
2033        # a semicolon, this is probably a forward declaration.  Pop
2034        # the stack for these.
2035        #
2036        # Similarly, if we haven't seen an opening brace yet, but we
2037        # already saw a closing parenthesis, then these are probably
2038        # function arguments with extra "class" or "struct" keywords.
2039        # Also pop these stack for these.
2040        if not self.SeenOpenBrace():
2041          self.stack.pop()
2042      else:  # token == '}'
2043        # Perform end of block checks and pop the stack.
2044        if self.stack:
2045          self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2046          self.stack.pop()
2047      line = matched.group(2)
2048
2049  def InnermostClass(self):
2050    """Get class info on the top of the stack.
2051
2052    Returns:
2053      A _ClassInfo object if we are inside a class, or None otherwise.
2054    """
2055    for i in range(len(self.stack), 0, -1):
2056      classinfo = self.stack[i - 1]
2057      if isinstance(classinfo, _ClassInfo):
2058        return classinfo
2059    return None
2060
2061  def CheckCompletedBlocks(self, filename, error):
2062    """Checks that all classes and namespaces have been completely parsed.
2063
2064    Call this when all lines in a file have been processed.
2065    Args:
2066      filename: The name of the current file.
2067      error: The function to call with any errors found.
2068    """
2069    # Note: This test can result in false positives if #ifdef constructs
2070    # get in the way of brace matching. See the testBuildClass test in
2071    # cpplint_unittest.py for an example of this.
2072    for obj in self.stack:
2073      if isinstance(obj, _ClassInfo):
2074        error(filename, obj.starting_linenum, 'build/class', 5,
2075              'Failed to find complete declaration of class %s' %
2076              obj.name)
2077      elif isinstance(obj, _NamespaceInfo):
2078        error(filename, obj.starting_linenum, 'build/namespaces', 5,
2079              'Failed to find complete declaration of namespace %s' %
2080              obj.name)
2081
2082
2083def CheckForNonStandardConstructs(filename, clean_lines, linenum,
2084                                  nesting_state, error):
2085  r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
2086
2087  Complain about several constructs which gcc-2 accepts, but which are
2088  not standard C++.  Warning about these in lint is one way to ease the
2089  transition to new compilers.
2090  - put storage class first (e.g. "static const" instead of "const static").
2091  - "%lld" instead of %qd" in printf-type functions.
2092  - "%1$d" is non-standard in printf-type functions.
2093  - "\%" is an undefined character escape sequence.
2094  - text after #endif is not allowed.
2095  - invalid inner-style forward declaration.
2096  - >? and <? operators, and their >?= and <?= cousins.
2097
2098  Additionally, check for constructor/destructor style violations and reference
2099  members, as it is very convenient to do so while checking for
2100  gcc-2 compliance.
2101
2102  Args:
2103    filename: The name of the current file.
2104    clean_lines: A CleansedLines instance containing the file.
2105    linenum: The number of the line to check.
2106    nesting_state: A _NestingState instance which maintains information about
2107                   the current stack of nested blocks being parsed.
2108    error: A callable to which errors are reported, which takes 4 arguments:
2109           filename, line number, error level, and message
2110  """
2111
2112  # Remove comments from the line, but leave in strings for now.
2113  line = clean_lines.lines[linenum]
2114
2115  if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2116    error(filename, linenum, 'runtime/printf_format', 3,
2117          '%q in format strings is deprecated.  Use %ll instead.')
2118
2119  if Search(r'printf\s*\(.*".*%\d+\$', line):
2120    error(filename, linenum, 'runtime/printf_format', 2,
2121          '%N$ formats are unconventional.  Try rewriting to avoid them.')
2122
2123  # Remove escaped backslashes before looking for undefined escapes.
2124  line = line.replace('\\\\', '')
2125
2126  if Search(r'("|\').*\\(%|\[|\(|{)', line):
2127    error(filename, linenum, 'build/printf_format', 3,
2128          '%, [, (, and { are undefined character escapes.  Unescape them.')
2129
2130  # For the rest, work with both comments and strings removed.
2131  line = clean_lines.elided[linenum]
2132
2133  if Search(r'\b(const|volatile|void|char|short|int|long'
2134            r'|float|double|signed|unsigned'
2135            r'|schar|u?int8|u?int16|u?int32|u?int64)'
2136            r'\s+(register|static|extern|typedef)\b',
2137            line):
2138    error(filename, linenum, 'build/storage_class', 5,
2139          'Storage class (static, extern, typedef, etc) should be first.')
2140
2141  if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2142    error(filename, linenum, 'build/endif_comment', 5,
2143          'Uncommented text after #endif is non-standard.  Use a comment.')
2144
2145  if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2146    error(filename, linenum, 'build/forward_decl', 5,
2147          'Inner-style forward declarations are invalid.  Remove this line.')
2148
2149  if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2150            line):
2151    error(filename, linenum, 'build/deprecated', 3,
2152          '>? and <? (max and min) operators are non-standard and deprecated.')
2153
2154  if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2155    # TODO(unknown): Could it be expanded safely to arbitrary references,
2156    # without triggering too many false positives? The first
2157    # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2158    # the restriction.
2159    # Here's the original regexp, for the reference:
2160    # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2161    # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2162    error(filename, linenum, 'runtime/member_string_references', 2,
2163          'const string& members are dangerous. It is much better to use '
2164          'alternatives, such as pointers or simple constants.')
2165
2166  # Everything else in this function operates on class declarations.
2167  # Return early if the top of the nesting stack is not a class, or if
2168  # the class head is not completed yet.
2169  classinfo = nesting_state.InnermostClass()
2170  if not classinfo or not classinfo.seen_open_brace:
2171    return
2172
2173  # The class may have been declared with namespace or classname qualifiers.
2174  # The constructor and destructor will not have those qualifiers.
2175  base_classname = classinfo.name.split('::')[-1]
2176
2177  # Look for single-argument constructors that aren't marked explicit.
2178  # Technically a valid construct, but against style.
2179  args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
2180               % re.escape(base_classname),
2181               line)
2182  if (args and
2183      args.group(1) != 'void' and
2184      not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2185                % re.escape(base_classname), args.group(1).strip())):
2186    error(filename, linenum, 'runtime/explicit', 5,
2187          'Single-argument constructors should be marked explicit.')
2188
2189
2190def CheckSpacingForFunctionCall(filename, line, linenum, error):
2191  """Checks for the correctness of various spacing around function calls.
2192
2193  Args:
2194    filename: The name of the current file.
2195    line: The text of the line to check.
2196    linenum: The number of the line to check.
2197    error: The function to call with any errors found.
2198  """
2199
2200  # Since function calls often occur inside if/for/while/switch
2201  # expressions - which have their own, more liberal conventions - we
2202  # first see if we should be looking inside such an expression for a
2203  # function call, to which we can apply more strict standards.
2204  fncall = line    # if there's no control flow construct, look at whole line
2205  for pattern in (r'\bif\s*\((.*)\)\s*{',
2206                  r'\bfor\s*\((.*)\)\s*{',
2207                  r'\bwhile\s*\((.*)\)\s*[{;]',
2208                  r'\bswitch\s*\((.*)\)\s*{'):
2209    match = Search(pattern, line)
2210    if match:
2211      fncall = match.group(1)    # look inside the parens for function calls
2212      break
2213
2214  # Except in if/for/while/switch, there should never be space
2215  # immediately inside parens (eg "f( 3, 4 )").  We make an exception
2216  # for nested parens ( (a+b) + c ).  Likewise, there should never be
2217  # a space before a ( when it's a function argument.  I assume it's a
2218  # function argument when the char before the whitespace is legal in
2219  # a function name (alnum + _) and we're not starting a macro. Also ignore
2220  # pointers and references to arrays and functions coz they're too tricky:
2221  # we use a very simple way to recognize these:
2222  # " (something)(maybe-something)" or
2223  # " (something)(maybe-something," or
2224  # " (something)[something]"
2225  # Note that we assume the contents of [] to be short enough that
2226  # they'll never need to wrap.
2227  if (  # Ignore control structures.
2228      not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2229                 fncall) and
2230      # Ignore pointers/references to functions.
2231      not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2232      # Ignore pointers/references to arrays.
2233      not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
2234    if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
2235      error(filename, linenum, 'whitespace/parens', 4,
2236            'Extra space after ( in function call')
2237    elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
2238      error(filename, linenum, 'whitespace/parens', 2,
2239            'Extra space after (')
2240    if (Search(r'\w\s+\(', fncall) and
2241        not Search(r'#\s*define|typedef', fncall) and
2242        not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
2243      error(filename, linenum, 'whitespace/parens', 4,
2244            'Extra space before ( in function call')
2245    # If the ) is followed only by a newline or a { + newline, assume it's
2246    # part of a control statement (if/while/etc), and don't complain
2247    if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
2248      # If the closing parenthesis is preceded by only whitespaces,
2249      # try to give a more descriptive error message.
2250      if Search(r'^\s+\)', fncall):
2251        error(filename, linenum, 'whitespace/parens', 2,
2252              'Closing ) should be moved to the previous line')
2253      else:
2254        error(filename, linenum, 'whitespace/parens', 2,
2255              'Extra space before )')
2256
2257
2258def IsBlankLine(line):
2259  """Returns true if the given line is blank.
2260
2261  We consider a line to be blank if the line is empty or consists of
2262  only white spaces.
2263
2264  Args:
2265    line: A line of a string.
2266
2267  Returns:
2268    True, if the given line is blank.
2269  """
2270  return not line or line.isspace()
2271
2272
2273def CheckForFunctionLengths(filename, clean_lines, linenum,
2274                            function_state, error):
2275  """Reports for long function bodies.
2276
2277  For an overview why this is done, see:
2278  http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2279
2280  Uses a simplistic algorithm assuming other style guidelines
2281  (especially spacing) are followed.
2282  Only checks unindented functions, so class members are unchecked.
2283  Trivial bodies are unchecked, so constructors with huge initializer lists
2284  may be missed.
2285  Blank/comment lines are not counted so as to avoid encouraging the removal
2286  of vertical space and comments just to get through a lint check.
2287  NOLINT *on the last line of a function* disables this check.
2288
2289  Args:
2290    filename: The name of the current file.
2291    clean_lines: A CleansedLines instance containing the file.
2292    linenum: The number of the line to check.
2293    function_state: Current function name and lines in body so far.
2294    error: The function to call with any errors found.
2295  """
2296  lines = clean_lines.lines
2297  line = lines[linenum]
2298  raw = clean_lines.raw_lines
2299  raw_line = raw[linenum]
2300  joined_line = ''
2301
2302  starting_func = False
2303  regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
2304  match_result = Match(regexp, line)
2305  if match_result:
2306    # If the name is all caps and underscores, figure it's a macro and
2307    # ignore it, unless it's TEST or TEST_F.
2308    function_name = match_result.group(1).split()[-1]
2309    if function_name == 'TEST' or function_name == 'TEST_F' or (
2310        not Match(r'[A-Z_]+$', function_name)):
2311      starting_func = True
2312
2313  if starting_func:
2314    body_found = False
2315    for start_linenum in xrange(linenum, clean_lines.NumLines()):
2316      start_line = lines[start_linenum]
2317      joined_line += ' ' + start_line.lstrip()
2318      if Search(r'(;|})', start_line):  # Declarations and trivial functions
2319        body_found = True
2320        break                              # ... ignore
2321      elif Search(r'{', start_line):
2322        body_found = True
2323        function = Search(r'((\w|:)*)\(', line).group(1)
2324        if Match(r'TEST', function):    # Handle TEST... macros
2325          parameter_regexp = Search(r'(\(.*\))', joined_line)
2326          if parameter_regexp:             # Ignore bad syntax
2327            function += parameter_regexp.group(1)
2328        else:
2329          function += '()'
2330        function_state.Begin(function)
2331        break
2332    if not body_found:
2333      # No body for the function (or evidence of a non-function) was found.
2334      error(filename, linenum, 'readability/fn_size', 5,
2335            'Lint failed to find start of function body.')
2336  elif Match(r'^\}\s*$', line):  # function end
2337    function_state.Check(error, filename, linenum)
2338    function_state.End()
2339  elif not Match(r'^\s*$', line):
2340    function_state.Count()  # Count non-blank/non-comment lines.
2341
2342
2343_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2344
2345
2346def CheckComment(comment, filename, linenum, error):
2347  """Checks for common mistakes in TODO comments.
2348
2349  Args:
2350    comment: The text of the comment from the line in question.
2351    filename: The name of the current file.
2352    linenum: The number of the line to check.
2353    error: The function to call with any errors found.
2354  """
2355  match = _RE_PATTERN_TODO.match(comment)
2356  if match:
2357    # One whitespace is correct; zero whitespace is handled elsewhere.
2358    leading_whitespace = match.group(1)
2359    if len(leading_whitespace) > 1:
2360      error(filename, linenum, 'whitespace/todo', 2,
2361            'Too many spaces before TODO')
2362
2363    username = match.group(2)
2364    if not username:
2365      error(filename, linenum, 'readability/todo', 2,
2366            'Missing username in TODO; it should look like '
2367            '"// TODO(my_username): Stuff."')
2368
2369    middle_whitespace = match.group(3)
2370    # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
2371    if middle_whitespace != ' ' and middle_whitespace != '':
2372      error(filename, linenum, 'whitespace/todo', 2,
2373            'TODO(my_username) should be followed by a space')
2374
2375def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2376  """Checks for improper use of DISALLOW* macros.
2377
2378  Args:
2379    filename: The name of the current file.
2380    clean_lines: A CleansedLines instance containing the file.
2381    linenum: The number of the line to check.
2382    nesting_state: A _NestingState instance which maintains information about
2383                   the current stack of nested blocks being parsed.
2384    error: The function to call with any errors found.
2385  """
2386  line = clean_lines.elided[linenum]  # get rid of comments and strings
2387
2388  matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2389                   r'DISALLOW_EVIL_CONSTRUCTORS|'
2390                   r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2391  if not matched:
2392    return
2393  if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2394    if nesting_state.stack[-1].access != 'private':
2395      error(filename, linenum, 'readability/constructors', 3,
2396            '%s must be in the private: section' % matched.group(1))
2397
2398  else:
2399    # Found DISALLOW* macro outside a class declaration, or perhaps it
2400    # was used inside a function when it should have been part of the
2401    # class declaration.  We could issue a warning here, but it
2402    # probably resulted in a compiler error already.
2403    pass
2404
2405
2406def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2407  """Find the corresponding > to close a template.
2408
2409  Args:
2410    clean_lines: A CleansedLines instance containing the file.
2411    linenum: Current line number.
2412    init_suffix: Remainder of the current line after the initial <.
2413
2414  Returns:
2415    True if a matching bracket exists.
2416  """
2417  line = init_suffix
2418  nesting_stack = ['<']
2419  while True:
2420    # Find the next operator that can tell us whether < is used as an
2421    # opening bracket or as a less-than operator.  We only want to
2422    # warn on the latter case.
2423    #
2424    # We could also check all other operators and terminate the search
2425    # early, e.g. if we got something like this "a<b+c", the "<" is
2426    # most likely a less-than operator, but then we will get false
2427    # positives for default arguments and other template expressions.
2428    match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2429    if match:
2430      # Found an operator, update nesting stack
2431      operator = match.group(1)
2432      line = match.group(2)
2433
2434      if nesting_stack[-1] == '<':
2435        # Expecting closing angle bracket
2436        if operator in ('<', '(', '['):
2437          nesting_stack.append(operator)
2438        elif operator == '>':
2439          nesting_stack.pop()
2440          if not nesting_stack:
2441            # Found matching angle bracket
2442            return True
2443        elif operator == ',':
2444          # Got a comma after a bracket, this is most likely a template
2445          # argument.  We have not seen a closing angle bracket yet, but
2446          # it's probably a few lines later if we look for it, so just
2447          # return early here.
2448          return True
2449        else:
2450          # Got some other operator.
2451          return False
2452
2453      else:
2454        # Expecting closing parenthesis or closing bracket
2455        if operator in ('<', '(', '['):
2456          nesting_stack.append(operator)
2457        elif operator in (')', ']'):
2458          # We don't bother checking for matching () or [].  If we got
2459          # something like (] or [), it would have been a syntax error.
2460          nesting_stack.pop()
2461
2462    else:
2463      # Scan the next line
2464      linenum += 1
2465      if linenum >= len(clean_lines.elided):
2466        break
2467      line = clean_lines.elided[linenum]
2468
2469  # Exhausted all remaining lines and still no matching angle bracket.
2470  # Most likely the input was incomplete, otherwise we should have
2471  # seen a semicolon and returned early.
2472  return True
2473
2474
2475def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2476  """Find the corresponding < that started a template.
2477
2478  Args:
2479    clean_lines: A CleansedLines instance containing the file.
2480    linenum: Current line number.
2481    init_prefix: Part of the current line before the initial >.
2482
2483  Returns:
2484    True if a matching bracket exists.
2485  """
2486  line = init_prefix
2487  nesting_stack = ['>']
2488  while True:
2489    # Find the previous operator
2490    match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2491    if match:
2492      # Found an operator, update nesting stack
2493      operator = match.group(2)
2494      line = match.group(1)
2495
2496      if nesting_stack[-1] == '>':
2497        # Expecting opening angle bracket
2498        if operator in ('>', ')', ']'):
2499          nesting_stack.append(operator)
2500        elif operator == '<':
2501          nesting_stack.pop()
2502          if not nesting_stack:
2503            # Found matching angle bracket
2504            return True
2505        elif operator == ',':
2506          # Got a comma before a bracket, this is most likely a
2507          # template argument.  The opening angle bracket is probably
2508          # there if we look for it, so just return early here.
2509          return True
2510        else:
2511          # Got some other operator.
2512          return False
2513
2514      else:
2515        # Expecting opening parenthesis or opening bracket
2516        if operator in ('>', ')', ']'):
2517          nesting_stack.append(operator)
2518        elif operator in ('(', '['):
2519          nesting_stack.pop()
2520
2521    else:
2522      # Scan the previous line
2523      linenum -= 1
2524      if linenum < 0:
2525        break
2526      line = clean_lines.elided[linenum]
2527
2528  # Exhausted all earlier lines and still no matching angle bracket.
2529  return False
2530
2531
2532def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
2533  """Checks for the correctness of various spacing issues in the code.
2534
2535  Things we check for: spaces around operators, spaces after
2536  if/for/while/switch, no spaces around parens in function calls, two
2537  spaces between code and comment, don't start a block with a blank
2538  line, don't end a function with a blank line, don't add a blank line
2539  after public/protected/private, don't have too many blank lines in a row.
2540
2541  Args:
2542    filename: The name of the current file.
2543    clean_lines: A CleansedLines instance containing the file.
2544    linenum: The number of the line to check.
2545    nesting_state: A _NestingState instance which maintains information about
2546                   the current stack of nested blocks being parsed.
2547    error: The function to call with any errors found.
2548  """
2549
2550  # Don't use "elided" lines here, otherwise we can't check commented lines.
2551  # Don't want to use "raw" either, because we don't want to check inside C++11
2552  # raw strings,
2553  raw = clean_lines.lines_without_raw_strings
2554  line = raw[linenum]
2555
2556  # Before nixing comments, check if the line is blank for no good
2557  # reason.  This includes the first line after a block is opened, and
2558  # blank lines at the end of a function (ie, right before a line like '}'
2559  #
2560  # Skip all the blank line checks if we are immediately inside a
2561  # namespace body.  In other words, don't issue blank line warnings
2562  # for this block:
2563  #   namespace {
2564  #
2565  #   }
2566  #
2567  # A warning about missing end of namespace comments will be issued instead.
2568  if IsBlankLine(line) and not nesting_state.InNamespaceBody():
2569    elided = clean_lines.elided
2570    prev_line = elided[linenum - 1]
2571    prevbrace = prev_line.rfind('{')
2572    # TODO(unknown): Don't complain if line before blank line, and line after,
2573    #                both start with alnums and are indented the same amount.
2574    #                This ignores whitespace at the start of a namespace block
2575    #                because those are not usually indented.
2576    if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
2577      # OK, we have a blank line at the start of a code block.  Before we
2578      # complain, we check if it is an exception to the rule: The previous
2579      # non-empty line has the parameters of a function header that are indented
2580      # 4 spaces (because they did not fit in a 80 column line when placed on
2581      # the same line as the function name).  We also check for the case where
2582      # the previous line is indented 6 spaces, which may happen when the
2583      # initializers of a constructor do not fit into a 80 column line.
2584      exception = False
2585      if Match(r' {6}\w', prev_line):  # Initializer list?
2586        # We are looking for the opening column of initializer list, which
2587        # should be indented 4 spaces to cause 6 space indentation afterwards.
2588        search_position = linenum-2
2589        while (search_position >= 0
2590               and Match(r' {6}\w', elided[search_position])):
2591          search_position -= 1
2592        exception = (search_position >= 0
2593                     and elided[search_position][:5] == '    :')
2594      else:
2595        # Search for the function arguments or an initializer list.  We use a
2596        # simple heuristic here: If the line is indented 4 spaces; and we have a
2597        # closing paren, without the opening paren, followed by an opening brace
2598        # or colon (for initializer lists) we assume that it is the last line of
2599        # a function header.  If we have a colon indented 4 spaces, it is an
2600        # initializer list.
2601        exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2602                           prev_line)
2603                     or Match(r' {4}:', prev_line))
2604
2605      if not exception:
2606        error(filename, linenum, 'whitespace/blank_line', 2,
2607              'Redundant blank line at the start of a code block '
2608              'should be deleted.')
2609    # Ignore blank lines at the end of a block in a long if-else
2610    # chain, like this:
2611    #   if (condition1) {
2612    #     // Something followed by a blank line
2613    #
2614    #   } else if (condition2) {
2615    #     // Something else
2616    #   }
2617    if linenum + 1 < clean_lines.NumLines():
2618      next_line = raw[linenum + 1]
2619      if (next_line
2620          and Match(r'\s*}', next_line)
2621          and next_line.find('} else ') == -1):
2622        error(filename, linenum, 'whitespace/blank_line', 3,
2623              'Redundant blank line at the end of a code block '
2624              'should be deleted.')
2625
2626    matched = Match(r'\s*(public|protected|private):', prev_line)
2627    if matched:
2628      error(filename, linenum, 'whitespace/blank_line', 3,
2629            'Do not leave a blank line after "%s:"' % matched.group(1))
2630
2631  # Next, we complain if there's a comment too near the text
2632  commentpos = line.find('//')
2633  if commentpos != -1:
2634    # Check if the // may be in quotes.  If so, ignore it
2635    # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
2636    if (line.count('"', 0, commentpos) -
2637        line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
2638      # Allow one space for new scopes, two spaces otherwise:
2639      if (not Match(r'^\s*{ //', line) and
2640          ((commentpos >= 1 and
2641            line[commentpos-1] not in string.whitespace) or
2642           (commentpos >= 2 and
2643            line[commentpos-2] not in string.whitespace))):
2644        error(filename, linenum, 'whitespace/comments', 2,
2645              'At least two spaces is best between code and comments')
2646      # There should always be a space between the // and the comment
2647      commentend = commentpos + 2
2648      if commentend < len(line) and not line[commentend] == ' ':
2649        # but some lines are exceptions -- e.g. if they're big
2650        # comment delimiters like:
2651        # //----------------------------------------------------------
2652        # or are an empty C++ style Doxygen comment, like:
2653        # ///
2654        # or C++ style Doxygen comments placed after the variable:
2655        # ///<  Header comment
2656        # //!<  Header comment
2657        # or they begin with multiple slashes followed by a space:
2658        # //////// Header comment
2659        match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
2660                 Search(r'^/$', line[commentend:]) or
2661                 Search(r'^!< ', line[commentend:]) or
2662                 Search(r'^/< ', line[commentend:]) or
2663                 Search(r'^/+ ', line[commentend:]))
2664        if not match:
2665          error(filename, linenum, 'whitespace/comments', 4,
2666                'Should have a space between // and comment')
2667      CheckComment(line[commentpos:], filename, linenum, error)
2668
2669  line = clean_lines.elided[linenum]  # get rid of comments and strings
2670
2671  # Don't try to do spacing checks for operator methods
2672  line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2673
2674  # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2675  # Otherwise not.  Note we only check for non-spaces on *both* sides;
2676  # sometimes people put non-spaces on one side when aligning ='s among
2677  # many lines (not that this is behavior that I approve of...)
2678  if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2679    error(filename, linenum, 'whitespace/operators', 4,
2680          'Missing spaces around =')
2681
2682  # It's ok not to have spaces around binary operators like + - * /, but if
2683  # there's too little whitespace, we get concerned.  It's hard to tell,
2684  # though, so we punt on this one for now.  TODO.
2685
2686  # You should always have whitespace around binary operators.
2687  #
2688  # Check <= and >= first to avoid false positives with < and >, then
2689  # check non-include lines for spacing around < and >.
2690  match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
2691  if match:
2692    error(filename, linenum, 'whitespace/operators', 3,
2693          'Missing spaces around %s' % match.group(1))
2694  # We allow no-spaces around << when used like this: 10<<20, but
2695  # not otherwise (particularly, not when used as streams)
2696  # Also ignore using ns::operator<<;
2697  match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2698  if (match and
2699      not (match.group(1).isdigit() and match.group(2).isdigit()) and
2700      not (match.group(1) == 'operator' and match.group(2) == ';')):
2701    error(filename, linenum, 'whitespace/operators', 3,
2702          'Missing spaces around <<')
2703  elif not Match(r'#.*include', line):
2704    # Avoid false positives on ->
2705    reduced_line = line.replace('->', '')
2706
2707    # Look for < that is not surrounded by spaces.  This is only
2708    # triggered if both sides are missing spaces, even though
2709    # technically should should flag if at least one side is missing a
2710    # space.  This is done to avoid some false positives with shifts.
2711    match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2712    if (match and
2713        not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2714      error(filename, linenum, 'whitespace/operators', 3,
2715            'Missing spaces around <')
2716
2717    # Look for > that is not surrounded by spaces.  Similar to the
2718    # above, we only trigger if both sides are missing spaces to avoid
2719    # false positives with shifts.
2720    match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2721    if (match and
2722        not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2723                                             match.group(1))):
2724      error(filename, linenum, 'whitespace/operators', 3,
2725            'Missing spaces around >')
2726
2727  # We allow no-spaces around >> for almost anything.  This is because
2728  # C++11 allows ">>" to close nested templates, which accounts for
2729  # most cases when ">>" is not followed by a space.
2730  #
2731  # We still warn on ">>" followed by alpha character, because that is
2732  # likely due to ">>" being used for right shifts, e.g.:
2733  #   value >> alpha
2734  #
2735  # When ">>" is used to close templates, the alphanumeric letter that
2736  # follows would be part of an identifier, and there should still be
2737  # a space separating the template type and the identifier.
2738  #   type<type<type>> alpha
2739  match = Search(r'>>[a-zA-Z_]', line)
2740  if match:
2741    error(filename, linenum, 'whitespace/operators', 3,
2742          'Missing spaces around >>')
2743
2744  # There shouldn't be space around unary operators
2745  match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2746  if match:
2747    error(filename, linenum, 'whitespace/operators', 4,
2748          'Extra space for operator %s' % match.group(1))
2749
2750  # A pet peeve of mine: no spaces after an if, while, switch, or for
2751  match = Search(r' (if\(|for\(|while\(|switch\()', line)
2752  if match:
2753    error(filename, linenum, 'whitespace/parens', 5,
2754          'Missing space before ( in %s' % match.group(1))
2755
2756  # For if/for/while/switch, the left and right parens should be
2757  # consistent about how many spaces are inside the parens, and
2758  # there should either be zero or one spaces inside the parens.
2759  # We don't want: "if ( foo)" or "if ( foo   )".
2760  # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
2761  match = Search(r'\b(if|for|while|switch)\s*'
2762                 r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2763                 line)
2764  if match:
2765    if len(match.group(2)) != len(match.group(4)):
2766      if not (match.group(3) == ';' and
2767              len(match.group(2)) == 1 + len(match.group(4)) or
2768              not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
2769        error(filename, linenum, 'whitespace/parens', 5,
2770              'Mismatching spaces inside () in %s' % match.group(1))
2771    if len(match.group(2)) not in [0, 1]:
2772      error(filename, linenum, 'whitespace/parens', 5,
2773            'Should have zero or one spaces inside ( and ) in %s' %
2774            match.group(1))
2775
2776  # You should always have a space after a comma (either as fn arg or operator)
2777  #
2778  # This does not apply when the non-space character following the
2779  # comma is another comma, since the only time when that happens is
2780  # for empty macro arguments.
2781  #
2782  # We run this check in two passes: first pass on elided lines to
2783  # verify that lines contain missing whitespaces, second pass on raw
2784  # lines to confirm that those missing whitespaces are not due to
2785  # elided comments.
2786  if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
2787    error(filename, linenum, 'whitespace/comma', 3,
2788          'Missing space after ,')
2789
2790  # You should always have a space after a semicolon
2791  # except for few corner cases
2792  # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2793  # space after ;
2794  if Search(r';[^\s};\\)/]', line):
2795    error(filename, linenum, 'whitespace/semicolon', 3,
2796          'Missing space after ;')
2797
2798  # Next we will look for issues with function calls.
2799  CheckSpacingForFunctionCall(filename, line, linenum, error)
2800
2801  # Except after an opening paren, or after another opening brace (in case of
2802  # an initializer list, for instance), you should have spaces before your
2803  # braces. And since you should never have braces at the beginning of a line,
2804  # this is an easy test.
2805  match = Match(r'^(.*[^ ({]){', line)
2806  if match:
2807    # Try a bit harder to check for brace initialization.  This
2808    # happens in one of the following forms:
2809    #   Constructor() : initializer_list_{} { ... }
2810    #   Constructor{}.MemberFunction()
2811    #   Type variable{};
2812    #   FunctionCall(type{}, ...);
2813    #   LastArgument(..., type{});
2814    #   LOG(INFO) << type{} << " ...";
2815    #   map_of_type[{...}] = ...;
2816    #
2817    # We check for the character following the closing brace, and
2818    # silence the warning if it's one of those listed above, i.e.
2819    # "{.;,)<]".
2820    #
2821    # To account for nested initializer list, we allow any number of
2822    # closing braces up to "{;,)<".  We can't simply silence the
2823    # warning on first sight of closing brace, because that would
2824    # cause false negatives for things that are not initializer lists.
2825    #   Silence this:         But not this:
2826    #     Outer{                if (...) {
2827    #       Inner{...}            if (...){  // Missing space before {
2828    #     };                    }
2829    #
2830    # There is a false negative with this approach if people inserted
2831    # spurious semicolons, e.g. "if (cond){};", but we will catch the
2832    # spurious semicolon with a separate check.
2833    (endline, endlinenum, endpos) = CloseExpression(
2834        clean_lines, linenum, len(match.group(1)))
2835    trailing_text = ''
2836    if endpos > -1:
2837      trailing_text = endline[endpos:]
2838    for offset in xrange(endlinenum + 1,
2839                         min(endlinenum + 3, clean_lines.NumLines() - 1)):
2840      trailing_text += clean_lines.elided[offset]
2841    if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
2842      error(filename, linenum, 'whitespace/braces', 5,
2843            'Missing space before {')
2844
2845  # Make sure '} else {' has spaces.
2846  if Search(r'}else', line):
2847    error(filename, linenum, 'whitespace/braces', 5,
2848          'Missing space before else')
2849
2850  # You shouldn't have spaces before your brackets, except maybe after
2851  # 'delete []' or 'new char * []'.
2852  if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2853    error(filename, linenum, 'whitespace/braces', 5,
2854          'Extra space before [')
2855
2856  # You shouldn't have a space before a semicolon at the end of the line.
2857  # There's a special case for "for" since the style guide allows space before
2858  # the semicolon there.
2859  if Search(r':\s*;\s*$', line):
2860    error(filename, linenum, 'whitespace/semicolon', 5,
2861          'Semicolon defining empty statement. Use {} instead.')
2862  elif Search(r'^\s*;\s*$', line):
2863    error(filename, linenum, 'whitespace/semicolon', 5,
2864          'Line contains only semicolon. If this should be an empty statement, '
2865          'use {} instead.')
2866  elif (Search(r'\s+;\s*$', line) and
2867        not Search(r'\bfor\b', line)):
2868    error(filename, linenum, 'whitespace/semicolon', 5,
2869          'Extra space before last semicolon. If this should be an empty '
2870          'statement, use {} instead.')
2871
2872  # In range-based for, we wanted spaces before and after the colon, but
2873  # not around "::" tokens that might appear.
2874  if (Search('for *\(.*[^:]:[^: ]', line) or
2875      Search('for *\(.*[^: ]:[^:]', line)):
2876    error(filename, linenum, 'whitespace/forcolon', 2,
2877          'Missing space around colon in range-based for loop')
2878
2879
2880def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2881  """Checks for additional blank line issues related to sections.
2882
2883  Currently the only thing checked here is blank line before protected/private.
2884
2885  Args:
2886    filename: The name of the current file.
2887    clean_lines: A CleansedLines instance containing the file.
2888    class_info: A _ClassInfo objects.
2889    linenum: The number of the line to check.
2890    error: The function to call with any errors found.
2891  """
2892  # Skip checks if the class is small, where small means 25 lines or less.
2893  # 25 lines seems like a good cutoff since that's the usual height of
2894  # terminals, and any class that can't fit in one screen can't really
2895  # be considered "small".
2896  #
2897  # Also skip checks if we are on the first line.  This accounts for
2898  # classes that look like
2899  #   class Foo { public: ... };
2900  #
2901  # If we didn't find the end of the class, last_line would be zero,
2902  # and the check will be skipped by the first condition.
2903  if (class_info.last_line - class_info.starting_linenum <= 24 or
2904      linenum <= class_info.starting_linenum):
2905    return
2906
2907  matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2908  if matched:
2909    # Issue warning if the line before public/protected/private was
2910    # not a blank line, but don't do this if the previous line contains
2911    # "class" or "struct".  This can happen two ways:
2912    #  - We are at the beginning of the class.
2913    #  - We are forward-declaring an inner class that is semantically
2914    #    private, but needed to be public for implementation reasons.
2915    # Also ignores cases where the previous line ends with a backslash as can be
2916    # common when defining classes in C macros.
2917    prev_line = clean_lines.lines[linenum - 1]
2918    if (not IsBlankLine(prev_line) and
2919        not Search(r'\b(class|struct)\b', prev_line) and
2920        not Search(r'\\$', prev_line)):
2921      # Try a bit harder to find the beginning of the class.  This is to
2922      # account for multi-line base-specifier lists, e.g.:
2923      #   class Derived
2924      #       : public Base {
2925      end_class_head = class_info.starting_linenum
2926      for i in range(class_info.starting_linenum, linenum):
2927        if Search(r'\{\s*$', clean_lines.lines[i]):
2928          end_class_head = i
2929          break
2930      if end_class_head < linenum - 1:
2931        error(filename, linenum, 'whitespace/blank_line', 3,
2932              '"%s:" should be preceded by a blank line' % matched.group(1))
2933
2934
2935def GetPreviousNonBlankLine(clean_lines, linenum):
2936  """Return the most recent non-blank line and its line number.
2937
2938  Args:
2939    clean_lines: A CleansedLines instance containing the file contents.
2940    linenum: The number of the line to check.
2941
2942  Returns:
2943    A tuple with two elements.  The first element is the contents of the last
2944    non-blank line before the current line, or the empty string if this is the
2945    first non-blank line.  The second is the line number of that line, or -1
2946    if this is the first non-blank line.
2947  """
2948
2949  prevlinenum = linenum - 1
2950  while prevlinenum >= 0:
2951    prevline = clean_lines.elided[prevlinenum]
2952    if not IsBlankLine(prevline):     # if not a blank line...
2953      return (prevline, prevlinenum)
2954    prevlinenum -= 1
2955  return ('', -1)
2956
2957
2958def CheckBraces(filename, clean_lines, linenum, error):
2959  """Looks for misplaced braces (e.g. at the end of line).
2960
2961  Args:
2962    filename: The name of the current file.
2963    clean_lines: A CleansedLines instance containing the file.
2964    linenum: The number of the line to check.
2965    error: The function to call with any errors found.
2966  """
2967
2968  line = clean_lines.elided[linenum]        # get rid of comments and strings
2969
2970  if Match(r'\s*{\s*$', line):
2971    # We allow an open brace to start a line in the case where someone is using
2972    # braces in a block to explicitly create a new scope, which is commonly used
2973    # to control the lifetime of stack-allocated variables.  Braces are also
2974    # used for brace initializers inside function calls.  We don't detect this
2975    # perfectly: we just don't complain if the last non-whitespace character on
2976    # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
2977    # previous line starts a preprocessor block.
2978    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2979    if (not Search(r'[,;:}{(]\s*$', prevline) and
2980        not Match(r'\s*#', prevline)):
2981      error(filename, linenum, 'whitespace/braces', 4,
2982            '{ should almost always be at the end of the previous line')
2983
2984  # An else clause should be on the same line as the preceding closing brace.
2985  if Match(r'\s*else\s*', line):
2986    prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2987    if Match(r'\s*}\s*$', prevline):
2988      error(filename, linenum, 'whitespace/newline', 4,
2989            'An else should appear on the same line as the preceding }')
2990
2991  # If braces come on one side of an else, they should be on both.
2992  # However, we have to worry about "else if" that spans multiple lines!
2993  if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2994    if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
2995      # find the ( after the if
2996      pos = line.find('else if')
2997      pos = line.find('(', pos)
2998      if pos > 0:
2999        (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3000        if endline[endpos:].find('{') == -1:    # must be brace after if
3001          error(filename, linenum, 'readability/braces', 5,
3002                'If an else has a brace on one side, it should have it on both')
3003    else:            # common case: else not followed by a multi-line if
3004      error(filename, linenum, 'readability/braces', 5,
3005            'If an else has a brace on one side, it should have it on both')
3006
3007  # Likewise, an else should never have the else clause on the same line
3008  if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3009    error(filename, linenum, 'whitespace/newline', 4,
3010          'Else clause should never be on same line as else (use 2 lines)')
3011
3012  # In the same way, a do/while should never be on one line
3013  if Match(r'\s*do [^\s{]', line):
3014    error(filename, linenum, 'whitespace/newline', 4,
3015          'do/while clauses should not be on a single line')
3016
3017  # Block bodies should not be followed by a semicolon.  Due to C++11
3018  # brace initialization, there are more places where semicolons are
3019  # required than not, so we use a whitelist approach to check these
3020  # rather than a blacklist.  These are the places where "};" should
3021  # be replaced by just "}":
3022  # 1. Some flavor of block following closing parenthesis:
3023  #    for (;;) {};
3024  #    while (...) {};
3025  #    switch (...) {};
3026  #    Function(...) {};
3027  #    if (...) {};
3028  #    if (...) else if (...) {};
3029  #
3030  # 2. else block:
3031  #    if (...) else {};
3032  #
3033  # 3. const member function:
3034  #    Function(...) const {};
3035  #
3036  # 4. Block following some statement:
3037  #    x = 42;
3038  #    {};
3039  #
3040  # 5. Block at the beginning of a function:
3041  #    Function(...) {
3042  #      {};
3043  #    }
3044  #
3045  #    Note that naively checking for the preceding "{" will also match
3046  #    braces inside multi-dimensional arrays, but this is fine since
3047  #    that expression will not contain semicolons.
3048  #
3049  # 6. Block following another block:
3050  #    while (true) {}
3051  #    {};
3052  #
3053  # 7. End of namespaces:
3054  #    namespace {};
3055  #
3056  #    These semicolons seems far more common than other kinds of
3057  #    redundant semicolons, possibly due to people converting classes
3058  #    to namespaces.  For now we do not warn for this case.
3059  #
3060  # Try matching case 1 first.
3061  match = Match(r'^(.*\)\s*)\{', line)
3062  if match:
3063    # Matched closing parenthesis (case 1).  Check the token before the
3064    # matching opening parenthesis, and don't warn if it looks like a
3065    # macro.  This avoids these false positives:
3066    #  - macro that defines a base class
3067    #  - multi-line macro that defines a base class
3068    #  - macro that defines the whole class-head
3069    #
3070    # But we still issue warnings for macros that we know are safe to
3071    # warn, specifically:
3072    #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3073    #  - TYPED_TEST
3074    #  - INTERFACE_DEF
3075    #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3076    #
3077    # We implement a whitelist of safe macros instead of a blacklist of
3078    # unsafe macros, even though the latter appears less frequently in
3079    # google code and would have been easier to implement.  This is because
3080    # the downside for getting the whitelist wrong means some extra
3081    # semicolons, while the downside for getting the blacklist wrong
3082    # would result in compile errors.
3083    #
3084    # In addition to macros, we also don't want to warn on compound
3085    # literals.
3086    closing_brace_pos = match.group(1).rfind(')')
3087    opening_parenthesis = ReverseCloseExpression(
3088        clean_lines, linenum, closing_brace_pos)
3089    if opening_parenthesis[2] > -1:
3090      line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3091      macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
3092      if ((macro and
3093           macro.group(1) not in (
3094               'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3095               'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3096               'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3097          Search(r'\s+=\s*$', line_prefix)):
3098        match = None
3099
3100  else:
3101    # Try matching cases 2-3.
3102    match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3103    if not match:
3104      # Try matching cases 4-6.  These are always matched on separate lines.
3105      #
3106      # Note that we can't simply concatenate the previous line to the
3107      # current line and do a single match, otherwise we may output
3108      # duplicate warnings for the blank line case:
3109      #   if (cond) {
3110      #     // blank line
3111      #   }
3112      prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3113      if prevline and Search(r'[;{}]\s*$', prevline):
3114        match = Match(r'^(\s*)\{', line)
3115
3116  # Check matching closing brace
3117  if match:
3118    (endline, endlinenum, endpos) = CloseExpression(
3119        clean_lines, linenum, len(match.group(1)))
3120    if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3121      # Current {} pair is eligible for semicolon check, and we have found
3122      # the redundant semicolon, output warning here.
3123      #
3124      # Note: because we are scanning forward for opening braces, and
3125      # outputting warnings for the matching closing brace, if there are
3126      # nested blocks with trailing semicolons, we will get the error
3127      # messages in reversed order.
3128      error(filename, endlinenum, 'readability/braces', 4,
3129            "You don't need a ; after a }")
3130
3131
3132def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3133  """Look for empty loop/conditional body with only a single semicolon.
3134
3135  Args:
3136    filename: The name of the current file.
3137    clean_lines: A CleansedLines instance containing the file.
3138    linenum: The number of the line to check.
3139    error: The function to call with any errors found.
3140  """
3141
3142  # Search for loop keywords at the beginning of the line.  Because only
3143  # whitespaces are allowed before the keywords, this will also ignore most
3144  # do-while-loops, since those lines should start with closing brace.
3145  #
3146  # We also check "if" blocks here, since an empty conditional block
3147  # is likely an error.
3148  line = clean_lines.elided[linenum]
3149  matched = Match(r'\s*(for|while|if)\s*\(', line)
3150  if matched:
3151    # Find the end of the conditional expression
3152    (end_line, end_linenum, end_pos) = CloseExpression(
3153        clean_lines, linenum, line.find('('))
3154
3155    # Output warning if what follows the condition expression is a semicolon.
3156    # No warning for all other cases, including whitespace or newline, since we
3157    # have a separate check for semicolons preceded by whitespace.
3158    if end_pos >= 0 and Match(r';', end_line[end_pos:]):
3159      if matched.group(1) == 'if':
3160        error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3161              'Empty conditional bodies should use {}')
3162      else:
3163        error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3164              'Empty loop bodies should use {} or continue')
3165
3166
3167def CheckCheck(filename, clean_lines, linenum, error):
3168  """Checks the use of CHECK and EXPECT macros.
3169
3170  Args:
3171    filename: The name of the current file.
3172    clean_lines: A CleansedLines instance containing the file.
3173    linenum: The number of the line to check.
3174    error: The function to call with any errors found.
3175  """
3176
3177  # Decide the set of replacement macros that should be suggested
3178  lines = clean_lines.elided
3179  check_macro = None
3180  start_pos = -1
3181  for macro in _CHECK_MACROS:
3182    i = lines[linenum].find(macro)
3183    if i >= 0:
3184      check_macro = macro
3185
3186      # Find opening parenthesis.  Do a regular expression match here
3187      # to make sure that we are matching the expected CHECK macro, as
3188      # opposed to some other macro that happens to contain the CHECK
3189      # substring.
3190      matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
3191      if not matched:
3192        continue
3193      start_pos = len(matched.group(1))
3194      break
3195  if not check_macro or start_pos < 0:
3196    # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
3197    return
3198
3199  # Find end of the boolean expression by matching parentheses
3200  (last_line, end_line, end_pos) = CloseExpression(
3201      clean_lines, linenum, start_pos)
3202  if end_pos < 0:
3203    return
3204  if linenum == end_line:
3205    expression = lines[linenum][start_pos + 1:end_pos - 1]
3206  else:
3207    expression = lines[linenum][start_pos + 1:]
3208    for i in xrange(linenum + 1, end_line):
3209      expression += lines[i]
3210    expression += last_line[0:end_pos - 1]
3211
3212  # Parse expression so that we can take parentheses into account.
3213  # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3214  # which is not replaceable by CHECK_LE.
3215  lhs = ''
3216  rhs = ''
3217  operator = None
3218  while expression:
3219    matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3220                    r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3221    if matched:
3222      token = matched.group(1)
3223      if token == '(':
3224        # Parenthesized operand
3225        expression = matched.group(2)
3226        (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
3227        if end < 0:
3228          return  # Unmatched parenthesis
3229        lhs += '(' + expression[0:end]
3230        expression = expression[end:]
3231      elif token in ('&&', '||'):
3232        # Logical and/or operators.  This means the expression
3233        # contains more than one term, for example:
3234        #   CHECK(42 < a && a < b);
3235        #
3236        # These are not replaceable with CHECK_LE, so bail out early.
3237        return
3238      elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3239        # Non-relational operator
3240        lhs += token
3241        expression = matched.group(2)
3242      else:
3243        # Relational operator
3244        operator = token
3245        rhs = matched.group(2)
3246        break
3247    else:
3248      # Unparenthesized operand.  Instead of appending to lhs one character
3249      # at a time, we do another regular expression match to consume several
3250      # characters at once if possible.  Trivial benchmark shows that this
3251      # is more efficient when the operands are longer than a single
3252      # character, which is generally the case.
3253      matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3254      if not matched:
3255        matched = Match(r'^(\s*\S)(.*)$', expression)
3256        if not matched:
3257          break
3258      lhs += matched.group(1)
3259      expression = matched.group(2)
3260
3261  # Only apply checks if we got all parts of the boolean expression
3262  if not (lhs and operator and rhs):
3263    return
3264
3265  # Check that rhs do not contain logical operators.  We already know
3266  # that lhs is fine since the loop above parses out && and ||.
3267  if rhs.find('&&') > -1 or rhs.find('||') > -1:
3268    return
3269
3270  # At least one of the operands must be a constant literal.  This is
3271  # to avoid suggesting replacements for unprintable things like
3272  # CHECK(variable != iterator)
3273  #
3274  # The following pattern matches decimal, hex integers, strings, and
3275  # characters (in that order).
3276  lhs = lhs.strip()
3277  rhs = rhs.strip()
3278  match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3279  if Match(match_constant, lhs) or Match(match_constant, rhs):
3280    # Note: since we know both lhs and rhs, we can provide a more
3281    # descriptive error message like:
3282    #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3283    # Instead of:
3284    #   Consider using CHECK_EQ instead of CHECK(a == b)
3285    #
3286    # We are still keeping the less descriptive message because if lhs
3287    # or rhs gets long, the error message might become unreadable.
3288    error(filename, linenum, 'readability/check', 2,
3289          'Consider using %s instead of %s(a %s b)' % (
3290              _CHECK_REPLACEMENT[check_macro][operator],
3291              check_macro, operator))
3292
3293
3294def CheckAltTokens(filename, clean_lines, linenum, error):
3295  """Check alternative keywords being used in boolean expressions.
3296
3297  Args:
3298    filename: The name of the current file.
3299    clean_lines: A CleansedLines instance containing the file.
3300    linenum: The number of the line to check.
3301    error: The function to call with any errors found.
3302  """
3303  line = clean_lines.elided[linenum]
3304
3305  # Avoid preprocessor lines
3306  if Match(r'^\s*#', line):
3307    return
3308
3309  # Last ditch effort to avoid multi-line comments.  This will not help
3310  # if the comment started before the current line or ended after the
3311  # current line, but it catches most of the false positives.  At least,
3312  # it provides a way to workaround this warning for people who use
3313  # multi-line comments in preprocessor macros.
3314  #
3315  # TODO(unknown): remove this once cpplint has better support for
3316  # multi-line comments.
3317  if line.find('/*') >= 0 or line.find('*/') >= 0:
3318    return
3319
3320  for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3321    error(filename, linenum, 'readability/alt_tokens', 2,
3322          'Use operator %s instead of %s' % (
3323              _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3324
3325
3326def GetLineWidth(line):
3327  """Determines the width of the line in column positions.
3328
3329  Args:
3330    line: A string, which may be a Unicode string.
3331
3332  Returns:
3333    The width of the line in column positions, accounting for Unicode
3334    combining characters and wide characters.
3335  """
3336  if isinstance(line, unicode):
3337    width = 0
3338    for uc in unicodedata.normalize('NFC', line):
3339      if unicodedata.east_asian_width(uc) in ('W', 'F'):
3340        width += 2
3341      elif not unicodedata.combining(uc):
3342        width += 1
3343    return width
3344  else:
3345    return len(line)
3346
3347
3348def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
3349               error):
3350  """Checks rules from the 'C++ style rules' section of cppguide.html.
3351
3352  Most of these rules are hard to test (naming, comment style), but we
3353  do what we can.  In particular we check for 2-space indents, line lengths,
3354  tab usage, spaces inside code, etc.
3355
3356  Args:
3357    filename: The name of the current file.
3358    clean_lines: A CleansedLines instance containing the file.
3359    linenum: The number of the line to check.
3360    file_extension: The extension (without the dot) of the filename.
3361    nesting_state: A _NestingState instance which maintains information about
3362                   the current stack of nested blocks being parsed.
3363    error: The function to call with any errors found.
3364  """
3365
3366  # Don't use "elided" lines here, otherwise we can't check commented lines.
3367  # Don't want to use "raw" either, because we don't want to check inside C++11
3368  # raw strings,
3369  raw_lines = clean_lines.lines_without_raw_strings
3370  line = raw_lines[linenum]
3371
3372  if line.find('\t') != -1:
3373    error(filename, linenum, 'whitespace/tab', 1,
3374          'Tab found; better to use spaces')
3375
3376  # One or three blank spaces at the beginning of the line is weird; it's
3377  # hard to reconcile that with 2-space indents.
3378  # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
3379  # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
3380  # if(RLENGTH > 20) complain = 0;
3381  # if(match($0, " +(error|private|public|protected):")) complain = 0;
3382  # if(match(prev, "&& *$")) complain = 0;
3383  # if(match(prev, "\\|\\| *$")) complain = 0;
3384  # if(match(prev, "[\",=><] *$")) complain = 0;
3385  # if(match($0, " <<")) complain = 0;
3386  # if(match(prev, " +for \\(")) complain = 0;
3387  # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3388  initial_spaces = 0
3389  cleansed_line = clean_lines.elided[linenum]
3390  while initial_spaces < len(line) and line[initial_spaces] == ' ':
3391    initial_spaces += 1
3392  if line and line[-1].isspace():
3393    error(filename, linenum, 'whitespace/end_of_line', 4,
3394          'Line ends in whitespace.  Consider deleting these extra spaces.')
3395  # There are certain situations we allow one space, notably for section labels
3396  elif ((initial_spaces == 1 or initial_spaces == 3) and
3397        not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3398    error(filename, linenum, 'whitespace/indent', 3,
3399          'Weird number of spaces at line-start.  '
3400          'Are you using a 2-space indent?')
3401
3402  # Check if the line is a header guard.
3403  is_header_guard = False
3404  if file_extension == 'h':
3405    cppvar = GetHeaderGuardCPPVariable(filename)
3406    if (line.startswith('#ifndef %s' % cppvar) or
3407        line.startswith('#define %s' % cppvar) or
3408        line.startswith('#endif  // %s' % cppvar)):
3409      is_header_guard = True
3410  # #include lines and header guards can be long, since there's no clean way to
3411  # split them.
3412  #
3413  # URLs can be long too.  It's possible to split these, but it makes them
3414  # harder to cut&paste.
3415  #
3416  # The "$Id:...$" comment may also get very long without it being the
3417  # developers fault.
3418  if (not line.startswith('#include') and not is_header_guard and
3419      not Match(r'^\s*//.*http(s?)://\S*$', line) and
3420      not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
3421    line_width = GetLineWidth(line)
3422    extended_length = int((_line_length * 1.25))
3423    if line_width > extended_length:
3424      error(filename, linenum, 'whitespace/line_length', 4,
3425            'Lines should very rarely be longer than %i characters' %
3426            extended_length)
3427    elif line_width > _line_length:
3428      error(filename, linenum, 'whitespace/line_length', 2,
3429            'Lines should be <= %i characters long' % _line_length)
3430
3431  if (cleansed_line.count(';') > 1 and
3432      # for loops are allowed two ;'s (and may run over two lines).
3433      cleansed_line.find('for') == -1 and
3434      (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3435       GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3436      # It's ok to have many commands in a switch case that fits in 1 line
3437      not ((cleansed_line.find('case ') != -1 or
3438            cleansed_line.find('default:') != -1) and
3439           cleansed_line.find('break;') != -1)):
3440    error(filename, linenum, 'whitespace/newline', 0,
3441          'More than one command on the same line')
3442
3443  # Some more style checks
3444  CheckBraces(filename, clean_lines, linenum, error)
3445  CheckEmptyBlockBody(filename, clean_lines, linenum, error)
3446  CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3447  CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
3448  CheckCheck(filename, clean_lines, linenum, error)
3449  CheckAltTokens(filename, clean_lines, linenum, error)
3450  classinfo = nesting_state.InnermostClass()
3451  if classinfo:
3452    CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
3453
3454
3455_RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3456_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3457# Matches the first component of a filename delimited by -s and _s. That is:
3458#  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3459#  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3460#  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3461#  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3462_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3463
3464
3465def _DropCommonSuffixes(filename):
3466  """Drops common suffixes like _test.cc or -inl.h from filename.
3467
3468  For example:
3469    >>> _DropCommonSuffixes('foo/foo-inl.h')
3470    'foo/foo'
3471    >>> _DropCommonSuffixes('foo/bar/foo.cc')
3472    'foo/bar/foo'
3473    >>> _DropCommonSuffixes('foo/foo_internal.h')
3474    'foo/foo'
3475    >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3476    'foo/foo_unusualinternal'
3477
3478  Args:
3479    filename: The input filename.
3480
3481  Returns:
3482    The filename with the common suffix removed.
3483  """
3484  for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3485                 'inl.h', 'impl.h', 'internal.h'):
3486    if (filename.endswith(suffix) and len(filename) > len(suffix) and
3487        filename[-len(suffix) - 1] in ('-', '_')):
3488      return filename[:-len(suffix) - 1]
3489  return os.path.splitext(filename)[0]
3490
3491
3492def _IsTestFilename(filename):
3493  """Determines if the given filename has a suffix that identifies it as a test.
3494
3495  Args:
3496    filename: The input filename.
3497
3498  Returns:
3499    True if 'filename' looks like a test, False otherwise.
3500  """
3501  if (filename.endswith('_test.cc') or
3502      filename.endswith('_unittest.cc') or
3503      filename.endswith('_regtest.cc')):
3504    return True
3505  else:
3506    return False
3507
3508
3509def _ClassifyInclude(fileinfo, include, is_system):
3510  """Figures out what kind of header 'include' is.
3511
3512  Args:
3513    fileinfo: The current file cpplint is running over. A FileInfo instance.
3514    include: The path to a #included file.
3515    is_system: True if the #include used <> rather than "".
3516
3517  Returns:
3518    One of the _XXX_HEADER constants.
3519
3520  For example:
3521    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3522    _C_SYS_HEADER
3523    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3524    _CPP_SYS_HEADER
3525    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3526    _LIKELY_MY_HEADER
3527    >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3528    ...                  'bar/foo_other_ext.h', False)
3529    _POSSIBLE_MY_HEADER
3530    >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3531    _OTHER_HEADER
3532  """
3533  # This is a list of all standard c++ header files, except
3534  # those already checked for above.
3535  is_cpp_h = include in _CPP_HEADERS
3536
3537  if is_system:
3538    if is_cpp_h:
3539      return _CPP_SYS_HEADER
3540    else:
3541      return _C_SYS_HEADER
3542
3543  # If the target file and the include we're checking share a
3544  # basename when we drop common extensions, and the include
3545  # lives in . , then it's likely to be owned by the target file.
3546  target_dir, target_base = (
3547      os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3548  include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3549  if target_base == include_base and (
3550      include_dir == target_dir or
3551      include_dir == os.path.normpath(target_dir + '/../public')):
3552    return _LIKELY_MY_HEADER
3553
3554  # If the target and include share some initial basename
3555  # component, it's possible the target is implementing the
3556  # include, so it's allowed to be first, but we'll never
3557  # complain if it's not there.
3558  target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3559  include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3560  if (target_first_component and include_first_component and
3561      target_first_component.group(0) ==
3562      include_first_component.group(0)):
3563    return _POSSIBLE_MY_HEADER
3564
3565  return _OTHER_HEADER
3566
3567
3568
3569def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3570  """Check rules that are applicable to #include lines.
3571
3572  Strings on #include lines are NOT removed from elided line, to make
3573  certain tasks easier. However, to prevent false positives, checks
3574  applicable to #include lines in CheckLanguage must be put here.
3575
3576  Args:
3577    filename: The name of the current file.
3578    clean_lines: A CleansedLines instance containing the file.
3579    linenum: The number of the line to check.
3580    include_state: An _IncludeState instance in which the headers are inserted.
3581    error: The function to call with any errors found.
3582  """
3583  fileinfo = FileInfo(filename)
3584
3585  line = clean_lines.lines[linenum]
3586
3587  # "include" should use the new style "foo/bar.h" instead of just "bar.h"
3588  if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
3589    error(filename, linenum, 'build/include', 4,
3590          'Include the directory when naming .h files')
3591
3592  # we shouldn't include a file more than once. actually, there are a
3593  # handful of instances where doing so is okay, but in general it's
3594  # not.
3595  match = _RE_PATTERN_INCLUDE.search(line)
3596  if match:
3597    include = match.group(2)
3598    is_system = (match.group(1) == '<')
3599    if include in include_state:
3600      error(filename, linenum, 'build/include', 4,
3601            '"%s" already included at %s:%s' %
3602            (include, filename, include_state[include]))
3603    else:
3604      include_state[include] = linenum
3605
3606      # We want to ensure that headers appear in the right order:
3607      # 1) for foo.cc, foo.h  (preferred location)
3608      # 2) c system files
3609      # 3) cpp system files
3610      # 4) for foo.cc, foo.h  (deprecated location)
3611      # 5) other google headers
3612      #
3613      # We classify each include statement as one of those 5 types
3614      # using a number of techniques. The include_state object keeps
3615      # track of the highest type seen, and complains if we see a
3616      # lower type after that.
3617      error_message = include_state.CheckNextIncludeOrder(
3618          _ClassifyInclude(fileinfo, include, is_system))
3619      if error_message:
3620        error(filename, linenum, 'build/include_order', 4,
3621              '%s. Should be: %s.h, c system, c++ system, other.' %
3622              (error_message, fileinfo.BaseName()))
3623      canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3624      if not include_state.IsInAlphabeticalOrder(
3625          clean_lines, linenum, canonical_include):
3626        error(filename, linenum, 'build/include_alpha', 4,
3627              'Include "%s" not in alphabetical order' % include)
3628      include_state.SetLastHeader(canonical_include)
3629
3630  # Look for any of the stream classes that are part of standard C++.
3631  match = _RE_PATTERN_INCLUDE.match(line)
3632  if match:
3633    include = match.group(2)
3634    if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3635      # Many unit tests use cout, so we exempt them.
3636      if not _IsTestFilename(filename):
3637        error(filename, linenum, 'readability/streams', 3,
3638              'Streams are highly discouraged.')
3639
3640
3641def _GetTextInside(text, start_pattern):
3642  r"""Retrieves all the text between matching open and close parentheses.
3643
3644  Given a string of lines and a regular expression string, retrieve all the text
3645  following the expression and between opening punctuation symbols like
3646  (, [, or {, and the matching close-punctuation symbol. This properly nested
3647  occurrences of the punctuations, so for the text like
3648    printf(a(), b(c()));
3649  a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3650  start_pattern must match string having an open punctuation symbol at the end.
3651
3652  Args:
3653    text: The lines to extract text. Its comments and strings must be elided.
3654           It can be single line and can span multiple lines.
3655    start_pattern: The regexp string indicating where to start extracting
3656                   the text.
3657  Returns:
3658    The extracted text.
3659    None if either the opening string or ending punctuation could not be found.
3660  """
3661  # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3662  # rewritten to use _GetTextInside (and use inferior regexp matching today).
3663
3664  # Give opening punctuations to get the matching close-punctuations.
3665  matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3666  closing_punctuation = set(matching_punctuation.itervalues())
3667
3668  # Find the position to start extracting text.
3669  match = re.search(start_pattern, text, re.M)
3670  if not match:  # start_pattern not found in text.
3671    return None
3672  start_position = match.end(0)
3673
3674  assert start_position > 0, (
3675      'start_pattern must ends with an opening punctuation.')
3676  assert text[start_position - 1] in matching_punctuation, (
3677      'start_pattern must ends with an opening punctuation.')
3678  # Stack of closing punctuations we expect to have in text after position.
3679  punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3680  position = start_position
3681  while punctuation_stack and position < len(text):
3682    if text[position] == punctuation_stack[-1]:
3683      punctuation_stack.pop()
3684    elif text[position] in closing_punctuation:
3685      # A closing punctuation without matching opening punctuations.
3686      return None
3687    elif text[position] in matching_punctuation:
3688      punctuation_stack.append(matching_punctuation[text[position]])
3689    position += 1
3690  if punctuation_stack:
3691    # Opening punctuations left without matching close-punctuations.
3692    return None
3693  # punctuations match.
3694  return text[start_position:position - 1]
3695
3696
3697# Patterns for matching call-by-reference parameters.
3698#
3699# Supports nested templates up to 2 levels deep using this messy pattern:
3700#   < (?: < (?: < [^<>]*
3701#               >
3702#           |   [^<>] )*
3703#         >
3704#     |   [^<>] )*
3705#   >
3706_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
3707_RE_PATTERN_TYPE = (
3708    r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
3709    r'(?:\w|'
3710    r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
3711    r'::)+')
3712# A call-by-reference parameter ends with '& identifier'.
3713_RE_PATTERN_REF_PARAM = re.compile(
3714    r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3715    r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3716# A call-by-const-reference parameter either ends with 'const& identifier'
3717# or looks like 'const type& identifier' when 'type' is atomic.
3718_RE_PATTERN_CONST_REF_PARAM = (
3719    r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3720    r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3721
3722
3723def CheckLanguage(filename, clean_lines, linenum, file_extension,
3724                  include_state, nesting_state, error):
3725  """Checks rules from the 'C++ language rules' section of cppguide.html.
3726
3727  Some of these rules are hard to test (function overloading, using
3728  uint32 inappropriately), but we do the best we can.
3729
3730  Args:
3731    filename: The name of the current file.
3732    clean_lines: A CleansedLines instance containing the file.
3733    linenum: The number of the line to check.
3734    file_extension: The extension (without the dot) of the filename.
3735    include_state: An _IncludeState instance in which the headers are inserted.
3736    nesting_state: A _NestingState instance which maintains information about
3737                   the current stack of nested blocks being parsed.
3738    error: The function to call with any errors found.
3739  """
3740  # If the line is empty or consists of entirely a comment, no need to
3741  # check it.
3742  line = clean_lines.elided[linenum]
3743  if not line:
3744    return
3745
3746  match = _RE_PATTERN_INCLUDE.search(line)
3747  if match:
3748    CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3749    return
3750
3751  # Reset include state across preprocessor directives.  This is meant
3752  # to silence warnings for conditional includes.
3753  if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
3754    include_state.ResetSection()
3755
3756  # Make Windows paths like Unix.
3757  fullname = os.path.abspath(filename).replace('\\', '/')
3758
3759  # TODO(unknown): figure out if they're using default arguments in fn proto.
3760
3761  # Check to see if they're using an conversion function cast.
3762  # I just try to capture the most common basic types, though there are more.
3763  # Parameterless conversion functions, such as bool(), are allowed as they are
3764  # probably a member operator declaration or default constructor.
3765  match = Search(
3766      r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
3767      r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
3768      r'(\([^)].*)', line)
3769  if match:
3770    matched_new = match.group(1)
3771    matched_type = match.group(2)
3772    matched_funcptr = match.group(3)
3773
3774    # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3775    # where type may be float(), int(string), etc.  Without context they are
3776    # virtually indistinguishable from int(x) casts. Likewise, gMock's
3777    # MockCallback takes a template parameter of the form return_type(arg_type),
3778    # which looks much like the cast we're trying to detect.
3779    #
3780    # std::function<> wrapper has a similar problem.
3781    #
3782    # Return types for function pointers also look like casts if they
3783    # don't have an extra space.
3784    if (matched_new is None and  # If new operator, then this isn't a cast
3785        not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
3786             Search(r'\bMockCallback<.*>', line) or
3787             Search(r'\bstd::function<.*>', line)) and
3788        not (matched_funcptr and
3789             Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
3790                   matched_funcptr))):
3791      # Try a bit harder to catch gmock lines: the only place where
3792      # something looks like an old-style cast is where we declare the
3793      # return type of the mocked method, and the only time when we
3794      # are missing context is if MOCK_METHOD was split across
3795      # multiple lines.  The missing MOCK_METHOD is usually one or two
3796      # lines back, so scan back one or two lines.
3797      #
3798      # It's not possible for gmock macros to appear in the first 2
3799      # lines, since the class head + section name takes up 2 lines.
3800      if (linenum < 2 or
3801          not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
3802                     clean_lines.elided[linenum - 1]) or
3803               Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
3804                     clean_lines.elided[linenum - 2]))):
3805        error(filename, linenum, 'readability/casting', 4,
3806              'Using deprecated casting style.  '
3807              'Use static_cast<%s>(...) instead' %
3808              matched_type)
3809
3810  CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3811                  'static_cast',
3812                  r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3813
3814  # This doesn't catch all cases. Consider (const char * const)"hello".
3815  #
3816  # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3817  # compile).
3818  if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3819                     'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3820    pass
3821  else:
3822    # Check pointer casts for other than string constants
3823    CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3824                    'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
3825
3826  # In addition, we look for people taking the address of a cast.  This
3827  # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3828  # point where you think.
3829  match = Search(
3830      r'(?:&\(([^)]+)\)[\w(])|'
3831      r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
3832  if match and match.group(1) != '*':
3833    error(filename, linenum, 'runtime/casting', 4,
3834          ('Are you taking an address of a cast?  '
3835           'This is dangerous: could be a temp var.  '
3836           'Take the address before doing the cast, rather than after'))
3837
3838  # Create an extended_line, which is the concatenation of the current and
3839  # next lines, for more effective checking of code that may span more than one
3840  # line.
3841  if linenum + 1 < clean_lines.NumLines():
3842    extended_line = line + clean_lines.elided[linenum + 1]
3843  else:
3844    extended_line = line
3845
3846  # Check for people declaring static/global STL strings at the top level.
3847  # This is dangerous because the C++ language does not guarantee that
3848  # globals with constructors are initialized before the first access.
3849  match = Match(
3850      r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3851      line)
3852  # Make sure it's not a function.
3853  # Function template specialization looks like: "string foo<Type>(...".
3854  # Class template definitions look like: "string Foo<Type>::Method(...".
3855  #
3856  # Also ignore things that look like operators.  These are matched separately
3857  # because operator names cross non-word boundaries.  If we change the pattern
3858  # above, we would decrease the accuracy of matching identifiers.
3859  if (match and
3860      not Search(r'\boperator\W', line) and
3861      not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
3862    error(filename, linenum, 'runtime/string', 4,
3863          'For a static/global string constant, use a C style string instead: '
3864          '"%schar %s[]".' %
3865          (match.group(1), match.group(2)))
3866
3867  if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3868    error(filename, linenum, 'runtime/init', 4,
3869          'You seem to be initializing a member variable with itself.')
3870
3871  if file_extension == 'h':
3872    # TODO(unknown): check that 1-arg constructors are explicit.
3873    #                How to tell it's a constructor?
3874    #                (handled in CheckForNonStandardConstructs for now)
3875    # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3876    #                (level 1 error)
3877    pass
3878
3879  # Check if people are using the verboten C basic types.  The only exception
3880  # we regularly allow is "unsigned short port" for port.
3881  if Search(r'\bshort port\b', line):
3882    if not Search(r'\bunsigned short port\b', line):
3883      error(filename, linenum, 'runtime/int', 4,
3884            'Use "unsigned short" for ports, not "short"')
3885  else:
3886    match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3887    if match:
3888      error(filename, linenum, 'runtime/int', 4,
3889            'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3890
3891  # When snprintf is used, the second argument shouldn't be a literal.
3892  match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
3893  if match and match.group(2) != '0':
3894    # If 2nd arg is zero, snprintf is used to calculate size.
3895    error(filename, linenum, 'runtime/printf', 3,
3896          'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3897          'to snprintf.' % (match.group(1), match.group(2)))
3898
3899  # Check if some verboten C functions are being used.
3900  if Search(r'\bsprintf\b', line):
3901    error(filename, linenum, 'runtime/printf', 5,
3902          'Never use sprintf.  Use snprintf instead.')
3903  match = Search(r'\b(strcpy|strcat)\b', line)
3904  if match:
3905    error(filename, linenum, 'runtime/printf', 4,
3906          'Almost always, snprintf is better than %s' % match.group(1))
3907
3908  # Check if some verboten operator overloading is going on
3909  # TODO(unknown): catch out-of-line unary operator&:
3910  #   class X {};
3911  #   int operator&(const X& x) { return 42; }  // unary operator&
3912  # The trick is it's hard to tell apart from binary operator&:
3913  #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3914  if Search(r'\boperator\s*&\s*\(\s*\)', line):
3915    error(filename, linenum, 'runtime/operator', 4,
3916          'Unary operator& is dangerous.  Do not use it.')
3917
3918  # Check for suspicious usage of "if" like
3919  # } if (a == b) {
3920  if Search(r'\}\s*if\s*\(', line):
3921    error(filename, linenum, 'readability/braces', 4,
3922          'Did you mean "else if"? If not, start a new line for "if".')
3923
3924  # Check for potential format string bugs like printf(foo).
3925  # We constrain the pattern not to pick things like DocidForPrintf(foo).
3926  # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
3927  # TODO(sugawarayu): Catch the following case. Need to change the calling
3928  # convention of the whole function to process multiple line to handle it.
3929  #   printf(
3930  #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3931  printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3932  if printf_args:
3933    match = Match(r'([\w.\->()]+)$', printf_args)
3934    if match and match.group(1) != '__VA_ARGS__':
3935      function_name = re.search(r'\b((?:string)?printf)\s*\(',
3936                                line, re.I).group(1)
3937      error(filename, linenum, 'runtime/printf', 4,
3938            'Potential format string bug. Do %s("%%s", %s) instead.'
3939            % (function_name, match.group(1)))
3940
3941  # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3942  match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3943  if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3944    error(filename, linenum, 'runtime/memset', 4,
3945          'Did you mean "memset(%s, 0, %s)"?'
3946          % (match.group(1), match.group(2)))
3947
3948  if Search(r'\busing namespace\b', line):
3949    error(filename, linenum, 'build/namespaces', 5,
3950          'Do not use namespace using-directives.  '
3951          'Use using-declarations instead.')
3952
3953  # Detect variable-length arrays.
3954  match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3955  if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3956      match.group(3).find(']') == -1):
3957    # Split the size using space and arithmetic operators as delimiters.
3958    # If any of the resulting tokens are not compile time constants then
3959    # report the error.
3960    tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3961    is_const = True
3962    skip_next = False
3963    for tok in tokens:
3964      if skip_next:
3965        skip_next = False
3966        continue
3967
3968      if Search(r'sizeof\(.+\)', tok): continue
3969      if Search(r'arraysize\(\w+\)', tok): continue
3970
3971      tok = tok.lstrip('(')
3972      tok = tok.rstrip(')')
3973      if not tok: continue
3974      if Match(r'\d+', tok): continue
3975      if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3976      if Match(r'k[A-Z0-9]\w*', tok): continue
3977      if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3978      if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3979      # A catch all for tricky sizeof cases, including 'sizeof expression',
3980      # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
3981      # requires skipping the next token because we split on ' ' and '*'.
3982      if tok.startswith('sizeof'):
3983        skip_next = True
3984        continue
3985      is_const = False
3986      break
3987    if not is_const:
3988      error(filename, linenum, 'runtime/arrays', 1,
3989            'Do not use variable-length arrays.  Use an appropriately named '
3990            "('k' followed by CamelCase) compile-time constant for the size.")
3991
3992  # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3993  # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3994  # in the class declaration.
3995  match = Match(
3996      (r'\s*'
3997       r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3998       r'\(.*\);$'),
3999      line)
4000  if match and linenum + 1 < clean_lines.NumLines():
4001    next_line = clean_lines.elided[linenum + 1]
4002    # We allow some, but not all, declarations of variables to be present
4003    # in the statement that defines the class.  The [\w\*,\s]* fragment of
4004    # the regular expression below allows users to declare instances of
4005    # the class or pointers to instances, but not less common types such
4006    # as function pointers or arrays.  It's a tradeoff between allowing
4007    # reasonable code and avoiding trying to parse more C++ using regexps.
4008    if not Search(r'^\s*}[\w\*,\s]*;', next_line):
4009      error(filename, linenum, 'readability/constructors', 3,
4010            match.group(1) + ' should be the last thing in the class')
4011
4012  # Check for use of unnamed namespaces in header files.  Registration
4013  # macros are typically OK, so we allow use of "namespace {" on lines
4014  # that end with backslashes.
4015  if (file_extension == 'h'
4016      and Search(r'\bnamespace\s*{', line)
4017      and line[-1] != '\\'):
4018    error(filename, linenum, 'build/namespaces', 4,
4019          'Do not use unnamed namespaces in header files.  See '
4020          'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4021          ' for more information.')
4022
4023def CheckForNonConstReference(filename, clean_lines, linenum,
4024                              nesting_state, error):
4025  """Check for non-const references.
4026
4027  Separate from CheckLanguage since it scans backwards from current
4028  line, instead of scanning forward.
4029
4030  Args:
4031    filename: The name of the current file.
4032    clean_lines: A CleansedLines instance containing the file.
4033    linenum: The number of the line to check.
4034    nesting_state: A _NestingState instance which maintains information about
4035                   the current stack of nested blocks being parsed.
4036    error: The function to call with any errors found.
4037  """
4038  # Do nothing if there is no '&' on current line.
4039  line = clean_lines.elided[linenum]
4040  if '&' not in line:
4041    return
4042
4043  # Long type names may be broken across multiple lines, usually in one
4044  # of these forms:
4045  #   LongType
4046  #       ::LongTypeContinued &identifier
4047  #   LongType::
4048  #       LongTypeContinued &identifier
4049  #   LongType<
4050  #       ...>::LongTypeContinued &identifier
4051  #
4052  # If we detected a type split across two lines, join the previous
4053  # line to current line so that we can match const references
4054  # accordingly.
4055  #
4056  # Note that this only scans back one line, since scanning back
4057  # arbitrary number of lines would be expensive.  If you have a type
4058  # that spans more than 2 lines, please use a typedef.
4059  if linenum > 1:
4060    previous = None
4061    if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
4062      # previous_line\n + ::current_line
4063      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
4064                        clean_lines.elided[linenum - 1])
4065    elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
4066      # previous_line::\n + current_line
4067      previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
4068                        clean_lines.elided[linenum - 1])
4069    if previous:
4070      line = previous.group(1) + line.lstrip()
4071    else:
4072      # Check for templated parameter that is split across multiple lines
4073      endpos = line.rfind('>')
4074      if endpos > -1:
4075        (_, startline, startpos) = ReverseCloseExpression(
4076            clean_lines, linenum, endpos)
4077        if startpos > -1 and startline < linenum:
4078          # Found the matching < on an earlier line, collect all
4079          # pieces up to current line.
4080          line = ''
4081          for i in xrange(startline, linenum + 1):
4082            line += clean_lines.elided[i].strip()
4083
4084  # Check for non-const references in function parameters.  A single '&' may
4085  # found in the following places:
4086  #   inside expression: binary & for bitwise AND
4087  #   inside expression: unary & for taking the address of something
4088  #   inside declarators: reference parameter
4089  # We will exclude the first two cases by checking that we are not inside a
4090  # function body, including one that was just introduced by a trailing '{'.
4091  # TODO(unknwon): Doesn't account for preprocessor directives.
4092  # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
4093  check_params = False
4094  if not nesting_state.stack:
4095    check_params = True  # top level
4096  elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
4097        isinstance(nesting_state.stack[-1], _NamespaceInfo)):
4098    check_params = True  # within class or namespace
4099  elif Match(r'.*{\s*$', line):
4100    if (len(nesting_state.stack) == 1 or
4101        isinstance(nesting_state.stack[-2], _ClassInfo) or
4102        isinstance(nesting_state.stack[-2], _NamespaceInfo)):
4103      check_params = True  # just opened global/class/namespace block
4104  # We allow non-const references in a few standard places, like functions
4105  # called "swap()" or iostream operators like "<<" or ">>".  Do not check
4106  # those function parameters.
4107  #
4108  # We also accept & in static_assert, which looks like a function but
4109  # it's actually a declaration expression.
4110  whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4111                           r'operator\s*[<>][<>]|'
4112                           r'static_assert|COMPILE_ASSERT'
4113                           r')\s*\(')
4114  if Search(whitelisted_functions, line):
4115    check_params = False
4116  elif not Search(r'\S+\([^)]*$', line):
4117    # Don't see a whitelisted function on this line.  Actually we
4118    # didn't see any function name on this line, so this is likely a
4119    # multi-line parameter list.  Try a bit harder to catch this case.
4120    for i in xrange(2):
4121      if (linenum > i and
4122          Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
4123        check_params = False
4124        break
4125
4126  if check_params:
4127    decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
4128    for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4129      if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4130        error(filename, linenum, 'runtime/references', 2,
4131              'Is this a non-const reference? '
4132              'If so, make const or use a pointer: ' +
4133              ReplaceAll(' *<', '<', parameter))
4134
4135
4136def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4137                    error):
4138  """Checks for a C-style cast by looking for the pattern.
4139
4140  Args:
4141    filename: The name of the current file.
4142    linenum: The number of the line to check.
4143    line: The line of code to check.
4144    raw_line: The raw line of code to check, with comments.
4145    cast_type: The string for the C++ cast to recommend.  This is either
4146      reinterpret_cast, static_cast, or const_cast, depending.
4147    pattern: The regular expression used to find C-style casts.
4148    error: The function to call with any errors found.
4149
4150  Returns:
4151    True if an error was emitted.
4152    False otherwise.
4153  """
4154  match = Search(pattern, line)
4155  if not match:
4156    return False
4157
4158  # e.g., sizeof(int)
4159  sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
4160  if sizeof_match:
4161    error(filename, linenum, 'runtime/sizeof', 1,
4162          'Using sizeof(type).  Use sizeof(varname) instead if possible')
4163    return True
4164
4165  # operator++(int) and operator--(int)
4166  if (line[0:match.start(1) - 1].endswith(' operator++') or
4167      line[0:match.start(1) - 1].endswith(' operator--')):
4168    return False
4169
4170  # A single unnamed argument for a function tends to look like old
4171  # style cast.  If we see those, don't issue warnings for deprecated
4172  # casts, instead issue warnings for unnamed arguments where
4173  # appropriate.
4174  #
4175  # These are things that we want warnings for, since the style guide
4176  # explicitly require all parameters to be named:
4177  #   Function(int);
4178  #   Function(int) {
4179  #   ConstMember(int) const;
4180  #   ConstMember(int) const {
4181  #   ExceptionMember(int) throw (...);
4182  #   ExceptionMember(int) throw (...) {
4183  #   PureVirtual(int) = 0;
4184  #
4185  # These are functions of some sort, where the compiler would be fine
4186  # if they had named parameters, but people often omit those
4187  # identifiers to reduce clutter:
4188  #   (FunctionPointer)(int);
4189  #   (FunctionPointer)(int) = value;
4190  #   Function((function_pointer_arg)(int))
4191  #   <TemplateArgument(int)>;
4192  #   <(FunctionPointerTemplateArgument)(int)>;
4193  remainder = line[match.end(0):]
4194  if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
4195    # Looks like an unnamed parameter.
4196
4197    # Don't warn on any kind of template arguments.
4198    if Match(r'^\s*>', remainder):
4199      return False
4200
4201    # Don't warn on assignments to function pointers, but keep warnings for
4202    # unnamed parameters to pure virtual functions.  Note that this pattern
4203    # will also pass on assignments of "0" to function pointers, but the
4204    # preferred values for those would be "nullptr" or "NULL".
4205    matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
4206    if matched_zero and matched_zero.group(1) != '0':
4207      return False
4208
4209    # Don't warn on function pointer declarations.  For this we need
4210    # to check what came before the "(type)" string.
4211    if Match(r'.*\)\s*$', line[0:match.start(0)]):
4212      return False
4213
4214    # Don't warn if the parameter is named with block comments, e.g.:
4215    #  Function(int /*unused_param*/);
4216    if '/*' in raw_line:
4217      return False
4218
4219    # Passed all filters, issue warning here.
4220    error(filename, linenum, 'readability/function', 3,
4221          'All parameters should be named in a function')
4222    return True
4223
4224  # At this point, all that should be left is actual casts.
4225  error(filename, linenum, 'readability/casting', 4,
4226        'Using C-style cast.  Use %s<%s>(...) instead' %
4227        (cast_type, match.group(1)))
4228
4229  return True
4230
4231
4232_HEADERS_CONTAINING_TEMPLATES = (
4233    ('<deque>', ('deque',)),
4234    ('<functional>', ('unary_function', 'binary_function',
4235                      'plus', 'minus', 'multiplies', 'divides', 'modulus',
4236                      'negate',
4237                      'equal_to', 'not_equal_to', 'greater', 'less',
4238                      'greater_equal', 'less_equal',
4239                      'logical_and', 'logical_or', 'logical_not',
4240                      'unary_negate', 'not1', 'binary_negate', 'not2',
4241                      'bind1st', 'bind2nd',
4242                      'pointer_to_unary_function',
4243                      'pointer_to_binary_function',
4244                      'ptr_fun',
4245                      'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
4246                      'mem_fun_ref_t',
4247                      'const_mem_fun_t', 'const_mem_fun1_t',
4248                      'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
4249                      'mem_fun_ref',
4250                     )),
4251    ('<limits>', ('numeric_limits',)),
4252    ('<list>', ('list',)),
4253    ('<map>', ('map', 'multimap',)),
4254    ('<memory>', ('allocator',)),
4255    ('<queue>', ('queue', 'priority_queue',)),
4256    ('<set>', ('set', 'multiset',)),
4257    ('<stack>', ('stack',)),
4258    ('<string>', ('char_traits', 'basic_string',)),
4259    ('<utility>', ('pair',)),
4260    ('<vector>', ('vector',)),
4261
4262    # gcc extensions.
4263    # Note: std::hash is their hash, ::hash is our hash
4264    ('<hash_map>', ('hash_map', 'hash_multimap',)),
4265    ('<hash_set>', ('hash_set', 'hash_multiset',)),
4266    ('<slist>', ('slist',)),
4267    )
4268
4269_RE_PATTERN_STRING = re.compile(r'\bstring\b')
4270
4271_re_pattern_algorithm_header = []
4272for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
4273                  'transform'):
4274  # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
4275  # type::max().
4276  _re_pattern_algorithm_header.append(
4277      (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
4278       _template,
4279       '<algorithm>'))
4280
4281_re_pattern_templates = []
4282for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
4283  for _template in _templates:
4284    _re_pattern_templates.append(
4285        (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
4286         _template + '<>',
4287         _header))
4288
4289
4290def FilesBelongToSameModule(filename_cc, filename_h):
4291  """Check if these two filenames belong to the same module.
4292
4293  The concept of a 'module' here is a as follows:
4294  foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
4295  same 'module' if they are in the same directory.
4296  some/path/public/xyzzy and some/path/internal/xyzzy are also considered
4297  to belong to the same module here.
4298
4299  If the filename_cc contains a longer path than the filename_h, for example,
4300  '/absolute/path/to/base/sysinfo.cc', and this file would include
4301  'base/sysinfo.h', this function also produces the prefix needed to open the
4302  header. This is used by the caller of this function to more robustly open the
4303  header file. We don't have access to the real include paths in this context,
4304  so we need this guesswork here.
4305
4306  Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
4307  according to this implementation. Because of this, this function gives
4308  some false positives. This should be sufficiently rare in practice.
4309
4310  Args:
4311    filename_cc: is the path for the .cc file
4312    filename_h: is the path for the header path
4313
4314  Returns:
4315    Tuple with a bool and a string:
4316    bool: True if filename_cc and filename_h belong to the same module.
4317    string: the additional prefix needed to open the header file.
4318  """
4319
4320  if not filename_cc.endswith('.cc'):
4321    return (False, '')
4322  filename_cc = filename_cc[:-len('.cc')]
4323  if filename_cc.endswith('_unittest'):
4324    filename_cc = filename_cc[:-len('_unittest')]
4325  elif filename_cc.endswith('_test'):
4326    filename_cc = filename_cc[:-len('_test')]
4327  filename_cc = filename_cc.replace('/public/', '/')
4328  filename_cc = filename_cc.replace('/internal/', '/')
4329
4330  if not filename_h.endswith('.h'):
4331    return (False, '')
4332  filename_h = filename_h[:-len('.h')]
4333  if filename_h.endswith('-inl'):
4334    filename_h = filename_h[:-len('-inl')]
4335  filename_h = filename_h.replace('/public/', '/')
4336  filename_h = filename_h.replace('/internal/', '/')
4337
4338  files_belong_to_same_module = filename_cc.endswith(filename_h)
4339  common_path = ''
4340  if files_belong_to_same_module:
4341    common_path = filename_cc[:-len(filename_h)]
4342  return files_belong_to_same_module, common_path
4343
4344
4345def UpdateIncludeState(filename, include_state, io=codecs):
4346  """Fill up the include_state with new includes found from the file.
4347
4348  Args:
4349    filename: the name of the header to read.
4350    include_state: an _IncludeState instance in which the headers are inserted.
4351    io: The io factory to use to read the file. Provided for testability.
4352
4353  Returns:
4354    True if a header was succesfully added. False otherwise.
4355  """
4356  headerfile = None
4357  try:
4358    headerfile = io.open(filename, 'r', 'utf8', 'replace')
4359  except IOError:
4360    return False
4361  linenum = 0
4362  for line in headerfile:
4363    linenum += 1
4364    clean_line = CleanseComments(line)
4365    match = _RE_PATTERN_INCLUDE.search(clean_line)
4366    if match:
4367      include = match.group(2)
4368      # The value formatting is cute, but not really used right now.
4369      # What matters here is that the key is in include_state.
4370      include_state.setdefault(include, '%s:%d' % (filename, linenum))
4371  return True
4372
4373
4374def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
4375                              io=codecs):
4376  """Reports for missing stl includes.
4377
4378  This function will output warnings to make sure you are including the headers
4379  necessary for the stl containers and functions that you use. We only give one
4380  reason to include a header. For example, if you use both equal_to<> and
4381  less<> in a .h file, only one (the latter in the file) of these will be
4382  reported as a reason to include the <functional>.
4383
4384  Args:
4385    filename: The name of the current file.
4386    clean_lines: A CleansedLines instance containing the file.
4387    include_state: An _IncludeState instance.
4388    error: The function to call with any errors found.
4389    io: The IO factory to use to read the header file. Provided for unittest
4390        injection.
4391  """
4392  required = {}  # A map of header name to linenumber and the template entity.
4393                 # Example of required: { '<functional>': (1219, 'less<>') }
4394
4395  for linenum in xrange(clean_lines.NumLines()):
4396    line = clean_lines.elided[linenum]
4397    if not line or line[0] == '#':
4398      continue
4399
4400    # String is special -- it is a non-templatized type in STL.
4401    matched = _RE_PATTERN_STRING.search(line)
4402    if matched:
4403      # Don't warn about strings in non-STL namespaces:
4404      # (We check only the first match per line; good enough.)
4405      prefix = line[:matched.start()]
4406      if prefix.endswith('std::') or not prefix.endswith('::'):
4407        required['<string>'] = (linenum, 'string')
4408
4409    for pattern, template, header in _re_pattern_algorithm_header:
4410      if pattern.search(line):
4411        required[header] = (linenum, template)
4412
4413    # The following function is just a speed up, no semantics are changed.
4414    if not '<' in line:  # Reduces the cpu time usage by skipping lines.
4415      continue
4416
4417    for pattern, template, header in _re_pattern_templates:
4418      if pattern.search(line):
4419        required[header] = (linenum, template)
4420
4421  # The policy is that if you #include something in foo.h you don't need to
4422  # include it again in foo.cc. Here, we will look at possible includes.
4423  # Let's copy the include_state so it is only messed up within this function.
4424  include_state = include_state.copy()
4425
4426  # Did we find the header for this file (if any) and succesfully load it?
4427  header_found = False
4428
4429  # Use the absolute path so that matching works properly.
4430  abs_filename = FileInfo(filename).FullName()
4431
4432  # For Emacs's flymake.
4433  # If cpplint is invoked from Emacs's flymake, a temporary file is generated
4434  # by flymake and that file name might end with '_flymake.cc'. In that case,
4435  # restore original file name here so that the corresponding header file can be
4436  # found.
4437  # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
4438  # instead of 'foo_flymake.h'
4439  abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
4440
4441  # include_state is modified during iteration, so we iterate over a copy of
4442  # the keys.
4443  header_keys = include_state.keys()
4444  for header in header_keys:
4445    (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
4446    fullpath = common_path + header
4447    if same_module and UpdateIncludeState(fullpath, include_state, io):
4448      header_found = True
4449
4450  # If we can't find the header file for a .cc, assume it's because we don't
4451  # know where to look. In that case we'll give up as we're not sure they
4452  # didn't include it in the .h file.
4453  # TODO(unknown): Do a better job of finding .h files so we are confident that
4454  # not having the .h file means there isn't one.
4455  if filename.endswith('.cc') and not header_found:
4456    return
4457
4458  # All the lines have been processed, report the errors found.
4459  for required_header_unstripped in required:
4460    template = required[required_header_unstripped][1]
4461    if required_header_unstripped.strip('<>"') not in include_state:
4462      error(filename, required[required_header_unstripped][0],
4463            'build/include_what_you_use', 4,
4464            'Add #include ' + required_header_unstripped + ' for ' + template)
4465
4466
4467_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
4468
4469
4470def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
4471  """Check that make_pair's template arguments are deduced.
4472
4473  G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
4474  specified explicitly, and such use isn't intended in any case.
4475
4476  Args:
4477    filename: The name of the current file.
4478    clean_lines: A CleansedLines instance containing the file.
4479    linenum: The number of the line to check.
4480    error: The function to call with any errors found.
4481  """
4482  line = clean_lines.elided[linenum]
4483  match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
4484  if match:
4485    error(filename, linenum, 'build/explicit_make_pair',
4486          4,  # 4 = high confidence
4487          'For C++11-compatibility, omit template arguments from make_pair'
4488          ' OR use pair directly OR if appropriate, construct a pair directly')
4489
4490
4491def ProcessLine(filename, file_extension, clean_lines, line,
4492                include_state, function_state, nesting_state, error,
4493                extra_check_functions=[]):
4494  """Processes a single line in the file.
4495
4496  Args:
4497    filename: Filename of the file that is being processed.
4498    file_extension: The extension (dot not included) of the file.
4499    clean_lines: An array of strings, each representing a line of the file,
4500                 with comments stripped.
4501    line: Number of line being processed.
4502    include_state: An _IncludeState instance in which the headers are inserted.
4503    function_state: A _FunctionState instance which counts function lines, etc.
4504    nesting_state: A _NestingState instance which maintains information about
4505                   the current stack of nested blocks being parsed.
4506    error: A callable to which errors are reported, which takes 4 arguments:
4507           filename, line number, error level, and message
4508    extra_check_functions: An array of additional check functions that will be
4509                           run on each source line. Each function takes 4
4510                           arguments: filename, clean_lines, line, error
4511  """
4512  raw_lines = clean_lines.raw_lines
4513  ParseNolintSuppressions(filename, raw_lines[line], line, error)
4514  nesting_state.Update(filename, clean_lines, line, error)
4515  if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
4516    return
4517  CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
4518  CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
4519  CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
4520  CheckLanguage(filename, clean_lines, line, file_extension, include_state,
4521                nesting_state, error)
4522  CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
4523  CheckForNonStandardConstructs(filename, clean_lines, line,
4524                                nesting_state, error)
4525  CheckVlogArguments(filename, clean_lines, line, error)
4526  CheckPosixThreading(filename, clean_lines, line, error)
4527  CheckInvalidIncrement(filename, clean_lines, line, error)
4528  CheckMakePairUsesDeduction(filename, clean_lines, line, error)
4529  for check_fn in extra_check_functions:
4530    check_fn(filename, clean_lines, line, error)
4531
4532def ProcessFileData(filename, file_extension, lines, error,
4533                    extra_check_functions=[]):
4534  """Performs lint checks and reports any errors to the given error function.
4535
4536  Args:
4537    filename: Filename of the file that is being processed.
4538    file_extension: The extension (dot not included) of the file.
4539    lines: An array of strings, each representing a line of the file, with the
4540           last element being empty if the file is terminated with a newline.
4541    error: A callable to which errors are reported, which takes 4 arguments:
4542           filename, line number, error level, and message
4543    extra_check_functions: An array of additional check functions that will be
4544                           run on each source line. Each function takes 4
4545                           arguments: filename, clean_lines, line, error
4546  """
4547  lines = (['// marker so line numbers and indices both start at 1'] + lines +
4548           ['// marker so line numbers end in a known way'])
4549
4550  include_state = _IncludeState()
4551  function_state = _FunctionState()
4552  nesting_state = _NestingState()
4553
4554  ResetNolintSuppressions()
4555
4556  CheckForCopyright(filename, lines, error)
4557
4558  if file_extension == 'h':
4559    CheckForHeaderGuard(filename, lines, error)
4560
4561  RemoveMultiLineComments(filename, lines, error)
4562  clean_lines = CleansedLines(lines)
4563  for line in xrange(clean_lines.NumLines()):
4564    ProcessLine(filename, file_extension, clean_lines, line,
4565                include_state, function_state, nesting_state, error,
4566                extra_check_functions)
4567  nesting_state.CheckCompletedBlocks(filename, error)
4568
4569  CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4570
4571  # We check here rather than inside ProcessLine so that we see raw
4572  # lines rather than "cleaned" lines.
4573  CheckForBadCharacters(filename, lines, error)
4574
4575  CheckForNewlineAtEOF(filename, lines, error)
4576
4577def ProcessFile(filename, vlevel, extra_check_functions=[]):
4578  """Does google-lint on a single file.
4579
4580  Args:
4581    filename: The name of the file to parse.
4582
4583    vlevel: The level of errors to report.  Every error of confidence
4584    >= verbose_level will be reported.  0 is a good default.
4585
4586    extra_check_functions: An array of additional check functions that will be
4587                           run on each source line. Each function takes 4
4588                           arguments: filename, clean_lines, line, error
4589  """
4590
4591  _SetVerboseLevel(vlevel)
4592
4593  try:
4594    # Support the UNIX convention of using "-" for stdin.  Note that
4595    # we are not opening the file with universal newline support
4596    # (which codecs doesn't support anyway), so the resulting lines do
4597    # contain trailing '\r' characters if we are reading a file that
4598    # has CRLF endings.
4599    # If after the split a trailing '\r' is present, it is removed
4600    # below. If it is not expected to be present (i.e. os.linesep !=
4601    # '\r\n' as in Windows), a warning is issued below if this file
4602    # is processed.
4603
4604    if filename == '-':
4605      lines = codecs.StreamReaderWriter(sys.stdin,
4606                                        codecs.getreader('utf8'),
4607                                        codecs.getwriter('utf8'),
4608                                        'replace').read().split('\n')
4609    else:
4610      lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4611
4612    carriage_return_found = False
4613    # Remove trailing '\r'.
4614    for linenum in range(len(lines)):
4615      if lines[linenum].endswith('\r'):
4616        lines[linenum] = lines[linenum].rstrip('\r')
4617        carriage_return_found = True
4618
4619  except IOError:
4620    sys.stderr.write(
4621        "Skipping input '%s': Can't open for reading\n" % filename)
4622    return
4623
4624  # Note, if no dot is found, this will give the entire filename as the ext.
4625  file_extension = filename[filename.rfind('.') + 1:]
4626
4627  # When reading from stdin, the extension is unknown, so no cpplint tests
4628  # should rely on the extension.
4629  if filename != '-' and file_extension not in _valid_extensions:
4630    sys.stderr.write('Ignoring %s; not a valid file name '
4631                     '(%s)\n' % (filename, ', '.join(_valid_extensions)))
4632  else:
4633    ProcessFileData(filename, file_extension, lines, Error,
4634                    extra_check_functions)
4635    if carriage_return_found and os.linesep != '\r\n':
4636      # Use 0 for linenum since outputting only one error for potentially
4637      # several lines.
4638      Error(filename, 0, 'whitespace/newline', 1,
4639            'One or more unexpected \\r (^M) found;'
4640            'better to use only a \\n')
4641
4642  sys.stderr.write('Done processing %s\n' % filename)
4643
4644
4645def PrintUsage(message):
4646  """Prints a brief usage string and exits, optionally with an error message.
4647
4648  Args:
4649    message: The optional error message.
4650  """
4651  sys.stderr.write(_USAGE)
4652  if message:
4653    sys.exit('\nFATAL ERROR: ' + message)
4654  else:
4655    sys.exit(1)
4656
4657
4658def PrintCategories():
4659  """Prints a list of all the error-categories used by error messages.
4660
4661  These are the categories used to filter messages via --filter.
4662  """
4663  sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
4664  sys.exit(0)
4665
4666
4667def ParseArguments(args):
4668  """Parses the command line arguments.
4669
4670  This may set the output format and verbosity level as side-effects.
4671
4672  Args:
4673    args: The command line arguments:
4674
4675  Returns:
4676    The list of filenames to lint.
4677  """
4678  try:
4679    (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
4680                                                 'counting=',
4681                                                 'filter=',
4682                                                 'root=',
4683                                                 'linelength=',
4684                                                 'extensions='])
4685  except getopt.GetoptError:
4686    PrintUsage('Invalid arguments.')
4687
4688  verbosity = _VerboseLevel()
4689  output_format = _OutputFormat()
4690  filters = ''
4691  counting_style = ''
4692
4693  for (opt, val) in opts:
4694    if opt == '--help':
4695      PrintUsage(None)
4696    elif opt == '--output':
4697      if val not in ('emacs', 'vs7', 'eclipse'):
4698        PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
4699      output_format = val
4700    elif opt == '--verbose':
4701      verbosity = int(val)
4702    elif opt == '--filter':
4703      filters = val
4704      if not filters:
4705        PrintCategories()
4706    elif opt == '--counting':
4707      if val not in ('total', 'toplevel', 'detailed'):
4708        PrintUsage('Valid counting options are total, toplevel, and detailed')
4709      counting_style = val
4710    elif opt == '--root':
4711      global _root
4712      _root = val
4713    elif opt == '--linelength':
4714      global _line_length
4715      try:
4716          _line_length = int(val)
4717      except ValueError:
4718          PrintUsage('Line length must be digits.')
4719    elif opt == '--extensions':
4720      global _valid_extensions
4721      try:
4722          _valid_extensions = set(val.split(','))
4723      except ValueError:
4724          PrintUsage('Extensions must be comma seperated list.')
4725
4726  if not filenames:
4727    PrintUsage('No files were specified.')
4728
4729  _SetOutputFormat(output_format)
4730  _SetVerboseLevel(verbosity)
4731  _SetFilters(filters)
4732  _SetCountingStyle(counting_style)
4733
4734  return filenames
4735
4736
4737def main():
4738  filenames = ParseArguments(sys.argv[1:])
4739
4740  # Change stderr to write with replacement characters so we don't die
4741  # if we try to print something containing non-ASCII characters.
4742  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4743                                         codecs.getreader('utf8'),
4744                                         codecs.getwriter('utf8'),
4745                                         'replace')
4746
4747  _cpplint_state.ResetErrorCounts()
4748  for filename in filenames:
4749    ProcessFile(filename, _cpplint_state.verbose_level)
4750  _cpplint_state.PrintErrorCounts()
4751
4752  sys.exit(_cpplint_state.error_count > 0)
4753
4754
4755if __name__ == '__main__':
4756  main()
4757