1#!/usr/bin/env python
2# pep8.py - Check Python source code formatting, according to PEP 8
3# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
4# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
5#
6# Permission is hereby granted, free of charge, to any person
7# obtaining a copy of this software and associated documentation files
8# (the "Software"), to deal in the Software without restriction,
9# including without limitation the rights to use, copy, modify, merge,
10# publish, distribute, sublicense, and/or sell copies of the Software,
11# and to permit persons to whom the Software is furnished to do so,
12# subject to the following conditions:
13#
14# The above copyright notice and this permission notice shall be
15# included in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24# SOFTWARE.
25
26r"""
27Check Python source code formatting, according to PEP 8.
28
29For usage and a list of options, try this:
30$ python pep8.py -h
31
32This program and its regression test suite live here:
33http://github.com/jcrocholl/pep8
34
35Groups of errors and warnings:
36E errors
37W warnings
38100 indentation
39200 whitespace
40300 blank lines
41400 imports
42500 line length
43600 deprecation
44700 statements
45900 syntax error
46"""
47from __future__ import with_statement
48
49__version__ = '1.5.7'
50
51import os
52import sys
53import re
54import time
55import inspect
56import keyword
57import tokenize
58from optparse import OptionParser
59from fnmatch import fnmatch
60try:
61    from configparser import RawConfigParser
62    from io import TextIOWrapper
63except ImportError:
64    from ConfigParser import RawConfigParser
65
66DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
67DEFAULT_IGNORE = 'E123,E226,E24'
68if sys.platform == 'win32':
69    DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
70else:
71    DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
72                                  os.path.expanduser('~/.config'), 'pep8')
73PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
74TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
75MAX_LINE_LENGTH = 79
76REPORT_FORMAT = {
77    'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
78    'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
79}
80
81PyCF_ONLY_AST = 1024
82SINGLETONS = frozenset(['False', 'None', 'True'])
83KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
84UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
85ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
86WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
87WS_NEEDED_OPERATORS = frozenset([
88    '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
89    '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
90WHITESPACE = frozenset(' \t')
91NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
92SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
93# ERRORTOKEN is triggered by backticks in Python 3
94SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
95BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
96
97INDENT_REGEX = re.compile(r'([ \t]*)')
98RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
99RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
100ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
101DOCSTRING_REGEX = re.compile(r'u?r?["\']')
102EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
103WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
104COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
105COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')
106COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
107                                r'|\s*\(\s*([^)]*[^ )])\s*\))')
108KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
109OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
110LAMBDA_REGEX = re.compile(r'\blambda\b')
111HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
112
113# Work around Python < 2.6 behaviour, which does not generate NL after
114# a comment which is on a line by itself.
115COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
116
117
118##############################################################################
119# Plugins (check functions) for physical lines
120##############################################################################
121
122
123def tabs_or_spaces(physical_line, indent_char):
124    r"""Never mix tabs and spaces.
125
126    The most popular way of indenting Python is with spaces only.  The
127    second-most popular way is with tabs only.  Code indented with a mixture
128    of tabs and spaces should be converted to using spaces exclusively.  When
129    invoking the Python command line interpreter with the -t option, it issues
130    warnings about code that illegally mixes tabs and spaces.  When using -tt
131    these warnings become errors.  These options are highly recommended!
132
133    Okay: if a == 0:\n        a = 1\n        b = 1
134    E101: if a == 0:\n        a = 1\n\tb = 1
135    """
136    indent = INDENT_REGEX.match(physical_line).group(1)
137    for offset, char in enumerate(indent):
138        if char != indent_char:
139            return offset, "E101 indentation contains mixed spaces and tabs"
140
141
142def tabs_obsolete(physical_line):
143    r"""For new projects, spaces-only are strongly recommended over tabs.
144
145    Okay: if True:\n    return
146    W191: if True:\n\treturn
147    """
148    indent = INDENT_REGEX.match(physical_line).group(1)
149    if '\t' in indent:
150        return indent.index('\t'), "W191 indentation contains tabs"
151
152
153def trailing_whitespace(physical_line):
154    r"""Trailing whitespace is superfluous.
155
156    The warning returned varies on whether the line itself is blank, for easier
157    filtering for those who want to indent their blank lines.
158
159    Okay: spam(1)\n#
160    W291: spam(1) \n#
161    W293: class Foo(object):\n    \n    bang = 12
162    """
163    physical_line = physical_line.rstrip('\n')    # chr(10), newline
164    physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
165    physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
166    stripped = physical_line.rstrip(' \t\v')
167    if physical_line != stripped:
168        if stripped:
169            return len(stripped), "W291 trailing whitespace"
170        else:
171            return 0, "W293 blank line contains whitespace"
172
173
174def trailing_blank_lines(physical_line, lines, line_number, total_lines):
175    r"""Trailing blank lines are superfluous.
176
177    Okay: spam(1)
178    W391: spam(1)\n
179
180    However the last line should end with a new line (warning W292).
181    """
182    if line_number == total_lines:
183        stripped_last_line = physical_line.rstrip()
184        if not stripped_last_line:
185            return 0, "W391 blank line at end of file"
186        if stripped_last_line == physical_line:
187            return len(physical_line), "W292 no newline at end of file"
188
189
190def maximum_line_length(physical_line, max_line_length, multiline):
191    r"""Limit all lines to a maximum of 79 characters.
192
193    There are still many devices around that are limited to 80 character
194    lines; plus, limiting windows to 80 characters makes it possible to have
195    several windows side-by-side.  The default wrapping on such devices looks
196    ugly.  Therefore, please limit all lines to a maximum of 79 characters.
197    For flowing long blocks of text (docstrings or comments), limiting the
198    length to 72 characters is recommended.
199
200    Reports error E501.
201    """
202    line = physical_line.rstrip()
203    length = len(line)
204    if length > max_line_length and not noqa(line):
205        # Special case for long URLs in multi-line docstrings or comments,
206        # but still report the error when the 72 first chars are whitespaces.
207        chunks = line.split()
208        if ((len(chunks) == 1 and multiline) or
209            (len(chunks) == 2 and chunks[0] == '#')) and \
210                len(line) - len(chunks[-1]) < max_line_length - 7:
211            return
212        if hasattr(line, 'decode'):   # Python 2
213            # The line could contain multi-byte characters
214            try:
215                length = len(line.decode('utf-8'))
216            except UnicodeError:
217                pass
218        if length > max_line_length:
219            return (max_line_length, "E501 line too long "
220                    "(%d > %d characters)" % (length, max_line_length))
221
222
223##############################################################################
224# Plugins (check functions) for logical lines
225##############################################################################
226
227
228def blank_lines(logical_line, blank_lines, indent_level, line_number,
229                blank_before, previous_logical, previous_indent_level):
230    r"""Separate top-level function and class definitions with two blank lines.
231
232    Method definitions inside a class are separated by a single blank line.
233
234    Extra blank lines may be used (sparingly) to separate groups of related
235    functions.  Blank lines may be omitted between a bunch of related
236    one-liners (e.g. a set of dummy implementations).
237
238    Use blank lines in functions, sparingly, to indicate logical sections.
239
240    Okay: def a():\n    pass\n\n\ndef b():\n    pass
241    Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
242
243    E301: class Foo:\n    b = 0\n    def bar():\n        pass
244    E302: def a():\n    pass\n\ndef b(n):\n    pass
245    E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
246    E303: def a():\n\n\n\n    pass
247    E304: @decorator\n\ndef a():\n    pass
248    """
249    if line_number < 3 and not previous_logical:
250        return  # Don't expect blank lines before the first line
251    if previous_logical.startswith('@'):
252        if blank_lines:
253            yield 0, "E304 blank lines found after function decorator"
254    elif blank_lines > 2 or (indent_level and blank_lines == 2):
255        yield 0, "E303 too many blank lines (%d)" % blank_lines
256    elif logical_line.startswith(('def ', 'class ', '@')):
257        if indent_level:
258            if not (blank_before or previous_indent_level < indent_level or
259                    DOCSTRING_REGEX.match(previous_logical)):
260                yield 0, "E301 expected 1 blank line, found 0"
261        elif blank_before != 2:
262            yield 0, "E302 expected 2 blank lines, found %d" % blank_before
263
264
265def extraneous_whitespace(logical_line):
266    r"""Avoid extraneous whitespace.
267
268    Avoid extraneous whitespace in these situations:
269    - Immediately inside parentheses, brackets or braces.
270    - Immediately before a comma, semicolon, or colon.
271
272    Okay: spam(ham[1], {eggs: 2})
273    E201: spam( ham[1], {eggs: 2})
274    E201: spam(ham[ 1], {eggs: 2})
275    E201: spam(ham[1], { eggs: 2})
276    E202: spam(ham[1], {eggs: 2} )
277    E202: spam(ham[1 ], {eggs: 2})
278    E202: spam(ham[1], {eggs: 2 })
279
280    E203: if x == 4: print x, y; x, y = y , x
281    E203: if x == 4: print x, y ; x, y = y, x
282    E203: if x == 4 : print x, y; x, y = y, x
283    """
284    line = logical_line
285    for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
286        text = match.group()
287        char = text.strip()
288        found = match.start()
289        if text == char + ' ':
290            # assert char in '([{'
291            yield found + 1, "E201 whitespace after '%s'" % char
292        elif line[found - 1] != ',':
293            code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
294            yield found, "%s whitespace before '%s'" % (code, char)
295
296
297def whitespace_around_keywords(logical_line):
298    r"""Avoid extraneous whitespace around keywords.
299
300    Okay: True and False
301    E271: True and  False
302    E272: True  and False
303    E273: True and\tFalse
304    E274: True\tand False
305    """
306    for match in KEYWORD_REGEX.finditer(logical_line):
307        before, after = match.groups()
308
309        if '\t' in before:
310            yield match.start(1), "E274 tab before keyword"
311        elif len(before) > 1:
312            yield match.start(1), "E272 multiple spaces before keyword"
313
314        if '\t' in after:
315            yield match.start(2), "E273 tab after keyword"
316        elif len(after) > 1:
317            yield match.start(2), "E271 multiple spaces after keyword"
318
319
320def missing_whitespace(logical_line):
321    r"""Each comma, semicolon or colon should be followed by whitespace.
322
323    Okay: [a, b]
324    Okay: (3,)
325    Okay: a[1:4]
326    Okay: a[:4]
327    Okay: a[1:]
328    Okay: a[1:4:2]
329    E231: ['a','b']
330    E231: foo(bar,baz)
331    E231: [{'a':'b'}]
332    """
333    line = logical_line
334    for index in range(len(line) - 1):
335        char = line[index]
336        if char in ',;:' and line[index + 1] not in WHITESPACE:
337            before = line[:index]
338            if char == ':' and before.count('[') > before.count(']') and \
339                    before.rfind('{') < before.rfind('['):
340                continue  # Slice syntax, no space required
341            if char == ',' and line[index + 1] == ')':
342                continue  # Allow tuple with only one element: (3,)
343            yield index, "E231 missing whitespace after '%s'" % char
344
345
346def indentation(logical_line, previous_logical, indent_char,
347                indent_level, previous_indent_level):
348    r"""Use 4 spaces per indentation level.
349
350    For really old code that you don't want to mess up, you can continue to
351    use 8-space tabs.
352
353    Okay: a = 1
354    Okay: if a == 0:\n    a = 1
355    E111:   a = 1
356
357    Okay: for item in items:\n    pass
358    E112: for item in items:\npass
359
360    Okay: a = 1\nb = 2
361    E113: a = 1\n    b = 2
362    """
363    if indent_char == ' ' and indent_level % 4:
364        yield 0, "E111 indentation is not a multiple of four"
365    indent_expect = previous_logical.endswith(':')
366    if indent_expect and indent_level <= previous_indent_level:
367        yield 0, "E112 expected an indented block"
368    if indent_level > previous_indent_level and not indent_expect:
369        yield 0, "E113 unexpected indentation"
370
371
372def continued_indentation(logical_line, tokens, indent_level, hang_closing,
373                          indent_char, noqa, verbose):
374    r"""Continuation lines indentation.
375
376    Continuation lines should align wrapped elements either vertically
377    using Python's implicit line joining inside parentheses, brackets
378    and braces, or using a hanging indent.
379
380    When using a hanging indent these considerations should be applied:
381    - there should be no arguments on the first line, and
382    - further indentation should be used to clearly distinguish itself as a
383      continuation line.
384
385    Okay: a = (\n)
386    E123: a = (\n    )
387
388    Okay: a = (\n    42)
389    E121: a = (\n   42)
390    E122: a = (\n42)
391    E123: a = (\n    42\n    )
392    E124: a = (24,\n     42\n)
393    E125: if (\n    b):\n    pass
394    E126: a = (\n        42)
395    E127: a = (24,\n      42)
396    E128: a = (24,\n    42)
397    E129: if (a or\n    b):\n    pass
398    E131: a = (\n    42\n 24)
399    """
400    first_row = tokens[0][2][0]
401    nrows = 1 + tokens[-1][2][0] - first_row
402    if noqa or nrows == 1:
403        return
404
405    # indent_next tells us whether the next block is indented; assuming
406    # that it is indented by 4 spaces, then we should not allow 4-space
407    # indents on the final continuation line; in turn, some other
408    # indents are allowed to have an extra 4 spaces.
409    indent_next = logical_line.endswith(':')
410
411    row = depth = 0
412    valid_hangs = (4,) if indent_char != '\t' else (4, 8)
413    # remember how many brackets were opened on each line
414    parens = [0] * nrows
415    # relative indents of physical lines
416    rel_indent = [0] * nrows
417    # for each depth, collect a list of opening rows
418    open_rows = [[0]]
419    # for each depth, memorize the hanging indentation
420    hangs = [None]
421    # visual indents
422    indent_chances = {}
423    last_indent = tokens[0][2]
424    visual_indent = None
425    # for each depth, memorize the visual indent column
426    indent = [last_indent[1]]
427    if verbose >= 3:
428        print(">>> " + tokens[0][4].rstrip())
429
430    for token_type, text, start, end, line in tokens:
431
432        newline = row < start[0] - first_row
433        if newline:
434            row = start[0] - first_row
435            newline = not last_token_multiline and token_type not in NEWLINE
436
437        if newline:
438            # this is the beginning of a continuation line.
439            last_indent = start
440            if verbose >= 3:
441                print("... " + line.rstrip())
442
443            # record the initial indent.
444            rel_indent[row] = expand_indent(line) - indent_level
445
446            # identify closing bracket
447            close_bracket = (token_type == tokenize.OP and text in ']})')
448
449            # is the indent relative to an opening bracket line?
450            for open_row in reversed(open_rows[depth]):
451                hang = rel_indent[row] - rel_indent[open_row]
452                hanging_indent = hang in valid_hangs
453                if hanging_indent:
454                    break
455            if hangs[depth]:
456                hanging_indent = (hang == hangs[depth])
457            # is there any chance of visual indent?
458            visual_indent = (not close_bracket and hang > 0 and
459                             indent_chances.get(start[1]))
460
461            if close_bracket and indent[depth]:
462                # closing bracket for visual indent
463                if start[1] != indent[depth]:
464                    yield (start, "E124 closing bracket does not match "
465                           "visual indentation")
466            elif close_bracket and not hang:
467                # closing bracket matches indentation of opening bracket's line
468                if hang_closing:
469                    yield start, "E133 closing bracket is missing indentation"
470            elif indent[depth] and start[1] < indent[depth]:
471                if visual_indent is not True:
472                    # visual indent is broken
473                    yield (start, "E128 continuation line "
474                           "under-indented for visual indent")
475            elif hanging_indent or (indent_next and rel_indent[row] == 8):
476                # hanging indent is verified
477                if close_bracket and not hang_closing:
478                    yield (start, "E123 closing bracket does not match "
479                           "indentation of opening bracket's line")
480                hangs[depth] = hang
481            elif visual_indent is True:
482                # visual indent is verified
483                indent[depth] = start[1]
484            elif visual_indent in (text, str):
485                # ignore token lined up with matching one from a previous line
486                pass
487            else:
488                # indent is broken
489                if hang <= 0:
490                    error = "E122", "missing indentation or outdented"
491                elif indent[depth]:
492                    error = "E127", "over-indented for visual indent"
493                elif not close_bracket and hangs[depth]:
494                    error = "E131", "unaligned for hanging indent"
495                else:
496                    hangs[depth] = hang
497                    if hang > 4:
498                        error = "E126", "over-indented for hanging indent"
499                    else:
500                        error = "E121", "under-indented for hanging indent"
501                yield start, "%s continuation line %s" % error
502
503        # look for visual indenting
504        if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
505                and not indent[depth]):
506            indent[depth] = start[1]
507            indent_chances[start[1]] = True
508            if verbose >= 4:
509                print("bracket depth %s indent to %s" % (depth, start[1]))
510        # deal with implicit string concatenation
511        elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
512              text in ('u', 'ur', 'b', 'br')):
513            indent_chances[start[1]] = str
514        # special case for the "if" statement because len("if (") == 4
515        elif not indent_chances and not row and not depth and text == 'if':
516            indent_chances[end[1] + 1] = True
517        elif text == ':' and line[end[1]:].isspace():
518            open_rows[depth].append(row)
519
520        # keep track of bracket depth
521        if token_type == tokenize.OP:
522            if text in '([{':
523                depth += 1
524                indent.append(0)
525                hangs.append(None)
526                if len(open_rows) == depth:
527                    open_rows.append([])
528                open_rows[depth].append(row)
529                parens[row] += 1
530                if verbose >= 4:
531                    print("bracket depth %s seen, col %s, visual min = %s" %
532                          (depth, start[1], indent[depth]))
533            elif text in ')]}' and depth > 0:
534                # parent indents should not be more than this one
535                prev_indent = indent.pop() or last_indent[1]
536                hangs.pop()
537                for d in range(depth):
538                    if indent[d] > prev_indent:
539                        indent[d] = 0
540                for ind in list(indent_chances):
541                    if ind >= prev_indent:
542                        del indent_chances[ind]
543                del open_rows[depth + 1:]
544                depth -= 1
545                if depth:
546                    indent_chances[indent[depth]] = True
547                for idx in range(row, -1, -1):
548                    if parens[idx]:
549                        parens[idx] -= 1
550                        break
551            assert len(indent) == depth + 1
552            if start[1] not in indent_chances:
553                # allow to line up tokens
554                indent_chances[start[1]] = text
555
556        last_token_multiline = (start[0] != end[0])
557        if last_token_multiline:
558            rel_indent[end[0] - first_row] = rel_indent[row]
559
560    if indent_next and expand_indent(line) == indent_level + 4:
561        pos = (start[0], indent[0] + 4)
562        if visual_indent:
563            code = "E129 visually indented line"
564        else:
565            code = "E125 continuation line"
566        yield pos, "%s with same indent as next logical line" % code
567
568
569def whitespace_before_parameters(logical_line, tokens):
570    r"""Avoid extraneous whitespace.
571
572    Avoid extraneous whitespace in the following situations:
573    - before the open parenthesis that starts the argument list of a
574      function call.
575    - before the open parenthesis that starts an indexing or slicing.
576
577    Okay: spam(1)
578    E211: spam (1)
579
580    Okay: dict['key'] = list[index]
581    E211: dict ['key'] = list[index]
582    E211: dict['key'] = list [index]
583    """
584    prev_type, prev_text, __, prev_end, __ = tokens[0]
585    for index in range(1, len(tokens)):
586        token_type, text, start, end, __ = tokens[index]
587        if (token_type == tokenize.OP and
588            text in '([' and
589            start != prev_end and
590            (prev_type == tokenize.NAME or prev_text in '}])') and
591            # Syntax "class A (B):" is allowed, but avoid it
592            (index < 2 or tokens[index - 2][1] != 'class') and
593                # Allow "return (a.foo for a in range(5))"
594                not keyword.iskeyword(prev_text)):
595            yield prev_end, "E211 whitespace before '%s'" % text
596        prev_type = token_type
597        prev_text = text
598        prev_end = end
599
600
601def whitespace_around_operator(logical_line):
602    r"""Avoid extraneous whitespace around an operator.
603
604    Okay: a = 12 + 3
605    E221: a = 4  + 5
606    E222: a = 4 +  5
607    E223: a = 4\t+ 5
608    E224: a = 4 +\t5
609    """
610    for match in OPERATOR_REGEX.finditer(logical_line):
611        before, after = match.groups()
612
613        if '\t' in before:
614            yield match.start(1), "E223 tab before operator"
615        elif len(before) > 1:
616            yield match.start(1), "E221 multiple spaces before operator"
617
618        if '\t' in after:
619            yield match.start(2), "E224 tab after operator"
620        elif len(after) > 1:
621            yield match.start(2), "E222 multiple spaces after operator"
622
623
624def missing_whitespace_around_operator(logical_line, tokens):
625    r"""Surround operators with a single space on either side.
626
627    - Always surround these binary operators with a single space on
628      either side: assignment (=), augmented assignment (+=, -= etc.),
629      comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
630      Booleans (and, or, not).
631
632    - If operators with different priorities are used, consider adding
633      whitespace around the operators with the lowest priorities.
634
635    Okay: i = i + 1
636    Okay: submitted += 1
637    Okay: x = x * 2 - 1
638    Okay: hypot2 = x * x + y * y
639    Okay: c = (a + b) * (a - b)
640    Okay: foo(bar, key='word', *args, **kwargs)
641    Okay: alpha[:-i]
642
643    E225: i=i+1
644    E225: submitted +=1
645    E225: x = x /2 - 1
646    E225: z = x **y
647    E226: c = (a+b) * (a-b)
648    E226: hypot2 = x*x + y*y
649    E227: c = a|b
650    E228: msg = fmt%(errno, errmsg)
651    """
652    parens = 0
653    need_space = False
654    prev_type = tokenize.OP
655    prev_text = prev_end = None
656    for token_type, text, start, end, line in tokens:
657        if token_type in SKIP_COMMENTS:
658            continue
659        if text in ('(', 'lambda'):
660            parens += 1
661        elif text == ')':
662            parens -= 1
663        if need_space:
664            if start != prev_end:
665                # Found a (probably) needed space
666                if need_space is not True and not need_space[1]:
667                    yield (need_space[0],
668                           "E225 missing whitespace around operator")
669                need_space = False
670            elif text == '>' and prev_text in ('<', '-'):
671                # Tolerate the "<>" operator, even if running Python 3
672                # Deal with Python 3's annotated return value "->"
673                pass
674            else:
675                if need_space is True or need_space[1]:
676                    # A needed trailing space was not found
677                    yield prev_end, "E225 missing whitespace around operator"
678                else:
679                    code, optype = 'E226', 'arithmetic'
680                    if prev_text == '%':
681                        code, optype = 'E228', 'modulo'
682                    elif prev_text not in ARITHMETIC_OP:
683                        code, optype = 'E227', 'bitwise or shift'
684                    yield (need_space[0], "%s missing whitespace "
685                           "around %s operator" % (code, optype))
686                need_space = False
687        elif token_type == tokenize.OP and prev_end is not None:
688            if text == '=' and parens:
689                # Allow keyword args or defaults: foo(bar=None).
690                pass
691            elif text in WS_NEEDED_OPERATORS:
692                need_space = True
693            elif text in UNARY_OPERATORS:
694                # Check if the operator is being used as a binary operator
695                # Allow unary operators: -123, -x, +1.
696                # Allow argument unpacking: foo(*args, **kwargs).
697                if (prev_text in '}])' if prev_type == tokenize.OP
698                        else prev_text not in KEYWORDS):
699                    need_space = None
700            elif text in WS_OPTIONAL_OPERATORS:
701                need_space = None
702
703            if need_space is None:
704                # Surrounding space is optional, but ensure that
705                # trailing space matches opening space
706                need_space = (prev_end, start != prev_end)
707            elif need_space and start == prev_end:
708                # A needed opening space was not found
709                yield prev_end, "E225 missing whitespace around operator"
710                need_space = False
711        prev_type = token_type
712        prev_text = text
713        prev_end = end
714
715
716def whitespace_around_comma(logical_line):
717    r"""Avoid extraneous whitespace after a comma or a colon.
718
719    Note: these checks are disabled by default
720
721    Okay: a = (1, 2)
722    E241: a = (1,  2)
723    E242: a = (1,\t2)
724    """
725    line = logical_line
726    for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
727        found = m.start() + 1
728        if '\t' in m.group():
729            yield found, "E242 tab after '%s'" % m.group()[0]
730        else:
731            yield found, "E241 multiple spaces after '%s'" % m.group()[0]
732
733
734def whitespace_around_named_parameter_equals(logical_line, tokens):
735    r"""Don't use spaces around the '=' sign in function arguments.
736
737    Don't use spaces around the '=' sign when used to indicate a
738    keyword argument or a default parameter value.
739
740    Okay: def complex(real, imag=0.0):
741    Okay: return magic(r=real, i=imag)
742    Okay: boolean(a == b)
743    Okay: boolean(a != b)
744    Okay: boolean(a <= b)
745    Okay: boolean(a >= b)
746
747    E251: def complex(real, imag = 0.0):
748    E251: return magic(r = real, i = imag)
749    """
750    parens = 0
751    no_space = False
752    prev_end = None
753    message = "E251 unexpected spaces around keyword / parameter equals"
754    for token_type, text, start, end, line in tokens:
755        if token_type == tokenize.NL:
756            continue
757        if no_space:
758            no_space = False
759            if start != prev_end:
760                yield (prev_end, message)
761        elif token_type == tokenize.OP:
762            if text == '(':
763                parens += 1
764            elif text == ')':
765                parens -= 1
766            elif parens and text == '=':
767                no_space = True
768                if start != prev_end:
769                    yield (prev_end, message)
770        prev_end = end
771
772
773def whitespace_before_comment(logical_line, tokens):
774    r"""Separate inline comments by at least two spaces.
775
776    An inline comment is a comment on the same line as a statement.  Inline
777    comments should be separated by at least two spaces from the statement.
778    They should start with a # and a single space.
779
780    Each line of a block comment starts with a # and a single space
781    (unless it is indented text inside the comment).
782
783    Okay: x = x + 1  # Increment x
784    Okay: x = x + 1    # Increment x
785    Okay: # Block comment
786    E261: x = x + 1 # Increment x
787    E262: x = x + 1  #Increment x
788    E262: x = x + 1  #  Increment x
789    E265: #Block comment
790    """
791    prev_end = (0, 0)
792    for token_type, text, start, end, line in tokens:
793        if token_type == tokenize.COMMENT:
794            inline_comment = line[:start[1]].strip()
795            if inline_comment:
796                if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
797                    yield (prev_end,
798                           "E261 at least two spaces before inline comment")
799            symbol, sp, comment = text.partition(' ')
800            bad_prefix = symbol not in ('#', '#:')
801            if inline_comment:
802                if bad_prefix or comment[:1].isspace():
803                    yield start, "E262 inline comment should start with '# '"
804            elif bad_prefix:
805                if text.rstrip('#') and (start[0] > 1 or symbol[1] != '!'):
806                    yield start, "E265 block comment should start with '# '"
807        elif token_type != tokenize.NL:
808            prev_end = end
809
810
811def imports_on_separate_lines(logical_line):
812    r"""Imports should usually be on separate lines.
813
814    Okay: import os\nimport sys
815    E401: import sys, os
816
817    Okay: from subprocess import Popen, PIPE
818    Okay: from myclas import MyClass
819    Okay: from foo.bar.yourclass import YourClass
820    Okay: import myclass
821    Okay: import foo.bar.yourclass
822    """
823    line = logical_line
824    if line.startswith('import '):
825        found = line.find(',')
826        if -1 < found and ';' not in line[:found]:
827            yield found, "E401 multiple imports on one line"
828
829
830def compound_statements(logical_line):
831    r"""Compound statements (on the same line) are generally discouraged.
832
833    While sometimes it's okay to put an if/for/while with a small body
834    on the same line, never do this for multi-clause statements.
835    Also avoid folding such long lines!
836
837    Okay: if foo == 'blah':\n    do_blah_thing()
838    Okay: do_one()
839    Okay: do_two()
840    Okay: do_three()
841
842    E701: if foo == 'blah': do_blah_thing()
843    E701: for x in lst: total += x
844    E701: while t < 10: t = delay()
845    E701: if foo == 'blah': do_blah_thing()
846    E701: else: do_non_blah_thing()
847    E701: try: something()
848    E701: finally: cleanup()
849    E701: if foo == 'blah': one(); two(); three()
850
851    E702: do_one(); do_two(); do_three()
852    E703: do_four();  # useless semicolon
853    """
854    line = logical_line
855    last_char = len(line) - 1
856    found = line.find(':')
857    while -1 < found < last_char:
858        before = line[:found]
859        if (before.count('{') <= before.count('}') and  # {'a': 1} (dict)
860            before.count('[') <= before.count(']') and  # [1:2] (slice)
861            before.count('(') <= before.count(')') and  # (Python 3 annotation)
862                not LAMBDA_REGEX.search(before)):       # lambda x: x
863            yield found, "E701 multiple statements on one line (colon)"
864        found = line.find(':', found + 1)
865    found = line.find(';')
866    while -1 < found:
867        if found < last_char:
868            yield found, "E702 multiple statements on one line (semicolon)"
869        else:
870            yield found, "E703 statement ends with a semicolon"
871        found = line.find(';', found + 1)
872
873
874def explicit_line_join(logical_line, tokens):
875    r"""Avoid explicit line join between brackets.
876
877    The preferred way of wrapping long lines is by using Python's implied line
878    continuation inside parentheses, brackets and braces.  Long lines can be
879    broken over multiple lines by wrapping expressions in parentheses.  These
880    should be used in preference to using a backslash for line continuation.
881
882    E502: aaa = [123, \\n       123]
883    E502: aaa = ("bbb " \\n       "ccc")
884
885    Okay: aaa = [123,\n       123]
886    Okay: aaa = ("bbb "\n       "ccc")
887    Okay: aaa = "bbb " \\n    "ccc"
888    """
889    prev_start = prev_end = parens = 0
890    for token_type, text, start, end, line in tokens:
891        if start[0] != prev_start and parens and backslash:
892            yield backslash, "E502 the backslash is redundant between brackets"
893        if end[0] != prev_end:
894            if line.rstrip('\r\n').endswith('\\'):
895                backslash = (end[0], len(line.splitlines()[-1]) - 1)
896            else:
897                backslash = None
898            prev_start = prev_end = end[0]
899        else:
900            prev_start = start[0]
901        if token_type == tokenize.OP:
902            if text in '([{':
903                parens += 1
904            elif text in ')]}':
905                parens -= 1
906
907
908def comparison_to_singleton(logical_line, noqa):
909    r"""Comparison to singletons should use "is" or "is not".
910
911    Comparisons to singletons like None should always be done
912    with "is" or "is not", never the equality operators.
913
914    Okay: if arg is not None:
915    E711: if arg != None:
916    E712: if arg == True:
917
918    Also, beware of writing if x when you really mean if x is not None --
919    e.g. when testing whether a variable or argument that defaults to None was
920    set to some other value.  The other value might have a type (such as a
921    container) that could be false in a boolean context!
922    """
923    match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
924    if match:
925        same = (match.group(1) == '==')
926        singleton = match.group(2)
927        msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
928        if singleton in ('None',):
929            code = 'E711'
930        else:
931            code = 'E712'
932            nonzero = ((singleton == 'True' and same) or
933                       (singleton == 'False' and not same))
934            msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
935        yield match.start(1), ("%s comparison to %s should be %s" %
936                               (code, singleton, msg))
937
938
939def comparison_negative(logical_line):
940    r"""Negative comparison should be done using "not in" and "is not".
941
942    Okay: if x not in y:\n    pass
943    Okay: assert (X in Y or X is Z)
944    Okay: if not (X in Y):\n    pass
945    Okay: zz = x is not y
946    E713: Z = not X in Y
947    E713: if not X.B in Y:\n    pass
948    E714: if not X is Y:\n    pass
949    E714: Z = not X.B is Y
950    """
951    match = COMPARE_NEGATIVE_REGEX.search(logical_line)
952    if match:
953        pos = match.start(1)
954        if match.group(2) == 'in':
955            yield pos, "E713 test for membership should be 'not in'"
956        else:
957            yield pos, "E714 test for object identity should be 'is not'"
958
959
960def comparison_type(logical_line):
961    r"""Object type comparisons should always use isinstance().
962
963    Do not compare types directly.
964
965    Okay: if isinstance(obj, int):
966    E721: if type(obj) is type(1):
967
968    When checking if an object is a string, keep in mind that it might be a
969    unicode string too! In Python 2.3, str and unicode have a common base
970    class, basestring, so you can do:
971
972    Okay: if isinstance(obj, basestring):
973    Okay: if type(a1) is type(b1):
974    """
975    match = COMPARE_TYPE_REGEX.search(logical_line)
976    if match:
977        inst = match.group(1)
978        if inst and isidentifier(inst) and inst not in SINGLETONS:
979            return  # Allow comparison for types which are not obvious
980        yield match.start(), "E721 do not compare types, use 'isinstance()'"
981
982
983def python_3000_has_key(logical_line, noqa):
984    r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
985
986    Okay: if "alph" in d:\n    print d["alph"]
987    W601: assert d.has_key('alph')
988    """
989    pos = logical_line.find('.has_key(')
990    if pos > -1 and not noqa:
991        yield pos, "W601 .has_key() is deprecated, use 'in'"
992
993
994def python_3000_raise_comma(logical_line):
995    r"""When raising an exception, use "raise ValueError('message')".
996
997    The older form is removed in Python 3.
998
999    Okay: raise DummyError("Message")
1000    W602: raise DummyError, "Message"
1001    """
1002    match = RAISE_COMMA_REGEX.match(logical_line)
1003    if match and not RERAISE_COMMA_REGEX.match(logical_line):
1004        yield match.end() - 1, "W602 deprecated form of raising exception"
1005
1006
1007def python_3000_not_equal(logical_line):
1008    r"""New code should always use != instead of <>.
1009
1010    The older syntax is removed in Python 3.
1011
1012    Okay: if a != 'no':
1013    W603: if a <> 'no':
1014    """
1015    pos = logical_line.find('<>')
1016    if pos > -1:
1017        yield pos, "W603 '<>' is deprecated, use '!='"
1018
1019
1020def python_3000_backticks(logical_line):
1021    r"""Backticks are removed in Python 3: use repr() instead.
1022
1023    Okay: val = repr(1 + 2)
1024    W604: val = `1 + 2`
1025    """
1026    pos = logical_line.find('`')
1027    if pos > -1:
1028        yield pos, "W604 backticks are deprecated, use 'repr()'"
1029
1030
1031##############################################################################
1032# Helper functions
1033##############################################################################
1034
1035
1036if '' == ''.encode():
1037    # Python 2: implicit encoding.
1038    def readlines(filename):
1039        """Read the source code."""
1040        with open(filename, 'rU') as f:
1041            return f.readlines()
1042    isidentifier = re.compile(r'[a-zA-Z_]\w*').match
1043    stdin_get_value = sys.stdin.read
1044else:
1045    # Python 3
1046    def readlines(filename):
1047        """Read the source code."""
1048        try:
1049            with open(filename, 'rb') as f:
1050                (coding, lines) = tokenize.detect_encoding(f.readline)
1051                f = TextIOWrapper(f, coding, line_buffering=True)
1052                return [l.decode(coding) for l in lines] + f.readlines()
1053        except (LookupError, SyntaxError, UnicodeError):
1054            # Fall back if file encoding is improperly declared
1055            with open(filename, encoding='latin-1') as f:
1056                return f.readlines()
1057    isidentifier = str.isidentifier
1058
1059    def stdin_get_value():
1060        return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1061noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1062
1063
1064def expand_indent(line):
1065    r"""Return the amount of indentation.
1066
1067    Tabs are expanded to the next multiple of 8.
1068
1069    >>> expand_indent('    ')
1070    4
1071    >>> expand_indent('\t')
1072    8
1073    >>> expand_indent('       \t')
1074    8
1075    >>> expand_indent('        \t')
1076    16
1077    """
1078    if '\t' not in line:
1079        return len(line) - len(line.lstrip())
1080    result = 0
1081    for char in line:
1082        if char == '\t':
1083            result = result // 8 * 8 + 8
1084        elif char == ' ':
1085            result += 1
1086        else:
1087            break
1088    return result
1089
1090
1091def mute_string(text):
1092    """Replace contents with 'xxx' to prevent syntax matching.
1093
1094    >>> mute_string('"abc"')
1095    '"xxx"'
1096    >>> mute_string("'''abc'''")
1097    "'''xxx'''"
1098    >>> mute_string("r'abc'")
1099    "r'xxx'"
1100    """
1101    # String modifiers (e.g. u or r)
1102    start = text.index(text[-1]) + 1
1103    end = len(text) - 1
1104    # Triple quotes
1105    if text[-3:] in ('"""', "'''"):
1106        start += 2
1107        end -= 2
1108    return text[:start] + 'x' * (end - start) + text[end:]
1109
1110
1111def parse_udiff(diff, patterns=None, parent='.'):
1112    """Return a dictionary of matching lines."""
1113    # For each file of the diff, the entry key is the filename,
1114    # and the value is a set of row numbers to consider.
1115    rv = {}
1116    path = nrows = None
1117    for line in diff.splitlines():
1118        if nrows:
1119            if line[:1] != '-':
1120                nrows -= 1
1121            continue
1122        if line[:3] == '@@ ':
1123            hunk_match = HUNK_REGEX.match(line)
1124            (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1125            rv[path].update(range(row, row + nrows))
1126        elif line[:3] == '+++':
1127            path = line[4:].split('\t', 1)[0]
1128            if path[:2] == 'b/':
1129                path = path[2:]
1130            rv[path] = set()
1131    return dict([(os.path.join(parent, path), rows)
1132                 for (path, rows) in rv.items()
1133                 if rows and filename_match(path, patterns)])
1134
1135
1136def normalize_paths(value, parent=os.curdir):
1137    """Parse a comma-separated list of paths.
1138
1139    Return a list of absolute paths.
1140    """
1141    if not value or isinstance(value, list):
1142        return value
1143    paths = []
1144    for path in value.split(','):
1145        if '/' in path:
1146            path = os.path.abspath(os.path.join(parent, path))
1147        paths.append(path.rstrip('/'))
1148    return paths
1149
1150
1151def filename_match(filename, patterns, default=True):
1152    """Check if patterns contains a pattern that matches filename.
1153
1154    If patterns is unspecified, this always returns True.
1155    """
1156    if not patterns:
1157        return default
1158    return any(fnmatch(filename, pattern) for pattern in patterns)
1159
1160
1161if COMMENT_WITH_NL:
1162    def _is_eol_token(token):
1163        return (token[0] in NEWLINE or
1164                (token[0] == tokenize.COMMENT and token[1] == token[4]))
1165else:
1166    def _is_eol_token(token):
1167        return token[0] in NEWLINE
1168
1169
1170##############################################################################
1171# Framework to run all checks
1172##############################################################################
1173
1174
1175_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1176
1177
1178def register_check(check, codes=None):
1179    """Register a new check object."""
1180    def _add_check(check, kind, codes, args):
1181        if check in _checks[kind]:
1182            _checks[kind][check][0].extend(codes or [])
1183        else:
1184            _checks[kind][check] = (codes or [''], args)
1185    if inspect.isfunction(check):
1186        args = inspect.getargspec(check)[0]
1187        if args and args[0] in ('physical_line', 'logical_line'):
1188            if codes is None:
1189                codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1190            _add_check(check, args[0], codes, args)
1191    elif inspect.isclass(check):
1192        if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1193            _add_check(check, 'tree', codes, None)
1194
1195
1196def init_checks_registry():
1197    """Register all globally visible functions.
1198
1199    The first argument name is either 'physical_line' or 'logical_line'.
1200    """
1201    mod = inspect.getmodule(register_check)
1202    for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1203        register_check(function)
1204init_checks_registry()
1205
1206
1207class Checker(object):
1208    """Load a Python source file, tokenize it, check coding style."""
1209
1210    def __init__(self, filename=None, lines=None,
1211                 options=None, report=None, **kwargs):
1212        if options is None:
1213            options = StyleGuide(kwargs).options
1214        else:
1215            assert not kwargs
1216        self._io_error = None
1217        self._physical_checks = options.physical_checks
1218        self._logical_checks = options.logical_checks
1219        self._ast_checks = options.ast_checks
1220        self.max_line_length = options.max_line_length
1221        self.multiline = False  # in a multiline string?
1222        self.hang_closing = options.hang_closing
1223        self.verbose = options.verbose
1224        self.filename = filename
1225        if filename is None:
1226            self.filename = 'stdin'
1227            self.lines = lines or []
1228        elif filename == '-':
1229            self.filename = 'stdin'
1230            self.lines = stdin_get_value().splitlines(True)
1231        elif lines is None:
1232            try:
1233                self.lines = readlines(filename)
1234            except IOError:
1235                (exc_type, exc) = sys.exc_info()[:2]
1236                self._io_error = '%s: %s' % (exc_type.__name__, exc)
1237                self.lines = []
1238        else:
1239            self.lines = lines
1240        if self.lines:
1241            ord0 = ord(self.lines[0][0])
1242            if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
1243                if ord0 == 0xfeff:
1244                    self.lines[0] = self.lines[0][1:]
1245                elif self.lines[0][:3] == '\xef\xbb\xbf':
1246                    self.lines[0] = self.lines[0][3:]
1247        self.report = report or options.report
1248        self.report_error = self.report.error
1249
1250    def report_invalid_syntax(self):
1251        """Check if the syntax is valid."""
1252        (exc_type, exc) = sys.exc_info()[:2]
1253        if len(exc.args) > 1:
1254            offset = exc.args[1]
1255            if len(offset) > 2:
1256                offset = offset[1:3]
1257        else:
1258            offset = (1, 0)
1259        self.report_error(offset[0], offset[1] or 0,
1260                          'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1261                          self.report_invalid_syntax)
1262
1263    def readline(self):
1264        """Get the next line from the input buffer."""
1265        if self.line_number >= self.total_lines:
1266            return ''
1267        line = self.lines[self.line_number]
1268        self.line_number += 1
1269        if self.indent_char is None and line[:1] in WHITESPACE:
1270            self.indent_char = line[0]
1271        return line
1272
1273    def run_check(self, check, argument_names):
1274        """Run a check plugin."""
1275        arguments = []
1276        for name in argument_names:
1277            arguments.append(getattr(self, name))
1278        return check(*arguments)
1279
1280    def check_physical(self, line):
1281        """Run all physical checks on a raw input line."""
1282        self.physical_line = line
1283        for name, check, argument_names in self._physical_checks:
1284            result = self.run_check(check, argument_names)
1285            if result is not None:
1286                (offset, text) = result
1287                self.report_error(self.line_number, offset, text, check)
1288                if text[:4] == 'E101':
1289                    self.indent_char = line[0]
1290
1291    def build_tokens_line(self):
1292        """Build a logical line from tokens."""
1293        logical = []
1294        comments = []
1295        length = 0
1296        prev_row = prev_col = mapping = None
1297        for token_type, text, start, end, line in self.tokens:
1298            if token_type in SKIP_TOKENS:
1299                continue
1300            if not mapping:
1301                mapping = [(0, start)]
1302            if token_type == tokenize.COMMENT:
1303                comments.append(text)
1304                continue
1305            if token_type == tokenize.STRING:
1306                text = mute_string(text)
1307            if prev_row:
1308                (start_row, start_col) = start
1309                if prev_row != start_row:    # different row
1310                    prev_text = self.lines[prev_row - 1][prev_col - 1]
1311                    if prev_text == ',' or (prev_text not in '{[('
1312                                            and text not in '}])'):
1313                        text = ' ' + text
1314                elif prev_col != start_col:  # different column
1315                    text = line[prev_col:start_col] + text
1316            logical.append(text)
1317            length += len(text)
1318            mapping.append((length, end))
1319            (prev_row, prev_col) = end
1320        self.logical_line = ''.join(logical)
1321        self.noqa = comments and noqa(''.join(comments))
1322        return mapping
1323
1324    def check_logical(self):
1325        """Build a line from tokens and run all logical checks on it."""
1326        self.report.increment_logical_line()
1327        mapping = self.build_tokens_line()
1328        (start_row, start_col) = mapping[0][1]
1329        start_line = self.lines[start_row - 1]
1330        self.indent_level = expand_indent(start_line[:start_col])
1331        if self.blank_before < self.blank_lines:
1332            self.blank_before = self.blank_lines
1333        if self.verbose >= 2:
1334            print(self.logical_line[:80].rstrip())
1335        for name, check, argument_names in self._logical_checks:
1336            if self.verbose >= 4:
1337                print('   ' + name)
1338            for offset, text in self.run_check(check, argument_names) or ():
1339                if not isinstance(offset, tuple):
1340                    for token_offset, pos in mapping:
1341                        if offset <= token_offset:
1342                            break
1343                    offset = (pos[0], pos[1] + offset - token_offset)
1344                self.report_error(offset[0], offset[1], text, check)
1345        if self.logical_line:
1346            self.previous_indent_level = self.indent_level
1347            self.previous_logical = self.logical_line
1348        self.blank_lines = 0
1349        self.tokens = []
1350
1351    def check_ast(self):
1352        """Build the file's AST and run all AST checks."""
1353        try:
1354            tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1355        except (SyntaxError, TypeError):
1356            return self.report_invalid_syntax()
1357        for name, cls, __ in self._ast_checks:
1358            checker = cls(tree, self.filename)
1359            for lineno, offset, text, check in checker.run():
1360                if not self.lines or not noqa(self.lines[lineno - 1]):
1361                    self.report_error(lineno, offset, text, check)
1362
1363    def generate_tokens(self):
1364        """Tokenize the file, run physical line checks and yield tokens."""
1365        if self._io_error:
1366            self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1367        tokengen = tokenize.generate_tokens(self.readline)
1368        try:
1369            for token in tokengen:
1370                if token[2][0] > self.total_lines:
1371                    return
1372                self.maybe_check_physical(token)
1373                yield token
1374        except (SyntaxError, tokenize.TokenError):
1375            self.report_invalid_syntax()
1376
1377    def maybe_check_physical(self, token):
1378        """If appropriate (based on token), check current physical line(s)."""
1379        # Called after every token, but act only on end of line.
1380        if _is_eol_token(token):
1381            # Obviously, a newline token ends a single physical line.
1382            self.check_physical(token[4])
1383        elif token[0] == tokenize.STRING and '\n' in token[1]:
1384            # Less obviously, a string that contains newlines is a
1385            # multiline string, either triple-quoted or with internal
1386            # newlines backslash-escaped. Check every physical line in the
1387            # string *except* for the last one: its newline is outside of
1388            # the multiline string, so we consider it a regular physical
1389            # line, and will check it like any other physical line.
1390            #
1391            # Subtleties:
1392            # - we don't *completely* ignore the last line; if it contains
1393            #   the magical "# noqa" comment, we disable all physical
1394            #   checks for the entire multiline string
1395            # - have to wind self.line_number back because initially it
1396            #   points to the last line of the string, and we want
1397            #   check_physical() to give accurate feedback
1398            if noqa(token[4]):
1399                return
1400            self.multiline = True
1401            self.line_number = token[2][0]
1402            for line in token[1].split('\n')[:-1]:
1403                self.check_physical(line + '\n')
1404                self.line_number += 1
1405            self.multiline = False
1406
1407    def check_all(self, expected=None, line_offset=0):
1408        """Run all checks on the input file."""
1409        self.report.init_file(self.filename, self.lines, expected, line_offset)
1410        self.total_lines = len(self.lines)
1411        if self._ast_checks:
1412            self.check_ast()
1413        self.line_number = 0
1414        self.indent_char = None
1415        self.indent_level = self.previous_indent_level = 0
1416        self.previous_logical = ''
1417        self.tokens = []
1418        self.blank_lines = self.blank_before = 0
1419        parens = 0
1420        for token in self.generate_tokens():
1421            self.tokens.append(token)
1422            token_type, text = token[0:2]
1423            if self.verbose >= 3:
1424                if token[2][0] == token[3][0]:
1425                    pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1426                else:
1427                    pos = 'l.%s' % token[3][0]
1428                print('l.%s\t%s\t%s\t%r' %
1429                      (token[2][0], pos, tokenize.tok_name[token[0]], text))
1430            if token_type == tokenize.OP:
1431                if text in '([{':
1432                    parens += 1
1433                elif text in '}])':
1434                    parens -= 1
1435            elif not parens:
1436                if token_type in NEWLINE:
1437                    if token_type == tokenize.NEWLINE:
1438                        self.check_logical()
1439                        self.blank_before = 0
1440                    elif len(self.tokens) == 1:
1441                        # The physical line contains only this token.
1442                        self.blank_lines += 1
1443                        del self.tokens[0]
1444                    else:
1445                        self.check_logical()
1446                elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1447                    if len(self.tokens) == 1:
1448                        # The comment also ends a physical line
1449                        token = list(token)
1450                        token[1] = text.rstrip('\r\n')
1451                        token[3] = (token[2][0], token[2][1] + len(token[1]))
1452                        self.tokens = [tuple(token)]
1453                        self.check_logical()
1454        if self.tokens:
1455            self.check_physical(self.lines[-1])
1456            self.check_logical()
1457        return self.report.get_file_results()
1458
1459
1460class BaseReport(object):
1461    """Collect the results of the checks."""
1462
1463    print_filename = False
1464
1465    def __init__(self, options):
1466        self._benchmark_keys = options.benchmark_keys
1467        self._ignore_code = options.ignore_code
1468        # Results
1469        self.elapsed = 0
1470        self.total_errors = 0
1471        self.counters = dict.fromkeys(self._benchmark_keys, 0)
1472        self.messages = {}
1473
1474    def start(self):
1475        """Start the timer."""
1476        self._start_time = time.time()
1477
1478    def stop(self):
1479        """Stop the timer."""
1480        self.elapsed = time.time() - self._start_time
1481
1482    def init_file(self, filename, lines, expected, line_offset):
1483        """Signal a new file."""
1484        self.filename = filename
1485        self.lines = lines
1486        self.expected = expected or ()
1487        self.line_offset = line_offset
1488        self.file_errors = 0
1489        self.counters['files'] += 1
1490        self.counters['physical lines'] += len(lines)
1491
1492    def increment_logical_line(self):
1493        """Signal a new logical line."""
1494        self.counters['logical lines'] += 1
1495
1496    def error(self, line_number, offset, text, check):
1497        """Report an error, according to options."""
1498        code = text[:4]
1499        if self._ignore_code(code):
1500            return
1501        if code in self.counters:
1502            self.counters[code] += 1
1503        else:
1504            self.counters[code] = 1
1505            self.messages[code] = text[5:]
1506        # Don't care about expected errors or warnings
1507        if code in self.expected:
1508            return
1509        if self.print_filename and not self.file_errors:
1510            print(self.filename)
1511        self.file_errors += 1
1512        self.total_errors += 1
1513        return code
1514
1515    def get_file_results(self):
1516        """Return the count of errors and warnings for this file."""
1517        return self.file_errors
1518
1519    def get_count(self, prefix=''):
1520        """Return the total count of errors and warnings."""
1521        return sum([self.counters[key]
1522                    for key in self.messages if key.startswith(prefix)])
1523
1524    def get_statistics(self, prefix=''):
1525        """Get statistics for message codes that start with the prefix.
1526
1527        prefix='' matches all errors and warnings
1528        prefix='E' matches all errors
1529        prefix='W' matches all warnings
1530        prefix='E4' matches all errors that have to do with imports
1531        """
1532        return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1533                for key in sorted(self.messages) if key.startswith(prefix)]
1534
1535    def print_statistics(self, prefix=''):
1536        """Print overall statistics (number of errors and warnings)."""
1537        for line in self.get_statistics(prefix):
1538            print(line)
1539
1540    def print_benchmark(self):
1541        """Print benchmark numbers."""
1542        print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1543        if self.elapsed:
1544            for key in self._benchmark_keys:
1545                print('%-7d %s per second (%d total)' %
1546                      (self.counters[key] / self.elapsed, key,
1547                       self.counters[key]))
1548
1549
1550class FileReport(BaseReport):
1551    """Collect the results of the checks and print only the filenames."""
1552    print_filename = True
1553
1554
1555class StandardReport(BaseReport):
1556    """Collect and print the results of the checks."""
1557
1558    def __init__(self, options):
1559        super(StandardReport, self).__init__(options)
1560        self._fmt = REPORT_FORMAT.get(options.format.lower(),
1561                                      options.format)
1562        self._repeat = options.repeat
1563        self._show_source = options.show_source
1564        self._show_pep8 = options.show_pep8
1565
1566    def init_file(self, filename, lines, expected, line_offset):
1567        """Signal a new file."""
1568        self._deferred_print = []
1569        return super(StandardReport, self).init_file(
1570            filename, lines, expected, line_offset)
1571
1572    def error(self, line_number, offset, text, check):
1573        """Report an error, according to options."""
1574        code = super(StandardReport, self).error(line_number, offset,
1575                                                 text, check)
1576        if code and (self.counters[code] == 1 or self._repeat):
1577            self._deferred_print.append(
1578                (line_number, offset, code, text[5:], check.__doc__))
1579        return code
1580
1581    def get_file_results(self):
1582        """Print the result and return the overall count for this file."""
1583        self._deferred_print.sort()
1584        for line_number, offset, code, text, doc in self._deferred_print:
1585            print(self._fmt % {
1586                'path': self.filename,
1587                'row': self.line_offset + line_number, 'col': offset + 1,
1588                'code': code, 'text': text,
1589            })
1590            if self._show_source:
1591                if line_number > len(self.lines):
1592                    line = ''
1593                else:
1594                    line = self.lines[line_number - 1]
1595                print(line.rstrip())
1596                print(re.sub(r'\S', ' ', line[:offset]) + '^')
1597            if self._show_pep8 and doc:
1598                print('    ' + doc.strip())
1599        return self.file_errors
1600
1601
1602class DiffReport(StandardReport):
1603    """Collect and print the results for the changed lines only."""
1604
1605    def __init__(self, options):
1606        super(DiffReport, self).__init__(options)
1607        self._selected = options.selected_lines
1608
1609    def error(self, line_number, offset, text, check):
1610        if line_number not in self._selected[self.filename]:
1611            return
1612        return super(DiffReport, self).error(line_number, offset, text, check)
1613
1614
1615class StyleGuide(object):
1616    """Initialize a PEP-8 instance with few options."""
1617
1618    def __init__(self, *args, **kwargs):
1619        # build options from the command line
1620        self.checker_class = kwargs.pop('checker_class', Checker)
1621        parse_argv = kwargs.pop('parse_argv', False)
1622        config_file = kwargs.pop('config_file', None)
1623        parser = kwargs.pop('parser', None)
1624        # build options from dict
1625        options_dict = dict(*args, **kwargs)
1626        arglist = None if parse_argv else options_dict.get('paths', None)
1627        options, self.paths = process_options(
1628            arglist, parse_argv, config_file, parser)
1629        if options_dict:
1630            options.__dict__.update(options_dict)
1631            if 'paths' in options_dict:
1632                self.paths = options_dict['paths']
1633
1634        self.runner = self.input_file
1635        self.options = options
1636
1637        if not options.reporter:
1638            options.reporter = BaseReport if options.quiet else StandardReport
1639
1640        options.select = tuple(options.select or ())
1641        if not (options.select or options.ignore or
1642                options.testsuite or options.doctest) and DEFAULT_IGNORE:
1643            # The default choice: ignore controversial checks
1644            options.ignore = tuple(DEFAULT_IGNORE.split(','))
1645        else:
1646            # Ignore all checks which are not explicitly selected
1647            options.ignore = ('',) if options.select else tuple(options.ignore)
1648        options.benchmark_keys = BENCHMARK_KEYS[:]
1649        options.ignore_code = self.ignore_code
1650        options.physical_checks = self.get_checks('physical_line')
1651        options.logical_checks = self.get_checks('logical_line')
1652        options.ast_checks = self.get_checks('tree')
1653        self.init_report()
1654
1655    def init_report(self, reporter=None):
1656        """Initialize the report instance."""
1657        self.options.report = (reporter or self.options.reporter)(self.options)
1658        return self.options.report
1659
1660    def check_files(self, paths=None):
1661        """Run all checks on the paths."""
1662        if paths is None:
1663            paths = self.paths
1664        report = self.options.report
1665        runner = self.runner
1666        report.start()
1667        try:
1668            for path in paths:
1669                if os.path.isdir(path):
1670                    self.input_dir(path)
1671                elif not self.excluded(path):
1672                    runner(path)
1673        except KeyboardInterrupt:
1674            print('... stopped')
1675        report.stop()
1676        return report
1677
1678    def input_file(self, filename, lines=None, expected=None, line_offset=0):
1679        """Run all checks on a Python source file."""
1680        if self.options.verbose:
1681            print('checking %s' % filename)
1682        fchecker = self.checker_class(
1683            filename, lines=lines, options=self.options)
1684        return fchecker.check_all(expected=expected, line_offset=line_offset)
1685
1686    def input_dir(self, dirname):
1687        """Check all files in this directory and all subdirectories."""
1688        dirname = dirname.rstrip('/')
1689        if self.excluded(dirname):
1690            return 0
1691        counters = self.options.report.counters
1692        verbose = self.options.verbose
1693        filepatterns = self.options.filename
1694        runner = self.runner
1695        for root, dirs, files in os.walk(dirname):
1696            if verbose:
1697                print('directory ' + root)
1698            counters['directories'] += 1
1699            for subdir in sorted(dirs):
1700                if self.excluded(subdir, root):
1701                    dirs.remove(subdir)
1702            for filename in sorted(files):
1703                # contain a pattern that matches?
1704                if ((filename_match(filename, filepatterns) and
1705                     not self.excluded(filename, root))):
1706                    runner(os.path.join(root, filename))
1707
1708    def excluded(self, filename, parent=None):
1709        """Check if the file should be excluded.
1710
1711        Check if 'options.exclude' contains a pattern that matches filename.
1712        """
1713        if not self.options.exclude:
1714            return False
1715        basename = os.path.basename(filename)
1716        if filename_match(basename, self.options.exclude):
1717            return True
1718        if parent:
1719            filename = os.path.join(parent, filename)
1720        filename = os.path.abspath(filename)
1721        return filename_match(filename, self.options.exclude)
1722
1723    def ignore_code(self, code):
1724        """Check if the error code should be ignored.
1725
1726        If 'options.select' contains a prefix of the error code,
1727        return False.  Else, if 'options.ignore' contains a prefix of
1728        the error code, return True.
1729        """
1730        if len(code) < 4 and any(s.startswith(code)
1731                                 for s in self.options.select):
1732            return False
1733        return (code.startswith(self.options.ignore) and
1734                not code.startswith(self.options.select))
1735
1736    def get_checks(self, argument_name):
1737        """Get all the checks for this category.
1738
1739        Find all globally visible functions where the first argument name
1740        starts with argument_name and which contain selected tests.
1741        """
1742        checks = []
1743        for check, attrs in _checks[argument_name].items():
1744            (codes, args) = attrs
1745            if any(not (code and self.ignore_code(code)) for code in codes):
1746                checks.append((check.__name__, check, args))
1747        return sorted(checks)
1748
1749
1750def get_parser(prog='pep8', version=__version__):
1751    parser = OptionParser(prog=prog, version=version,
1752                          usage="%prog [options] input ...")
1753    parser.config_options = [
1754        'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1755        'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1756        'show-source', 'statistics', 'verbose']
1757    parser.add_option('-v', '--verbose', default=0, action='count',
1758                      help="print status messages, or debug with -vv")
1759    parser.add_option('-q', '--quiet', default=0, action='count',
1760                      help="report only file names, or nothing with -qq")
1761    parser.add_option('-r', '--repeat', default=True, action='store_true',
1762                      help="(obsolete) show all occurrences of the same error")
1763    parser.add_option('--first', action='store_false', dest='repeat',
1764                      help="show first occurrence of each error")
1765    parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1766                      help="exclude files or directories which match these "
1767                           "comma separated patterns (default: %default)")
1768    parser.add_option('--filename', metavar='patterns', default='*.py',
1769                      help="when parsing directories, only check filenames "
1770                           "matching these comma separated patterns "
1771                           "(default: %default)")
1772    parser.add_option('--select', metavar='errors', default='',
1773                      help="select errors and warnings (e.g. E,W6)")
1774    parser.add_option('--ignore', metavar='errors', default='',
1775                      help="skip errors and warnings (e.g. E4,W)")
1776    parser.add_option('--show-source', action='store_true',
1777                      help="show source code for each error")
1778    parser.add_option('--show-pep8', action='store_true',
1779                      help="show text of PEP 8 for each error "
1780                           "(implies --first)")
1781    parser.add_option('--statistics', action='store_true',
1782                      help="count errors and warnings")
1783    parser.add_option('--count', action='store_true',
1784                      help="print total number of errors and warnings "
1785                           "to standard error and set exit code to 1 if "
1786                           "total is not null")
1787    parser.add_option('--max-line-length', type='int', metavar='n',
1788                      default=MAX_LINE_LENGTH,
1789                      help="set maximum allowed line length "
1790                           "(default: %default)")
1791    parser.add_option('--hang-closing', action='store_true',
1792                      help="hang closing bracket instead of matching "
1793                           "indentation of opening bracket's line")
1794    parser.add_option('--format', metavar='format', default='default',
1795                      help="set the error format [default|pylint|<custom>]")
1796    parser.add_option('--diff', action='store_true',
1797                      help="report only lines changed according to the "
1798                           "unified diff received on STDIN")
1799    group = parser.add_option_group("Testing Options")
1800    if os.path.exists(TESTSUITE_PATH):
1801        group.add_option('--testsuite', metavar='dir',
1802                         help="run regression tests from dir")
1803        group.add_option('--doctest', action='store_true',
1804                         help="run doctest on myself")
1805    group.add_option('--benchmark', action='store_true',
1806                     help="measure processing speed")
1807    return parser
1808
1809
1810def read_config(options, args, arglist, parser):
1811    """Read both user configuration and local configuration."""
1812    config = RawConfigParser()
1813
1814    user_conf = options.config
1815    if user_conf and os.path.isfile(user_conf):
1816        if options.verbose:
1817            print('user configuration: %s' % user_conf)
1818        config.read(user_conf)
1819
1820    local_dir = os.curdir
1821    parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1822    while tail:
1823        if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1824            local_dir = parent
1825            if options.verbose:
1826                print('local configuration: in %s' % parent)
1827            break
1828        (parent, tail) = os.path.split(parent)
1829
1830    pep8_section = parser.prog
1831    if config.has_section(pep8_section):
1832        option_list = dict([(o.dest, o.type or o.action)
1833                            for o in parser.option_list])
1834
1835        # First, read the default values
1836        (new_options, __) = parser.parse_args([])
1837
1838        # Second, parse the configuration
1839        for opt in config.options(pep8_section):
1840            if opt.replace('_', '-') not in parser.config_options:
1841                print("  unknown option '%s' ignored" % opt)
1842                continue
1843            if options.verbose > 1:
1844                print("  %s = %s" % (opt, config.get(pep8_section, opt)))
1845            normalized_opt = opt.replace('-', '_')
1846            opt_type = option_list[normalized_opt]
1847            if opt_type in ('int', 'count'):
1848                value = config.getint(pep8_section, opt)
1849            elif opt_type == 'string':
1850                value = config.get(pep8_section, opt)
1851                if normalized_opt == 'exclude':
1852                    value = normalize_paths(value, local_dir)
1853            else:
1854                assert opt_type in ('store_true', 'store_false')
1855                value = config.getboolean(pep8_section, opt)
1856            setattr(new_options, normalized_opt, value)
1857
1858        # Third, overwrite with the command-line options
1859        (options, __) = parser.parse_args(arglist, values=new_options)
1860    options.doctest = options.testsuite = False
1861    return options
1862
1863
1864def process_options(arglist=None, parse_argv=False, config_file=None,
1865                    parser=None):
1866    """Process options passed either via arglist or via command line args."""
1867    if not parser:
1868        parser = get_parser()
1869    if not parser.has_option('--config'):
1870        if config_file is True:
1871            config_file = DEFAULT_CONFIG
1872        group = parser.add_option_group("Configuration", description=(
1873            "The project options are read from the [%s] section of the "
1874            "tox.ini file or the setup.cfg file located in any parent folder "
1875            "of the path(s) being processed.  Allowed options are: %s." %
1876            (parser.prog, ', '.join(parser.config_options))))
1877        group.add_option('--config', metavar='path', default=config_file,
1878                         help="user config file location (default: %default)")
1879    # Don't read the command line if the module is used as a library.
1880    if not arglist and not parse_argv:
1881        arglist = []
1882    # If parse_argv is True and arglist is None, arguments are
1883    # parsed from the command line (sys.argv)
1884    (options, args) = parser.parse_args(arglist)
1885    options.reporter = None
1886
1887    if options.ensure_value('testsuite', False):
1888        args.append(options.testsuite)
1889    elif not options.ensure_value('doctest', False):
1890        if parse_argv and not args:
1891            if options.diff or any(os.path.exists(name)
1892                                   for name in PROJECT_CONFIG):
1893                args = ['.']
1894            else:
1895                parser.error('input not specified')
1896        options = read_config(options, args, arglist, parser)
1897        options.reporter = parse_argv and options.quiet == 1 and FileReport
1898
1899    options.filename = options.filename and options.filename.split(',')
1900    options.exclude = normalize_paths(options.exclude)
1901    options.select = options.select and options.select.split(',')
1902    options.ignore = options.ignore and options.ignore.split(',')
1903
1904    if options.diff:
1905        options.reporter = DiffReport
1906        stdin = stdin_get_value()
1907        options.selected_lines = parse_udiff(stdin, options.filename, args[0])
1908        args = sorted(options.selected_lines)
1909
1910    return options, args
1911
1912
1913def _main():
1914    """Parse options and run checks on Python source."""
1915    import signal
1916
1917    # Handle "Broken pipe" gracefully
1918    try:
1919        signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
1920    except AttributeError:
1921        pass    # not supported on Windows
1922
1923    pep8style = StyleGuide(parse_argv=True, config_file=True)
1924    options = pep8style.options
1925    if options.doctest or options.testsuite:
1926        from testsuite.support import run_tests
1927        report = run_tests(pep8style)
1928    else:
1929        report = pep8style.check_files()
1930    if options.statistics:
1931        report.print_statistics()
1932    if options.benchmark:
1933        report.print_benchmark()
1934    if options.testsuite and not options.quiet:
1935        report.print_results()
1936    if report.total_errors:
1937        if options.count:
1938            sys.stderr.write(str(report.total_errors) + '\n')
1939        sys.exit(1)
1940
1941if __name__ == '__main__':
1942    _main()
1943