1# Copyright (C) 2010-2011 Hideo Hattori
2# Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
3# Copyright (C) 2013-2014 Hideo Hattori, Steven Myint, Bill Wendling
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23# SOFTWARE.
24
25"""Automatically formats Python code to conform to the PEP 8 style guide.
26
27Fixes that only need be done once can be added by adding a function of the form
28"fix_<code>(source)" to this module. They should return the fixed source code.
29These fixes are picked up by apply_global_fixes().
30
31Fixes that depend on pep8 should be added as methods to FixPEP8. See the class
32documentation for more information.
33
34"""
35
36from __future__ import absolute_import
37from __future__ import division
38from __future__ import print_function
39from __future__ import unicode_literals
40
41import bisect
42import codecs
43import collections
44import copy
45import difflib
46import fnmatch
47import inspect
48import io
49import itertools
50import keyword
51import locale
52import os
53import re
54import signal
55import sys
56import token
57import tokenize
58
59import pep8
60
61
62try:
63    unicode
64except NameError:
65    unicode = str
66
67
68__version__ = '1.0.3'
69
70
71CR = '\r'
72LF = '\n'
73CRLF = '\r\n'
74
75
76PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
77
78
79# For generating line shortening candidates.
80SHORTEN_OPERATOR_GROUPS = frozenset([
81    frozenset([',']),
82    frozenset(['%']),
83    frozenset([',', '(', '[', '{']),
84    frozenset(['%', '(', '[', '{']),
85    frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
86    frozenset(['%', '+', '-', '*', '/', '//']),
87])
88
89
90DEFAULT_IGNORE = 'E24'
91DEFAULT_INDENT_SIZE = 4
92
93
94# W602 is handled separately due to the need to avoid "with_traceback".
95CODE_TO_2TO3 = {
96    'E721': ['idioms'],
97    'W601': ['has_key'],
98    'W603': ['ne'],
99    'W604': ['repr'],
100    'W690': ['apply',
101             'except',
102             'exitfunc',
103             'import',
104             'numliterals',
105             'operator',
106             'paren',
107             'reduce',
108             'renames',
109             'standarderror',
110             'sys_exc',
111             'throw',
112             'tuple_params',
113             'xreadlines']}
114
115
116def open_with_encoding(filename, encoding=None, mode='r'):
117    """Return opened file with a specific encoding."""
118    if not encoding:
119        encoding = detect_encoding(filename)
120
121    return io.open(filename, mode=mode, encoding=encoding,
122                   newline='')  # Preserve line endings
123
124
125def detect_encoding(filename):
126    """Return file encoding."""
127    try:
128        with open(filename, 'rb') as input_file:
129            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
130            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
131
132        # Check for correctness of encoding
133        with open_with_encoding(filename, encoding) as test_file:
134            test_file.read()
135
136        return encoding
137    except (LookupError, SyntaxError, UnicodeDecodeError):
138        return 'latin-1'
139
140
141def readlines_from_file(filename):
142    """Return contents of file."""
143    with open_with_encoding(filename) as input_file:
144        return input_file.readlines()
145
146
147def extended_blank_lines(logical_line,
148                         blank_lines,
149                         indent_level,
150                         previous_logical):
151    """Check for missing blank lines after class declaration."""
152    if previous_logical.startswith('class '):
153        if (
154            logical_line.startswith(('def ', 'class ', '@')) or
155            pep8.DOCSTRING_REGEX.match(logical_line)
156        ):
157            if indent_level and not blank_lines:
158                yield (0, 'E309 expected 1 blank line after class declaration')
159    elif previous_logical.startswith('def '):
160        if blank_lines and pep8.DOCSTRING_REGEX.match(logical_line):
161            yield (0, 'E303 too many blank lines ({0})'.format(blank_lines))
162    elif pep8.DOCSTRING_REGEX.match(previous_logical):
163        # Missing blank line between class docstring and method declaration.
164        if (
165            indent_level and
166            not blank_lines and
167            logical_line.startswith(('def ')) and
168            '(self' in logical_line
169        ):
170            yield (0, 'E301 expected 1 blank line, found 0')
171pep8.register_check(extended_blank_lines)
172
173
174def continued_indentation(logical_line, tokens, indent_level, indent_char,
175                          noqa):
176    """Override pep8's function to provide indentation information."""
177    first_row = tokens[0][2][0]
178    nrows = 1 + tokens[-1][2][0] - first_row
179    if noqa or nrows == 1:
180        return
181
182    # indent_next tells us whether the next block is indented. Assuming
183    # that it is indented by 4 spaces, then we should not allow 4-space
184    # indents on the final continuation line. In turn, some other
185    # indents are allowed to have an extra 4 spaces.
186    indent_next = logical_line.endswith(':')
187
188    row = depth = 0
189    valid_hangs = (
190        (DEFAULT_INDENT_SIZE,)
191        if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
192                                     2 * DEFAULT_INDENT_SIZE)
193    )
194
195    # Remember how many brackets were opened on each line.
196    parens = [0] * nrows
197
198    # Relative indents of physical lines.
199    rel_indent = [0] * nrows
200
201    # For each depth, collect a list of opening rows.
202    open_rows = [[0]]
203    # For each depth, memorize the hanging indentation.
204    hangs = [None]
205
206    # Visual indents.
207    indent_chances = {}
208    last_indent = tokens[0][2]
209    indent = [last_indent[1]]
210
211    last_token_multiline = None
212    line = None
213    last_line = ''
214    last_line_begins_with_multiline = False
215    for token_type, text, start, end, line in tokens:
216
217        newline = row < start[0] - first_row
218        if newline:
219            row = start[0] - first_row
220            newline = (not last_token_multiline and
221                       token_type not in (tokenize.NL, tokenize.NEWLINE))
222            last_line_begins_with_multiline = last_token_multiline
223
224        if newline:
225            # This is the beginning of a continuation line.
226            last_indent = start
227
228            # Record the initial indent.
229            rel_indent[row] = pep8.expand_indent(line) - indent_level
230
231            # Identify closing bracket.
232            close_bracket = (token_type == tokenize.OP and text in ']})')
233
234            # Is the indent relative to an opening bracket line?
235            for open_row in reversed(open_rows[depth]):
236                hang = rel_indent[row] - rel_indent[open_row]
237                hanging_indent = hang in valid_hangs
238                if hanging_indent:
239                    break
240            if hangs[depth]:
241                hanging_indent = (hang == hangs[depth])
242
243            visual_indent = (not close_bracket and hang > 0 and
244                             indent_chances.get(start[1]))
245
246            if close_bracket and indent[depth]:
247                # Closing bracket for visual indent.
248                if start[1] != indent[depth]:
249                    yield (start, 'E124 {0}'.format(indent[depth]))
250            elif close_bracket and not hang:
251                pass
252            elif indent[depth] and start[1] < indent[depth]:
253                # Visual indent is broken.
254                yield (start, 'E128 {0}'.format(indent[depth]))
255            elif (hanging_indent or
256                  (indent_next and
257                   rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
258                # Hanging indent is verified.
259                if close_bracket:
260                    yield (start, 'E123 {0}'.format(indent_level +
261                                                    rel_indent[open_row]))
262                hangs[depth] = hang
263            elif visual_indent is True:
264                # Visual indent is verified.
265                indent[depth] = start[1]
266            elif visual_indent in (text, unicode):
267                # Ignore token lined up with matching one from a previous line.
268                pass
269            else:
270                one_indented = (indent_level + rel_indent[open_row] +
271                                DEFAULT_INDENT_SIZE)
272                # Indent is broken.
273                if hang <= 0:
274                    error = ('E122', one_indented)
275                elif indent[depth]:
276                    error = ('E127', indent[depth])
277                elif hang > DEFAULT_INDENT_SIZE:
278                    error = ('E126', one_indented)
279                else:
280                    hangs[depth] = hang
281                    error = ('E121', one_indented)
282
283                yield (start, '{0} {1}'.format(*error))
284
285        # Look for visual indenting.
286        if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
287                and not indent[depth]):
288            indent[depth] = start[1]
289            indent_chances[start[1]] = True
290        # Deal with implicit string concatenation.
291        elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
292              text in ('u', 'ur', 'b', 'br')):
293            indent_chances[start[1]] = unicode
294        # Special case for the "if" statement because len("if (") is equal to
295        # 4.
296        elif not indent_chances and not row and not depth and text == 'if':
297            indent_chances[end[1] + 1] = True
298        elif text == ':' and line[end[1]:].isspace():
299            open_rows[depth].append(row)
300
301        # Keep track of bracket depth.
302        if token_type == tokenize.OP:
303            if text in '([{':
304                depth += 1
305                indent.append(0)
306                hangs.append(None)
307                if len(open_rows) == depth:
308                    open_rows.append([])
309                open_rows[depth].append(row)
310                parens[row] += 1
311            elif text in ')]}' and depth > 0:
312                # Parent indents should not be more than this one.
313                prev_indent = indent.pop() or last_indent[1]
314                hangs.pop()
315                for d in range(depth):
316                    if indent[d] > prev_indent:
317                        indent[d] = 0
318                for ind in list(indent_chances):
319                    if ind >= prev_indent:
320                        del indent_chances[ind]
321                del open_rows[depth + 1:]
322                depth -= 1
323                if depth:
324                    indent_chances[indent[depth]] = True
325                for idx in range(row, -1, -1):
326                    if parens[idx]:
327                        parens[idx] -= 1
328                        break
329            assert len(indent) == depth + 1
330            if (
331                start[1] not in indent_chances and
332                # This is for purposes of speeding up E121 (GitHub #90).
333                not last_line.rstrip().endswith(',')
334            ):
335                # Allow to line up tokens.
336                indent_chances[start[1]] = text
337
338        last_token_multiline = (start[0] != end[0])
339        if last_token_multiline:
340            rel_indent[end[0] - first_row] = rel_indent[row]
341
342        last_line = line
343
344    if (
345        indent_next and
346        not last_line_begins_with_multiline and
347        pep8.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
348    ):
349        pos = (start[0], indent[0] + 4)
350        yield (pos, 'E125 {0}'.format(indent_level +
351                                      2 * DEFAULT_INDENT_SIZE))
352del pep8._checks['logical_line'][pep8.continued_indentation]
353pep8.register_check(continued_indentation)
354
355
356class FixPEP8(object):
357
358    """Fix invalid code.
359
360    Fixer methods are prefixed "fix_". The _fix_source() method looks for these
361    automatically.
362
363    The fixer method can take either one or two arguments (in addition to
364    self). The first argument is "result", which is the error information from
365    pep8. The second argument, "logical", is required only for logical-line
366    fixes.
367
368    The fixer method can return the list of modified lines or None. An empty
369    list would mean that no changes were made. None would mean that only the
370    line reported in the pep8 error was modified. Note that the modified line
371    numbers that are returned are indexed at 1. This typically would correspond
372    with the line number reported in the pep8 error information.
373
374    [fixed method list]
375        - e121,e122,e123,e124,e125,e126,e127,e128,e129
376        - e201,e202,e203
377        - e211
378        - e221,e222,e223,e224,e225
379        - e231
380        - e251
381        - e261,e262
382        - e271,e272,e273,e274
383        - e301,e302,e303
384        - e401
385        - e502
386        - e701,e702
387        - e711
388        - w291
389
390    """
391
392    def __init__(self, filename,
393                 options,
394                 contents=None,
395                 long_line_ignore_cache=None):
396        self.filename = filename
397        if contents is None:
398            self.source = readlines_from_file(filename)
399        else:
400            sio = io.StringIO(contents)
401            self.source = sio.readlines()
402        self.options = options
403        self.indent_word = _get_indentword(''.join(self.source))
404
405        self.long_line_ignore_cache = (
406            set() if long_line_ignore_cache is None
407            else long_line_ignore_cache)
408
409        # Many fixers are the same even though pep8 categorizes them
410        # differently.
411        self.fix_e115 = self.fix_e112
412        self.fix_e116 = self.fix_e113
413        self.fix_e121 = self._fix_reindent
414        self.fix_e122 = self._fix_reindent
415        self.fix_e123 = self._fix_reindent
416        self.fix_e124 = self._fix_reindent
417        self.fix_e126 = self._fix_reindent
418        self.fix_e127 = self._fix_reindent
419        self.fix_e128 = self._fix_reindent
420        self.fix_e129 = self._fix_reindent
421        self.fix_e202 = self.fix_e201
422        self.fix_e203 = self.fix_e201
423        self.fix_e211 = self.fix_e201
424        self.fix_e221 = self.fix_e271
425        self.fix_e222 = self.fix_e271
426        self.fix_e223 = self.fix_e271
427        self.fix_e226 = self.fix_e225
428        self.fix_e227 = self.fix_e225
429        self.fix_e228 = self.fix_e225
430        self.fix_e241 = self.fix_e271
431        self.fix_e242 = self.fix_e224
432        self.fix_e261 = self.fix_e262
433        self.fix_e272 = self.fix_e271
434        self.fix_e273 = self.fix_e271
435        self.fix_e274 = self.fix_e271
436        self.fix_e309 = self.fix_e301
437        self.fix_e501 = (
438            self.fix_long_line_logically if
439            options and (options.aggressive >= 2 or options.experimental) else
440            self.fix_long_line_physically)
441        self.fix_e703 = self.fix_e702
442
443        self._ws_comma_done = False
444
445    def _fix_source(self, results):
446        try:
447            (logical_start, logical_end) = _find_logical(self.source)
448            logical_support = True
449        except (SyntaxError, tokenize.TokenError):  # pragma: no cover
450            logical_support = False
451
452        completed_lines = set()
453        for result in sorted(results, key=_priority_key):
454            if result['line'] in completed_lines:
455                continue
456
457            fixed_methodname = 'fix_' + result['id'].lower()
458            if hasattr(self, fixed_methodname):
459                fix = getattr(self, fixed_methodname)
460
461                line_index = result['line'] - 1
462                original_line = self.source[line_index]
463
464                is_logical_fix = len(inspect.getargspec(fix).args) > 2
465                if is_logical_fix:
466                    logical = None
467                    if logical_support:
468                        logical = _get_logical(self.source,
469                                               result,
470                                               logical_start,
471                                               logical_end)
472                        if logical and set(range(
473                            logical[0][0] + 1,
474                            logical[1][0] + 1)).intersection(
475                                completed_lines):
476                            continue
477
478                    modified_lines = fix(result, logical)
479                else:
480                    modified_lines = fix(result)
481
482                if modified_lines is None:
483                    # Force logical fixes to report what they modified.
484                    assert not is_logical_fix
485
486                    if self.source[line_index] == original_line:
487                        modified_lines = []
488
489                if modified_lines:
490                    completed_lines.update(modified_lines)
491                elif modified_lines == []:  # Empty list means no fix
492                    if self.options.verbose >= 2:
493                        print(
494                            '--->  Not fixing {f} on line {l}'.format(
495                                f=result['id'], l=result['line']),
496                            file=sys.stderr)
497                else:  # We assume one-line fix when None.
498                    completed_lines.add(result['line'])
499            else:
500                if self.options.verbose >= 3:
501                    print(
502                        "--->  '{0}' is not defined.".format(fixed_methodname),
503                        file=sys.stderr)
504
505                    info = result['info'].strip()
506                    print('--->  {0}:{1}:{2}:{3}'.format(self.filename,
507                                                         result['line'],
508                                                         result['column'],
509                                                         info),
510                          file=sys.stderr)
511
512    def fix(self):
513        """Return a version of the source code with PEP 8 violations fixed."""
514        pep8_options = {
515            'ignore': self.options.ignore,
516            'select': self.options.select,
517            'max_line_length': self.options.max_line_length,
518        }
519        results = _execute_pep8(pep8_options, self.source)
520
521        if self.options.verbose:
522            progress = {}
523            for r in results:
524                if r['id'] not in progress:
525                    progress[r['id']] = set()
526                progress[r['id']].add(r['line'])
527            print('--->  {n} issue(s) to fix {progress}'.format(
528                n=len(results), progress=progress), file=sys.stderr)
529
530        if self.options.line_range:
531            start, end = self.options.line_range
532            results = [r for r in results
533                       if start <= r['line'] <= end]
534
535        self._fix_source(filter_results(source=''.join(self.source),
536                                        results=results,
537                                        aggressive=self.options.aggressive))
538
539        if self.options.line_range:
540            # If number of lines has changed then change line_range.
541            count = sum(sline.count('\n')
542                        for sline in self.source[start - 1:end])
543            self.options.line_range[1] = start + count - 1
544
545        return ''.join(self.source)
546
547    def _fix_reindent(self, result):
548        """Fix a badly indented line.
549
550        This is done by adding or removing from its initial indent only.
551
552        """
553        num_indent_spaces = int(result['info'].split()[1])
554        line_index = result['line'] - 1
555        target = self.source[line_index]
556
557        self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
558
559    def fix_e112(self, result):
560        """Fix under-indented comments."""
561        line_index = result['line'] - 1
562        target = self.source[line_index]
563
564        if not target.lstrip().startswith('#'):
565            # Don't screw with invalid syntax.
566            return []
567
568        self.source[line_index] = self.indent_word + target
569
570    def fix_e113(self, result):
571        """Fix over-indented comments."""
572        line_index = result['line'] - 1
573        target = self.source[line_index]
574
575        indent = _get_indentation(target)
576        stripped = target.lstrip()
577
578        if not stripped.startswith('#'):
579            # Don't screw with invalid syntax.
580            return []
581
582        self.source[line_index] = indent[1:] + stripped
583
584    def fix_e125(self, result):
585        """Fix indentation undistinguish from the next logical line."""
586        num_indent_spaces = int(result['info'].split()[1])
587        line_index = result['line'] - 1
588        target = self.source[line_index]
589
590        spaces_to_add = num_indent_spaces - len(_get_indentation(target))
591        indent = len(_get_indentation(target))
592        modified_lines = []
593
594        while len(_get_indentation(self.source[line_index])) >= indent:
595            self.source[line_index] = (' ' * spaces_to_add +
596                                       self.source[line_index])
597            modified_lines.append(1 + line_index)  # Line indexed at 1.
598            line_index -= 1
599
600        return modified_lines
601
602    def fix_e201(self, result):
603        """Remove extraneous whitespace."""
604        line_index = result['line'] - 1
605        target = self.source[line_index]
606        offset = result['column'] - 1
607
608        if is_probably_part_of_multiline(target):
609            return []
610
611        fixed = fix_whitespace(target,
612                               offset=offset,
613                               replacement='')
614
615        self.source[line_index] = fixed
616
617    def fix_e224(self, result):
618        """Remove extraneous whitespace around operator."""
619        target = self.source[result['line'] - 1]
620        offset = result['column'] - 1
621        fixed = target[:offset] + target[offset:].replace('\t', ' ')
622        self.source[result['line'] - 1] = fixed
623
624    def fix_e225(self, result):
625        """Fix missing whitespace around operator."""
626        target = self.source[result['line'] - 1]
627        offset = result['column'] - 1
628        fixed = target[:offset] + ' ' + target[offset:]
629
630        # Only proceed if non-whitespace characters match.
631        # And make sure we don't break the indentation.
632        if (
633            fixed.replace(' ', '') == target.replace(' ', '') and
634            _get_indentation(fixed) == _get_indentation(target)
635        ):
636            self.source[result['line'] - 1] = fixed
637        else:
638            return []
639
640    def fix_e231(self, result):
641        """Add missing whitespace."""
642        # Optimize for comma case. This will fix all commas in the full source
643        # code in one pass. Don't do this more than once. If it fails the first
644        # time, there is no point in trying again.
645        if ',' in result['info'] and not self._ws_comma_done:
646            self._ws_comma_done = True
647            original = ''.join(self.source)
648            new = refactor(original, ['ws_comma'])
649            if original.strip() != new.strip():
650                self.source = [new]
651                return range(1, 1 + len(original))
652
653        line_index = result['line'] - 1
654        target = self.source[line_index]
655        offset = result['column']
656        fixed = target[:offset] + ' ' + target[offset:]
657        self.source[line_index] = fixed
658
659    def fix_e251(self, result):
660        """Remove whitespace around parameter '=' sign."""
661        line_index = result['line'] - 1
662        target = self.source[line_index]
663
664        # This is necessary since pep8 sometimes reports columns that goes
665        # past the end of the physical line. This happens in cases like,
666        # foo(bar\n=None)
667        c = min(result['column'] - 1,
668                len(target) - 1)
669
670        if target[c].strip():
671            fixed = target
672        else:
673            fixed = target[:c].rstrip() + target[c:].lstrip()
674
675        # There could be an escaped newline
676        #
677        #     def foo(a=\
678        #             1)
679        if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
680            self.source[line_index] = fixed.rstrip('\n\r \t\\')
681            self.source[line_index + 1] = self.source[line_index + 1].lstrip()
682            return [line_index + 1, line_index + 2]  # Line indexed at 1
683
684        self.source[result['line'] - 1] = fixed
685
686    def fix_e262(self, result):
687        """Fix spacing after comment hash."""
688        target = self.source[result['line'] - 1]
689        offset = result['column']
690
691        code = target[:offset].rstrip(' \t#')
692        comment = target[offset:].lstrip(' \t#')
693
694        fixed = code + ('  # ' + comment if comment.strip() else '\n')
695
696        self.source[result['line'] - 1] = fixed
697
698    def fix_e271(self, result):
699        """Fix extraneous whitespace around keywords."""
700        line_index = result['line'] - 1
701        target = self.source[line_index]
702        offset = result['column'] - 1
703
704        if is_probably_part_of_multiline(target):
705            return []
706
707        fixed = fix_whitespace(target,
708                               offset=offset,
709                               replacement=' ')
710
711        if fixed == target:
712            return []
713        else:
714            self.source[line_index] = fixed
715
716    def fix_e301(self, result):
717        """Add missing blank line."""
718        cr = '\n'
719        self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
720
721    def fix_e302(self, result):
722        """Add missing 2 blank lines."""
723        add_linenum = 2 - int(result['info'].split()[-1])
724        cr = '\n' * add_linenum
725        self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
726
727    def fix_e303(self, result):
728        """Remove extra blank lines."""
729        delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
730        delete_linenum = max(1, delete_linenum)
731
732        # We need to count because pep8 reports an offset line number if there
733        # are comments.
734        cnt = 0
735        line = result['line'] - 2
736        modified_lines = []
737        while cnt < delete_linenum and line >= 0:
738            if not self.source[line].strip():
739                self.source[line] = ''
740                modified_lines.append(1 + line)  # Line indexed at 1
741                cnt += 1
742            line -= 1
743
744        return modified_lines
745
746    def fix_e304(self, result):
747        """Remove blank line following function decorator."""
748        line = result['line'] - 2
749        if not self.source[line].strip():
750            self.source[line] = ''
751
752    def fix_e401(self, result):
753        """Put imports on separate lines."""
754        line_index = result['line'] - 1
755        target = self.source[line_index]
756        offset = result['column'] - 1
757
758        if not target.lstrip().startswith('import'):
759            return []
760
761        indentation = re.split(pattern=r'\bimport\b',
762                               string=target, maxsplit=1)[0]
763        fixed = (target[:offset].rstrip('\t ,') + '\n' +
764                 indentation + 'import ' + target[offset:].lstrip('\t ,'))
765        self.source[line_index] = fixed
766
767    def fix_long_line_logically(self, result, logical):
768        """Try to make lines fit within --max-line-length characters."""
769        if (
770            not logical or
771            len(logical[2]) == 1 or
772            self.source[result['line'] - 1].lstrip().startswith('#')
773        ):
774            return self.fix_long_line_physically(result)
775
776        start_line_index = logical[0][0]
777        end_line_index = logical[1][0]
778        logical_lines = logical[2]
779
780        previous_line = get_item(self.source, start_line_index - 1, default='')
781        next_line = get_item(self.source, end_line_index + 1, default='')
782
783        single_line = join_logical_line(''.join(logical_lines))
784
785        try:
786            fixed = self.fix_long_line(
787                target=single_line,
788                previous_line=previous_line,
789                next_line=next_line,
790                original=''.join(logical_lines))
791        except (SyntaxError, tokenize.TokenError):
792            return self.fix_long_line_physically(result)
793
794        if fixed:
795            for line_index in range(start_line_index, end_line_index + 1):
796                self.source[line_index] = ''
797            self.source[start_line_index] = fixed
798            return range(start_line_index + 1, end_line_index + 1)
799        else:
800            return []
801
802    def fix_long_line_physically(self, result):
803        """Try to make lines fit within --max-line-length characters."""
804        line_index = result['line'] - 1
805        target = self.source[line_index]
806
807        previous_line = get_item(self.source, line_index - 1, default='')
808        next_line = get_item(self.source, line_index + 1, default='')
809
810        try:
811            fixed = self.fix_long_line(
812                target=target,
813                previous_line=previous_line,
814                next_line=next_line,
815                original=target)
816        except (SyntaxError, tokenize.TokenError):
817            return []
818
819        if fixed:
820            self.source[line_index] = fixed
821            return [line_index + 1]
822        else:
823            return []
824
825    def fix_long_line(self, target, previous_line,
826                      next_line, original):
827        cache_entry = (target, previous_line, next_line)
828        if cache_entry in self.long_line_ignore_cache:
829            return []
830
831        if target.lstrip().startswith('#'):
832            # Wrap commented lines.
833            return shorten_comment(
834                line=target,
835                max_line_length=self.options.max_line_length,
836                last_comment=not next_line.lstrip().startswith('#'))
837
838        fixed = get_fixed_long_line(
839            target=target,
840            previous_line=previous_line,
841            original=original,
842            indent_word=self.indent_word,
843            max_line_length=self.options.max_line_length,
844            aggressive=self.options.aggressive,
845            experimental=self.options.experimental,
846            verbose=self.options.verbose)
847        if fixed and not code_almost_equal(original, fixed):
848            return fixed
849        else:
850            self.long_line_ignore_cache.add(cache_entry)
851            return None
852
853    def fix_e502(self, result):
854        """Remove extraneous escape of newline."""
855        line_index = result['line'] - 1
856        target = self.source[line_index]
857        self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
858
859    def fix_e701(self, result):
860        """Put colon-separated compound statement on separate lines."""
861        line_index = result['line'] - 1
862        target = self.source[line_index]
863        c = result['column']
864
865        fixed_source = (target[:c] + '\n' +
866                        _get_indentation(target) + self.indent_word +
867                        target[c:].lstrip('\n\r \t\\'))
868        self.source[result['line'] - 1] = fixed_source
869        return [result['line'], result['line'] + 1]
870
871    def fix_e702(self, result, logical):
872        """Put semicolon-separated compound statement on separate lines."""
873        if not logical:
874            return []  # pragma: no cover
875        logical_lines = logical[2]
876
877        line_index = result['line'] - 1
878        target = self.source[line_index]
879
880        if target.rstrip().endswith('\\'):
881            # Normalize '1; \\\n2' into '1; 2'.
882            self.source[line_index] = target.rstrip('\n \r\t\\')
883            self.source[line_index + 1] = self.source[line_index + 1].lstrip()
884            return [line_index + 1, line_index + 2]
885
886        if target.rstrip().endswith(';'):
887            self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
888            return [line_index + 1]
889
890        offset = result['column'] - 1
891        first = target[:offset].rstrip(';').rstrip()
892        second = (_get_indentation(logical_lines[0]) +
893                  target[offset:].lstrip(';').lstrip())
894
895        self.source[line_index] = first + '\n' + second
896        return [line_index + 1]
897
898    def fix_e711(self, result):
899        """Fix comparison with None."""
900        line_index = result['line'] - 1
901        target = self.source[line_index]
902        offset = result['column'] - 1
903
904        right_offset = offset + 2
905        if right_offset >= len(target):
906            return []
907
908        left = target[:offset].rstrip()
909        center = target[offset:right_offset]
910        right = target[right_offset:].lstrip()
911
912        if not right.startswith('None'):
913            return []
914
915        if center.strip() == '==':
916            new_center = 'is'
917        elif center.strip() == '!=':
918            new_center = 'is not'
919        else:
920            return []
921
922        self.source[line_index] = ' '.join([left, new_center, right])
923
924    def fix_e712(self, result):
925        """Fix comparison with boolean."""
926        line_index = result['line'] - 1
927        target = self.source[line_index]
928        offset = result['column'] - 1
929
930        # Handle very easy "not" special cases.
931        if re.match(r'^\s*if \w+ == False:$', target):
932            self.source[line_index] = re.sub(r'if (\w+) == False:',
933                                             r'if not \1:', target, count=1)
934        elif re.match(r'^\s*if \w+ != True:$', target):
935            self.source[line_index] = re.sub(r'if (\w+) != True:',
936                                             r'if not \1:', target, count=1)
937        else:
938            right_offset = offset + 2
939            if right_offset >= len(target):
940                return []
941
942            left = target[:offset].rstrip()
943            center = target[offset:right_offset]
944            right = target[right_offset:].lstrip()
945
946            # Handle simple cases only.
947            new_right = None
948            if center.strip() == '==':
949                if re.match(r'\bTrue\b', right):
950                    new_right = re.sub(r'\bTrue\b *', '', right, count=1)
951            elif center.strip() == '!=':
952                if re.match(r'\bFalse\b', right):
953                    new_right = re.sub(r'\bFalse\b *', '', right, count=1)
954
955            if new_right is None:
956                return []
957
958            if new_right[0].isalnum():
959                new_right = ' ' + new_right
960
961            self.source[line_index] = left + new_right
962
963    def fix_e713(self, result):
964        """Fix non-membership check."""
965        line_index = result['line'] - 1
966        target = self.source[line_index]
967
968        # Handle very easy case only.
969        if re.match(r'^\s*if not \w+ in \w+:$', target):
970            self.source[line_index] = re.sub(r'if not (\w+) in (\w+):',
971                                             r'if \1 not in \2:',
972                                             target,
973                                             count=1)
974
975    def fix_w291(self, result):
976        """Remove trailing whitespace."""
977        fixed_line = self.source[result['line'] - 1].rstrip()
978        self.source[result['line'] - 1] = fixed_line + '\n'
979
980
981def get_fixed_long_line(target, previous_line, original,
982                        indent_word='    ', max_line_length=79,
983                        aggressive=False, experimental=False, verbose=False):
984    """Break up long line and return result.
985
986    Do this by generating multiple reformatted candidates and then
987    ranking the candidates to heuristically select the best option.
988
989    """
990    indent = _get_indentation(target)
991    source = target[len(indent):]
992    assert source.lstrip() == source
993
994    # Check for partial multiline.
995    tokens = list(generate_tokens(source))
996
997    candidates = shorten_line(
998        tokens, source, indent,
999        indent_word,
1000        max_line_length,
1001        aggressive=aggressive,
1002        experimental=experimental,
1003        previous_line=previous_line)
1004
1005    # Also sort alphabetically as a tie breaker (for determinism).
1006    candidates = sorted(
1007        sorted(set(candidates).union([target, original])),
1008        key=lambda x: line_shortening_rank(x,
1009                                           indent_word,
1010                                           max_line_length,
1011                                           experimental))
1012
1013    if verbose >= 4:
1014        print(('-' * 79 + '\n').join([''] + candidates + ['']),
1015              file=codecs.getwriter('utf-8')(sys.stderr.buffer
1016                                             if hasattr(sys.stderr,
1017                                                        'buffer')
1018                                             else sys.stderr))
1019
1020    if candidates:
1021        return candidates[0]
1022
1023
1024def join_logical_line(logical_line):
1025    """Return single line based on logical line input."""
1026    indentation = _get_indentation(logical_line)
1027
1028    return indentation + untokenize_without_newlines(
1029        generate_tokens(logical_line.lstrip())) + '\n'
1030
1031
1032def untokenize_without_newlines(tokens):
1033    """Return source code based on tokens."""
1034    text = ''
1035    last_row = 0
1036    last_column = -1
1037
1038    for t in tokens:
1039        token_string = t[1]
1040        (start_row, start_column) = t[2]
1041        (end_row, end_column) = t[3]
1042
1043        if start_row > last_row:
1044            last_column = 0
1045        if (
1046            (start_column > last_column or token_string == '\n') and
1047            not text.endswith(' ')
1048        ):
1049            text += ' '
1050
1051        if token_string != '\n':
1052            text += token_string
1053
1054        last_row = end_row
1055        last_column = end_column
1056
1057    return text
1058
1059
1060def _find_logical(source_lines):
1061    # Make a variable which is the index of all the starts of lines.
1062    logical_start = []
1063    logical_end = []
1064    last_newline = True
1065    parens = 0
1066    for t in generate_tokens(''.join(source_lines)):
1067        if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
1068                    tokenize.INDENT, tokenize.NL,
1069                    tokenize.ENDMARKER]:
1070            continue
1071        if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
1072            last_newline = True
1073            logical_end.append((t[3][0] - 1, t[2][1]))
1074            continue
1075        if last_newline and not parens:
1076            logical_start.append((t[2][0] - 1, t[2][1]))
1077            last_newline = False
1078        if t[0] == tokenize.OP:
1079            if t[1] in '([{':
1080                parens += 1
1081            elif t[1] in '}])':
1082                parens -= 1
1083    return (logical_start, logical_end)
1084
1085
1086def _get_logical(source_lines, result, logical_start, logical_end):
1087    """Return the logical line corresponding to the result.
1088
1089    Assumes input is already E702-clean.
1090
1091    """
1092    row = result['line'] - 1
1093    col = result['column'] - 1
1094    ls = None
1095    le = None
1096    for i in range(0, len(logical_start), 1):
1097        assert logical_end
1098        x = logical_end[i]
1099        if x[0] > row or (x[0] == row and x[1] > col):
1100            le = x
1101            ls = logical_start[i]
1102            break
1103    if ls is None:
1104        return None
1105    original = source_lines[ls[0]:le[0] + 1]
1106    return ls, le, original
1107
1108
1109def get_item(items, index, default=None):
1110    if 0 <= index < len(items):
1111        return items[index]
1112    else:
1113        return default
1114
1115
1116def reindent(source, indent_size):
1117    """Reindent all lines."""
1118    reindenter = Reindenter(source)
1119    return reindenter.run(indent_size)
1120
1121
1122def code_almost_equal(a, b):
1123    """Return True if code is similar.
1124
1125    Ignore whitespace when comparing specific line.
1126
1127    """
1128    split_a = split_and_strip_non_empty_lines(a)
1129    split_b = split_and_strip_non_empty_lines(b)
1130
1131    if len(split_a) != len(split_b):
1132        return False
1133
1134    for index in range(len(split_a)):
1135        if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
1136            return False
1137
1138    return True
1139
1140
1141def split_and_strip_non_empty_lines(text):
1142    """Return lines split by newline.
1143
1144    Ignore empty lines.
1145
1146    """
1147    return [line.strip() for line in text.splitlines() if line.strip()]
1148
1149
1150def fix_e265(source, aggressive=False):  # pylint: disable=unused-argument
1151    """Format block comments."""
1152    if '#' not in source:
1153        # Optimization.
1154        return source
1155
1156    ignored_line_numbers = multiline_string_lines(
1157        source,
1158        include_docstrings=True) | set(commented_out_code_lines(source))
1159
1160    fixed_lines = []
1161    sio = io.StringIO(source)
1162    for (line_number, line) in enumerate(sio.readlines(), start=1):
1163        if (
1164            line.lstrip().startswith('#') and
1165            line_number not in ignored_line_numbers
1166        ):
1167            indentation = _get_indentation(line)
1168            line = line.lstrip()
1169
1170            # Normalize beginning if not a shebang.
1171            if len(line) > 1:
1172                if (
1173                    # Leave multiple spaces like '#    ' alone.
1174                    (line.count('#') > 1 or line[1].isalnum())
1175                    # Leave stylistic outlined blocks alone.
1176                    and not line.rstrip().endswith('#')
1177                ):
1178                    line = '# ' + line.lstrip('# \t')
1179
1180            fixed_lines.append(indentation + line)
1181        else:
1182            fixed_lines.append(line)
1183
1184    return ''.join(fixed_lines)
1185
1186
1187def refactor(source, fixer_names, ignore=None):
1188    """Return refactored code using lib2to3.
1189
1190    Skip if ignore string is produced in the refactored code.
1191
1192    """
1193    from lib2to3 import pgen2
1194    try:
1195        new_text = refactor_with_2to3(source,
1196                                      fixer_names=fixer_names)
1197    except (pgen2.parse.ParseError,
1198            SyntaxError,
1199            UnicodeDecodeError,
1200            UnicodeEncodeError):
1201        return source
1202
1203    if ignore:
1204        if ignore in new_text and ignore not in source:
1205            return source
1206
1207    return new_text
1208
1209
1210def code_to_2to3(select, ignore):
1211    fixes = set()
1212    for code, fix in CODE_TO_2TO3.items():
1213        if code_match(code, select=select, ignore=ignore):
1214            fixes |= set(fix)
1215    return fixes
1216
1217
1218def fix_2to3(source, aggressive=True, select=None, ignore=None):
1219    """Fix various deprecated code (via lib2to3)."""
1220    if not aggressive:
1221        return source
1222
1223    select = select or []
1224    ignore = ignore or []
1225
1226    return refactor(source,
1227                    code_to_2to3(select=select,
1228                                 ignore=ignore))
1229
1230
1231def fix_w602(source, aggressive=True):
1232    """Fix deprecated form of raising exception."""
1233    if not aggressive:
1234        return source
1235
1236    return refactor(source, ['raise'],
1237                    ignore='with_traceback')
1238
1239
1240def find_newline(source):
1241    """Return type of newline used in source.
1242
1243    Input is a list of lines.
1244
1245    """
1246    assert not isinstance(source, unicode)
1247
1248    counter = collections.defaultdict(int)
1249    for line in source:
1250        if line.endswith(CRLF):
1251            counter[CRLF] += 1
1252        elif line.endswith(CR):
1253            counter[CR] += 1
1254        elif line.endswith(LF):
1255            counter[LF] += 1
1256
1257    return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
1258
1259
1260def _get_indentword(source):
1261    """Return indentation type."""
1262    indent_word = '    '  # Default in case source has no indentation
1263    try:
1264        for t in generate_tokens(source):
1265            if t[0] == token.INDENT:
1266                indent_word = t[1]
1267                break
1268    except (SyntaxError, tokenize.TokenError):
1269        pass
1270    return indent_word
1271
1272
1273def _get_indentation(line):
1274    """Return leading whitespace."""
1275    if line.strip():
1276        non_whitespace_index = len(line) - len(line.lstrip())
1277        return line[:non_whitespace_index]
1278    else:
1279        return ''
1280
1281
1282def get_diff_text(old, new, filename):
1283    """Return text of unified diff between old and new."""
1284    newline = '\n'
1285    diff = difflib.unified_diff(
1286        old, new,
1287        'original/' + filename,
1288        'fixed/' + filename,
1289        lineterm=newline)
1290
1291    text = ''
1292    for line in diff:
1293        text += line
1294
1295        # Work around missing newline (http://bugs.python.org/issue2142).
1296        if text and not line.endswith(newline):
1297            text += newline + r'\ No newline at end of file' + newline
1298
1299    return text
1300
1301
1302def _priority_key(pep8_result):
1303    """Key for sorting PEP8 results.
1304
1305    Global fixes should be done first. This is important for things like
1306    indentation.
1307
1308    """
1309    priority = [
1310        # Fix multiline colon-based before semicolon based.
1311        'e701',
1312        # Break multiline statements early.
1313        'e702',
1314        # Things that make lines longer.
1315        'e225', 'e231',
1316        # Remove extraneous whitespace before breaking lines.
1317        'e201',
1318        # Shorten whitespace in comment before resorting to wrapping.
1319        'e262'
1320    ]
1321    middle_index = 10000
1322    lowest_priority = [
1323        # We need to shorten lines last since the logical fixer can get in a
1324        # loop, which causes us to exit early.
1325        'e501'
1326    ]
1327    key = pep8_result['id'].lower()
1328    try:
1329        return priority.index(key)
1330    except ValueError:
1331        try:
1332            return middle_index + lowest_priority.index(key) + 1
1333        except ValueError:
1334            return middle_index
1335
1336
1337def shorten_line(tokens, source, indentation, indent_word, max_line_length,
1338                 aggressive=False, experimental=False, previous_line=''):
1339    """Separate line at OPERATOR.
1340
1341    Multiple candidates will be yielded.
1342
1343    """
1344    for candidate in _shorten_line(tokens=tokens,
1345                                   source=source,
1346                                   indentation=indentation,
1347                                   indent_word=indent_word,
1348                                   aggressive=aggressive,
1349                                   previous_line=previous_line):
1350        yield candidate
1351
1352    if aggressive:
1353        for key_token_strings in SHORTEN_OPERATOR_GROUPS:
1354            shortened = _shorten_line_at_tokens(
1355                tokens=tokens,
1356                source=source,
1357                indentation=indentation,
1358                indent_word=indent_word,
1359                key_token_strings=key_token_strings,
1360                aggressive=aggressive)
1361
1362            if shortened is not None and shortened != source:
1363                yield shortened
1364
1365    if experimental:
1366        for shortened in _shorten_line_at_tokens_new(
1367                tokens=tokens,
1368                source=source,
1369                indentation=indentation,
1370                max_line_length=max_line_length):
1371
1372            yield shortened
1373
1374
1375def _shorten_line(tokens, source, indentation, indent_word,
1376                  aggressive=False, previous_line=''):
1377    """Separate line at OPERATOR.
1378
1379    The input is expected to be free of newlines except for inside multiline
1380    strings and at the end.
1381
1382    Multiple candidates will be yielded.
1383
1384    """
1385    for (token_type,
1386         token_string,
1387         start_offset,
1388         end_offset) in token_offsets(tokens):
1389
1390        if (
1391            token_type == tokenize.COMMENT and
1392            not is_probably_part_of_multiline(previous_line) and
1393            not is_probably_part_of_multiline(source) and
1394            not source[start_offset + 1:].strip().lower().startswith(
1395                ('noqa', 'pragma:', 'pylint:'))
1396        ):
1397            # Move inline comments to previous line.
1398            first = source[:start_offset]
1399            second = source[start_offset:]
1400            yield (indentation + second.strip() + '\n' +
1401                   indentation + first.strip() + '\n')
1402        elif token_type == token.OP and token_string != '=':
1403            # Don't break on '=' after keyword as this violates PEP 8.
1404
1405            assert token_type != token.INDENT
1406
1407            first = source[:end_offset]
1408
1409            second_indent = indentation
1410            if first.rstrip().endswith('('):
1411                second_indent += indent_word
1412            elif '(' in first:
1413                second_indent += ' ' * (1 + first.find('('))
1414            else:
1415                second_indent += indent_word
1416
1417            second = (second_indent + source[end_offset:].lstrip())
1418            if (
1419                not second.strip() or
1420                second.lstrip().startswith('#')
1421            ):
1422                continue
1423
1424            # Do not begin a line with a comma
1425            if second.lstrip().startswith(','):
1426                continue
1427            # Do end a line with a dot
1428            if first.rstrip().endswith('.'):
1429                continue
1430            if token_string in '+-*/':
1431                fixed = first + ' \\' + '\n' + second
1432            else:
1433                fixed = first + '\n' + second
1434
1435            # Only fix if syntax is okay.
1436            if check_syntax(normalize_multiline(fixed)
1437                            if aggressive else fixed):
1438                yield indentation + fixed
1439
1440
1441# A convenient way to handle tokens.
1442Token = collections.namedtuple('Token', ['token_type', 'token_string',
1443                                         'spos', 'epos', 'line'])
1444
1445
1446class ReformattedLines(object):
1447
1448    """The reflowed lines of atoms.
1449
1450    Each part of the line is represented as an "atom." They can be moved
1451    around when need be to get the optimal formatting.
1452
1453    """
1454
1455    ###########################################################################
1456    # Private Classes
1457
1458    class _Indent(object):
1459
1460        """Represent an indentation in the atom stream."""
1461
1462        def __init__(self, indent_amt):
1463            self._indent_amt = indent_amt
1464
1465        def emit(self):
1466            return ' ' * self._indent_amt
1467
1468        @property
1469        def size(self):
1470            return self._indent_amt
1471
1472    class _Space(object):
1473
1474        """Represent a space in the atom stream."""
1475
1476        def emit(self):
1477            return ' '
1478
1479        @property
1480        def size(self):
1481            return 1
1482
1483    class _LineBreak(object):
1484
1485        """Represent a line break in the atom stream."""
1486
1487        def emit(self):
1488            return '\n'
1489
1490        @property
1491        def size(self):
1492            return 0
1493
1494    def __init__(self, max_line_length):
1495        self._max_line_length = max_line_length
1496        self._lines = []
1497        self._bracket_depth = 0
1498        self._prev_item = None
1499        self._prev_prev_item = None
1500
1501    def __repr__(self):
1502        return self.emit()
1503
1504    ###########################################################################
1505    # Public Methods
1506
1507    def add(self, obj, indent_amt, break_after_open_bracket):
1508        if isinstance(obj, Atom):
1509            self._add_item(obj, indent_amt)
1510            return
1511
1512        self._add_container(obj, indent_amt, break_after_open_bracket)
1513
1514    def add_comment(self, item):
1515        num_spaces = 2
1516        if len(self._lines) > 1:
1517            if isinstance(self._lines[-1], self._Space):
1518                num_spaces -= 1
1519            if len(self._lines) > 2:
1520                if isinstance(self._lines[-2], self._Space):
1521                    num_spaces -= 1
1522
1523        while num_spaces > 0:
1524            self._lines.append(self._Space())
1525            num_spaces -= 1
1526        self._lines.append(item)
1527
1528    def add_indent(self, indent_amt):
1529        self._lines.append(self._Indent(indent_amt))
1530
1531    def add_line_break(self, indent):
1532        self._lines.append(self._LineBreak())
1533        self.add_indent(len(indent))
1534
1535    def add_line_break_at(self, index, indent_amt):
1536        self._lines.insert(index, self._LineBreak())
1537        self._lines.insert(index + 1, self._Indent(indent_amt))
1538
1539    def add_space_if_needed(self, curr_text, equal=False):
1540        if (
1541            not self._lines or isinstance(
1542                self._lines[-1], (self._LineBreak, self._Indent, self._Space))
1543        ):
1544            return
1545
1546        prev_text = unicode(self._prev_item)
1547        prev_prev_text = (
1548            unicode(self._prev_prev_item) if self._prev_prev_item else '')
1549
1550        if (
1551            # The previous item was a keyword or identifier and the current
1552            # item isn't an operator that doesn't require a space.
1553            ((self._prev_item.is_keyword or self._prev_item.is_string or
1554              self._prev_item.is_name or self._prev_item.is_number) and
1555             (curr_text[0] not in '([{.,:}])' or
1556              (curr_text[0] == '=' and equal))) or
1557
1558            # Don't place spaces around a '.', unless it's in an 'import'
1559            # statement.
1560            ((prev_prev_text != 'from' and prev_text[-1] != '.' and
1561              curr_text != 'import') and
1562
1563             # Don't place a space before a colon.
1564             curr_text[0] != ':' and
1565
1566             # Don't split up ending brackets by spaces.
1567             ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
1568
1569              # Put a space after a colon or comma.
1570              prev_text[-1] in ':,' or
1571
1572              # Put space around '=' if asked to.
1573              (equal and prev_text == '=') or
1574
1575              # Put spaces around non-unary arithmetic operators.
1576              ((self._prev_prev_item and
1577                (prev_text not in '+-' and
1578                 (self._prev_prev_item.is_name or
1579                  self._prev_prev_item.is_number or
1580                  self._prev_prev_item.is_string)) and
1581                prev_text in ('+', '-', '%', '*', '/', '//', '**')))))
1582        ):
1583            self._lines.append(self._Space())
1584
1585    def previous_item(self):
1586        """Return the previous non-whitespace item."""
1587        return self._prev_item
1588
1589    def fits_on_current_line(self, item_extent):
1590        return self.current_size() + item_extent <= self._max_line_length
1591
1592    def current_size(self):
1593        """The size of the current line minus the indentation."""
1594        size = 0
1595        for item in reversed(self._lines):
1596            size += item.size
1597            if isinstance(item, self._LineBreak):
1598                break
1599
1600        return size
1601
1602    def line_empty(self):
1603        return (self._lines and
1604                isinstance(self._lines[-1],
1605                           (self._LineBreak, self._Indent)))
1606
1607    def emit(self):
1608        string = ''
1609        for item in self._lines:
1610            if isinstance(item, self._LineBreak):
1611                string = string.rstrip()
1612            string += item.emit()
1613
1614        return string.rstrip() + '\n'
1615
1616    ###########################################################################
1617    # Private Methods
1618
1619    def _add_item(self, item, indent_amt):
1620        """Add an item to the line.
1621
1622        Reflow the line to get the best formatting after the item is
1623        inserted. The bracket depth indicates if the item is being
1624        inserted inside of a container or not.
1625
1626        """
1627        if self._prev_item and self._prev_item.is_string and item.is_string:
1628            # Place consecutive string literals on separate lines.
1629            self._lines.append(self._LineBreak())
1630            self._lines.append(self._Indent(indent_amt))
1631
1632        item_text = unicode(item)
1633        if self._lines and self._bracket_depth:
1634            # Adding the item into a container.
1635            self._prevent_default_initializer_splitting(item, indent_amt)
1636
1637            if item_text in '.,)]}':
1638                self._split_after_delimiter(item, indent_amt)
1639
1640        elif self._lines and not self.line_empty():
1641            # Adding the item outside of a container.
1642            if self.fits_on_current_line(len(item_text)):
1643                self._enforce_space(item)
1644
1645            else:
1646                # Line break for the new item.
1647                self._lines.append(self._LineBreak())
1648                self._lines.append(self._Indent(indent_amt))
1649
1650        self._lines.append(item)
1651        self._prev_item, self._prev_prev_item = item, self._prev_item
1652
1653        if item_text in '([{':
1654            self._bracket_depth += 1
1655
1656        elif item_text in '}])':
1657            self._bracket_depth -= 1
1658            assert self._bracket_depth >= 0
1659
1660    def _add_container(self, container, indent_amt, break_after_open_bracket):
1661        actual_indent = indent_amt + 1
1662
1663        if (
1664            unicode(self._prev_item) != '=' and
1665            not self.line_empty() and
1666            not self.fits_on_current_line(
1667                container.size + self._bracket_depth + 2)
1668        ):
1669
1670            if unicode(container)[0] == '(' and self._prev_item.is_name:
1671                # Don't split before the opening bracket of a call.
1672                break_after_open_bracket = True
1673                actual_indent = indent_amt + 4
1674            elif (
1675                break_after_open_bracket or
1676                unicode(self._prev_item) not in '([{'
1677            ):
1678                # If the container doesn't fit on the current line and the
1679                # current line isn't empty, place the container on the next
1680                # line.
1681                self._lines.append(self._LineBreak())
1682                self._lines.append(self._Indent(indent_amt))
1683                break_after_open_bracket = False
1684        else:
1685            actual_indent = self.current_size() + 1
1686            break_after_open_bracket = False
1687
1688        if isinstance(container, (ListComprehension, IfExpression)):
1689            actual_indent = indent_amt
1690
1691        # Increase the continued indentation only if recursing on a
1692        # container.
1693        container.reflow(self, ' ' * actual_indent,
1694                         break_after_open_bracket=break_after_open_bracket)
1695
1696    def _prevent_default_initializer_splitting(self, item, indent_amt):
1697        """Prevent splitting between a default initializer.
1698
1699        When there is a default initializer, it's best to keep it all on
1700        the same line. It's nicer and more readable, even if it goes
1701        over the maximum allowable line length. This goes back along the
1702        current line to determine if we have a default initializer, and,
1703        if so, to remove extraneous whitespaces and add a line
1704        break/indent before it if needed.
1705
1706        """
1707        if unicode(item) == '=':
1708            # This is the assignment in the initializer. Just remove spaces for
1709            # now.
1710            self._delete_whitespace()
1711            return
1712
1713        if (not self._prev_item or not self._prev_prev_item or
1714                unicode(self._prev_item) != '='):
1715            return
1716
1717        self._delete_whitespace()
1718        prev_prev_index = self._lines.index(self._prev_prev_item)
1719
1720        if (
1721            isinstance(self._lines[prev_prev_index - 1], self._Indent) or
1722            self.fits_on_current_line(item.size + 1)
1723        ):
1724            # The default initializer is already the only item on this line.
1725            # Don't insert a newline here.
1726            return
1727
1728        # Replace the space with a newline/indent combo.
1729        if isinstance(self._lines[prev_prev_index - 1], self._Space):
1730            del self._lines[prev_prev_index - 1]
1731
1732        self.add_line_break_at(self._lines.index(self._prev_prev_item),
1733                               indent_amt)
1734
1735    def _split_after_delimiter(self, item, indent_amt):
1736        """Split the line only after a delimiter."""
1737        self._delete_whitespace()
1738
1739        if self.fits_on_current_line(item.size):
1740            return
1741
1742        last_space = None
1743        for item in reversed(self._lines):
1744            if (
1745                last_space and
1746                (not isinstance(item, Atom) or not item.is_colon)
1747            ):
1748                break
1749            else:
1750                last_space = None
1751            if isinstance(item, self._Space):
1752                last_space = item
1753            if isinstance(item, (self._LineBreak, self._Indent)):
1754                return
1755
1756        if not last_space:
1757            return
1758
1759        self.add_line_break_at(self._lines.index(last_space), indent_amt)
1760
1761    def _enforce_space(self, item):
1762        """Enforce a space in certain situations.
1763
1764        There are cases where we will want a space where normally we
1765        wouldn't put one. This just enforces the addition of a space.
1766
1767        """
1768        if isinstance(self._lines[-1],
1769                      (self._Space, self._LineBreak, self._Indent)):
1770            return
1771
1772        if not self._prev_item:
1773            return
1774
1775        item_text = unicode(item)
1776        prev_text = unicode(self._prev_item)
1777
1778        # Prefer a space around a '.' in an import statement, and between the
1779        # 'import' and '('.
1780        if (
1781            (item_text == '.' and prev_text == 'from') or
1782            (item_text == 'import' and prev_text == '.') or
1783            (item_text == '(' and prev_text == 'import')
1784        ):
1785            self._lines.append(self._Space())
1786
1787    def _delete_whitespace(self):
1788        """Delete all whitespace from the end of the line."""
1789        while isinstance(self._lines[-1], (self._Space, self._LineBreak,
1790                                           self._Indent)):
1791            del self._lines[-1]
1792
1793
1794class Atom(object):
1795
1796    """The smallest unbreakable unit that can be reflowed."""
1797
1798    def __init__(self, atom):
1799        self._atom = atom
1800
1801    def __repr__(self):
1802        return self._atom.token_string
1803
1804    def __len__(self):
1805        return self.size
1806
1807    def reflow(
1808        self, reflowed_lines, continued_indent, extent,
1809        break_after_open_bracket=False,
1810        is_list_comp_or_if_expr=False,
1811        next_is_dot=False
1812    ):
1813        if self._atom.token_type == tokenize.COMMENT:
1814            reflowed_lines.add_comment(self)
1815            return
1816
1817        total_size = extent if extent else self.size
1818
1819        if self._atom.token_string not in ',:([{}])':
1820            # Some atoms will need an extra 1-sized space token after them.
1821            total_size += 1
1822
1823        prev_item = reflowed_lines.previous_item()
1824        if (
1825            not is_list_comp_or_if_expr and
1826            not reflowed_lines.fits_on_current_line(total_size) and
1827            not (next_is_dot and
1828                 reflowed_lines.fits_on_current_line(self.size + 1)) and
1829            not reflowed_lines.line_empty() and
1830            not self.is_colon and
1831            not (prev_item and prev_item.is_name and
1832                 unicode(self) == '(')
1833        ):
1834            # Start a new line if there is already something on the line and
1835            # adding this atom would make it go over the max line length.
1836            reflowed_lines.add_line_break(continued_indent)
1837        else:
1838            reflowed_lines.add_space_if_needed(unicode(self))
1839
1840        reflowed_lines.add(self, len(continued_indent),
1841                           break_after_open_bracket)
1842
1843    def emit(self):
1844        return self.__repr__()
1845
1846    @property
1847    def is_keyword(self):
1848        return keyword.iskeyword(self._atom.token_string)
1849
1850    @property
1851    def is_string(self):
1852        return self._atom.token_type == tokenize.STRING
1853
1854    @property
1855    def is_name(self):
1856        return self._atom.token_type == tokenize.NAME
1857
1858    @property
1859    def is_number(self):
1860        return self._atom.token_type == tokenize.NUMBER
1861
1862    @property
1863    def is_comma(self):
1864        return self._atom.token_string == ','
1865
1866    @property
1867    def is_colon(self):
1868        return self._atom.token_string == ':'
1869
1870    @property
1871    def size(self):
1872        return len(self._atom.token_string)
1873
1874
1875class Container(object):
1876
1877    """Base class for all container types."""
1878
1879    def __init__(self, items):
1880        self._items = items
1881
1882    def __repr__(self):
1883        string = ''
1884        last_was_keyword = False
1885
1886        for item in self._items:
1887            if item.is_comma:
1888                string += ', '
1889            elif item.is_colon:
1890                string += ': '
1891            else:
1892                item_string = unicode(item)
1893                if (
1894                    string and
1895                    (last_was_keyword or
1896                     (not string.endswith(tuple('([{,.:}]) ')) and
1897                      not item_string.startswith(tuple('([{,.:}])'))))
1898                ):
1899                    string += ' '
1900                string += item_string
1901
1902            last_was_keyword = item.is_keyword
1903        return string
1904
1905    def __iter__(self):
1906        for element in self._items:
1907            yield element
1908
1909    def __getitem__(self, idx):
1910        return self._items[idx]
1911
1912    def reflow(self, reflowed_lines, continued_indent,
1913               break_after_open_bracket=False):
1914        last_was_container = False
1915        for (index, item) in enumerate(self._items):
1916            next_item = get_item(self._items, index + 1)
1917
1918            if isinstance(item, Atom):
1919                is_list_comp_or_if_expr = (
1920                    isinstance(self, (ListComprehension, IfExpression)))
1921                item.reflow(reflowed_lines, continued_indent,
1922                            self._get_extent(index),
1923                            is_list_comp_or_if_expr=is_list_comp_or_if_expr,
1924                            next_is_dot=(next_item and
1925                                         unicode(next_item) == '.'))
1926                if last_was_container and item.is_comma:
1927                    reflowed_lines.add_line_break(continued_indent)
1928                last_was_container = False
1929            else:  # isinstance(item, Container)
1930                reflowed_lines.add(item, len(continued_indent),
1931                                   break_after_open_bracket)
1932                last_was_container = not isinstance(item, (ListComprehension,
1933                                                           IfExpression))
1934
1935            if (
1936                break_after_open_bracket and index == 0 and
1937                # Prefer to keep empty containers together instead of
1938                # separating them.
1939                unicode(item) == self.open_bracket and
1940                (not next_item or unicode(next_item) != self.close_bracket) and
1941                (len(self._items) != 3 or not isinstance(next_item, Atom))
1942            ):
1943                reflowed_lines.add_line_break(continued_indent)
1944                break_after_open_bracket = False
1945            else:
1946                next_next_item = get_item(self._items, index + 2)
1947                if (
1948                    unicode(item) not in ['.', '%', 'in'] and
1949                    next_item and not isinstance(next_item, Container) and
1950                    unicode(next_item) != ':' and
1951                    next_next_item and (not isinstance(next_next_item, Atom) or
1952                                        unicode(next_item) == 'not') and
1953                    not reflowed_lines.line_empty() and
1954                    not reflowed_lines.fits_on_current_line(
1955                        self._get_extent(index + 1) + 2)
1956                ):
1957                    reflowed_lines.add_line_break(continued_indent)
1958
1959    def _get_extent(self, index):
1960        """The extent of the full element.
1961
1962        E.g., the length of a function call or keyword.
1963
1964        """
1965        extent = 0
1966        prev_item = get_item(self._items, index - 1)
1967        seen_dot = prev_item and unicode(prev_item) == '.'
1968        while index < len(self._items):
1969            item = get_item(self._items, index)
1970            index += 1
1971
1972            if isinstance(item, (ListComprehension, IfExpression)):
1973                break
1974
1975            if isinstance(item, Container):
1976                if prev_item and prev_item.is_name:
1977                    if seen_dot:
1978                        extent += 1
1979                    else:
1980                        extent += item.size
1981
1982                    prev_item = item
1983                    continue
1984            elif (unicode(item) not in ['.', '=', ':', 'not'] and
1985                  not item.is_name and not item.is_string):
1986                break
1987
1988            if unicode(item) == '.':
1989                seen_dot = True
1990
1991            extent += item.size
1992            prev_item = item
1993
1994        return extent
1995
1996    @property
1997    def is_string(self):
1998        return False
1999
2000    @property
2001    def size(self):
2002        return len(self.__repr__())
2003
2004    @property
2005    def is_keyword(self):
2006        return False
2007
2008    @property
2009    def is_name(self):
2010        return False
2011
2012    @property
2013    def is_comma(self):
2014        return False
2015
2016    @property
2017    def is_colon(self):
2018        return False
2019
2020    @property
2021    def open_bracket(self):
2022        return None
2023
2024    @property
2025    def close_bracket(self):
2026        return None
2027
2028
2029class Tuple(Container):
2030
2031    """A high-level representation of a tuple."""
2032
2033    @property
2034    def open_bracket(self):
2035        return '('
2036
2037    @property
2038    def close_bracket(self):
2039        return ')'
2040
2041
2042class List(Container):
2043
2044    """A high-level representation of a list."""
2045
2046    @property
2047    def open_bracket(self):
2048        return '['
2049
2050    @property
2051    def close_bracket(self):
2052        return ']'
2053
2054
2055class DictOrSet(Container):
2056
2057    """A high-level representation of a dictionary or set."""
2058
2059    @property
2060    def open_bracket(self):
2061        return '{'
2062
2063    @property
2064    def close_bracket(self):
2065        return '}'
2066
2067
2068class ListComprehension(Container):
2069
2070    """A high-level representation of a list comprehension."""
2071
2072    @property
2073    def size(self):
2074        length = 0
2075        for item in self._items:
2076            if isinstance(item, IfExpression):
2077                break
2078            length += item.size
2079        return length
2080
2081
2082class IfExpression(Container):
2083
2084    """A high-level representation of an if-expression."""
2085
2086
2087def _parse_container(tokens, index, for_or_if=None):
2088    """Parse a high-level container, such as a list, tuple, etc."""
2089
2090    # Store the opening bracket.
2091    items = [Atom(Token(*tokens[index]))]
2092    index += 1
2093
2094    num_tokens = len(tokens)
2095    while index < num_tokens:
2096        tok = Token(*tokens[index])
2097
2098        if tok.token_string in ',)]}':
2099            # First check if we're at the end of a list comprehension or
2100            # if-expression. Don't add the ending token as part of the list
2101            # comprehension or if-expression, because they aren't part of those
2102            # constructs.
2103            if for_or_if == 'for':
2104                return (ListComprehension(items), index - 1)
2105
2106            elif for_or_if == 'if':
2107                return (IfExpression(items), index - 1)
2108
2109            # We've reached the end of a container.
2110            items.append(Atom(tok))
2111
2112            # If not, then we are at the end of a container.
2113            if tok.token_string == ')':
2114                # The end of a tuple.
2115                return (Tuple(items), index)
2116
2117            elif tok.token_string == ']':
2118                # The end of a list.
2119                return (List(items), index)
2120
2121            elif tok.token_string == '}':
2122                # The end of a dictionary or set.
2123                return (DictOrSet(items), index)
2124
2125        elif tok.token_string in '([{':
2126            # A sub-container is being defined.
2127            (container, index) = _parse_container(tokens, index)
2128            items.append(container)
2129
2130        elif tok.token_string == 'for':
2131            (container, index) = _parse_container(tokens, index, 'for')
2132            items.append(container)
2133
2134        elif tok.token_string == 'if':
2135            (container, index) = _parse_container(tokens, index, 'if')
2136            items.append(container)
2137
2138        else:
2139            items.append(Atom(tok))
2140
2141        index += 1
2142
2143    return (None, None)
2144
2145
2146def _parse_tokens(tokens):
2147    """Parse the tokens.
2148
2149    This converts the tokens into a form where we can manipulate them
2150    more easily.
2151
2152    """
2153
2154    index = 0
2155    parsed_tokens = []
2156
2157    num_tokens = len(tokens)
2158    while index < num_tokens:
2159        tok = Token(*tokens[index])
2160
2161        assert tok.token_type != token.INDENT
2162        if tok.token_type == tokenize.NEWLINE:
2163            # There's only one newline and it's at the end.
2164            break
2165
2166        if tok.token_string in '([{':
2167            (container, index) = _parse_container(tokens, index)
2168            if not container:
2169                return None
2170            parsed_tokens.append(container)
2171        else:
2172            parsed_tokens.append(Atom(tok))
2173
2174        index += 1
2175
2176    return parsed_tokens
2177
2178
2179def _reflow_lines(parsed_tokens, indentation, max_line_length,
2180                  start_on_prefix_line):
2181    """Reflow the lines so that it looks nice."""
2182
2183    if unicode(parsed_tokens[0]) == 'def':
2184        # A function definition gets indented a bit more.
2185        continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
2186    else:
2187        continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
2188
2189    break_after_open_bracket = not start_on_prefix_line
2190
2191    lines = ReformattedLines(max_line_length)
2192    lines.add_indent(len(indentation.lstrip('\r\n')))
2193
2194    if not start_on_prefix_line:
2195        # If splitting after the opening bracket will cause the first element
2196        # to be aligned weirdly, don't try it.
2197        first_token = get_item(parsed_tokens, 0)
2198        second_token = get_item(parsed_tokens, 1)
2199
2200        if (
2201            first_token and second_token and
2202            unicode(second_token)[0] == '(' and
2203            len(indentation) + len(first_token) + 1 == len(continued_indent)
2204        ):
2205            return None
2206
2207    for item in parsed_tokens:
2208        lines.add_space_if_needed(unicode(item), equal=True)
2209
2210        save_continued_indent = continued_indent
2211        if start_on_prefix_line and isinstance(item, Container):
2212            start_on_prefix_line = False
2213            continued_indent = ' ' * (lines.current_size() + 1)
2214
2215        item.reflow(lines, continued_indent, break_after_open_bracket)
2216        continued_indent = save_continued_indent
2217
2218    return lines.emit()
2219
2220
2221def _shorten_line_at_tokens_new(tokens, source, indentation,
2222                                max_line_length):
2223    """Shorten the line taking its length into account.
2224
2225    The input is expected to be free of newlines except for inside
2226    multiline strings and at the end.
2227
2228    """
2229    # Yield the original source so to see if it's a better choice than the
2230    # shortened candidate lines we generate here.
2231    yield indentation + source
2232
2233    parsed_tokens = _parse_tokens(tokens)
2234
2235    if parsed_tokens:
2236        # Perform two reflows. The first one starts on the same line as the
2237        # prefix. The second starts on the line after the prefix.
2238        fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2239                              start_on_prefix_line=True)
2240        if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2241            yield fixed
2242
2243        fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
2244                              start_on_prefix_line=False)
2245        if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
2246            yield fixed
2247
2248
2249def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
2250                            key_token_strings, aggressive):
2251    """Separate line by breaking at tokens in key_token_strings.
2252
2253    The input is expected to be free of newlines except for inside
2254    multiline strings and at the end.
2255
2256    """
2257    offsets = []
2258    for (index, _t) in enumerate(token_offsets(tokens)):
2259        (token_type,
2260         token_string,
2261         start_offset,
2262         end_offset) = _t
2263
2264        assert token_type != token.INDENT
2265
2266        if token_string in key_token_strings:
2267            # Do not break in containers with zero or one items.
2268            unwanted_next_token = {
2269                '(': ')',
2270                '[': ']',
2271                '{': '}'}.get(token_string)
2272            if unwanted_next_token:
2273                if (
2274                    get_item(tokens,
2275                             index + 1,
2276                             default=[None, None])[1] == unwanted_next_token or
2277                    get_item(tokens,
2278                             index + 2,
2279                             default=[None, None])[1] == unwanted_next_token
2280                ):
2281                    continue
2282
2283            if (
2284                index > 2 and token_string == '(' and
2285                tokens[index - 1][1] in ',(%['
2286            ):
2287                # Don't split after a tuple start, or before a tuple start if
2288                # the tuple is in a list.
2289                continue
2290
2291            if end_offset < len(source) - 1:
2292                # Don't split right before newline.
2293                offsets.append(end_offset)
2294        else:
2295            # Break at adjacent strings. These were probably meant to be on
2296            # separate lines in the first place.
2297            previous_token = get_item(tokens, index - 1)
2298            if (
2299                token_type == tokenize.STRING and
2300                previous_token and previous_token[0] == tokenize.STRING
2301            ):
2302                offsets.append(start_offset)
2303
2304    current_indent = None
2305    fixed = None
2306    for line in split_at_offsets(source, offsets):
2307        if fixed:
2308            fixed += '\n' + current_indent + line
2309
2310            for symbol in '([{':
2311                if line.endswith(symbol):
2312                    current_indent += indent_word
2313        else:
2314            # First line.
2315            fixed = line
2316            assert not current_indent
2317            current_indent = indent_word
2318
2319    assert fixed is not None
2320
2321    if check_syntax(normalize_multiline(fixed)
2322                    if aggressive > 1 else fixed):
2323        return indentation + fixed
2324    else:
2325        return None
2326
2327
2328def token_offsets(tokens):
2329    """Yield tokens and offsets."""
2330    end_offset = 0
2331    previous_end_row = 0
2332    previous_end_column = 0
2333    for t in tokens:
2334        token_type = t[0]
2335        token_string = t[1]
2336        (start_row, start_column) = t[2]
2337        (end_row, end_column) = t[3]
2338
2339        # Account for the whitespace between tokens.
2340        end_offset += start_column
2341        if previous_end_row == start_row:
2342            end_offset -= previous_end_column
2343
2344        # Record the start offset of the token.
2345        start_offset = end_offset
2346
2347        # Account for the length of the token itself.
2348        end_offset += len(token_string)
2349
2350        yield (token_type,
2351               token_string,
2352               start_offset,
2353               end_offset)
2354
2355        previous_end_row = end_row
2356        previous_end_column = end_column
2357
2358
2359def normalize_multiline(line):
2360    """Normalize multiline-related code that will cause syntax error.
2361
2362    This is for purposes of checking syntax.
2363
2364    """
2365    if line.startswith('def ') and line.rstrip().endswith(':'):
2366        return line + ' pass'
2367    elif line.startswith('return '):
2368        return 'def _(): ' + line
2369    elif line.startswith('@'):
2370        return line + 'def _(): pass'
2371    elif line.startswith('class '):
2372        return line + ' pass'
2373    elif line.startswith('if '):
2374        return line + ' pass'
2375    else:
2376        return line
2377
2378
2379def fix_whitespace(line, offset, replacement):
2380    """Replace whitespace at offset and return fixed line."""
2381    # Replace escaped newlines too
2382    left = line[:offset].rstrip('\n\r \t\\')
2383    right = line[offset:].lstrip('\n\r \t\\')
2384    if right.startswith('#'):
2385        return line
2386    else:
2387        return left + replacement + right
2388
2389
2390def _execute_pep8(pep8_options, source):
2391    """Execute pep8 via python method calls."""
2392    class QuietReport(pep8.BaseReport):
2393
2394        """Version of checker that does not print."""
2395
2396        def __init__(self, options):
2397            super(QuietReport, self).__init__(options)
2398            self.__full_error_results = []
2399
2400        def error(self, line_number, offset, text, _):
2401            """Collect errors."""
2402            code = super(QuietReport, self).error(line_number, offset, text, _)
2403            if code:
2404                self.__full_error_results.append(
2405                    {'id': code,
2406                     'line': line_number,
2407                     'column': offset + 1,
2408                     'info': text})
2409
2410        def full_error_results(self):
2411            """Return error results in detail.
2412
2413            Results are in the form of a list of dictionaries. Each
2414            dictionary contains 'id', 'line', 'column', and 'info'.
2415
2416            """
2417            return self.__full_error_results
2418
2419    checker = pep8.Checker('', lines=source,
2420                           reporter=QuietReport, **pep8_options)
2421    checker.check_all()
2422    return checker.report.full_error_results()
2423
2424
2425def _remove_leading_and_normalize(line):
2426    return line.lstrip().rstrip(CR + LF) + '\n'
2427
2428
2429class Reindenter(object):
2430
2431    """Reindents badly-indented code to uniformly use four-space indentation.
2432
2433    Released to the public domain, by Tim Peters, 03 October 2000.
2434
2435    """
2436
2437    def __init__(self, input_text):
2438        sio = io.StringIO(input_text)
2439        source_lines = sio.readlines()
2440
2441        self.string_content_line_numbers = multiline_string_lines(input_text)
2442
2443        # File lines, rstripped & tab-expanded. Dummy at start is so
2444        # that we can use tokenize's 1-based line numbering easily.
2445        # Note that a line is all-blank iff it is a newline.
2446        self.lines = []
2447        for line_number, line in enumerate(source_lines, start=1):
2448            # Do not modify if inside a multiline string.
2449            if line_number in self.string_content_line_numbers:
2450                self.lines.append(line)
2451            else:
2452                # Only expand leading tabs.
2453                self.lines.append(_get_indentation(line).expandtabs() +
2454                                  _remove_leading_and_normalize(line))
2455
2456        self.lines.insert(0, None)
2457        self.index = 1  # index into self.lines of next line
2458        self.input_text = input_text
2459
2460    def run(self, indent_size=DEFAULT_INDENT_SIZE):
2461        """Fix indentation and return modified line numbers.
2462
2463        Line numbers are indexed at 1.
2464
2465        """
2466        if indent_size < 1:
2467            return self.input_text
2468
2469        try:
2470            stats = _reindent_stats(tokenize.generate_tokens(self.getline))
2471        except (SyntaxError, tokenize.TokenError):
2472            return self.input_text
2473        # Remove trailing empty lines.
2474        lines = self.lines
2475        while lines and lines[-1] == '\n':
2476            lines.pop()
2477        # Sentinel.
2478        stats.append((len(lines), 0))
2479        # Map count of leading spaces to # we want.
2480        have2want = {}
2481        # Program after transformation.
2482        after = []
2483        # Copy over initial empty lines -- there's nothing to do until
2484        # we see a line with *something* on it.
2485        i = stats[0][0]
2486        after.extend(lines[1:i])
2487        for i in range(len(stats) - 1):
2488            thisstmt, thislevel = stats[i]
2489            nextstmt = stats[i + 1][0]
2490            have = _leading_space_count(lines[thisstmt])
2491            want = thislevel * indent_size
2492            if want < 0:
2493                # A comment line.
2494                if have:
2495                    # An indented comment line. If we saw the same
2496                    # indentation before, reuse what it most recently
2497                    # mapped to.
2498                    want = have2want.get(have, -1)
2499                    if want < 0:
2500                        # Then it probably belongs to the next real stmt.
2501                        for j in range(i + 1, len(stats) - 1):
2502                            jline, jlevel = stats[j]
2503                            if jlevel >= 0:
2504                                if have == _leading_space_count(lines[jline]):
2505                                    want = jlevel * indent_size
2506                                break
2507                    if want < 0:            # Maybe it's a hanging
2508                                            # comment like this one,
2509                        # in which case we should shift it like its base
2510                        # line got shifted.
2511                        for j in range(i - 1, -1, -1):
2512                            jline, jlevel = stats[j]
2513                            if jlevel >= 0:
2514                                want = (have + _leading_space_count(
2515                                        after[jline - 1]) -
2516                                        _leading_space_count(lines[jline]))
2517                                break
2518                    if want < 0:
2519                        # Still no luck -- leave it alone.
2520                        want = have
2521                else:
2522                    want = 0
2523            assert want >= 0
2524            have2want[have] = want
2525            diff = want - have
2526            if diff == 0 or have == 0:
2527                after.extend(lines[thisstmt:nextstmt])
2528            else:
2529                for line_number, line in enumerate(lines[thisstmt:nextstmt],
2530                                                   start=thisstmt):
2531                    if line_number in self.string_content_line_numbers:
2532                        after.append(line)
2533                    elif diff > 0:
2534                        if line == '\n':
2535                            after.append(line)
2536                        else:
2537                            after.append(' ' * diff + line)
2538                    else:
2539                        remove = min(_leading_space_count(line), -diff)
2540                        after.append(line[remove:])
2541
2542        return ''.join(after)
2543
2544    def getline(self):
2545        """Line-getter for tokenize."""
2546        if self.index >= len(self.lines):
2547            line = ''
2548        else:
2549            line = self.lines[self.index]
2550            self.index += 1
2551        return line
2552
2553
2554def _reindent_stats(tokens):
2555    """Return list of (lineno, indentlevel) pairs.
2556
2557    One for each stmt and comment line. indentlevel is -1 for comment lines, as
2558    a signal that tokenize doesn't know what to do about them; indeed, they're
2559    our headache!
2560
2561    """
2562    find_stmt = 1  # Next token begins a fresh stmt?
2563    level = 0  # Current indent level.
2564    stats = []
2565
2566    for t in tokens:
2567        token_type = t[0]
2568        sline = t[2][0]
2569        line = t[4]
2570
2571        if token_type == tokenize.NEWLINE:
2572            # A program statement, or ENDMARKER, will eventually follow,
2573            # after some (possibly empty) run of tokens of the form
2574            #     (NL | COMMENT)* (INDENT | DEDENT+)?
2575            find_stmt = 1
2576
2577        elif token_type == tokenize.INDENT:
2578            find_stmt = 1
2579            level += 1
2580
2581        elif token_type == tokenize.DEDENT:
2582            find_stmt = 1
2583            level -= 1
2584
2585        elif token_type == tokenize.COMMENT:
2586            if find_stmt:
2587                stats.append((sline, -1))
2588                # But we're still looking for a new stmt, so leave
2589                # find_stmt alone.
2590
2591        elif token_type == tokenize.NL:
2592            pass
2593
2594        elif find_stmt:
2595            # This is the first "real token" following a NEWLINE, so it
2596            # must be the first token of the next program statement, or an
2597            # ENDMARKER.
2598            find_stmt = 0
2599            if line:   # Not endmarker.
2600                stats.append((sline, level))
2601
2602    return stats
2603
2604
2605def _leading_space_count(line):
2606    """Return number of leading spaces in line."""
2607    i = 0
2608    while i < len(line) and line[i] == ' ':
2609        i += 1
2610    return i
2611
2612
2613def refactor_with_2to3(source_text, fixer_names):
2614    """Use lib2to3 to refactor the source.
2615
2616    Return the refactored source code.
2617
2618    """
2619    from lib2to3.refactor import RefactoringTool
2620    fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
2621    tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
2622
2623    from lib2to3.pgen2 import tokenize as lib2to3_tokenize
2624    try:
2625        return unicode(tool.refactor_string(source_text, name=''))
2626    except lib2to3_tokenize.TokenError:
2627        return source_text
2628
2629
2630def check_syntax(code):
2631    """Return True if syntax is okay."""
2632    try:
2633        return compile(code, '<string>', 'exec')
2634    except (SyntaxError, TypeError, UnicodeDecodeError):
2635        return False
2636
2637
2638def filter_results(source, results, aggressive):
2639    """Filter out spurious reports from pep8.
2640
2641    If aggressive is True, we allow possibly unsafe fixes (E711, E712).
2642
2643    """
2644    non_docstring_string_line_numbers = multiline_string_lines(
2645        source, include_docstrings=False)
2646    all_string_line_numbers = multiline_string_lines(
2647        source, include_docstrings=True)
2648
2649    commented_out_code_line_numbers = commented_out_code_lines(source)
2650
2651    for r in results:
2652        issue_id = r['id'].lower()
2653
2654        if r['line'] in non_docstring_string_line_numbers:
2655            if issue_id.startswith(('e1', 'e501', 'w191')):
2656                continue
2657
2658        if r['line'] in all_string_line_numbers:
2659            if issue_id in ['e501']:
2660                continue
2661
2662        # We must offset by 1 for lines that contain the trailing contents of
2663        # multiline strings.
2664        if not aggressive and (r['line'] + 1) in all_string_line_numbers:
2665            # Do not modify multiline strings in non-aggressive mode. Remove
2666            # trailing whitespace could break doctests.
2667            if issue_id.startswith(('w29', 'w39')):
2668                continue
2669
2670        if aggressive <= 0:
2671            if issue_id.startswith(('e711', 'w6')):
2672                continue
2673
2674        if aggressive <= 1:
2675            if issue_id.startswith(('e712', 'e713')):
2676                continue
2677
2678        if r['line'] in commented_out_code_line_numbers:
2679            if issue_id.startswith(('e26', 'e501')):
2680                continue
2681
2682        yield r
2683
2684
2685def multiline_string_lines(source, include_docstrings=False):
2686    """Return line numbers that are within multiline strings.
2687
2688    The line numbers are indexed at 1.
2689
2690    Docstrings are ignored.
2691
2692    """
2693    line_numbers = set()
2694    previous_token_type = ''
2695    try:
2696        for t in generate_tokens(source):
2697            token_type = t[0]
2698            start_row = t[2][0]
2699            end_row = t[3][0]
2700
2701            if token_type == tokenize.STRING and start_row != end_row:
2702                if (
2703                    include_docstrings or
2704                    previous_token_type != tokenize.INDENT
2705                ):
2706                    # We increment by one since we want the contents of the
2707                    # string.
2708                    line_numbers |= set(range(1 + start_row, 1 + end_row))
2709
2710            previous_token_type = token_type
2711    except (SyntaxError, tokenize.TokenError):
2712        pass
2713
2714    return line_numbers
2715
2716
2717def commented_out_code_lines(source):
2718    """Return line numbers of comments that are likely code.
2719
2720    Commented-out code is bad practice, but modifying it just adds even more
2721    clutter.
2722
2723    """
2724    line_numbers = []
2725    try:
2726        for t in generate_tokens(source):
2727            token_type = t[0]
2728            token_string = t[1]
2729            start_row = t[2][0]
2730            line = t[4]
2731
2732            # Ignore inline comments.
2733            if not line.lstrip().startswith('#'):
2734                continue
2735
2736            if token_type == tokenize.COMMENT:
2737                stripped_line = token_string.lstrip('#').strip()
2738                if (
2739                    ' ' in stripped_line and
2740                    '#' not in stripped_line and
2741                    check_syntax(stripped_line)
2742                ):
2743                    line_numbers.append(start_row)
2744    except (SyntaxError, tokenize.TokenError):
2745        pass
2746
2747    return line_numbers
2748
2749
2750def shorten_comment(line, max_line_length, last_comment=False):
2751    """Return trimmed or split long comment line.
2752
2753    If there are no comments immediately following it, do a text wrap.
2754    Doing this wrapping on all comments in general would lead to jagged
2755    comment text.
2756
2757    """
2758    assert len(line) > max_line_length
2759    line = line.rstrip()
2760
2761    # PEP 8 recommends 72 characters for comment text.
2762    indentation = _get_indentation(line) + '# '
2763    max_line_length = min(max_line_length,
2764                          len(indentation) + 72)
2765
2766    MIN_CHARACTER_REPEAT = 5
2767    if (
2768        len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
2769        not line[-1].isalnum()
2770    ):
2771        # Trim comments that end with things like ---------
2772        return line[:max_line_length] + '\n'
2773    elif last_comment and re.match(r'\s*#+\s*\w+', line):
2774        import textwrap
2775        split_lines = textwrap.wrap(line.lstrip(' \t#'),
2776                                    initial_indent=indentation,
2777                                    subsequent_indent=indentation,
2778                                    width=max_line_length,
2779                                    break_long_words=False,
2780                                    break_on_hyphens=False)
2781        return '\n'.join(split_lines) + '\n'
2782    else:
2783        return line + '\n'
2784
2785
2786def normalize_line_endings(lines, newline):
2787    """Return fixed line endings.
2788
2789    All lines will be modified to use the most common line ending.
2790
2791    """
2792    return [line.rstrip('\n\r') + newline for line in lines]
2793
2794
2795def mutual_startswith(a, b):
2796    return b.startswith(a) or a.startswith(b)
2797
2798
2799def code_match(code, select, ignore):
2800    if ignore:
2801        assert not isinstance(ignore, unicode)
2802        for ignored_code in [c.strip() for c in ignore]:
2803            if mutual_startswith(code.lower(), ignored_code.lower()):
2804                return False
2805
2806    if select:
2807        assert not isinstance(select, unicode)
2808        for selected_code in [c.strip() for c in select]:
2809            if mutual_startswith(code.lower(), selected_code.lower()):
2810                return True
2811        return False
2812
2813    return True
2814
2815
2816def fix_code(source, options=None):
2817    """Return fixed source code."""
2818    if not options:
2819        options = parse_args([''])
2820
2821    if not isinstance(source, unicode):
2822        source = source.decode(locale.getpreferredencoding())
2823
2824    sio = io.StringIO(source)
2825    return fix_lines(sio.readlines(), options=options)
2826
2827
2828def fix_lines(source_lines, options, filename=''):
2829    """Return fixed source code."""
2830    # Transform everything to line feed. Then change them back to original
2831    # before returning fixed source code.
2832    original_newline = find_newline(source_lines)
2833    tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
2834
2835    # Keep a history to break out of cycles.
2836    previous_hashes = set()
2837
2838    if options.line_range:
2839        fixed_source = apply_local_fixes(tmp_source, options)
2840    else:
2841        # Apply global fixes only once (for efficiency).
2842        fixed_source = apply_global_fixes(tmp_source, options)
2843
2844    passes = 0
2845    long_line_ignore_cache = set()
2846    while hash(fixed_source) not in previous_hashes:
2847        if options.pep8_passes >= 0 and passes > options.pep8_passes:
2848            break
2849        passes += 1
2850
2851        previous_hashes.add(hash(fixed_source))
2852
2853        tmp_source = copy.copy(fixed_source)
2854
2855        fix = FixPEP8(
2856            filename,
2857            options,
2858            contents=tmp_source,
2859            long_line_ignore_cache=long_line_ignore_cache)
2860
2861        fixed_source = fix.fix()
2862
2863    sio = io.StringIO(fixed_source)
2864    return ''.join(normalize_line_endings(sio.readlines(), original_newline))
2865
2866
2867def fix_file(filename, options=None, output=None):
2868    if not options:
2869        options = parse_args([filename])
2870
2871    original_source = readlines_from_file(filename)
2872
2873    fixed_source = original_source
2874
2875    if options.in_place or output:
2876        encoding = detect_encoding(filename)
2877
2878    if output:
2879        output = codecs.getwriter(encoding)(output.buffer
2880                                            if hasattr(output, 'buffer')
2881                                            else output)
2882
2883        output = LineEndingWrapper(output)
2884
2885    fixed_source = fix_lines(fixed_source, options, filename=filename)
2886
2887    if options.diff:
2888        new = io.StringIO(fixed_source)
2889        new = new.readlines()
2890        diff = get_diff_text(original_source, new, filename)
2891        if output:
2892            output.write(diff)
2893            output.flush()
2894        else:
2895            return diff
2896    elif options.in_place:
2897        fp = open_with_encoding(filename, encoding=encoding,
2898                                mode='w')
2899        fp.write(fixed_source)
2900        fp.close()
2901    else:
2902        if output:
2903            output.write(fixed_source)
2904            output.flush()
2905        else:
2906            return fixed_source
2907
2908
2909def global_fixes():
2910    """Yield multiple (code, function) tuples."""
2911    for function in globals().values():
2912        if inspect.isfunction(function):
2913            arguments = inspect.getargspec(function)[0]
2914            if arguments[:1] != ['source']:
2915                continue
2916
2917            code = extract_code_from_function(function)
2918            if code:
2919                yield (code, function)
2920
2921
2922def apply_global_fixes(source, options, where='global'):
2923    """Run global fixes on source code.
2924
2925    These are fixes that only need be done once (unlike those in
2926    FixPEP8, which are dependent on pep8).
2927
2928    """
2929    if code_match('E101', select=options.select, ignore=options.ignore):
2930        source = reindent(source,
2931                          indent_size=options.indent_size)
2932
2933    for (code, function) in global_fixes():
2934        if code_match(code, select=options.select, ignore=options.ignore):
2935            if options.verbose:
2936                print('--->  Applying {0} fix for {1}'.format(where,
2937                                                              code.upper()),
2938                      file=sys.stderr)
2939            source = function(source,
2940                              aggressive=options.aggressive)
2941
2942    source = fix_2to3(source,
2943                      aggressive=options.aggressive,
2944                      select=options.select,
2945                      ignore=options.ignore)
2946
2947    return source
2948
2949
2950def apply_local_fixes(source, options):
2951    """Ananologus to apply_global_fixes, but runs only those which makes sense
2952    for the given line_range.
2953
2954    Do as much as we can without breaking code.
2955
2956    """
2957    def find_ge(a, x):
2958        """Find leftmost item greater than or equal to x."""
2959        i = bisect.bisect_left(a, x)
2960        if i != len(a):
2961            return i, a[i]
2962        return len(a) - 1, a[-1]
2963
2964    def find_le(a, x):
2965        """Find rightmost value less than or equal to x."""
2966        i = bisect.bisect_right(a, x)
2967        if i:
2968            return i - 1, a[i - 1]
2969        return 0, a[0]
2970
2971    def local_fix(source, start_log, end_log,
2972                  start_lines, end_lines, indents, last_line):
2973        """apply_global_fixes to the source between start_log and end_log.
2974
2975        The subsource must be the correct syntax of a complete python program
2976        (but all lines may share an indentation). The subsource's shared indent
2977        is removed, fixes are applied and the indent prepended back. Taking
2978        care to not reindent strings.
2979
2980        last_line is the strict cut off (options.line_range[1]), so that
2981        lines after last_line are not modified.
2982
2983        """
2984        if end_log < start_log:
2985            return source
2986
2987        ind = indents[start_log]
2988        indent = _get_indentation(source[start_lines[start_log]])
2989
2990        sl = slice(start_lines[start_log], end_lines[end_log] + 1)
2991
2992        subsource = source[sl]
2993        # Remove indent from subsource.
2994        if ind:
2995            for line_no in start_lines[start_log:end_log + 1]:
2996                pos = line_no - start_lines[start_log]
2997                subsource[pos] = subsource[pos][ind:]
2998
2999        # Fix indentation of subsource.
3000        fixed_subsource = apply_global_fixes(''.join(subsource),
3001                                             options,
3002                                             where='local')
3003        fixed_subsource = fixed_subsource.splitlines(True)
3004
3005        # Add back indent for non multi-line strings lines.
3006        msl = multiline_string_lines(''.join(fixed_subsource),
3007                                     include_docstrings=False)
3008        for i, line in enumerate(fixed_subsource):
3009            if not i + 1 in msl:
3010                fixed_subsource[i] = indent + line if line != '\n' else line
3011
3012        # We make a special case to look at the final line, if it's a multiline
3013        # *and* the cut off is somewhere inside it, we take the fixed
3014        # subset up until last_line, this assumes that the number of lines
3015        # does not change in this multiline line.
3016        changed_lines = len(fixed_subsource)
3017        if (start_lines[end_log] != end_lines[end_log]
3018                and end_lines[end_log] > last_line):
3019            after_end = end_lines[end_log] - last_line
3020            fixed_subsource = (fixed_subsource[:-after_end] +
3021                               source[sl][-after_end:])
3022            changed_lines -= after_end
3023
3024            options.line_range[1] = (options.line_range[0] +
3025                                     changed_lines - 1)
3026
3027        return (source[:start_lines[start_log]] +
3028                fixed_subsource +
3029                source[end_lines[end_log] + 1:])
3030
3031    def is_continued_stmt(line,
3032                          continued_stmts=frozenset(['else', 'elif',
3033                                                     'finally', 'except'])):
3034        return re.split('[ :]', line.strip(), 1)[0] in continued_stmts
3035
3036    assert options.line_range
3037    start, end = options.line_range
3038    start -= 1
3039    end -= 1
3040    last_line = end  # We shouldn't modify lines after this cut-off.
3041
3042    try:
3043        logical = _find_logical(source)
3044    except (SyntaxError, tokenize.TokenError):
3045        return ''.join(source)
3046
3047    if not logical[0]:
3048        # Just blank lines, this should imply that it will become '\n' ?
3049        return apply_global_fixes(source, options)
3050
3051    start_lines, indents = zip(*logical[0])
3052    end_lines, _ = zip(*logical[1])
3053
3054    source = source.splitlines(True)
3055
3056    start_log, start = find_ge(start_lines, start)
3057    end_log, end = find_le(start_lines, end)
3058
3059    # Look behind one line, if it's indented less than current indent
3060    # then we can move to this previous line knowing that its
3061    # indentation level will not be changed.
3062    if (start_log > 0
3063            and indents[start_log - 1] < indents[start_log]
3064            and not is_continued_stmt(source[start_log - 1])):
3065        start_log -= 1
3066        start = start_lines[start_log]
3067
3068    while start < end:
3069
3070        if is_continued_stmt(source[start]):
3071            start_log += 1
3072            start = start_lines[start_log]
3073            continue
3074
3075        ind = indents[start_log]
3076        for t in itertools.takewhile(lambda t: t[1][1] >= ind,
3077                                     enumerate(logical[0][start_log:])):
3078            n_log, n = start_log + t[0], t[1][0]
3079        # start shares indent up to n.
3080
3081        if n <= end:
3082            source = local_fix(source, start_log, n_log,
3083                               start_lines, end_lines,
3084                               indents, last_line)
3085            start_log = n_log if n == end else n_log + 1
3086            start = start_lines[start_log]
3087            continue
3088
3089        else:
3090            # Look at the line after end and see if allows us to reindent.
3091            after_end_log, after_end = find_ge(start_lines, end + 1)
3092
3093            if indents[after_end_log] > indents[start_log]:
3094                start_log, start = find_ge(start_lines, start + 1)
3095                continue
3096
3097            if (indents[after_end_log] == indents[start_log]
3098                    and is_continued_stmt(source[after_end])):
3099                # find n, the beginning of the last continued statement
3100                # Apply fix to previous block if there is one.
3101                only_block = True
3102                for n, n_ind in logical[0][start_log:end_log + 1][::-1]:
3103                    if n_ind == ind and not is_continued_stmt(source[n]):
3104                        n_log = start_lines.index(n)
3105                        source = local_fix(source, start_log, n_log - 1,
3106                                           start_lines, end_lines,
3107                                           indents, last_line)
3108                        start_log = n_log + 1
3109                        start = start_lines[start_log]
3110                        only_block = False
3111                        break
3112                if only_block:
3113                    end_log, end = find_le(start_lines, end - 1)
3114                continue
3115
3116            source = local_fix(source, start_log, end_log,
3117                               start_lines, end_lines,
3118                               indents, last_line)
3119            break
3120
3121    return ''.join(source)
3122
3123
3124def extract_code_from_function(function):
3125    """Return code handled by function."""
3126    if not function.__name__.startswith('fix_'):
3127        return None
3128
3129    code = re.sub('^fix_', '', function.__name__)
3130    if not code:
3131        return None
3132
3133    try:
3134        int(code[1:])
3135    except ValueError:
3136        return None
3137
3138    return code
3139
3140
3141def create_parser():
3142    """Return command-line parser."""
3143    # Do import locally to be friendly to those who use autopep8 as a library
3144    # and are supporting Python 2.6.
3145    import argparse
3146
3147    parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
3148                                     prog='autopep8')
3149    parser.add_argument('--version', action='version',
3150                        version='%(prog)s ' + __version__)
3151    parser.add_argument('-v', '--verbose', action='count', dest='verbose',
3152                        default=0,
3153                        help='print verbose messages; '
3154                        'multiple -v result in more verbose messages')
3155    parser.add_argument('-d', '--diff', action='store_true', dest='diff',
3156                        help='print the diff for the fixed source')
3157    parser.add_argument('-i', '--in-place', action='store_true',
3158                        help='make changes to files in place')
3159    parser.add_argument('-r', '--recursive', action='store_true',
3160                        help='run recursively over directories; '
3161                        'must be used with --in-place or --diff')
3162    parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
3163                        help='number of parallel jobs; '
3164                        'match CPU count if value is less than 1')
3165    parser.add_argument('-p', '--pep8-passes', metavar='n',
3166                        default=-1, type=int,
3167                        help='maximum number of additional pep8 passes '
3168                        '(default: infinite)')
3169    parser.add_argument('-a', '--aggressive', action='count', default=0,
3170                        help='enable non-whitespace changes; '
3171                        'multiple -a result in more aggressive changes')
3172    parser.add_argument('--experimental', action='store_true',
3173                        help='enable experimental fixes')
3174    parser.add_argument('--exclude', metavar='globs',
3175                        help='exclude file/directory names that match these '
3176                        'comma-separated globs')
3177    parser.add_argument('--list-fixes', action='store_true',
3178                        help='list codes for fixes; '
3179                        'used by --ignore and --select')
3180    parser.add_argument('--ignore', metavar='errors', default='',
3181                        help='do not fix these errors/warnings '
3182                        '(default: {0})'.format(DEFAULT_IGNORE))
3183    parser.add_argument('--select', metavar='errors', default='',
3184                        help='fix only these errors/warnings (e.g. E4,W)')
3185    parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
3186                        help='set maximum allowed line length '
3187                        '(default: %(default)s)')
3188    parser.add_argument('--range', metavar='line', dest='line_range',
3189                        default=None, type=int, nargs=2,
3190                        help='only fix errors found within this inclusive '
3191                        'range of line numbers (e.g. 1 99); '
3192                        'line numbers are indexed at 1')
3193    parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
3194                        type=int, metavar='n',
3195                        help='number of spaces per indent level '
3196                             '(default %(default)s)')
3197    parser.add_argument('files', nargs='*',
3198                        help="files to format or '-' for standard in")
3199
3200    return parser
3201
3202
3203def parse_args(arguments):
3204    """Parse command-line options."""
3205    parser = create_parser()
3206    args = parser.parse_args(arguments)
3207
3208    if not args.files and not args.list_fixes:
3209        parser.error('incorrect number of arguments')
3210
3211    args.files = [decode_filename(name) for name in args.files]
3212
3213    if '-' in args.files:
3214        if len(args.files) > 1:
3215            parser.error('cannot mix stdin and regular files')
3216
3217        if args.diff:
3218            parser.error('--diff cannot be used with standard input')
3219
3220        if args.in_place:
3221            parser.error('--in-place cannot be used with standard input')
3222
3223        if args.recursive:
3224            parser.error('--recursive cannot be used with standard input')
3225
3226    if len(args.files) > 1 and not (args.in_place or args.diff):
3227        parser.error('autopep8 only takes one filename as argument '
3228                     'unless the "--in-place" or "--diff" args are '
3229                     'used')
3230
3231    if args.recursive and not (args.in_place or args.diff):
3232        parser.error('--recursive must be used with --in-place or --diff')
3233
3234    if args.exclude and not args.recursive:
3235        parser.error('--exclude is only relevant when used with --recursive')
3236
3237    if args.in_place and args.diff:
3238        parser.error('--in-place and --diff are mutually exclusive')
3239
3240    if args.max_line_length <= 0:
3241        parser.error('--max-line-length must be greater than 0')
3242
3243    if args.select:
3244        args.select = args.select.split(',')
3245
3246    if args.ignore:
3247        args.ignore = args.ignore.split(',')
3248    elif not args.select:
3249        if args.aggressive:
3250            # Enable everything by default if aggressive.
3251            args.select = ['E', 'W']
3252        else:
3253            args.ignore = DEFAULT_IGNORE.split(',')
3254
3255    if args.exclude:
3256        args.exclude = args.exclude.split(',')
3257    else:
3258        args.exclude = []
3259
3260    if args.jobs < 1:
3261        # Do not import multiprocessing globally in case it is not supported
3262        # on the platform.
3263        import multiprocessing
3264        args.jobs = multiprocessing.cpu_count()
3265
3266    if args.jobs > 1 and not args.in_place:
3267        parser.error('parallel jobs requires --in-place')
3268
3269    if args.line_range:
3270        if args.line_range[0] <= 0:
3271            parser.error('--range must be positive numbers')
3272        if args.line_range[0] > args.line_range[1]:
3273            parser.error('First value of --range should be less than or equal '
3274                         'to the second')
3275
3276    return args
3277
3278
3279def decode_filename(filename):
3280    """Return Unicode filename."""
3281    if isinstance(filename, unicode):
3282        return filename
3283    else:
3284        return filename.decode(sys.getfilesystemencoding())
3285
3286
3287def supported_fixes():
3288    """Yield pep8 error codes that autopep8 fixes.
3289
3290    Each item we yield is a tuple of the code followed by its
3291    description.
3292
3293    """
3294    yield ('E101', docstring_summary(reindent.__doc__))
3295
3296    instance = FixPEP8(filename=None, options=None, contents='')
3297    for attribute in dir(instance):
3298        code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
3299        if code:
3300            yield (
3301                code.group(1).upper(),
3302                re.sub(r'\s+', ' ',
3303                       docstring_summary(getattr(instance, attribute).__doc__))
3304            )
3305
3306    for (code, function) in sorted(global_fixes()):
3307        yield (code.upper() + (4 - len(code)) * ' ',
3308               re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
3309
3310    for code in sorted(CODE_TO_2TO3):
3311        yield (code.upper() + (4 - len(code)) * ' ',
3312               re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
3313
3314
3315def docstring_summary(docstring):
3316    """Return summary of docstring."""
3317    return docstring.split('\n')[0]
3318
3319
3320def line_shortening_rank(candidate, indent_word, max_line_length,
3321                         experimental=False):
3322    """Return rank of candidate.
3323
3324    This is for sorting candidates.
3325
3326    """
3327    if not candidate.strip():
3328        return 0
3329
3330    rank = 0
3331    lines = candidate.split('\n')
3332
3333    offset = 0
3334    if (
3335        not lines[0].lstrip().startswith('#') and
3336        lines[0].rstrip()[-1] not in '([{'
3337    ):
3338        for (opening, closing) in ('()', '[]', '{}'):
3339            # Don't penalize empty containers that aren't split up. Things like
3340            # this "foo(\n    )" aren't particularly good.
3341            opening_loc = lines[0].find(opening)
3342            closing_loc = lines[0].find(closing)
3343            if opening_loc >= 0:
3344                if closing_loc < 0 or closing_loc != opening_loc + 1:
3345                    offset = max(offset, 1 + opening_loc)
3346
3347    current_longest = max(offset + len(x.strip()) for x in lines)
3348
3349    rank += 4 * max(0, current_longest - max_line_length)
3350
3351    rank += len(lines)
3352
3353    # Too much variation in line length is ugly.
3354    rank += 2 * standard_deviation(len(line) for line in lines)
3355
3356    bad_staring_symbol = {
3357        '(': ')',
3358        '[': ']',
3359        '{': '}'}.get(lines[0][-1])
3360
3361    if len(lines) > 1:
3362        if (
3363            bad_staring_symbol and
3364            lines[1].lstrip().startswith(bad_staring_symbol)
3365        ):
3366            rank += 20
3367
3368    for lineno, current_line in enumerate(lines):
3369        current_line = current_line.strip()
3370
3371        if current_line.startswith('#'):
3372            continue
3373
3374        for bad_start in ['.', '%', '+', '-', '/']:
3375            if current_line.startswith(bad_start):
3376                rank += 100
3377
3378            # Do not tolerate operators on their own line.
3379            if current_line == bad_start:
3380                rank += 1000
3381
3382        if current_line.endswith(('(', '[', '{', '.')):
3383            # Avoid lonely opening. They result in longer lines.
3384            if len(current_line) <= len(indent_word):
3385                rank += 100
3386
3387            # Avoid the ugliness of ", (\n".
3388            if (
3389                current_line.endswith('(') and
3390                current_line[:-1].rstrip().endswith(',')
3391            ):
3392                rank += 100
3393
3394            # Also avoid the ugliness of "foo.\nbar"
3395            if current_line.endswith('.'):
3396                rank += 100
3397
3398            if has_arithmetic_operator(current_line):
3399                rank += 100
3400
3401        if current_line.endswith(('%', '(', '[', '{')):
3402            rank -= 20
3403
3404        # Try to break list comprehensions at the "for".
3405        if current_line.startswith('for '):
3406            rank -= 50
3407
3408        if current_line.endswith('\\'):
3409            # If a line ends in \-newline, it may be part of a
3410            # multiline string. In that case, we would like to know
3411            # how long that line is without the \-newline. If it's
3412            # longer than the maximum, or has comments, then we assume
3413            # that the \-newline is an okay candidate and only
3414            # penalize it a bit.
3415            total_len = len(current_line)
3416            lineno += 1
3417            while lineno < len(lines):
3418                total_len += len(lines[lineno])
3419
3420                if lines[lineno].lstrip().startswith('#'):
3421                    total_len = max_line_length
3422                    break
3423
3424                if not lines[lineno].endswith('\\'):
3425                    break
3426
3427                lineno += 1
3428
3429            if total_len < max_line_length:
3430                rank += 10
3431            else:
3432                rank += 100 if experimental else 1
3433
3434        # Prefer breaking at commas rather than colon.
3435        if ',' in current_line and current_line.endswith(':'):
3436            rank += 10
3437
3438        rank += 10 * count_unbalanced_brackets(current_line)
3439
3440    return max(0, rank)
3441
3442
3443def standard_deviation(numbers):
3444    """Return standard devation."""
3445    numbers = list(numbers)
3446    if not numbers:
3447        return 0
3448    mean = sum(numbers) / len(numbers)
3449    return (sum((n - mean) ** 2 for n in numbers) /
3450            len(numbers)) ** .5
3451
3452
3453def has_arithmetic_operator(line):
3454    """Return True if line contains any arithmetic operators."""
3455    for operator in pep8.ARITHMETIC_OP:
3456        if operator in line:
3457            return True
3458
3459    return False
3460
3461
3462def count_unbalanced_brackets(line):
3463    """Return number of unmatched open/close brackets."""
3464    count = 0
3465    for opening, closing in ['()', '[]', '{}']:
3466        count += abs(line.count(opening) - line.count(closing))
3467
3468    return count
3469
3470
3471def split_at_offsets(line, offsets):
3472    """Split line at offsets.
3473
3474    Return list of strings.
3475
3476    """
3477    result = []
3478
3479    previous_offset = 0
3480    current_offset = 0
3481    for current_offset in sorted(offsets):
3482        if current_offset < len(line) and previous_offset != current_offset:
3483            result.append(line[previous_offset:current_offset].strip())
3484        previous_offset = current_offset
3485
3486    result.append(line[current_offset:])
3487
3488    return result
3489
3490
3491class LineEndingWrapper(object):
3492
3493    r"""Replace line endings to work with sys.stdout.
3494
3495    It seems that sys.stdout expects only '\n' as the line ending, no matter
3496    the platform. Otherwise, we get repeated line endings.
3497
3498    """
3499
3500    def __init__(self, output):
3501        self.__output = output
3502
3503    def write(self, s):
3504        self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
3505
3506    def flush(self):
3507        self.__output.flush()
3508
3509
3510def match_file(filename, exclude):
3511    """Return True if file is okay for modifying/recursing."""
3512    base_name = os.path.basename(filename)
3513
3514    if base_name.startswith('.'):
3515        return False
3516
3517    for pattern in exclude:
3518        if fnmatch.fnmatch(base_name, pattern):
3519            return False
3520
3521    if not os.path.isdir(filename) and not is_python_file(filename):
3522        return False
3523
3524    return True
3525
3526
3527def find_files(filenames, recursive, exclude):
3528    """Yield filenames."""
3529    while filenames:
3530        name = filenames.pop(0)
3531        if recursive and os.path.isdir(name):
3532            for root, directories, children in os.walk(name):
3533                filenames += [os.path.join(root, f) for f in children
3534                              if match_file(os.path.join(root, f),
3535                                            exclude)]
3536                directories[:] = [d for d in directories
3537                                  if match_file(os.path.join(root, d),
3538                                                exclude)]
3539        else:
3540            yield name
3541
3542
3543def _fix_file(parameters):
3544    """Helper function for optionally running fix_file() in parallel."""
3545    if parameters[1].verbose:
3546        print('[file:{0}]'.format(parameters[0]), file=sys.stderr)
3547    try:
3548        fix_file(*parameters)
3549    except IOError as error:
3550        print(unicode(error), file=sys.stderr)
3551
3552
3553def fix_multiple_files(filenames, options, output=None):
3554    """Fix list of files.
3555
3556    Optionally fix files recursively.
3557
3558    """
3559    filenames = find_files(filenames, options.recursive, options.exclude)
3560    if options.jobs > 1:
3561        import multiprocessing
3562        pool = multiprocessing.Pool(options.jobs)
3563        pool.map(_fix_file,
3564                 [(name, options) for name in filenames])
3565    else:
3566        for name in filenames:
3567            _fix_file((name, options, output))
3568
3569
3570def is_python_file(filename):
3571    """Return True if filename is Python file."""
3572    if filename.endswith('.py'):
3573        return True
3574
3575    try:
3576        with open_with_encoding(filename) as f:
3577            first_line = f.readlines(1)[0]
3578    except (IOError, IndexError):
3579        return False
3580
3581    if not PYTHON_SHEBANG_REGEX.match(first_line):
3582        return False
3583
3584    return True
3585
3586
3587def is_probably_part_of_multiline(line):
3588    """Return True if line is likely part of a multiline string.
3589
3590    When multiline strings are involved, pep8 reports the error as being
3591    at the start of the multiline string, which doesn't work for us.
3592
3593    """
3594    return (
3595        '"""' in line or
3596        "'''" in line or
3597        line.rstrip().endswith('\\')
3598    )
3599
3600
3601def main():
3602    """Tool main."""
3603    try:
3604        # Exit on broken pipe.
3605        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
3606    except AttributeError:  # pragma: no cover
3607        # SIGPIPE is not available on Windows.
3608        pass
3609
3610    try:
3611        args = parse_args(sys.argv[1:])
3612
3613        if args.list_fixes:
3614            for code, description in sorted(supported_fixes()):
3615                print('{code} - {description}'.format(
3616                    code=code, description=description))
3617            return 0
3618
3619        if args.files == ['-']:
3620            assert not args.in_place
3621
3622            # LineEndingWrapper is unnecessary here due to the symmetry between
3623            # standard in and standard out.
3624            sys.stdout.write(fix_code(sys.stdin.read(), args))
3625        else:
3626            if args.in_place or args.diff:
3627                args.files = list(set(args.files))
3628            else:
3629                assert len(args.files) == 1
3630                assert not args.recursive
3631
3632            fix_multiple_files(args.files, args, sys.stdout)
3633    except KeyboardInterrupt:
3634        return 1  # pragma: no cover
3635
3636
3637class CachedTokenizer(object):
3638
3639    """A one-element cache around tokenize.generate_tokens().
3640
3641    Original code written by Ned Batchelder, in coverage.py.
3642
3643    """
3644
3645    def __init__(self):
3646        self.last_text = None
3647        self.last_tokens = None
3648
3649    def generate_tokens(self, text):
3650        """A stand-in for tokenize.generate_tokens()."""
3651        if text != self.last_text:
3652            string_io = io.StringIO(text)
3653            self.last_tokens = list(
3654                tokenize.generate_tokens(string_io.readline)
3655            )
3656            self.last_text = text
3657        return self.last_tokens
3658
3659_cached_tokenizer = CachedTokenizer()
3660generate_tokens = _cached_tokenizer.generate_tokens
3661
3662
3663if __name__ == '__main__':
3664    sys.exit(main())
3665