1#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose)  Verbose.   Print informative msgs; else no output.
11-h (--help)     Help.      Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files.  Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output.  In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths.  When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension.  If it finds nothing to
28change, the file is left alone.  If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines.  So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42__version__ = "1"
43
44import tokenize
45import os, shutil
46import sys
47
48verbose    = 0
49recurse    = 0
50dryrun     = 0
51makebackup = True
52
53def usage(msg=None):
54    if msg is not None:
55        print >> sys.stderr, msg
56    print >> sys.stderr, __doc__
57
58def errprint(*args):
59    sep = ""
60    for arg in args:
61        sys.stderr.write(sep + str(arg))
62        sep = " "
63    sys.stderr.write("\n")
64
65def main():
66    import getopt
67    global verbose, recurse, dryrun, makebackup
68    try:
69        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
71    except getopt.error, msg:
72        usage(msg)
73        return
74    for o, a in opts:
75        if o in ('-d', '--dryrun'):
76            dryrun += 1
77        elif o in ('-r', '--recurse'):
78            recurse += 1
79        elif o in ('-n', '--nobackup'):
80            makebackup = False
81        elif o in ('-v', '--verbose'):
82            verbose += 1
83        elif o in ('-h', '--help'):
84            usage()
85            return
86    if not args:
87        r = Reindenter(sys.stdin)
88        r.run()
89        r.write(sys.stdout)
90        return
91    for arg in args:
92        check(arg)
93
94def check(file):
95    if os.path.isdir(file) and not os.path.islink(file):
96        if verbose:
97            print "listing directory", file
98        names = os.listdir(file)
99        for name in names:
100            fullname = os.path.join(file, name)
101            if ((recurse and os.path.isdir(fullname) and
102                 not os.path.islink(fullname))
103                or name.lower().endswith(".py")):
104                check(fullname)
105        return
106
107    if verbose:
108        print "checking", file, "...",
109    try:
110        f = open(file)
111    except IOError, msg:
112        errprint("%s: I/O Error: %s" % (file, str(msg)))
113        return
114
115    r = Reindenter(f)
116    f.close()
117    if r.run():
118        if verbose:
119            print "changed."
120            if dryrun:
121                print "But this is a dry run, so leaving it alone."
122        if not dryrun:
123            bak = file + ".bak"
124            if makebackup:
125                shutil.copyfile(file, bak)
126                if verbose:
127                    print "backed up", file, "to", bak
128            f = open(file, "w")
129            r.write(f)
130            f.close()
131            if verbose:
132                print "wrote new", file
133        return True
134    else:
135        if verbose:
136            print "unchanged."
137        return False
138
139def _rstrip(line, JUNK='\n \t'):
140    """Return line stripped of trailing spaces, tabs, newlines.
141
142    Note that line.rstrip() instead also strips sundry control characters,
143    but at least one known Emacs user expects to keep junk like that, not
144    mentioning Barry by name or anything <wink>.
145    """
146
147    i = len(line)
148    while i > 0 and line[i-1] in JUNK:
149        i -= 1
150    return line[:i]
151
152class Reindenter:
153
154    def __init__(self, f):
155        self.find_stmt = 1  # next token begins a fresh stmt?
156        self.level = 0      # current indent level
157
158        # Raw file lines.
159        self.raw = f.readlines()
160
161        # File lines, rstripped & tab-expanded.  Dummy at start is so
162        # that we can use tokenize's 1-based line numbering easily.
163        # Note that a line is all-blank iff it's "\n".
164        self.lines = [_rstrip(line).expandtabs() + "\n"
165                      for line in self.raw]
166        self.lines.insert(0, None)
167        self.index = 1  # index into self.lines of next line
168
169        # List of (lineno, indentlevel) pairs, one for each stmt and
170        # comment line.  indentlevel is -1 for comment lines, as a
171        # signal that tokenize doesn't know what to do about them;
172        # indeed, they're our headache!
173        self.stats = []
174
175    def run(self):
176        tokenize.tokenize(self.getline, self.tokeneater)
177        # Remove trailing empty lines.
178        lines = self.lines
179        while lines and lines[-1] == "\n":
180            lines.pop()
181        # Sentinel.
182        stats = self.stats
183        stats.append((len(lines), 0))
184        # Map count of leading spaces to # we want.
185        have2want = {}
186        # Program after transformation.
187        after = self.after = []
188        # Copy over initial empty lines -- there's nothing to do until
189        # we see a line with *something* on it.
190        i = stats[0][0]
191        after.extend(lines[1:i])
192        for i in range(len(stats)-1):
193            thisstmt, thislevel = stats[i]
194            nextstmt = stats[i+1][0]
195            have = getlspace(lines[thisstmt])
196            want = thislevel * 4
197            if want < 0:
198                # A comment line.
199                if have:
200                    # An indented comment line.  If we saw the same
201                    # indentation before, reuse what it most recently
202                    # mapped to.
203                    want = have2want.get(have, -1)
204                    if want < 0:
205                        # Then it probably belongs to the next real stmt.
206                        for j in xrange(i+1, len(stats)-1):
207                            jline, jlevel = stats[j]
208                            if jlevel >= 0:
209                                if have == getlspace(lines[jline]):
210                                    want = jlevel * 4
211                                break
212                    if want < 0:           # Maybe it's a hanging
213                                           # comment like this one,
214                        # in which case we should shift it like its base
215                        # line got shifted.
216                        for j in xrange(i-1, -1, -1):
217                            jline, jlevel = stats[j]
218                            if jlevel >= 0:
219                                want = have + getlspace(after[jline-1]) - \
220                                       getlspace(lines[jline])
221                                break
222                    if want < 0:
223                        # Still no luck -- leave it alone.
224                        want = have
225                else:
226                    want = 0
227            assert want >= 0
228            have2want[have] = want
229            diff = want - have
230            if diff == 0 or have == 0:
231                after.extend(lines[thisstmt:nextstmt])
232            else:
233                for line in lines[thisstmt:nextstmt]:
234                    if diff > 0:
235                        if line == "\n":
236                            after.append(line)
237                        else:
238                            after.append(" " * diff + line)
239                    else:
240                        remove = min(getlspace(line), -diff)
241                        after.append(line[remove:])
242        return self.raw != self.after
243
244    def write(self, f):
245        f.writelines(self.after)
246
247    # Line-getter for tokenize.
248    def getline(self):
249        if self.index >= len(self.lines):
250            line = ""
251        else:
252            line = self.lines[self.index]
253            self.index += 1
254        return line
255
256    # Line-eater for tokenize.
257    def tokeneater(self, type, token, (sline, scol), end, line,
258                   INDENT=tokenize.INDENT,
259                   DEDENT=tokenize.DEDENT,
260                   NEWLINE=tokenize.NEWLINE,
261                   COMMENT=tokenize.COMMENT,
262                   NL=tokenize.NL):
263
264        if type == NEWLINE:
265            # A program statement, or ENDMARKER, will eventually follow,
266            # after some (possibly empty) run of tokens of the form
267            #     (NL | COMMENT)* (INDENT | DEDENT+)?
268            self.find_stmt = 1
269
270        elif type == INDENT:
271            self.find_stmt = 1
272            self.level += 1
273
274        elif type == DEDENT:
275            self.find_stmt = 1
276            self.level -= 1
277
278        elif type == COMMENT:
279            if self.find_stmt:
280                self.stats.append((sline, -1))
281                # but we're still looking for a new stmt, so leave
282                # find_stmt alone
283
284        elif type == NL:
285            pass
286
287        elif self.find_stmt:
288            # This is the first "real token" following a NEWLINE, so it
289            # must be the first token of the next program statement, or an
290            # ENDMARKER.
291            self.find_stmt = 0
292            if line:   # not endmarker
293                self.stats.append((sline, self.level))
294
295# Count number of leading blanks.
296def getlspace(line):
297    i, n = 0, len(line)
298    while i < n and line[i] == " ":
299        i += 1
300    return i
301
302if __name__ == '__main__':
303    main()
304