1#! /usr/bin/env python
2
3# This file contains a class and a main program that perform three
4# related (though complimentary) formatting operations on Python
5# programs.  When called as "pindent -c", it takes a valid Python
6# program as input and outputs a version augmented with block-closing
7# comments.  When called as "pindent -d", it assumes its input is a
8# Python program with block-closing comments and outputs a commentless
9# version.   When called as "pindent -r" it assumes its input is a
10# Python program with block-closing comments but with its indentation
11# messed up, and outputs a properly indented version.
12
13# A "block-closing comment" is a comment of the form '# end <keyword>'
14# where <keyword> is the keyword that opened the block.  If the
15# opening keyword is 'def' or 'class', the function or class name may
16# be repeated in the block-closing comment as well.  Here is an
17# example of a program fully augmented with block-closing comments:
18
19# def foobar(a, b):
20#    if a == b:
21#        a = a+1
22#    elif a < b:
23#        b = b-1
24#        if b > a: a = a-1
25#        # end if
26#    else:
27#        print 'oops!'
28#    # end if
29# # end def foobar
30
31# Note that only the last part of an if...elif...else... block needs a
32# block-closing comment; the same is true for other compound
33# statements (e.g. try...except).  Also note that "short-form" blocks
34# like the second 'if' in the example must be closed as well;
35# otherwise the 'else' in the example would be ambiguous (remember
36# that indentation is not significant when interpreting block-closing
37# comments).
38
39# The operations are idempotent (i.e. applied to their own output
40# they yield an identical result).  Running first "pindent -c" and
41# then "pindent -r" on a valid Python program produces a program that
42# is semantically identical to the input (though its indentation may
43# be different). Running "pindent -e" on that output produces a
44# program that only differs from the original in indentation.
45
46# Other options:
47# -s stepsize: set the indentation step size (default 8)
48# -t tabsize : set the number of spaces a tab character is worth (default 8)
49# -e         : expand TABs into spaces
50# file ...   : input file(s) (default standard input)
51# The results always go to standard output
52
53# Caveats:
54# - comments ending in a backslash will be mistaken for continued lines
55# - continuations using backslash are always left unchanged
56# - continuations inside parentheses are not extra indented by -r
57#   but must be indented for -c to work correctly (this breaks
58#   idempotency!)
59# - continued lines inside triple-quoted strings are totally garbled
60
61# Secret feature:
62# - On input, a block may also be closed with an "end statement" --
63#   this is a block-closing comment without the '#' sign.
64
65# Possible improvements:
66# - check syntax based on transitions in 'next' table
67# - better error reporting
68# - better error recovery
69# - check identifier after class/def
70
71# The following wishes need a more complete tokenization of the source:
72# - Don't get fooled by comments ending in backslash
73# - reindent continuation lines indicated by backslash
74# - handle continuation lines inside parentheses/braces/brackets
75# - handle triple quoted strings spanning lines
76# - realign comments
77# - optionally do much more thorough reformatting, a la C indent
78
79# Defaults
80STEPSIZE = 8
81TABSIZE = 8
82EXPANDTABS = 0
83
84import re
85import sys
86
87next = {}
88next['if'] = next['elif'] = 'elif', 'else', 'end'
89next['while'] = next['for'] = 'else', 'end'
90next['try'] = 'except', 'finally'
91next['except'] = 'except', 'else', 'finally', 'end'
92next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
93next['end'] = ()
94start = 'if', 'while', 'for', 'try', 'with', 'def', 'class'
95
96class PythonIndenter:
97
98    def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
99                 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
100        self.fpi = fpi
101        self.fpo = fpo
102        self.indentsize = indentsize
103        self.tabsize = tabsize
104        self.lineno = 0
105        self.expandtabs = expandtabs
106        self._write = fpo.write
107        self.kwprog = re.compile(
108                r'^\s*(?P<kw>[a-z]+)'
109                r'(\s+(?P<id>[a-zA-Z_]\w*))?'
110                r'[^\w]')
111        self.endprog = re.compile(
112                r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
113                r'(\s+(?P<id>[a-zA-Z_]\w*))?'
114                r'[^\w]')
115        self.wsprog = re.compile(r'^[ \t]*')
116    # end def __init__
117
118    def write(self, line):
119        if self.expandtabs:
120            self._write(line.expandtabs(self.tabsize))
121        else:
122            self._write(line)
123        # end if
124    # end def write
125
126    def readline(self):
127        line = self.fpi.readline()
128        if line: self.lineno = self.lineno + 1
129        # end if
130        return line
131    # end def readline
132
133    def error(self, fmt, *args):
134        if args: fmt = fmt % args
135        # end if
136        sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
137        self.write('### %s ###\n' % fmt)
138    # end def error
139
140    def getline(self):
141        line = self.readline()
142        while line[-2:] == '\\\n':
143            line2 = self.readline()
144            if not line2: break
145            # end if
146            line = line + line2
147        # end while
148        return line
149    # end def getline
150
151    def putline(self, line, indent = None):
152        if indent is None:
153            self.write(line)
154            return
155        # end if
156        tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
157        i = 0
158        m = self.wsprog.match(line)
159        if m: i = m.end()
160        # end if
161        self.write('\t'*tabs + ' '*spaces + line[i:])
162    # end def putline
163
164    def reformat(self):
165        stack = []
166        while 1:
167            line = self.getline()
168            if not line: break      # EOF
169            # end if
170            m = self.endprog.match(line)
171            if m:
172                kw = 'end'
173                kw2 = m.group('kw')
174                if not stack:
175                    self.error('unexpected end')
176                elif stack[-1][0] != kw2:
177                    self.error('unmatched end')
178                # end if
179                del stack[-1:]
180                self.putline(line, len(stack))
181                continue
182            # end if
183            m = self.kwprog.match(line)
184            if m:
185                kw = m.group('kw')
186                if kw in start:
187                    self.putline(line, len(stack))
188                    stack.append((kw, kw))
189                    continue
190                # end if
191                if next.has_key(kw) and stack:
192                    self.putline(line, len(stack)-1)
193                    kwa, kwb = stack[-1]
194                    stack[-1] = kwa, kw
195                    continue
196                # end if
197            # end if
198            self.putline(line, len(stack))
199        # end while
200        if stack:
201            self.error('unterminated keywords')
202            for kwa, kwb in stack:
203                self.write('\t%s\n' % kwa)
204            # end for
205        # end if
206    # end def reformat
207
208    def delete(self):
209        begin_counter = 0
210        end_counter = 0
211        while 1:
212            line = self.getline()
213            if not line: break      # EOF
214            # end if
215            m = self.endprog.match(line)
216            if m:
217                end_counter = end_counter + 1
218                continue
219            # end if
220            m = self.kwprog.match(line)
221            if m:
222                kw = m.group('kw')
223                if kw in start:
224                    begin_counter = begin_counter + 1
225                # end if
226            # end if
227            self.putline(line)
228        # end while
229        if begin_counter - end_counter < 0:
230            sys.stderr.write('Warning: input contained more end tags than expected\n')
231        elif begin_counter - end_counter > 0:
232            sys.stderr.write('Warning: input contained less end tags than expected\n')
233        # end if
234    # end def delete
235
236    def complete(self):
237        self.indentsize = 1
238        stack = []
239        todo = []
240        thisid = ''
241        current, firstkw, lastkw, topid = 0, '', '', ''
242        while 1:
243            line = self.getline()
244            i = 0
245            m = self.wsprog.match(line)
246            if m: i = m.end()
247            # end if
248            m = self.endprog.match(line)
249            if m:
250                thiskw = 'end'
251                endkw = m.group('kw')
252                thisid = m.group('id')
253            else:
254                m = self.kwprog.match(line)
255                if m:
256                    thiskw = m.group('kw')
257                    if not next.has_key(thiskw):
258                        thiskw = ''
259                    # end if
260                    if thiskw in ('def', 'class'):
261                        thisid = m.group('id')
262                    else:
263                        thisid = ''
264                    # end if
265                elif line[i:i+1] in ('\n', '#'):
266                    todo.append(line)
267                    continue
268                else:
269                    thiskw = ''
270                # end if
271            # end if
272            indent = len(line[:i].expandtabs(self.tabsize))
273            while indent < current:
274                if firstkw:
275                    if topid:
276                        s = '# end %s %s\n' % (
277                                firstkw, topid)
278                    else:
279                        s = '# end %s\n' % firstkw
280                    # end if
281                    self.putline(s, current)
282                    firstkw = lastkw = ''
283                # end if
284                current, firstkw, lastkw, topid = stack[-1]
285                del stack[-1]
286            # end while
287            if indent == current and firstkw:
288                if thiskw == 'end':
289                    if endkw != firstkw:
290                        self.error('mismatched end')
291                    # end if
292                    firstkw = lastkw = ''
293                elif not thiskw or thiskw in start:
294                    if topid:
295                        s = '# end %s %s\n' % (
296                                firstkw, topid)
297                    else:
298                        s = '# end %s\n' % firstkw
299                    # end if
300                    self.putline(s, current)
301                    firstkw = lastkw = topid = ''
302                # end if
303            # end if
304            if indent > current:
305                stack.append((current, firstkw, lastkw, topid))
306                if thiskw and thiskw not in start:
307                    # error
308                    thiskw = ''
309                # end if
310                current, firstkw, lastkw, topid = \
311                         indent, thiskw, thiskw, thisid
312            # end if
313            if thiskw:
314                if thiskw in start:
315                    firstkw = lastkw = thiskw
316                    topid = thisid
317                else:
318                    lastkw = thiskw
319                # end if
320            # end if
321            for l in todo: self.write(l)
322            # end for
323            todo = []
324            if not line: break
325            # end if
326            self.write(line)
327        # end while
328    # end def complete
329
330# end class PythonIndenter
331
332# Simplified user interface
333# - xxx_filter(input, output): read and write file objects
334# - xxx_string(s): take and return string object
335# - xxx_file(filename): process file in place, return true iff changed
336
337def complete_filter(input = sys.stdin, output = sys.stdout,
338                    stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
339    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
340    pi.complete()
341# end def complete_filter
342
343def delete_filter(input= sys.stdin, output = sys.stdout,
344                        stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
345    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
346    pi.delete()
347# end def delete_filter
348
349def reformat_filter(input = sys.stdin, output = sys.stdout,
350                    stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
351    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
352    pi.reformat()
353# end def reformat_filter
354
355class StringReader:
356    def __init__(self, buf):
357        self.buf = buf
358        self.pos = 0
359        self.len = len(self.buf)
360    # end def __init__
361    def read(self, n = 0):
362        if n <= 0:
363            n = self.len - self.pos
364        else:
365            n = min(n, self.len - self.pos)
366        # end if
367        r = self.buf[self.pos : self.pos + n]
368        self.pos = self.pos + n
369        return r
370    # end def read
371    def readline(self):
372        i = self.buf.find('\n', self.pos)
373        return self.read(i + 1 - self.pos)
374    # end def readline
375    def readlines(self):
376        lines = []
377        line = self.readline()
378        while line:
379            lines.append(line)
380            line = self.readline()
381        # end while
382        return lines
383    # end def readlines
384    # seek/tell etc. are left as an exercise for the reader
385# end class StringReader
386
387class StringWriter:
388    def __init__(self):
389        self.buf = ''
390    # end def __init__
391    def write(self, s):
392        self.buf = self.buf + s
393    # end def write
394    def getvalue(self):
395        return self.buf
396    # end def getvalue
397# end class StringWriter
398
399def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
400    input = StringReader(source)
401    output = StringWriter()
402    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
403    pi.complete()
404    return output.getvalue()
405# end def complete_string
406
407def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
408    input = StringReader(source)
409    output = StringWriter()
410    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
411    pi.delete()
412    return output.getvalue()
413# end def delete_string
414
415def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
416    input = StringReader(source)
417    output = StringWriter()
418    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
419    pi.reformat()
420    return output.getvalue()
421# end def reformat_string
422
423def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
424    source = open(filename, 'r').read()
425    result = complete_string(source, stepsize, tabsize, expandtabs)
426    if source == result: return 0
427    # end if
428    import os
429    try: os.rename(filename, filename + '~')
430    except os.error: pass
431    # end try
432    f = open(filename, 'w')
433    f.write(result)
434    f.close()
435    return 1
436# end def complete_file
437
438def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
439    source = open(filename, 'r').read()
440    result = delete_string(source, stepsize, tabsize, expandtabs)
441    if source == result: return 0
442    # end if
443    import os
444    try: os.rename(filename, filename + '~')
445    except os.error: pass
446    # end try
447    f = open(filename, 'w')
448    f.write(result)
449    f.close()
450    return 1
451# end def delete_file
452
453def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
454    source = open(filename, 'r').read()
455    result = reformat_string(source, stepsize, tabsize, expandtabs)
456    if source == result: return 0
457    # end if
458    import os
459    try: os.rename(filename, filename + '~')
460    except os.error: pass
461    # end try
462    f = open(filename, 'w')
463    f.write(result)
464    f.close()
465    return 1
466# end def reformat_file
467
468# Test program when called as a script
469
470usage = """
471usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
472-c         : complete a correctly indented program (add #end directives)
473-d         : delete #end directives
474-r         : reformat a completed program (use #end directives)
475-s stepsize: indentation step (default %(STEPSIZE)d)
476-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
477-e         : expand TABs into spaces (defailt OFF)
478[file] ... : files are changed in place, with backups in file~
479If no files are specified or a single - is given,
480the program acts as a filter (reads stdin, writes stdout).
481""" % vars()
482
483def error_both(op1, op2):
484    sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
485    sys.stderr.write(usage)
486    sys.exit(2)
487# end def error_both
488
489def test():
490    import getopt
491    try:
492        opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
493    except getopt.error, msg:
494        sys.stderr.write('Error: %s\n' % msg)
495        sys.stderr.write(usage)
496        sys.exit(2)
497    # end try
498    action = None
499    stepsize = STEPSIZE
500    tabsize = TABSIZE
501    expandtabs = EXPANDTABS
502    for o, a in opts:
503        if o == '-c':
504            if action: error_both(o, action)
505            # end if
506            action = 'complete'
507        elif o == '-d':
508            if action: error_both(o, action)
509            # end if
510            action = 'delete'
511        elif o == '-r':
512            if action: error_both(o, action)
513            # end if
514            action = 'reformat'
515        elif o == '-s':
516            stepsize = int(a)
517        elif o == '-t':
518            tabsize = int(a)
519        elif o == '-e':
520            expandtabs = 1
521        # end if
522    # end for
523    if not action:
524        sys.stderr.write(
525                'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
526        sys.stderr.write(usage)
527        sys.exit(2)
528    # end if
529    if not args or args == ['-']:
530        action = eval(action + '_filter')
531        action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
532    else:
533        action = eval(action + '_file')
534        for filename in args:
535            action(filename, stepsize, tabsize, expandtabs)
536        # end for
537    # end if
538# end def test
539
540if __name__ == '__main__':
541    test()
542# end if
543