14adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao#! /usr/bin/env python
24adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
34adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
44adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
54adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaotabnanny -- Detection of ambiguous indentation
64adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
74adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoFor the time being this module is intended to be called as a script.
84adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoHowever it is possible to import it into an IDE and use the function
94adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaocheck() described below.
104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
114adfde8bc82dd39f59e0445588c3e599ada477dJosh GaoWarning: The API provided by this module is likely to change in future
124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoreleases; such changes may not be backward compatible.
134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao"""
144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# Released to the public domain, by Tim Peters, 15 April 1998.
164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# XXX Note: this is now a standard library module.
184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# XXX The API needs to undergo changes however; the current code is too
194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao# XXX script-like.  This will be addressed later.
204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__version__ = "6"
224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport os
244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport sys
254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport getopt
264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoimport tokenize
274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif not hasattr(tokenize, 'NL'):
284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao__all__ = ["check", "NannyNag", "process_tokens"]
314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoverbose = 0
334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaofilename_only = 0
344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef errprint(*args):
364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    sep = ""
374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for arg in args:
384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        sys.stderr.write(sep + str(arg))
394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        sep = " "
404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    sys.stderr.write("\n")
414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef main():
434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    global verbose, filename_only
444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    try:
454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        opts, args = getopt.getopt(sys.argv[1:], "qv")
464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except getopt.error, msg:
474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errprint(msg)
484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for o, a in opts:
504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if o == '-q':
514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            filename_only = filename_only + 1
524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if o == '-v':
534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            verbose = verbose + 1
544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if not args:
554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for arg in args:
584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        check(arg)
594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass NannyNag(Exception):
614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Raised by tokeneater() if detecting an ambiguous indent.
634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    Captured and handled in check().
644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, lineno, msg, line):
664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.lineno, self.msg, self.line = lineno, msg, line
674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_lineno(self):
684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.lineno
694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_msg(self):
704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.msg
714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def get_line(self):
724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.line
734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef check(file):
754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """check(file_or_dir)
764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    If file_or_dir is a directory and not a symbolic link, then recursively
784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    descend the directory tree named by file_or_dir, checking all .py files
794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    along the way. If file_or_dir is an ordinary Python source file, it is
804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    checked for whitespace related problems. The diagnostic messages are
814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    written to standard output using the print statement.
824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    """
834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if os.path.isdir(file) and not os.path.islink(file):
854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if verbose:
864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            print "%r: listing directory" % (file,)
874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        names = os.listdir(file)
884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for name in names:
894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            fullname = os.path.join(file, name)
904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if (os.path.isdir(fullname) and
914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                not os.path.islink(fullname) or
924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                os.path.normcase(name[-3:]) == ".py"):
934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                check(fullname)
944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    try:
974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        f = open(file)
984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except IOError, msg:
994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errprint("%r: I/O Error: %s" % (file, msg))
1004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
1014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if verbose > 1:
1034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        print "checking %r ..." % file
1044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    try:
1064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        process_tokens(tokenize.generate_tokens(f.readline))
1074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except tokenize.TokenError, msg:
1094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errprint("%r: Token Error: %s" % (file, msg))
1104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
1114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except IndentationError, msg:
1134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        errprint("%r: Indentation Error: %s" % (file, msg))
1144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
1154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    except NannyNag, nag:
1174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        badline = nag.get_lineno()
1184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        line = nag.get_line()
1194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if verbose:
1204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
1214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            print "offending line: %r" % (line,)
1224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            print nag.get_msg()
1234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        else:
1244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ' ' in file: file = '"' + file + '"'
1254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if filename_only: print file
1264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else: print file, badline, repr(line)
1274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return
1284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if verbose:
1304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        print "%r: Clean bill of health." % (file,)
1314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoclass Whitespace:
1334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # the characters used for space and tab
1344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    S, T = ' \t'
1354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # members:
1374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #   raw
1384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       the original string
1394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #   n
1404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       the number of leading whitespace characters in raw
1414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #   nt
1424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       the number of tabs in raw[:n]
1434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #   norm
1444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       the normal form as a pair (count, trailing), where:
1454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       count
1464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #           a tuple such that raw[:n] contains count[i]
1474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #           instances of S * i + T
1484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       trailing
1494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #           the number of trailing spaces in raw[:n]
1504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       It's A Theorem that m.indent_level(t) ==
1514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
1524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #   is_simple
1534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    #       true iff raw[:n] is of the form (T*)(S*)
1544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def __init__(self, ws):
1564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.raw  = ws
1574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        S, T = Whitespace.S, Whitespace.T
1584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        count = []
1594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        b = n = nt = 0
1604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for ch in self.raw:
1614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if ch == S:
1624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                n = n + 1
1634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                b = b + 1
1644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            elif ch == T:
1654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                n = n + 1
1664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                nt = nt + 1
1674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                if b >= len(count):
1684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                    count = count + [0] * (b - len(count) + 1)
1694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                count[b] = count[b] + 1
1704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                b = 0
1714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            else:
1724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                break
1734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.n    = n
1744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.nt   = nt
1754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.norm = tuple(count), b
1764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        self.is_simple = len(count) <= 1
1774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # return length of longest contiguous run of spaces (whether or not
1794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # preceding a tab)
1804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def longest_run_of_spaces(self):
1814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        count, trailing = self.norm
1824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return max(len(count)-1, trailing)
1834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def indent_level(self, tabsize):
1854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # count, il = self.norm
1864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # for i in range(len(count)):
1874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #    if count[i]:
1884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        #        il = il + (i/tabsize + 1)*tabsize * count[i]
1894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # return il
1904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # quicker:
1924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # il = trailing + sum (i/ts + 1)*ts*count[i] =
1934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # trailing + ts * sum (i/ts + 1)*count[i] =
1944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # trailing + ts * sum i/ts*count[i] + count[i] =
1954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
1964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
1974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # and note that i/ts*count[i] is 0 when i < ts
1984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
1994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        count, trailing = self.norm
2004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        il = 0
2014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for i in range(tabsize, len(count)):
2024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            il = il + i/tabsize * count[i]
2034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return trailing + tabsize * (il + self.nt)
2044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # return true iff self.indent_level(t) == other.indent_level(t)
2064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # for all t >= 1
2074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def equal(self, other):
2084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return self.norm == other.norm
2094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # return a list of tuples (ts, i1, i2) such that
2114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
2124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Intended to be used after not self.equal(other) is known, in which
2134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # case it will return at least one witnessing tab size.
2144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def not_equal_witness(self, other):
2154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        n = max(self.longest_run_of_spaces(),
2164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                other.longest_run_of_spaces()) + 1
2174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        a = []
2184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for ts in range(1, n+1):
2194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.indent_level(ts) != other.indent_level(ts):
2204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                a.append( (ts,
2214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           self.indent_level(ts),
2224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           other.indent_level(ts)) )
2234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return a
2244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Return True iff self.indent_level(t) < other.indent_level(t)
2264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # for all t >= 1.
2274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # The algorithm is due to Vincent Broman.
2284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Easy to prove it's correct.
2294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # XXXpost that.
2304adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Trivial to prove n is sharp (consider T vs ST).
2314adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Unknown whether there's a faster general way.  I suspected so at
2324adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # first, but no longer.
2334adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # For the special (but common!) case where M and N are both of the
2344adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
2354adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
2364adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # XXXwrite that up.
2374adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
2384adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def less(self, other):
2394adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.n >= other.n:
2404adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return False
2414adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if self.is_simple and other.is_simple:
2424adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            return self.nt <= other.nt
2434adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        n = max(self.longest_run_of_spaces(),
2444adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                other.longest_run_of_spaces()) + 1
2454adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        # the self.n >= other.n test already did it for ts=1
2464adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for ts in range(2, n+1):
2474adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.indent_level(ts) >= other.indent_level(ts):
2484adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                return False
2494adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return True
2504adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2514adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # return a list of tuples (ts, i1, i2) such that
2524adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
2534adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # Intended to be used after not self.less(other) is known, in which
2544adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    # case it will return at least one witnessing tab size.
2554adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    def not_less_witness(self, other):
2564adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        n = max(self.longest_run_of_spaces(),
2574adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                other.longest_run_of_spaces()) + 1
2584adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        a = []
2594adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        for ts in range(1, n+1):
2604adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if self.indent_level(ts) >= other.indent_level(ts):
2614adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                a.append( (ts,
2624adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           self.indent_level(ts),
2634adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                           other.indent_level(ts)) )
2644adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        return a
2654adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2664adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef format_witnesses(w):
2674adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    firsts = map(lambda tup: str(tup[0]), w)
2684adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    prefix = "at tab size"
2694adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    if len(w) > 1:
2704adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        prefix = prefix + "s"
2714adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    return prefix + " " + ', '.join(firsts)
2724adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2734adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaodef process_tokens(tokens):
2744adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    INDENT = tokenize.INDENT
2754adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    DEDENT = tokenize.DEDENT
2764adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    NEWLINE = tokenize.NEWLINE
2774adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    JUNK = tokenize.COMMENT, tokenize.NL
2784adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    indents = [Whitespace("")]
2794adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    check_equal = 0
2804adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2814adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    for (type, token, start, end, line) in tokens:
2824adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        if type == NEWLINE:
2834adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # a program statement, or ENDMARKER, will eventually follow,
2844adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # after some (possibly empty) run of tokens of the form
2854adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            #     (NL | COMMENT)* (INDENT | DEDENT+)?
2864adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # If an INDENT appears, setting check_equal is wrong, and will
2874adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # be undone when we see the INDENT.
2884adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            check_equal = 1
2894adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2904adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif type == INDENT:
2914adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            check_equal = 0
2924adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            thisguy = Whitespace(token)
2934adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not indents[-1].less(thisguy):
2944adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                witness = indents[-1].not_less_witness(thisguy)
2954adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                msg = "indent not greater e.g. " + format_witnesses(witness)
2964adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise NannyNag(start[0], msg, line)
2974adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            indents.append(thisguy)
2984adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
2994adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif type == DEDENT:
3004adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # there's nothing we need to check here!  what's important is
3014adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # that when the run of DEDENTs ends, the indentation of the
3024adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # program statement (or ENDMARKER) that triggered the run is
3034adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # equal to what's left at the top of the indents stack
3044adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3054adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # Ouch!  This assert triggers if the last line of the source
3064adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # is indented *and* lacks a newline -- then DEDENTs pop out
3074adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # of thin air.
3084adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
3094adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            check_equal = 1
3104adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3114adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            del indents[-1]
3124adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3134adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao        elif check_equal and type not in JUNK:
3144adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # this is the first "real token" following a NEWLINE, so it
3154adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # must be the first token of the next program statement, or an
3164adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # ENDMARKER; the "line" argument exposes the leading whitespace
3174adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # for this statement; in the case of ENDMARKER, line is an empty
3184adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # string, so will properly match the empty string with which the
3194adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            # "indents" stack was seeded
3204adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            check_equal = 0
3214adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            thisguy = Whitespace(line)
3224adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao            if not indents[-1].equal(thisguy):
3234adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                witness = indents[-1].not_equal_witness(thisguy)
3244adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                msg = "indent not equal e.g. " + format_witnesses(witness)
3254adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao                raise NannyNag(start[0], msg, line)
3264adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3274adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao
3284adfde8bc82dd39f59e0445588c3e599ada477dJosh Gaoif __name__ == '__main__':
3294adfde8bc82dd39f59e0445588c3e599ada477dJosh Gao    main()
330