14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#! /usr/bin/env python
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtabnanny -- Detection of ambiguous indentation
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFor the time being this module is intended to be called as a script.
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHowever it is possible to import it into an IDE and use the function
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcheck() described below.
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWarning: The API provided by this module is likely to change in future
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmreleases; such changes may not be backward compatible.
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Released to the public domain, by Tim Peters, 15 April 1998.
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX Note: this is now a standard library module.
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX The API needs to undergo changes however; the current code is too
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX script-like.  This will be addressed later.
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__version__ = "6"
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport os
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport getopt
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport tokenize
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif not hasattr(tokenize, 'NL'):
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ["check", "NannyNag", "process_tokens"]
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmverbose = 0
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfilename_only = 0
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef errprint(*args):
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    sep = ""
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for arg in args:
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sys.stderr.write(sep + str(arg))
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        sep = " "
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    sys.stderr.write("\n")
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef main():
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    global verbose, filename_only
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        opts, args = getopt.getopt(sys.argv[1:], "qv")
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except getopt.error, msg:
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        errprint(msg)
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for o, a in opts:
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if o == '-q':
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            filename_only = filename_only + 1
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if o == '-v':
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            verbose = verbose + 1
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if not args:
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for arg in args:
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        check(arg)
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NannyNag(Exception):
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Raised by tokeneater() if detecting an ambiguous indent.
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Captured and handled in check().
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, lineno, msg, line):
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.lineno, self.msg, self.line = lineno, msg, line
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_lineno(self):
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.lineno
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_msg(self):
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.msg
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def get_line(self):
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.line
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef check(file):
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """check(file_or_dir)
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    If file_or_dir is a directory and not a symbolic link, then recursively
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    descend the directory tree named by file_or_dir, checking all .py files
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    along the way. If file_or_dir is an ordinary Python source file, it is
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    checked for whitespace related problems. The diagnostic messages are
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    written to standard output using the print statement.
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    """
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if os.path.isdir(file) and not os.path.islink(file):
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if verbose:
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            print "%r: listing directory" % (file,)
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        names = os.listdir(file)
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for name in names:
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fullname = os.path.join(file, name)
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (os.path.isdir(fullname) and
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                not os.path.islink(fullname) or
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                os.path.normcase(name[-3:]) == ".py"):
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                check(fullname)
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        f = open(file)
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except IOError, msg:
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        errprint("%r: I/O Error: %s" % (file, msg))
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if verbose > 1:
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print "checking %r ..." % file
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    try:
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        process_tokens(tokenize.generate_tokens(f.readline))
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except tokenize.TokenError, msg:
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        errprint("%r: Token Error: %s" % (file, msg))
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except IndentationError, msg:
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        errprint("%r: Indentation Error: %s" % (file, msg))
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    except NannyNag, nag:
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        badline = nag.get_lineno()
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        line = nag.get_line()
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if verbose:
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            print "offending line: %r" % (line,)
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            print nag.get_msg()
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ' ' in file: file = '"' + file + '"'
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if filename_only: print file
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else: print file, badline, repr(line)
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if verbose:
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        print "%r: Clean bill of health." % (file,)
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Whitespace:
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # the characters used for space and tab
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    S, T = ' \t'
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # members:
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #   raw
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       the original string
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #   n
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       the number of leading whitespace characters in raw
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #   nt
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       the number of tabs in raw[:n]
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #   norm
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       the normal form as a pair (count, trailing), where:
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       count
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #           a tuple such that raw[:n] contains count[i]
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #           instances of S * i + T
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       trailing
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #           the number of trailing spaces in raw[:n]
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       It's A Theorem that m.indent_level(t) ==
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #   is_simple
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    #       true iff raw[:n] is of the form (T*)(S*)
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, ws):
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.raw  = ws
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        S, T = Whitespace.S, Whitespace.T
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        count = []
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        b = n = nt = 0
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ch in self.raw:
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if ch == S:
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                n = n + 1
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                b = b + 1
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            elif ch == T:
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                n = n + 1
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                nt = nt + 1
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if b >= len(count):
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    count = count + [0] * (b - len(count) + 1)
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                count[b] = count[b] + 1
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                b = 0
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else:
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.n    = n
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.nt   = nt
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.norm = tuple(count), b
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.is_simple = len(count) <= 1
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # return length of longest contiguous run of spaces (whether or not
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # preceding a tab)
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def longest_run_of_spaces(self):
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        count, trailing = self.norm
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return max(len(count)-1, trailing)
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def indent_level(self, tabsize):
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # count, il = self.norm
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # for i in range(len(count)):
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #    if count[i]:
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        #        il = il + (i/tabsize + 1)*tabsize * count[i]
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # return il
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # quicker:
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # il = trailing + sum (i/ts + 1)*ts*count[i] =
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # trailing + ts * sum (i/ts + 1)*count[i] =
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # trailing + ts * sum i/ts*count[i] + count[i] =
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # and note that i/ts*count[i] is 0 when i < ts
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        count, trailing = self.norm
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        il = 0
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for i in range(tabsize, len(count)):
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            il = il + i/tabsize * count[i]
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return trailing + tabsize * (il + self.nt)
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # return true iff self.indent_level(t) == other.indent_level(t)
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # for all t >= 1
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def equal(self, other):
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.norm == other.norm
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # return a list of tuples (ts, i1, i2) such that
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Intended to be used after not self.equal(other) is known, in which
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # case it will return at least one witnessing tab size.
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def not_equal_witness(self, other):
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n = max(self.longest_run_of_spaces(),
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                other.longest_run_of_spaces()) + 1
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        a = []
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ts in range(1, n+1):
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.indent_level(ts) != other.indent_level(ts):
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                a.append( (ts,
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           self.indent_level(ts),
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           other.indent_level(ts)) )
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return a
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Return True iff self.indent_level(t) < other.indent_level(t)
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # for all t >= 1.
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # The algorithm is due to Vincent Broman.
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Easy to prove it's correct.
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # XXXpost that.
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Trivial to prove n is sharp (consider T vs ST).
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Unknown whether there's a faster general way.  I suspected so at
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # first, but no longer.
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # For the special (but common!) case where M and N are both of the
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # XXXwrite that up.
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def less(self, other):
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.n >= other.n:
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return False
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.is_simple and other.is_simple:
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return self.nt <= other.nt
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n = max(self.longest_run_of_spaces(),
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                other.longest_run_of_spaces()) + 1
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        # the self.n >= other.n test already did it for ts=1
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ts in range(2, n+1):
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.indent_level(ts) >= other.indent_level(ts):
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return False
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return True
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # return a list of tuples (ts, i1, i2) such that
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # Intended to be used after not self.less(other) is known, in which
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    # case it will return at least one witnessing tab size.
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def not_less_witness(self, other):
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n = max(self.longest_run_of_spaces(),
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                other.longest_run_of_spaces()) + 1
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        a = []
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for ts in range(1, n+1):
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if self.indent_level(ts) >= other.indent_level(ts):
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                a.append( (ts,
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           self.indent_level(ts),
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           other.indent_level(ts)) )
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return a
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef format_witnesses(w):
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    firsts = map(lambda tup: str(tup[0]), w)
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    prefix = "at tab size"
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if len(w) > 1:
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        prefix = prefix + "s"
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return prefix + " " + ', '.join(firsts)
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef process_tokens(tokens):
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    INDENT = tokenize.INDENT
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    DEDENT = tokenize.DEDENT
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    NEWLINE = tokenize.NEWLINE
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    JUNK = tokenize.COMMENT, tokenize.NL
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    indents = [Whitespace("")]
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    check_equal = 0
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (type, token, start, end, line) in tokens:
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if type == NEWLINE:
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # a program statement, or ENDMARKER, will eventually follow,
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # after some (possibly empty) run of tokens of the form
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            #     (NL | COMMENT)* (INDENT | DEDENT+)?
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # If an INDENT appears, setting check_equal is wrong, and will
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # be undone when we see the INDENT.
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            check_equal = 1
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif type == INDENT:
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            check_equal = 0
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            thisguy = Whitespace(token)
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not indents[-1].less(thisguy):
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                witness = indents[-1].not_less_witness(thisguy)
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                msg = "indent not greater e.g. " + format_witnesses(witness)
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise NannyNag(start[0], msg, line)
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            indents.append(thisguy)
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif type == DEDENT:
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # there's nothing we need to check here!  what's important is
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # that when the run of DEDENTs ends, the indentation of the
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # program statement (or ENDMARKER) that triggered the run is
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # equal to what's left at the top of the indents stack
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # Ouch!  This assert triggers if the last line of the source
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # is indented *and* lacks a newline -- then DEDENTs pop out
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # of thin air.
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            check_equal = 1
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            del indents[-1]
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        elif check_equal and type not in JUNK:
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # this is the first "real token" following a NEWLINE, so it
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # must be the first token of the next program statement, or an
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # ENDMARKER; the "line" argument exposes the leading whitespace
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # for this statement; in the case of ENDMARKER, line is an empty
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # string, so will properly match the empty string with which the
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            # "indents" stack was seeded
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            check_equal = 0
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            thisguy = Whitespace(line)
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if not indents[-1].equal(thisguy):
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                witness = indents[-1].not_equal_witness(thisguy)
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                msg = "indent not equal e.g. " + format_witnesses(witness)
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                raise NannyNag(start[0], msg, line)
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == '__main__':
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    main()
330