14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#! /usr/bin/env python 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtabnanny -- Detection of ambiguous indentation 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFor the time being this module is intended to be called as a script. 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmHowever it is possible to import it into an IDE and use the function 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcheck() described below. 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmWarning: The API provided by this module is likely to change in future 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmreleases; such changes may not be backward compatible. 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm""" 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Released to the public domain, by Tim Peters, 15 April 1998. 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX Note: this is now a standard library module. 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX The API needs to undergo changes however; the current code is too 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# XXX script-like. This will be addressed later. 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__version__ = "6" 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport os 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport sys 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport getopt 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport tokenize 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif not hasattr(tokenize, 'NL'): 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm__all__ = ["check", "NannyNag", "process_tokens"] 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmverbose = 0 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfilename_only = 0 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef errprint(*args): 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sep = "" 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for arg in args: 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sys.stderr.write(sep + str(arg)) 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sep = " " 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sys.stderr.write("\n") 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef main(): 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm global verbose, filename_only 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm opts, args = getopt.getopt(sys.argv[1:], "qv") 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except getopt.error, msg: 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm errprint(msg) 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for o, a in opts: 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if o == '-q': 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm filename_only = filename_only + 1 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if o == '-v': 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm verbose = verbose + 1 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not args: 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for arg in args: 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check(arg) 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NannyNag(Exception): 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Raised by tokeneater() if detecting an ambiguous indent. 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Captured and handled in check(). 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, lineno, msg, line): 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.lineno, self.msg, self.line = lineno, msg, line 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_lineno(self): 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.lineno 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_msg(self): 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.msg 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def get_line(self): 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.line 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef check(file): 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """check(file_or_dir) 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm If file_or_dir is a directory and not a symbolic link, then recursively 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm descend the directory tree named by file_or_dir, checking all .py files 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm along the way. If file_or_dir is an ordinary Python source file, it is 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm checked for whitespace related problems. The diagnostic messages are 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm written to standard output using the print statement. 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm """ 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if os.path.isdir(file) and not os.path.islink(file): 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if verbose: 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "%r: listing directory" % (file,) 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm names = os.listdir(file) 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for name in names: 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fullname = os.path.join(file, name) 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (os.path.isdir(fullname) and 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm not os.path.islink(fullname) or 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm os.path.normcase(name[-3:]) == ".py"): 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check(fullname) 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm f = open(file) 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except IOError, msg: 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm errprint("%r: I/O Error: %s" % (file, msg)) 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if verbose > 1: 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "checking %r ..." % file 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm try: 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm process_tokens(tokenize.generate_tokens(f.readline)) 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except tokenize.TokenError, msg: 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm errprint("%r: Token Error: %s" % (file, msg)) 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except IndentationError, msg: 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm errprint("%r: Indentation Error: %s" % (file, msg)) 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm except NannyNag, nag: 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm badline = nag.get_lineno() 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm line = nag.get_line() 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if verbose: 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "%r: *** Line %d: trouble in tab city! ***" % (file, badline) 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "offending line: %r" % (line,) 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print nag.get_msg() 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ' ' in file: file = '"' + file + '"' 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if filename_only: print file 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: print file, badline, repr(line) 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if verbose: 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm print "%r: Clean bill of health." % (file,) 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass Whitespace: 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the characters used for space and tab 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm S, T = ' \t' 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # members: 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # raw 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the original string 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # n 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the number of leading whitespace characters in raw 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # nt 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the number of tabs in raw[:n] 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # norm 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the normal form as a pair (count, trailing), where: 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # count 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # a tuple such that raw[:n] contains count[i] 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # instances of S * i + T 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # trailing 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the number of trailing spaces in raw[:n] 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # It's A Theorem that m.indent_level(t) == 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # is_simple 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # true iff raw[:n] is of the form (T*)(S*) 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def __init__(self, ws): 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.raw = ws 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm S, T = Whitespace.S, Whitespace.T 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count = [] 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm b = n = nt = 0 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ch in self.raw: 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ch == S: 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n + 1 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm b = b + 1 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif ch == T: 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n + 1 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nt = nt + 1 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if b >= len(count): 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count = count + [0] * (b - len(count) + 1) 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count[b] = count[b] + 1 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm b = 0 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else: 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.n = n 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.nt = nt 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.norm = tuple(count), b 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.is_simple = len(count) <= 1 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # return length of longest contiguous run of spaces (whether or not 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # preceding a tab) 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def longest_run_of_spaces(self): 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count, trailing = self.norm 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return max(len(count)-1, trailing) 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def indent_level(self, tabsize): 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # count, il = self.norm 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for i in range(len(count)): 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # if count[i]: 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # il = il + (i/tabsize + 1)*tabsize * count[i] 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # return il 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # quicker: 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # il = trailing + sum (i/ts + 1)*ts*count[i] = 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # trailing + ts * sum (i/ts + 1)*count[i] = 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # trailing + ts * sum i/ts*count[i] + count[i] = 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] = 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # trailing + ts * [(sum i/ts*count[i]) + num_tabs] 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # and note that i/ts*count[i] is 0 when i < ts 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm count, trailing = self.norm 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm il = 0 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for i in range(tabsize, len(count)): 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm il = il + i/tabsize * count[i] 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return trailing + tabsize * (il + self.nt) 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # return true iff self.indent_level(t) == other.indent_level(t) 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for all t >= 1 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def equal(self, other): 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.norm == other.norm 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # return a list of tuples (ts, i1, i2) such that 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Intended to be used after not self.equal(other) is known, in which 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # case it will return at least one witnessing tab size. 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def not_equal_witness(self, other): 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = max(self.longest_run_of_spaces(), 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm other.longest_run_of_spaces()) + 1 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a = [] 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ts in range(1, n+1): 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.indent_level(ts) != other.indent_level(ts): 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a.append( (ts, 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.indent_level(ts), 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm other.indent_level(ts)) ) 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return a 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Return True iff self.indent_level(t) < other.indent_level(t) 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for all t >= 1. 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # The algorithm is due to Vincent Broman. 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Easy to prove it's correct. 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXXpost that. 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Trivial to prove n is sharp (consider T vs ST). 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Unknown whether there's a faster general way. I suspected so at 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # first, but no longer. 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # For the special (but common!) case where M and N are both of the 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # form (T*)(S*), M.less(N) iff M.len() < N.len() and 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # XXXwrite that up. 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def less(self, other): 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.n >= other.n: 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.is_simple and other.is_simple: 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return self.nt <= other.nt 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = max(self.longest_run_of_spaces(), 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm other.longest_run_of_spaces()) + 1 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # the self.n >= other.n test already did it for ts=1 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ts in range(2, n+1): 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.indent_level(ts) >= other.indent_level(ts): 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return False 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return True 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # return a list of tuples (ts, i1, i2) such that 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Intended to be used after not self.less(other) is known, in which 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # case it will return at least one witnessing tab size. 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm def not_less_witness(self, other): 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = max(self.longest_run_of_spaces(), 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm other.longest_run_of_spaces()) + 1 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a = [] 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for ts in range(1, n+1): 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if self.indent_level(ts) >= other.indent_level(ts): 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a.append( (ts, 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm self.indent_level(ts), 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm other.indent_level(ts)) ) 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return a 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef format_witnesses(w): 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm firsts = map(lambda tup: str(tup[0]), w) 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = "at tab size" 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if len(w) > 1: 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm prefix = prefix + "s" 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return prefix + " " + ', '.join(firsts) 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef process_tokens(tokens): 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm INDENT = tokenize.INDENT 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm DEDENT = tokenize.DEDENT 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm NEWLINE = tokenize.NEWLINE 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm JUNK = tokenize.COMMENT, tokenize.NL 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm indents = [Whitespace("")] 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check_equal = 0 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (type, token, start, end, line) in tokens: 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if type == NEWLINE: 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # a program statement, or ENDMARKER, will eventually follow, 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # after some (possibly empty) run of tokens of the form 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # (NL | COMMENT)* (INDENT | DEDENT+)? 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # If an INDENT appears, setting check_equal is wrong, and will 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # be undone when we see the INDENT. 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check_equal = 1 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif type == INDENT: 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check_equal = 0 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm thisguy = Whitespace(token) 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not indents[-1].less(thisguy): 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm witness = indents[-1].not_less_witness(thisguy) 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm msg = "indent not greater e.g. " + format_witnesses(witness) 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NannyNag(start[0], msg, line) 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm indents.append(thisguy) 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif type == DEDENT: 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # there's nothing we need to check here! what's important is 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # that when the run of DEDENTs ends, the indentation of the 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # program statement (or ENDMARKER) that triggered the run is 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # equal to what's left at the top of the indents stack 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # Ouch! This assert triggers if the last line of the source 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # is indented *and* lacks a newline -- then DEDENTs pop out 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # of thin air. 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check_equal = 1 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm del indents[-1] 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm elif check_equal and type not in JUNK: 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # this is the first "real token" following a NEWLINE, so it 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # must be the first token of the next program statement, or an 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # ENDMARKER; the "line" argument exposes the leading whitespace 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # for this statement; in the case of ENDMARKER, line is an empty 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # string, so will properly match the empty string with which the 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm # "indents" stack was seeded 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm check_equal = 0 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm thisguy = Whitespace(line) 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if not indents[-1].equal(thisguy): 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm witness = indents[-1].not_equal_witness(thisguy) 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm msg = "indent not equal e.g. " + format_witnesses(witness) 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm raise NannyNag(start[0], msg, line) 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__ == '__main__': 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm main() 330