11ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Module 'parser' 21ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 31ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse S-expressions output by the Panel Editor 41ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# (which is written in Scheme so it can't help writing S-expressions). 51ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 61ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# See notes at end of file. 711ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannonfrom warnings import warnpy3k 811ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannonwarnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2) 911ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannondel warnpy3k 101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumwhitespace = ' \t\n' 131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumoperators = '()\'' 141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumseparators = operators + whitespace + ';' + '"' 151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a string. 181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings). 191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_string(s): 21182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = [] 22182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while s: 23182b5aca27d376b08a2904bed42b751496f932f3Tim Peters c = s[:1] 24182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if c in whitespace: 25182b5aca27d376b08a2904bed42b751496f932f3Tim Peters s = s[1:] 26182b5aca27d376b08a2904bed42b751496f932f3Tim Peters elif c == ';': 27182b5aca27d376b08a2904bed42b751496f932f3Tim Peters s = '' 28182b5aca27d376b08a2904bed42b751496f932f3Tim Peters elif c == '"': 29182b5aca27d376b08a2904bed42b751496f932f3Tim Peters n = len(s) 30182b5aca27d376b08a2904bed42b751496f932f3Tim Peters i = 1 31182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while i < n: 32182b5aca27d376b08a2904bed42b751496f932f3Tim Peters c = s[i] 33182b5aca27d376b08a2904bed42b751496f932f3Tim Peters i = i+1 34182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if c == '"': break 35182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if c == '\\': i = i+1 36182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens.append(s[:i]) 37182b5aca27d376b08a2904bed42b751496f932f3Tim Peters s = s[i:] 38182b5aca27d376b08a2904bed42b751496f932f3Tim Peters elif c in operators: 39182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens.append(c) 40182b5aca27d376b08a2904bed42b751496f932f3Tim Peters s = s[1:] 41182b5aca27d376b08a2904bed42b751496f932f3Tim Peters else: 42182b5aca27d376b08a2904bed42b751496f932f3Tim Peters n = len(s) 43182b5aca27d376b08a2904bed42b751496f932f3Tim Peters i = 1 44182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while i < n: 45182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if s[i] in separators: break 46182b5aca27d376b08a2904bed42b751496f932f3Tim Peters i = i+1 47182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens.append(s[:i]) 48182b5aca27d376b08a2904bed42b751496f932f3Tim Peters s = s[i:] 49182b5aca27d376b08a2904bed42b751496f932f3Tim Peters return tokens 501ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 511ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 521ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a whole file (given as file object, not as file name). 531ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings). 541ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 551ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_file(fp): 56182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = [] 57182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while 1: 58182b5aca27d376b08a2904bed42b751496f932f3Tim Peters line = fp.readline() 59182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if not line: break 60182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = tokens + tokenize_string(line) 61182b5aca27d376b08a2904bed42b751496f932f3Tim Peters return tokens 621ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 631ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 641ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Exception raised by parse_exr. 651ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 661ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumsyntax_error = 'syntax error' 671ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 681ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 691ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse an S-expression. 701ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Input is a list of tokens as returned by tokenize_*(). 711ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a pair (expr, tokens) 721ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# where expr is a list representing the s-expression, 731ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# and tokens contains the remaining tokens. 741ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# May raise syntax_error. 751ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 761ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_expr(tokens): 77182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if (not tokens) or tokens[0] != '(': 78182b5aca27d376b08a2904bed42b751496f932f3Tim Peters raise syntax_error, 'expected "("' 79182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = tokens[1:] 80182b5aca27d376b08a2904bed42b751496f932f3Tim Peters expr = [] 81182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while 1: 82182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if not tokens: 83182b5aca27d376b08a2904bed42b751496f932f3Tim Peters raise syntax_error, 'missing ")"' 84182b5aca27d376b08a2904bed42b751496f932f3Tim Peters if tokens[0] == ')': 85182b5aca27d376b08a2904bed42b751496f932f3Tim Peters return expr, tokens[1:] 86182b5aca27d376b08a2904bed42b751496f932f3Tim Peters elif tokens[0] == '(': 87182b5aca27d376b08a2904bed42b751496f932f3Tim Peters subexpr, tokens = parse_expr(tokens) 88182b5aca27d376b08a2904bed42b751496f932f3Tim Peters expr.append(subexpr) 89182b5aca27d376b08a2904bed42b751496f932f3Tim Peters else: 90182b5aca27d376b08a2904bed42b751496f932f3Tim Peters expr.append(tokens[0]) 91182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = tokens[1:] 921ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 931ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 941ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse a file (given as file object, not as file name). 951ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of parsed S-expressions found at the top level. 961ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 971ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_file(fp): 98182b5aca27d376b08a2904bed42b751496f932f3Tim Peters tokens = tokenize_file(fp) 99182b5aca27d376b08a2904bed42b751496f932f3Tim Peters exprlist = [] 100182b5aca27d376b08a2904bed42b751496f932f3Tim Peters while tokens: 101182b5aca27d376b08a2904bed42b751496f932f3Tim Peters expr, tokens = parse_expr(tokens) 102182b5aca27d376b08a2904bed42b751496f932f3Tim Peters exprlist.append(expr) 103182b5aca27d376b08a2904bed42b751496f932f3Tim Peters return exprlist 1041ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1051ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1061ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# EXAMPLE: 1071ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1081ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# The input 109182b5aca27d376b08a2904bed42b751496f932f3Tim Peters# '(hip (hop hur-ray))' 1101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# passed to tokenize_string() returns the token list 112182b5aca27d376b08a2904bed42b751496f932f3Tim Peters# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] 1131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When this is passed to parse_expr() it returns the expression 115182b5aca27d376b08a2904bed42b751496f932f3Tim Peters# ['hip', ['hop', 'hur-ray']] 1161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# plus an empty token list (because there are no tokens left. 1171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When a file containing the example is passed to parse_file() it returns 1191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# a list whose only element is the output of parse_expr() above: 120182b5aca27d376b08a2904bed42b751496f932f3Tim Peters# [['hip', ['hop', 'hur-ray']]] 1211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# TOKENIZING: 1241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Comments start with semicolon (;) and continue till the end of the line. 1261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokens are separated by whitespace, except the following characters 1281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# always form a separate token (outside strings): 129182b5aca27d376b08a2904bed42b751496f932f3Tim Peters# ( ) ' 1301ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Strings are enclosed in double quotes (") and backslash (\) is used 1311ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# as escape character in strings. 132