panelparser.py revision 1ce7c6fde8a8879ec64242dc0d01b660b934633a
11ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Module 'parser' 21ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 31ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse S-expressions output by the Panel Editor 41ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# (which is written in Scheme so it can't help writing S-expressions). 51ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 61ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# See notes at end of file. 71ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 81ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 91ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumwhitespace = ' \t\n' 101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumoperators = '()\'' 111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumseparators = operators + whitespace + ';' + '"' 121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a string. 151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings). 161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_string(s): 181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = [] 191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while s: 201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum c = s[:1] 211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if c in whitespace: 221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum s = s[1:] 231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum elif c == ';': 241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum s = '' 251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum elif c == '"': 261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum n = len(s) 271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum i = 1 281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while i < n: 291ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum c = s[i] 301ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum i = i+1 311ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if c == '"': break 321ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if c == '\\': i = i+1 331ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens.append(s[:i]) 341ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum s = s[i:] 351ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum elif c in operators: 361ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens.append(c) 371ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum s = s[1:] 381ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum else: 391ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum n = len(s) 401ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum i = 1 411ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while i < n: 421ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if s[i] in separators: break 431ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum i = i+1 441ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens.append(s[:i]) 451ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum s = s[i:] 461ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum return tokens 471ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 481ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 491ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a whole file (given as file object, not as file name). 501ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings). 511ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 521ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_file(fp): 531ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = [] 541ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while 1: 551ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum line = fp.readline() 561ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if not line: break 571ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = tokens + tokenize_string(line) 581ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum return tokens 591ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 601ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 611ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Exception raised by parse_exr. 621ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 631ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumsyntax_error = 'syntax error' 641ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 651ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 661ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse an S-expression. 671ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Input is a list of tokens as returned by tokenize_*(). 681ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a pair (expr, tokens) 691ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# where expr is a list representing the s-expression, 701ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# and tokens contains the remaining tokens. 711ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# May raise syntax_error. 721ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 731ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_expr(tokens): 741ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if (not tokens) or tokens[0] <> '(': 751ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum raise syntax_error, 'expected "("' 761ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = tokens[1:] 771ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum expr = [] 781ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while 1: 791ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if not tokens: 801ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum raise syntax_error, 'missing ")"' 811ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum if tokens[0] == ')': 821ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum return expr, tokens[1:] 831ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum elif tokens[0] == '(': 841ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum subexpr, tokens = parse_expr(tokens) 851ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum expr.append(subexpr) 861ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum else: 871ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum expr.append(tokens[0]) 881ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = tokens[1:] 891ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 901ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 911ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse a file (given as file object, not as file name). 921ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of parsed S-expressions found at the top level. 931ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 941ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_file(fp): 951ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum tokens = tokenize_file(fp) 961ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum exprlist = [] 971ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum while tokens: 981ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum expr, tokens = parse_expr(tokens) 991ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum exprlist.append(expr) 1001ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum return exprlist 1011ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1021ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1031ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# EXAMPLE: 1041ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1051ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# The input 1061ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# '(hip (hop hur-ray))' 1071ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1081ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# passed to tokenize_string() returns the token list 1091ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] 1101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When this is passed to parse_expr() it returns the expression 1121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# ['hip', ['hop', 'hur-ray']] 1131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# plus an empty token list (because there are no tokens left. 1141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When a file containing the example is passed to parse_file() it returns 1161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# a list whose only element is the output of parse_expr() above: 1171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# [['hip', ['hop', 'hur-ray']]] 1181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum 1201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# TOKENIZING: 1211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Comments start with semicolon (;) and continue till the end of the line. 1231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# 1241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokens are separated by whitespace, except the following characters 1251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# always form a separate token (outside strings): 1261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# ( ) ' 1271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Strings are enclosed in double quotes (") and backslash (\) is used 1281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# as escape character in strings. 129