panelparser.py revision 1ce7c6fde8a8879ec64242dc0d01b660b934633a
1# Module 'parser' 2# 3# Parse S-expressions output by the Panel Editor 4# (which is written in Scheme so it can't help writing S-expressions). 5# 6# See notes at end of file. 7 8 9whitespace = ' \t\n' 10operators = '()\'' 11separators = operators + whitespace + ';' + '"' 12 13 14# Tokenize a string. 15# Return a list of tokens (strings). 16# 17def tokenize_string(s): 18 tokens = [] 19 while s: 20 c = s[:1] 21 if c in whitespace: 22 s = s[1:] 23 elif c == ';': 24 s = '' 25 elif c == '"': 26 n = len(s) 27 i = 1 28 while i < n: 29 c = s[i] 30 i = i+1 31 if c == '"': break 32 if c == '\\': i = i+1 33 tokens.append(s[:i]) 34 s = s[i:] 35 elif c in operators: 36 tokens.append(c) 37 s = s[1:] 38 else: 39 n = len(s) 40 i = 1 41 while i < n: 42 if s[i] in separators: break 43 i = i+1 44 tokens.append(s[:i]) 45 s = s[i:] 46 return tokens 47 48 49# Tokenize a whole file (given as file object, not as file name). 50# Return a list of tokens (strings). 51# 52def tokenize_file(fp): 53 tokens = [] 54 while 1: 55 line = fp.readline() 56 if not line: break 57 tokens = tokens + tokenize_string(line) 58 return tokens 59 60 61# Exception raised by parse_exr. 62# 63syntax_error = 'syntax error' 64 65 66# Parse an S-expression. 67# Input is a list of tokens as returned by tokenize_*(). 68# Return a pair (expr, tokens) 69# where expr is a list representing the s-expression, 70# and tokens contains the remaining tokens. 71# May raise syntax_error. 72# 73def parse_expr(tokens): 74 if (not tokens) or tokens[0] <> '(': 75 raise syntax_error, 'expected "("' 76 tokens = tokens[1:] 77 expr = [] 78 while 1: 79 if not tokens: 80 raise syntax_error, 'missing ")"' 81 if tokens[0] == ')': 82 return expr, tokens[1:] 83 elif tokens[0] == '(': 84 subexpr, tokens = parse_expr(tokens) 85 expr.append(subexpr) 86 else: 87 expr.append(tokens[0]) 88 tokens = tokens[1:] 89 90 91# Parse a file (given as file object, not as file name). 92# Return a list of parsed S-expressions found at the top level. 93# 94def parse_file(fp): 95 tokens = tokenize_file(fp) 96 exprlist = [] 97 while tokens: 98 expr, tokens = parse_expr(tokens) 99 exprlist.append(expr) 100 return exprlist 101 102 103# EXAMPLE: 104# 105# The input 106# '(hip (hop hur-ray))' 107# 108# passed to tokenize_string() returns the token list 109# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')'] 110# 111# When this is passed to parse_expr() it returns the expression 112# ['hip', ['hop', 'hur-ray']] 113# plus an empty token list (because there are no tokens left. 114# 115# When a file containing the example is passed to parse_file() it returns 116# a list whose only element is the output of parse_expr() above: 117# [['hip', ['hop', 'hur-ray']]] 118 119 120# TOKENIZING: 121# 122# Comments start with semicolon (;) and continue till the end of the line. 123# 124# Tokens are separated by whitespace, except the following characters 125# always form a separate token (outside strings): 126# ( ) ' 127# Strings are enclosed in double quotes (") and backslash (\) is used 128# as escape character in strings. 129