panelparser.py revision 1ce7c6fde8a8879ec64242dc0d01b660b934633a
1# Module 'parser'
2#
3# Parse S-expressions output by the Panel Editor
4# (which is written in Scheme so it can't help writing S-expressions).
5#
6# See notes at end of file.
7
8
9whitespace = ' \t\n'
10operators = '()\''
11separators = operators + whitespace + ';' + '"'
12
13
14# Tokenize a string.
15# Return a list of tokens (strings).
16#
17def tokenize_string(s):
18	tokens = []
19	while s:
20		c = s[:1]
21		if c in whitespace:
22			s = s[1:]
23		elif c == ';':
24			s = ''
25		elif c == '"':
26			n = len(s)
27			i = 1
28			while i < n:
29				c = s[i]
30				i = i+1
31				if c == '"': break
32				if c == '\\': i = i+1
33			tokens.append(s[:i])
34			s = s[i:]
35		elif c in operators:
36			tokens.append(c)
37			s = s[1:]
38		else:
39			n = len(s)
40			i = 1
41			while i < n:
42				if s[i] in separators: break
43				i = i+1
44			tokens.append(s[:i])
45			s = s[i:]
46	return tokens
47
48
49# Tokenize a whole file (given as file object, not as file name).
50# Return a list of tokens (strings).
51#
52def tokenize_file(fp):
53	tokens = []
54	while 1:
55		line = fp.readline()
56		if not line: break
57		tokens = tokens + tokenize_string(line)
58	return tokens
59
60
61# Exception raised by parse_exr.
62#
63syntax_error = 'syntax error'
64
65
66# Parse an S-expression.
67# Input is a list of tokens as returned by tokenize_*().
68# Return a pair (expr, tokens)
69# where expr is a list representing the s-expression,
70# and tokens contains the remaining tokens.
71# May raise syntax_error.
72#
73def parse_expr(tokens):
74	if (not tokens) or tokens[0] <> '(':
75		raise syntax_error, 'expected "("'
76	tokens = tokens[1:]
77	expr = []
78	while 1:
79		if not tokens:
80			raise syntax_error, 'missing ")"'
81		if tokens[0] == ')':
82			return expr, tokens[1:]
83		elif tokens[0] == '(':
84			subexpr, tokens = parse_expr(tokens)
85			expr.append(subexpr)
86		else:
87			expr.append(tokens[0])
88			tokens = tokens[1:]
89
90
91# Parse a file (given as file object, not as file name).
92# Return a list of parsed S-expressions found at the top level.
93#
94def parse_file(fp):
95	tokens = tokenize_file(fp)
96	exprlist = []
97	while tokens:
98		expr, tokens = parse_expr(tokens)
99		exprlist.append(expr)
100	return exprlist
101
102
103# EXAMPLE:
104#
105# The input
106#	'(hip (hop hur-ray))'
107#
108# passed to tokenize_string() returns the token list
109#	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
110#
111# When this is passed to parse_expr() it returns the expression
112#	['hip', ['hop', 'hur-ray']]
113# plus an empty token list (because there are no tokens left.
114#
115# When a file containing the example is passed to parse_file() it returns
116# a list whose only element is the output of parse_expr() above:
117#	[['hip', ['hop', 'hur-ray']]]
118
119
120# TOKENIZING:
121#
122# Comments start with semicolon (;) and continue till the end of the line.
123#
124# Tokens are separated by whitespace, except the following characters
125# always form a separate token (outside strings):
126#	( ) '
127# Strings are enclosed in double quotes (") and backslash (\) is used
128# as escape character in strings.
129