panelparser.py revision 1ce7c6fde8a8879ec64242dc0d01b660b934633a
11ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Module 'parser'
21ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
31ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse S-expressions output by the Panel Editor
41ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# (which is written in Scheme so it can't help writing S-expressions).
51ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
61ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# See notes at end of file.
71ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
81ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
91ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumwhitespace = ' \t\n'
101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumoperators = '()\''
111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumseparators = operators + whitespace + ';' + '"'
121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a string.
151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings).
161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_string(s):
181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	tokens = []
191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	while s:
201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		c = s[:1]
211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		if c in whitespace:
221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			s = s[1:]
231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		elif c == ';':
241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			s = ''
251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		elif c == '"':
261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			n = len(s)
271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			i = 1
281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			while i < n:
291ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				c = s[i]
301ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				i = i+1
311ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				if c == '"': break
321ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				if c == '\\': i = i+1
331ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			tokens.append(s[:i])
341ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			s = s[i:]
351ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		elif c in operators:
361ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			tokens.append(c)
371ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			s = s[1:]
381ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		else:
391ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			n = len(s)
401ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			i = 1
411ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			while i < n:
421ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				if s[i] in separators: break
431ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum				i = i+1
441ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			tokens.append(s[:i])
451ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			s = s[i:]
461ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	return tokens
471ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
481ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
491ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a whole file (given as file object, not as file name).
501ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings).
511ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
521ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_file(fp):
531ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	tokens = []
541ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	while 1:
551ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		line = fp.readline()
561ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		if not line: break
571ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		tokens = tokens + tokenize_string(line)
581ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	return tokens
591ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
601ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
611ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Exception raised by parse_exr.
621ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
631ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumsyntax_error = 'syntax error'
641ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
651ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
661ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse an S-expression.
671ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Input is a list of tokens as returned by tokenize_*().
681ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a pair (expr, tokens)
691ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# where expr is a list representing the s-expression,
701ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# and tokens contains the remaining tokens.
711ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# May raise syntax_error.
721ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
731ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_expr(tokens):
741ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	if (not tokens) or tokens[0] <> '(':
751ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		raise syntax_error, 'expected "("'
761ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	tokens = tokens[1:]
771ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	expr = []
781ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	while 1:
791ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		if not tokens:
801ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			raise syntax_error, 'missing ")"'
811ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		if tokens[0] == ')':
821ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			return expr, tokens[1:]
831ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		elif tokens[0] == '(':
841ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			subexpr, tokens = parse_expr(tokens)
851ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			expr.append(subexpr)
861ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		else:
871ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			expr.append(tokens[0])
881ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum			tokens = tokens[1:]
891ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
901ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
911ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse a file (given as file object, not as file name).
921ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of parsed S-expressions found at the top level.
931ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
941ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_file(fp):
951ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	tokens = tokenize_file(fp)
961ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	exprlist = []
971ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	while tokens:
981ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		expr, tokens = parse_expr(tokens)
991ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum		exprlist.append(expr)
1001ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum	return exprlist
1011ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1021ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1031ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# EXAMPLE:
1041ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1051ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# The input
1061ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#	'(hip (hop hur-ray))'
1071ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1081ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# passed to tokenize_string() returns the token list
1091ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
1101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When this is passed to parse_expr() it returns the expression
1121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#	['hip', ['hop', 'hur-ray']]
1131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# plus an empty token list (because there are no tokens left.
1141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When a file containing the example is passed to parse_file() it returns
1161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# a list whose only element is the output of parse_expr() above:
1171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#	[['hip', ['hop', 'hur-ray']]]
1181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# TOKENIZING:
1211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Comments start with semicolon (;) and continue till the end of the line.
1231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokens are separated by whitespace, except the following characters
1251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# always form a separate token (outside strings):
1261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#	( ) '
1271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Strings are enclosed in double quotes (") and backslash (\) is used
1281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# as escape character in strings.
129