11ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Module 'parser'
21ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
31ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse S-expressions output by the Panel Editor
41ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# (which is written in Scheme so it can't help writing S-expressions).
51ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
61ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# See notes at end of file.
711ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannonfrom warnings import warnpy3k
811ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannonwarnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
911ae6e7151fb6c6364f53f28337dd4579c5de0eeBrett Cannondel warnpy3k
101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
121ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumwhitespace = ' \t\n'
131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumoperators = '()\''
141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumseparators = operators + whitespace + ';' + '"'
151ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a string.
181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings).
191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
201ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_string(s):
21182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    tokens = []
22182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    while s:
23182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        c = s[:1]
24182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        if c in whitespace:
25182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            s = s[1:]
26182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        elif c == ';':
27182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            s = ''
28182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        elif c == '"':
29182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            n = len(s)
30182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            i = 1
31182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            while i < n:
32182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                c = s[i]
33182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                i = i+1
34182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                if c == '"': break
35182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                if c == '\\': i = i+1
36182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            tokens.append(s[:i])
37182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            s = s[i:]
38182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        elif c in operators:
39182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            tokens.append(c)
40182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            s = s[1:]
41182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        else:
42182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            n = len(s)
43182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            i = 1
44182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            while i < n:
45182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                if s[i] in separators: break
46182b5aca27d376b08a2904bed42b751496f932f3Tim Peters                i = i+1
47182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            tokens.append(s[:i])
48182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            s = s[i:]
49182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    return tokens
501ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
511ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
521ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokenize a whole file (given as file object, not as file name).
531ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of tokens (strings).
541ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
551ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef tokenize_file(fp):
56182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    tokens = []
57182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    while 1:
58182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        line = fp.readline()
59182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        if not line: break
60182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        tokens = tokens + tokenize_string(line)
61182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    return tokens
621ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
631ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
641ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Exception raised by parse_exr.
651ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
661ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumsyntax_error = 'syntax error'
671ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
681ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
691ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse an S-expression.
701ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Input is a list of tokens as returned by tokenize_*().
711ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a pair (expr, tokens)
721ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# where expr is a list representing the s-expression,
731ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# and tokens contains the remaining tokens.
741ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# May raise syntax_error.
751ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
761ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_expr(tokens):
77182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    if (not tokens) or tokens[0] != '(':
78182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        raise syntax_error, 'expected "("'
79182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    tokens = tokens[1:]
80182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    expr = []
81182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    while 1:
82182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        if not tokens:
83182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            raise syntax_error, 'missing ")"'
84182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        if tokens[0] == ')':
85182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            return expr, tokens[1:]
86182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        elif tokens[0] == '(':
87182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            subexpr, tokens = parse_expr(tokens)
88182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            expr.append(subexpr)
89182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        else:
90182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            expr.append(tokens[0])
91182b5aca27d376b08a2904bed42b751496f932f3Tim Peters            tokens = tokens[1:]
921ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
931ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
941ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Parse a file (given as file object, not as file name).
951ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Return a list of parsed S-expressions found at the top level.
961ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
971ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossumdef parse_file(fp):
98182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    tokens = tokenize_file(fp)
99182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    exprlist = []
100182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    while tokens:
101182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        expr, tokens = parse_expr(tokens)
102182b5aca27d376b08a2904bed42b751496f932f3Tim Peters        exprlist.append(expr)
103182b5aca27d376b08a2904bed42b751496f932f3Tim Peters    return exprlist
1041ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1051ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1061ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# EXAMPLE:
1071ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1081ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# The input
109182b5aca27d376b08a2904bed42b751496f932f3Tim Peters#       '(hip (hop hur-ray))'
1101ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1111ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# passed to tokenize_string() returns the token list
112182b5aca27d376b08a2904bed42b751496f932f3Tim Peters#       ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
1131ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1141ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When this is passed to parse_expr() it returns the expression
115182b5aca27d376b08a2904bed42b751496f932f3Tim Peters#       ['hip', ['hop', 'hur-ray']]
1161ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# plus an empty token list (because there are no tokens left.
1171ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1181ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# When a file containing the example is passed to parse_file() it returns
1191ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# a list whose only element is the output of parse_expr() above:
120182b5aca27d376b08a2904bed42b751496f932f3Tim Peters#       [['hip', ['hop', 'hur-ray']]]
1211ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1221ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum
1231ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# TOKENIZING:
1241ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1251ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Comments start with semicolon (;) and continue till the end of the line.
1261ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum#
1271ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Tokens are separated by whitespace, except the following characters
1281ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# always form a separate token (outside strings):
129182b5aca27d376b08a2904bed42b751496f932f3Tim Peters#       ( ) '
1301ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# Strings are enclosed in double quotes (") and backslash (\) is used
1311ce7c6fde8a8879ec64242dc0d01b660b934633aGuido van Rossum# as escape character in strings.
132