t054main.py revision 324c4644fee44b9898524c09511bd33c3f12e2df
1# -*- coding: utf-8 -*-
2
3import unittest
4import textwrap
5import antlr3
6import antlr3.tree
7import testbase
8import sys
9from StringIO import StringIO
10
11class T(testbase.ANTLRTest):
12    def setUp(self):
13        self.oldPath = sys.path[:]
14        sys.path.insert(0, self.baseDir)
15
16
17    def tearDown(self):
18        sys.path = self.oldPath
19
20
21    def testOverrideMain(self):
22        grammar = textwrap.dedent(
23            r"""lexer grammar T3;
24            options {
25              language = Python;
26              }
27
28            @main {
29            def main(argv):
30                raise RuntimeError("no")
31            }
32
33            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
34            WS: ' '+ { $channel = HIDDEN; };
35            """)
36
37
38        stdout = StringIO()
39
40        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
41        try:
42            lexerMod.main(
43            ['lexer.py']
44            )
45            self.fail()
46        except RuntimeError:
47            pass
48
49
50    def testLexerFromFile(self):
51        input = "foo bar"
52        inputPath = self.writeFile("input.txt", input)
53
54        grammar = textwrap.dedent(
55            r"""lexer grammar T1;
56            options {
57              language = Python;
58              }
59
60            ID: 'a'..'z'+;
61            WS: ' '+ { $channel = HIDDEN; };
62            """)
63
64
65        stdout = StringIO()
66
67        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
68        lexerMod.main(
69            ['lexer.py', inputPath],
70            stdout=stdout
71            )
72
73        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
74
75
76    def testLexerFromStdIO(self):
77        input = "foo bar"
78
79        grammar = textwrap.dedent(
80            r"""lexer grammar T2;
81            options {
82              language = Python;
83              }
84
85            ID: 'a'..'z'+;
86            WS: ' '+ { $channel = HIDDEN; };
87            """)
88
89
90        stdout = StringIO()
91
92        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
93        lexerMod.main(
94            ['lexer.py'],
95            stdin=StringIO(input),
96            stdout=stdout
97            )
98
99        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
100
101
102    def testLexerEncoding(self):
103        input = u"föö bär".encode('utf-8')
104
105        grammar = textwrap.dedent(
106            r"""lexer grammar T3;
107            options {
108              language = Python;
109              }
110
111            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
112            WS: ' '+ { $channel = HIDDEN; };
113            """)
114
115
116        stdout = StringIO()
117
118        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
119        lexerMod.main(
120            ['lexer.py', '--encoding', 'utf-8'],
121            stdin=StringIO(input),
122            stdout=stdout
123            )
124
125        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
126
127
128    def testCombined(self):
129        input = "foo bar"
130
131        grammar = textwrap.dedent(
132            r"""grammar T4;
133            options {
134              language = Python;
135              }
136
137            r returns [res]: (ID)+ EOF { $res = $text; };
138
139            ID: 'a'..'z'+;
140            WS: ' '+ { $channel = HIDDEN; };
141            """)
142
143
144        stdout = StringIO()
145
146        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
147        parserMod.main(
148            ['combined.py', '--rule', 'r'],
149            stdin=StringIO(input),
150            stdout=stdout
151            )
152
153        stdout = stdout.getvalue()
154        self.failUnlessEqual(len(stdout.splitlines()), 1, stdout)
155
156
157    def testCombinedOutputAST(self):
158        input = "foo + bar"
159
160        grammar = textwrap.dedent(
161            r"""grammar T5;
162            options {
163              language = Python;
164              output = AST;
165            }
166
167            r: ID OP^ ID EOF!;
168
169            ID: 'a'..'z'+;
170            OP: '+';
171            WS: ' '+ { $channel = HIDDEN; };
172            """)
173
174
175        stdout = StringIO()
176
177        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
178        parserMod.main(
179            ['combined.py', '--rule', 'r'],
180            stdin=StringIO(input),
181            stdout=stdout
182            )
183
184        stdout = stdout.getvalue().strip()
185        self.failUnlessEqual(stdout, "(+ foo bar)")
186
187
188    def testTreeParser(self):
189        grammar = textwrap.dedent(
190            r'''grammar T6;
191            options {
192              language = Python;
193              output = AST;
194            }
195
196            r: ID OP^ ID EOF!;
197
198            ID: 'a'..'z'+;
199            OP: '+';
200            WS: ' '+ { $channel = HIDDEN; };
201            ''')
202
203        treeGrammar = textwrap.dedent(
204            r'''tree grammar T6Walker;
205            options {
206            language=Python;
207            ASTLabelType=CommonTree;
208            tokenVocab=T6;
209            }
210            r returns [res]: ^(OP a=ID b=ID)
211              { $res = "\%s \%s \%s" \% ($a.text, $OP.text, $b.text) }
212              ;
213            ''')
214
215        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
216        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)
217
218        stdout = StringIO()
219        walkerMod.main(
220            ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'],
221            stdin=StringIO("a+b"),
222            stdout=stdout
223            )
224
225        stdout = stdout.getvalue().strip()
226        self.failUnlessEqual(stdout, "u'a + b'")
227
228
229    def testTreeParserRewrite(self):
230        grammar = textwrap.dedent(
231            r'''grammar T7;
232            options {
233              language = Python;
234              output = AST;
235            }
236
237            r: ID OP^ ID EOF!;
238
239            ID: 'a'..'z'+;
240            OP: '+';
241            WS: ' '+ { $channel = HIDDEN; };
242            ''')
243
244        treeGrammar = textwrap.dedent(
245            r'''tree grammar T7Walker;
246            options {
247              language=Python;
248              ASTLabelType=CommonTree;
249              tokenVocab=T7;
250              output=AST;
251            }
252            tokens {
253              ARG;
254            }
255            r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID));
256            ''')
257
258        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
259        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)
260
261        stdout = StringIO()
262        walkerMod.main(
263            ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'],
264            stdin=StringIO("a+b"),
265            stdout=stdout
266            )
267
268        stdout = stdout.getvalue().strip()
269        self.failUnlessEqual(stdout, "(+ (ARG a) (ARG b))")
270
271
272
273    def testGrammarImport(self):
274        slave = textwrap.dedent(
275            r'''
276            parser grammar T8S;
277            options {
278              language=Python;
279            }
280
281            a : B;
282            ''')
283
284        parserName = self.writeInlineGrammar(slave)[0]
285        # slave parsers are imported as normal python modules
286        # to force reloading current version, purge module from sys.modules
287        try:
288            del sys.modules[parserName+'Parser']
289        except KeyError:
290            pass
291
292        master = textwrap.dedent(
293            r'''
294            grammar T8M;
295            options {
296              language=Python;
297            }
298            import T8S;
299            s returns [res]: a { $res = $a.text };
300            B : 'b' ; // defines B from inherited token space
301            WS : (' '|'\n') {self.skip()} ;
302            ''')
303
304        stdout = StringIO()
305
306        lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True)
307        parserMod.main(
308            ['import.py', '--rule', 's'],
309            stdin=StringIO("b"),
310            stdout=stdout
311            )
312
313        stdout = stdout.getvalue().strip()
314        self.failUnlessEqual(stdout, "u'b'")
315
316
317if __name__ == '__main__':
318    unittest.main()
319