1# -*- coding: utf-8 -*- 2 3import unittest 4import textwrap 5import antlr3 6import antlr3.tree 7import testbase 8import sys 9from StringIO import StringIO 10 11class T(testbase.ANTLRTest): 12 def setUp(self): 13 self.oldPath = sys.path[:] 14 sys.path.insert(0, self.baseDir) 15 16 17 def tearDown(self): 18 sys.path = self.oldPath 19 20 21 def testOverrideMain(self): 22 grammar = textwrap.dedent( 23 r"""lexer grammar T3; 24 options { 25 language = Python; 26 } 27 28 @main { 29 def main(argv): 30 raise RuntimeError("no") 31 } 32 33 ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 34 WS: ' '+ { $channel = HIDDEN; }; 35 """) 36 37 38 stdout = StringIO() 39 40 lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 41 try: 42 lexerMod.main( 43 ['lexer.py'] 44 ) 45 self.fail() 46 except RuntimeError: 47 pass 48 49 50 def testLexerFromFile(self): 51 input = "foo bar" 52 inputPath = self.writeFile("input.txt", input) 53 54 grammar = textwrap.dedent( 55 r"""lexer grammar T1; 56 options { 57 language = Python; 58 } 59 60 ID: 'a'..'z'+; 61 WS: ' '+ { $channel = HIDDEN; }; 62 """) 63 64 65 stdout = StringIO() 66 67 lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 68 lexerMod.main( 69 ['lexer.py', inputPath], 70 stdout=stdout 71 ) 72 73 self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 74 75 76 def testLexerFromStdIO(self): 77 input = "foo bar" 78 79 grammar = textwrap.dedent( 80 r"""lexer grammar T2; 81 options { 82 language = Python; 83 } 84 85 ID: 'a'..'z'+; 86 WS: ' '+ { $channel = HIDDEN; }; 87 """) 88 89 90 stdout = StringIO() 91 92 lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 93 lexerMod.main( 94 ['lexer.py'], 95 stdin=StringIO(input), 96 stdout=stdout 97 ) 98 99 self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 100 101 102 def testLexerEncoding(self): 103 input = u"föö bär".encode('utf-8') 104 105 grammar = textwrap.dedent( 106 r"""lexer grammar T3; 107 options { 108 language = Python; 109 } 110 111 ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 112 WS: ' '+ { $channel = HIDDEN; }; 113 """) 114 115 116 stdout = StringIO() 117 118 lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 119 lexerMod.main( 120 ['lexer.py', '--encoding', 'utf-8'], 121 stdin=StringIO(input), 122 stdout=stdout 123 ) 124 125 self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 126 127 128 def testCombined(self): 129 input = "foo bar" 130 131 grammar = textwrap.dedent( 132 r"""grammar T4; 133 options { 134 language = Python; 135 } 136 137 r returns [res]: (ID)+ EOF { $res = $text; }; 138 139 ID: 'a'..'z'+; 140 WS: ' '+ { $channel = HIDDEN; }; 141 """) 142 143 144 stdout = StringIO() 145 146 lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 147 parserMod.main( 148 ['combined.py', '--rule', 'r'], 149 stdin=StringIO(input), 150 stdout=stdout 151 ) 152 153 stdout = stdout.getvalue() 154 self.failUnlessEqual(len(stdout.splitlines()), 1, stdout) 155 156 157 def testCombinedOutputAST(self): 158 input = "foo + bar" 159 160 grammar = textwrap.dedent( 161 r"""grammar T5; 162 options { 163 language = Python; 164 output = AST; 165 } 166 167 r: ID OP^ ID EOF!; 168 169 ID: 'a'..'z'+; 170 OP: '+'; 171 WS: ' '+ { $channel = HIDDEN; }; 172 """) 173 174 175 stdout = StringIO() 176 177 lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 178 parserMod.main( 179 ['combined.py', '--rule', 'r'], 180 stdin=StringIO(input), 181 stdout=stdout 182 ) 183 184 stdout = stdout.getvalue().strip() 185 self.failUnlessEqual(stdout, "(+ foo bar)") 186 187 188 def testTreeParser(self): 189 grammar = textwrap.dedent( 190 r'''grammar T6; 191 options { 192 language = Python; 193 output = AST; 194 } 195 196 r: ID OP^ ID EOF!; 197 198 ID: 'a'..'z'+; 199 OP: '+'; 200 WS: ' '+ { $channel = HIDDEN; }; 201 ''') 202 203 treeGrammar = textwrap.dedent( 204 r'''tree grammar T6Walker; 205 options { 206 language=Python; 207 ASTLabelType=CommonTree; 208 tokenVocab=T6; 209 } 210 r returns [res]: ^(OP a=ID b=ID) 211 { $res = "\%s \%s \%s" \% ($a.text, $OP.text, $b.text) } 212 ; 213 ''') 214 215 lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 216 walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 217 218 stdout = StringIO() 219 walkerMod.main( 220 ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'], 221 stdin=StringIO("a+b"), 222 stdout=stdout 223 ) 224 225 stdout = stdout.getvalue().strip() 226 self.failUnlessEqual(stdout, "u'a + b'") 227 228 229 def testTreeParserRewrite(self): 230 grammar = textwrap.dedent( 231 r'''grammar T7; 232 options { 233 language = Python; 234 output = AST; 235 } 236 237 r: ID OP^ ID EOF!; 238 239 ID: 'a'..'z'+; 240 OP: '+'; 241 WS: ' '+ { $channel = HIDDEN; }; 242 ''') 243 244 treeGrammar = textwrap.dedent( 245 r'''tree grammar T7Walker; 246 options { 247 language=Python; 248 ASTLabelType=CommonTree; 249 tokenVocab=T7; 250 output=AST; 251 } 252 tokens { 253 ARG; 254 } 255 r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID)); 256 ''') 257 258 lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 259 walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 260 261 stdout = StringIO() 262 walkerMod.main( 263 ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'], 264 stdin=StringIO("a+b"), 265 stdout=stdout 266 ) 267 268 stdout = stdout.getvalue().strip() 269 self.failUnlessEqual(stdout, "(+ (ARG a) (ARG b))") 270 271 272 273 def testGrammarImport(self): 274 slave = textwrap.dedent( 275 r''' 276 parser grammar T8S; 277 options { 278 language=Python; 279 } 280 281 a : B; 282 ''') 283 284 parserName = self.writeInlineGrammar(slave)[0] 285 # slave parsers are imported as normal python modules 286 # to force reloading current version, purge module from sys.modules 287 try: 288 del sys.modules[parserName+'Parser'] 289 except KeyError: 290 pass 291 292 master = textwrap.dedent( 293 r''' 294 grammar T8M; 295 options { 296 language=Python; 297 } 298 import T8S; 299 s returns [res]: a { $res = $a.text }; 300 B : 'b' ; // defines B from inherited token space 301 WS : (' '|'\n') {self.skip()} ; 302 ''') 303 304 stdout = StringIO() 305 306 lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True) 307 parserMod.main( 308 ['import.py', '--rule', 's'], 309 stdin=StringIO("b"), 310 stdout=stdout 311 ) 312 313 stdout = stdout.getvalue().strip() 314 self.failUnlessEqual(stdout, "u'b'") 315 316 317if __name__ == '__main__': 318 unittest.main() 319