ANTLRv3.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/* 2 [The "BSD license"] 3 Copyright (c) 2010 Terence Parr 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*/ 28/** ANTLR v3 grammar written in ANTLR v3 with AST construction */ 29grammar ANTLRv3; 30 31options { 32 output=AST; 33 ASTLabelType=CommonTree; 34} 35 36tokens { 37 DOC_COMMENT; 38 PARSER; 39 LEXER; 40 RULE; 41 BLOCK; 42 OPTIONAL; 43 CLOSURE; 44 POSITIVE_CLOSURE; 45 SYNPRED; 46 RANGE; 47 CHAR_RANGE; 48 EPSILON; 49 ALT; 50 EOR; 51 EOB; 52 EOA; // end of alt 53 ID; 54 ARG; 55 ARGLIST; 56 RET='returns'; 57 LEXER_GRAMMAR; 58 PARSER_GRAMMAR; 59 TREE_GRAMMAR; 60 COMBINED_GRAMMAR; 61 LABEL; // $x used in rewrite rules 62 TEMPLATE; 63 SCOPE='scope'; 64 SEMPRED; 65 GATED_SEMPRED; // {p}? => 66 SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred 67 BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred 68 FRAGMENT='fragment'; 69 TREE_BEGIN='^('; 70 ROOT='^'; 71 BANG='!'; 72 RANGE='..'; 73 REWRITE='->'; 74 AT='@'; 75 LABEL_ASSIGN='='; 76 LIST_LABEL_ASSIGN='+='; 77} 78 79@parser::header 80{ 81 package org.antlr.grammar.v3; 82} 83@lexer::header 84{ 85 package org.antlr.grammar.v3; 86} 87 88@members { 89 int gtype; 90} 91 92grammarDef 93 : DOC_COMMENT? 94 ( 'lexer' {gtype=LEXER_GRAMMAR;} // pure lexer 95 | 'parser' {gtype=PARSER_GRAMMAR;} // pure parser 96 | 'tree' {gtype=TREE_GRAMMAR;} // a tree parser 97 | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer 98 ) 99 g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action* 100 rule+ 101 EOF 102 -> ^( {adaptor.create(gtype,$g)} 103 id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+ 104 ) 105 ; 106 107tokensSpec 108 : TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+) 109 ; 110 111tokenSpec 112 : TOKEN_REF 113 ( '=' (lit=STRING_LITERAL|lit=CHAR_LITERAL) -> ^('=' TOKEN_REF $lit) 114 | -> TOKEN_REF 115 ) 116 ';' 117 ; 118 119attrScope 120 : 'scope' id ACTION -> ^('scope' id ACTION) 121 ; 122 123/** Match stuff like @parser::members {int i;} */ 124action 125 : '@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION) 126 ; 127 128/** Sometimes the scope names will collide with keywords; allow them as 129 * ids for action scopes. 130 */ 131actionScopeName 132 : id 133 | l='lexer' -> ID[$l] 134 | p='parser' -> ID[$p] 135 ; 136 137optionsSpec 138 : OPTIONS (option ';')+ '}' -> ^(OPTIONS option+) 139 ; 140 141option 142 : id '=' optionValue -> ^('=' id optionValue) 143 ; 144 145optionValue 146 : qid 147 | STRING_LITERAL 148 | CHAR_LITERAL 149 | INT 150 | s='*' -> STRING_LITERAL[$s] // used for k=* 151 ; 152 153rule 154scope { 155 String name; 156} 157 : DOC_COMMENT? 158 ( modifier=('protected'|'public'|'private'|'fragment') )? 159 id {$rule::name = $id.text;} 160 '!'? 161 ( arg=ARG_ACTION )? 162 ( 'returns' rt=ARG_ACTION )? 163 throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* 164 ':' altList ';' 165 exceptionGroup? 166 -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG[$arg] $arg)? ^('returns' $rt)? 167 throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* 168 altList 169 exceptionGroup? 170 EOR["EOR"] 171 ) 172 ; 173 174/** Match stuff like @init {int i;} */ 175ruleAction 176 : '@' id ACTION -> ^('@' id ACTION) 177 ; 178 179throwsSpec 180 : 'throws' id ( ',' id )* -> ^('throws' id+) 181 ; 182 183ruleScopeSpec 184 : 'scope' ACTION -> ^('scope' ACTION) 185 | 'scope' id (',' id)* ';' -> ^('scope' id+) 186 | 'scope' ACTION 187 'scope' id (',' id)* ';' 188 -> ^('scope' ACTION id+ ) 189 ; 190 191block 192 : lp='(' 193 ( (opts=optionsSpec)? ':' )? 194 altpair ( '|' altpair )* 195 rp=')' 196 -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? altpair+ EOB[$rp,"EOB"] ) 197 ; 198 199altpair : alternative rewrite ; 200 201altList 202@init { 203 // must create root manually as it's used by invoked rules in real antlr tool. 204 // leave here to demonstrate use of {...} in rewrite rule 205 // it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token. 206 CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK"); 207} 208 : altpair ( '|' altpair )* -> ^( {blkRoot} altpair+ EOB["EOB"] ) 209 ; 210 211alternative 212@init { 213 Token firstToken = input.LT(1); 214 Token prevToken = input.LT(-1); // either : or | I think 215} 216 : element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"]) 217 | -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"]) 218 ; 219 220exceptionGroup 221 : ( exceptionHandler )+ ( finallyClause )? 222 | finallyClause 223 ; 224 225exceptionHandler 226 : 'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION) 227 ; 228 229finallyClause 230 : 'finally' ACTION -> ^('finally' ACTION) 231 ; 232 233element 234 : id (labelOp='='|labelOp='+=') atom 235 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"])) 236 | -> ^($labelOp id atom) 237 ) 238 | id (labelOp='='|labelOp='+=') block 239 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"])) 240 | -> ^($labelOp id block) 241 ) 242 | atom 243 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) ) 244 | -> atom 245 ) 246 | ebnf 247 | ACTION 248 | SEMPRED ( g='=>' -> GATED_SEMPRED[$g] | -> SEMPRED ) 249 | treeSpec 250 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) ) 251 | -> treeSpec 252 ) 253 ; 254 255atom: terminal 256 | range 257 ( (op='^'|op='!') -> ^($op range) 258 | -> range 259 ) 260 | notSet 261 ( (op='^'|op='!') -> ^($op notSet) 262 | -> notSet 263 ) 264 | RULE_REF ARG_ACTION? 265 ( (op='^'|op='!') -> ^($op RULE_REF ARG_ACTION?) 266 | -> ^(RULE_REF ARG_ACTION?) 267 ) 268 ; 269 270notSet 271 : '~' 272 ( notTerminal elementOptions? -> ^('~' notTerminal elementOptions?) 273 | block elementOptions? -> ^('~' block elementOptions?) 274 ) 275 ; 276 277notTerminal 278 : CHAR_LITERAL 279 | TOKEN_REF 280 | STRING_LITERAL 281 ; 282 283elementOptions 284 : '<' qid '>' -> ^(OPTIONS qid) 285 | '<' option (';' option)* '>' -> ^(OPTIONS option+) 286 ; 287 288elementOption 289 : id '=' optionValue -> ^('=' id optionValue) 290 ; 291 292treeSpec 293 : '^(' element ( element )+ ')' -> ^(TREE_BEGIN element+) 294 ; 295 296range! 297 : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL elementOptions? 298 -> ^(CHAR_RANGE[$c1,".."] $c1 $c2 elementOptions?) 299 ; 300 301terminal 302 : ( CHAR_LITERAL elementOptions? -> ^(CHAR_LITERAL elementOptions?) 303 // Args are only valid for lexer rules 304 | TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF ARG_ACTION? elementOptions?) 305 | STRING_LITERAL elementOptions? -> ^(STRING_LITERAL elementOptions?) 306 | '.' elementOptions? -> ^('.' elementOptions?) 307 ) 308 ( '^' -> ^('^' $terminal) 309 | '!' -> ^('!' $terminal) 310 )? 311 ; 312 313/** Matches ENBF blocks (and token sets via block rule) */ 314ebnf 315@init { 316 Token firstToken = input.LT(1); 317} 318@after { 319 $ebnf.tree.getToken().setLine(firstToken.getLine()); 320 $ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 321} 322 : block 323 ( op='?' -> ^(OPTIONAL[op] block) 324 | op='*' -> ^(CLOSURE[op] block) 325 | op='+' -> ^(POSITIVE_CLOSURE[op] block) 326 | '=>' // syntactic predicate 327 -> {gtype==COMBINED_GRAMMAR && 328 Character.isUpperCase($rule::name.charAt(0))}? 329 // if lexer rule in combined, leave as pred for lexer 330 ^(SYNPRED["=>"] block) 331 // in real antlr tool, text for SYN_SEMPRED is predname 332 -> SYN_SEMPRED 333 | -> block 334 ) 335 ; 336 337ebnfSuffix 338@init { 339 Token op = input.LT(1); 340} 341 : '?' -> OPTIONAL[op] 342 | '*' -> CLOSURE[op] 343 | '+' -> POSITIVE_CLOSURE[op] 344 ; 345 346 347 348// R E W R I T E S Y N T A X 349 350rewrite 351@init { 352 Token firstToken = input.LT(1); 353} 354 : (rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)* 355 rew2='->' last=rewrite_alternative 356 -> ^($rew $preds $predicated)* ^($rew2 $last) 357 | 358 ; 359 360rewrite_alternative 361options {backtrack=true;} 362 : rewrite_template 363 | rewrite_tree_alternative 364 | /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"]) 365 ; 366 367rewrite_tree_block 368 : lp='(' rewrite_tree_alternative ')' 369 -> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"]) 370 ; 371 372rewrite_tree_alternative 373 : rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"]) 374 ; 375 376rewrite_tree_element 377 : rewrite_tree_atom 378 | rewrite_tree_atom ebnfSuffix 379 -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"])) 380 | rewrite_tree 381 ( ebnfSuffix 382 -> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"])) 383 | -> rewrite_tree 384 ) 385 | rewrite_tree_ebnf 386 ; 387 388rewrite_tree_atom 389 : CHAR_LITERAL 390 | TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes 391 | RULE_REF 392 | STRING_LITERAL 393 | d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule 394 | ACTION 395 ; 396 397rewrite_tree_ebnf 398@init { 399 Token firstToken = input.LT(1); 400} 401@after { 402 $rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine()); 403 $rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 404} 405 : rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block) 406 ; 407 408rewrite_tree 409 : '^(' rewrite_tree_atom rewrite_tree_element* ')' 410 -> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* ) 411 ; 412 413/** Build a tree for a template rewrite: 414 ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) 415 where ARGLIST is always there even if no args exist. 416 ID can be "template" keyword. If first child is ACTION then it's 417 an indirect template ref 418 419 -> foo(a={...}, b={...}) 420 -> ({string-e})(a={...}, b={...}) // e evaluates to template name 421 -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) 422 -> {st-expr} // st-expr evaluates to ST 423 */ 424rewrite_template 425 : // -> template(a={...},...) "..." inline template 426 id lp='(' rewrite_template_args ')' 427 ( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL ) 428 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str) 429 430 | // -> foo(a={...}, ...) 431 rewrite_template_ref 432 433 | // -> ({expr})(a={...}, ...) 434 rewrite_indirect_template_head 435 436 | // -> {...} 437 ACTION 438 ; 439 440/** -> foo(a={...}, ...) */ 441rewrite_template_ref 442 : id lp='(' rewrite_template_args ')' 443 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args) 444 ; 445 446/** -> ({expr})(a={...}, ...) */ 447rewrite_indirect_template_head 448 : lp='(' ACTION ')' '(' rewrite_template_args ')' 449 -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args) 450 ; 451 452rewrite_template_args 453 : rewrite_template_arg (',' rewrite_template_arg)* 454 -> ^(ARGLIST rewrite_template_arg+) 455 | -> ARGLIST 456 ; 457 458rewrite_template_arg 459 : id '=' ACTION -> ^(ARG[$id.start] id ACTION) 460 ; 461 462qid : id ('.' id)* ; 463 464id : TOKEN_REF -> ID[$TOKEN_REF] 465 | RULE_REF -> ID[$RULE_REF] 466 ; 467 468// L E X I C A L R U L E S 469 470SL_COMMENT 471 : '//' 472 ( ' $ANTLR ' SRC // src directive 473 | ~('\r'|'\n')* 474 ) 475 '\r'? '\n' 476 {$channel=HIDDEN;} 477 ; 478 479ML_COMMENT 480 : '/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/' 481 ; 482 483CHAR_LITERAL 484 : '\'' LITERAL_CHAR '\'' 485 ; 486 487STRING_LITERAL 488 : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' 489 ; 490 491fragment 492LITERAL_CHAR 493 : ESC 494 | ~('\''|'\\') 495 ; 496 497DOUBLE_QUOTE_STRING_LITERAL 498 : '"' (ESC | ~('\\'|'"'))* '"' 499 ; 500 501DOUBLE_ANGLE_STRING_LITERAL 502 : '<<' .* '>>' 503 ; 504 505fragment 506ESC : '\\' 507 ( 'n' 508 | 'r' 509 | 't' 510 | 'b' 511 | 'f' 512 | '"' 513 | '\'' 514 | '\\' 515 | '>' 516 | 'u' XDIGIT XDIGIT XDIGIT XDIGIT 517 | . // unknown, leave as it is 518 ) 519 ; 520 521fragment 522XDIGIT : 523 '0' .. '9' 524 | 'a' .. 'f' 525 | 'A' .. 'F' 526 ; 527 528INT : '0'..'9'+ 529 ; 530 531ARG_ACTION 532 : NESTED_ARG_ACTION 533 ; 534 535fragment 536NESTED_ARG_ACTION : 537 '[' 538 ( options {greedy=false; k=1;} 539 : NESTED_ARG_ACTION 540 | ACTION_STRING_LITERAL 541 | ACTION_CHAR_LITERAL 542 | . 543 )* 544 ']' 545 //{setText(getText().substring(1, getText().length()-1));} 546 ; 547 548ACTION 549 : NESTED_ACTION ( '?' {$type = SEMPRED;} )? 550 ; 551 552fragment 553NESTED_ACTION : 554 '{' 555 ( options {greedy=false; k=2;} 556 : NESTED_ACTION 557 | SL_COMMENT 558 | ML_COMMENT 559 | ACTION_STRING_LITERAL 560 | ACTION_CHAR_LITERAL 561 | . 562 )* 563 '}' 564 ; 565 566fragment 567ACTION_CHAR_LITERAL 568 : '\'' (ACTION_ESC|~('\\'|'\'')) '\'' 569 ; 570 571fragment 572ACTION_STRING_LITERAL 573 : '"' (ACTION_ESC|~('\\'|'"'))* '"' 574 ; 575 576fragment 577ACTION_ESC 578 : '\\\'' 579 | '\\' '"' // ANTLR doesn't like: '\\"' 580 | '\\' ~('\''|'"') 581 ; 582 583TOKEN_REF 584 : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 585 ; 586 587RULE_REF 588 : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 589 ; 590 591/** Match the start of an options section. Don't allow normal 592 * action processing on the {...} as it's not a action. 593 */ 594OPTIONS 595 : 'options' WS_LOOP '{' 596 ; 597 598TOKENS 599 : 'tokens' WS_LOOP '{' 600 ; 601 602/** Reset the file and line information; useful when the grammar 603 * has been generated so that errors are shown relative to the 604 * original file like the old C preprocessor used to do. 605 */ 606fragment 607SRC : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT 608 ; 609 610WS : ( ' ' 611 | '\t' 612 | '\r'? '\n' 613 )+ 614 {$channel=HIDDEN;} 615 ; 616 617fragment 618WS_LOOP 619 : ( WS 620 | SL_COMMENT 621 | ML_COMMENT 622 )* 623 ; 624 625