ANTLR.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/* 2 [The "BSD license"] 3 Copyright (c) 2005-2011 Terence Parr 4 All rights reserved. 5 6 Grammar conversion to ANTLR v3: 7 Copyright (c) 2011 Sam Harwell 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 2. Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*/ 32 33/** Read in an ANTLR grammar and build an AST. Try not to do 34 * any actions, just build the tree. 35 * 36 * The phases are: 37 * 38 * antlr.g (this file) 39 * assign.types.g 40 * define.g 41 * buildnfa.g 42 * antlr.print.g (optional) 43 * codegen.g 44 * 45 * Terence Parr 46 * University of San Francisco 47 * 2005 48 */ 49 50grammar ANTLR; 51 52options 53{ 54 output=AST; 55 ASTLabelType=GrammarAST; 56} 57 58tokens 59{ 60 //OPTIONS='options'; 61 //TOKENS='tokens'; 62 LEXER='lexer'; 63 PARSER='parser'; 64 CATCH='catch'; 65 FINALLY='finally'; 66 GRAMMAR='grammar'; 67 PRIVATE='private'; 68 PROTECTED='protected'; 69 PUBLIC='public'; 70 RETURNS='returns'; 71 THROWS='throws'; 72 TREE='tree'; 73 74 RULE; 75 PREC_RULE; 76 RECURSIVE_RULE_REF; // flip recursive RULE_REF to RECURSIVE_RULE_REF in prec rules 77 BLOCK; 78 OPTIONAL; 79 CLOSURE; 80 POSITIVE_CLOSURE; 81 SYNPRED; 82 RANGE; 83 CHAR_RANGE; 84 EPSILON; 85 ALT; 86 EOR; 87 EOB; 88 EOA; // end of alt 89 ID; 90 ARG; 91 ARGLIST; 92 RET; 93 LEXER_GRAMMAR; 94 PARSER_GRAMMAR; 95 TREE_GRAMMAR; 96 COMBINED_GRAMMAR; 97 INITACTION; 98 FORCED_ACTION; // {{...}} always exec even during syn preds 99 LABEL; // $x used in rewrite rules 100 TEMPLATE; 101 SCOPE='scope'; 102 IMPORT='import'; 103 GATED_SEMPRED; // {p}? => 104 SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred 105 BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred 106 FRAGMENT='fragment'; 107 DOT; 108 REWRITES; 109} 110 111@lexer::header { 112package org.antlr.grammar.v3; 113import org.antlr.tool.ErrorManager; 114import org.antlr.tool.Grammar; 115} 116 117@parser::header { 118package org.antlr.grammar.v3; 119import org.antlr.tool.ErrorManager; 120import org.antlr.tool.Grammar; 121import org.antlr.tool.GrammarAST; 122import org.antlr.misc.IntSet; 123import org.antlr.tool.Rule; 124} 125 126@lexer::members { 127public boolean hasASTOperator = false; 128private String fileName; 129 130public String getFileName() { 131 return fileName; 132} 133 134public void setFileName(String value) { 135 fileName = value; 136} 137} 138 139@parser::members { 140protected String currentRuleName = null; 141protected GrammarAST currentBlockAST = null; 142protected boolean atTreeRoot; // are we matching a tree root in tree grammar? 143 144public static ANTLRParser createParser(TokenStream input) { 145 ANTLRParser parser = new ANTLRParser(input); 146 parser.adaptor = new grammar_Adaptor(parser); 147 return parser; 148} 149 150private static class GrammarASTErrorNode extends GrammarAST { 151 public IntStream input; 152 public Token start; 153 public Token stop; 154 public RecognitionException trappedException; 155 156 public GrammarASTErrorNode(TokenStream input, Token start, Token stop, RecognitionException e) { 157 super(stop); 158 //Console.Out.WriteLine( "start: " + start + ", stop: " + stop ); 159 if ( stop == null || 160 ( stop.getTokenIndex() < start.getTokenIndex() && 161 stop.getType() != Token.EOF) ) { 162 // sometimes resync does not consume a token (when LT(1) is 163 // in follow set. So, stop will be 1 to left to start. adjust. 164 // Also handle case where start is the first token and no token 165 // is consumed during recovery; LT(-1) will return null. 166 stop = start; 167 } 168 this.input = input; 169 this.start = start; 170 this.stop = stop; 171 this.trappedException = e; 172 } 173 174 @Override 175 public boolean isNil() { return false; } 176 177 @Override 178 public String getText() 179 { 180 String badText = null; 181 if (start instanceof Token) { 182 int i = ((Token)start).getTokenIndex(); 183 int j = ((Token)stop).getTokenIndex(); 184 if (((Token)stop).getType() == Token.EOF) { 185 j = ((TokenStream)input).size(); 186 } 187 badText = ((TokenStream)input).toString(i, j); 188 } else if (start instanceof Tree) { 189 badText = ((TreeNodeStream)input).toString(start, stop); 190 } else { 191 // people should subclass if they alter the tree type so this 192 // next one is for sure correct. 193 badText = "<unknown>"; 194 } 195 return badText; 196 } 197 198 @Override 199 public void setText(String value) { } 200 201 @Override 202 public int getType() { return Token.INVALID_TOKEN_TYPE; } 203 204 @Override 205 public void setType(int value) { } 206 207 @Override 208 public String toString() 209 { 210 if (trappedException instanceof MissingTokenException) 211 { 212 return "<missing type: " + 213 ( (MissingTokenException)trappedException ).getMissingType() + 214 ">"; 215 } else if (trappedException instanceof UnwantedTokenException) { 216 return "<extraneous: " + 217 ( (UnwantedTokenException)trappedException ).getUnexpectedToken() + 218 ", resync=" + getText() + ">"; 219 } else if (trappedException instanceof MismatchedTokenException) { 220 return "<mismatched token: " + trappedException.token + ", resync=" + getText() + ">"; 221 } else if (trappedException instanceof NoViableAltException) { 222 return "<unexpected: " + trappedException.token + 223 ", resync=" + getText() + ">"; 224 } 225 return "<error: " + getText() + ">"; 226 } 227} 228 229static class grammar_Adaptor extends CommonTreeAdaptor { 230 ANTLRParser _outer; 231 232 public grammar_Adaptor(ANTLRParser outer) { 233 _outer = outer; 234 } 235 236 @Override 237 public Object create(Token payload) { 238 GrammarAST t = new GrammarAST( payload ); 239 if (_outer != null) 240 t.enclosingRuleName = _outer.currentRuleName; 241 return t; 242 } 243 244 @Override 245 public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { 246 GrammarAST t = new GrammarASTErrorNode(input, start, stop, e); 247 if (_outer != null) 248 t.enclosingRuleName = _outer.currentRuleName; 249 return t; 250 } 251} 252 253private Grammar grammar; 254private int grammarType; 255private String fileName; 256 257public Grammar getGrammar() { 258 return grammar; 259} 260 261public void setGrammar(Grammar value) { 262 grammar = value; 263} 264 265public int getGrammarType() { 266 return grammarType; 267} 268 269public void setGrammarType(int value) { 270 grammarType = value; 271} 272 273public String getFileName() { 274 return fileName; 275} 276 277public void setFileName(String value) { 278 fileName = value; 279} 280 281private final int LA(int i) { return input.LA( i ); } 282 283private final Token LT(int k) { return input.LT( k ); } 284 285/*partial void createTreeAdaptor(ref ITreeAdaptor adaptor) 286{ 287 adaptor = new grammar_Adaptor(this); 288}*/ 289 290protected GrammarAST setToBlockWithSet(GrammarAST b) { 291 /* 292 * alt = ^(ALT["ALT"] {b} EOA["EOA"]) 293 * prefixWithSynpred( alt ) 294 * return ^(BLOCK["BLOCK"] {alt} EOB["<end-of-block>"]) 295 */ 296 GrammarAST alt = (GrammarAST)adaptor.create(ALT, "ALT"); 297 adaptor.addChild(alt, b); 298 adaptor.addChild(alt, adaptor.create(EOA, "<end-of-alt>")); 299 300 prefixWithSynPred(alt); 301 302 GrammarAST block = (GrammarAST)adaptor.create(BLOCK, b.getToken(), "BLOCK"); 303 adaptor.addChild(block, alt); 304 adaptor.addChild(alt, adaptor.create(EOB, "<end-of-block>")); 305 306 return block; 307} 308 309/** Create a copy of the alt and make it into a BLOCK; all actions, 310 * labels, tree operators, rewrites are removed. 311 */ 312protected GrammarAST createBlockFromDupAlt(GrammarAST alt) { 313 /* 314 * ^(BLOCK["BLOCK"] {GrammarAST.dupTreeNoActions(alt)} EOB["<end-of-block>"]) 315 */ 316 GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null); 317 318 GrammarAST block = (GrammarAST)adaptor.create(BLOCK, alt.getToken(), "BLOCK"); 319 adaptor.addChild( block, nalt ); 320 adaptor.addChild( block, adaptor.create( EOB, "<end-of-block>" ) ); 321 322 return block; 323} 324 325/** Rewrite alt to have a synpred as first element; 326 * (xxx)=>xxx 327 * but only if they didn't specify one manually. 328 */ 329protected void prefixWithSynPred( GrammarAST alt ) { 330 // if they want backtracking and it's not a lexer rule in combined grammar 331 String autoBacktrack = (String)grammar.getBlockOption( currentBlockAST, "backtrack" ); 332 if ( autoBacktrack == null ) 333 { 334 autoBacktrack = (String)grammar.getOption( "backtrack" ); 335 } 336 if ( autoBacktrack != null && autoBacktrack.equals( "true" ) && 337 !( grammarType == Grammar.COMBINED && 338 Rule.getRuleType(currentRuleName) == Grammar.LEXER) && 339 alt.getChild( 0 ).getType() != SYN_SEMPRED ) 340 { 341 // duplicate alt and make a synpred block around that dup'd alt 342 GrammarAST synpredBlockAST = createBlockFromDupAlt( alt ); 343 344 // Create a BACKTRACK_SEMPRED node as if user had typed this in 345 // Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx 346 GrammarAST synpredAST = createSynSemPredFromBlock( synpredBlockAST, 347 BACKTRACK_SEMPRED ); 348 349 // insert BACKTRACK_SEMPRED as first element of alt 350 //synpredAST.getLastSibling().setNextSibling( alt.getFirstChild() ); 351 //synpredAST.addChild( alt.getFirstChild() ); 352 //alt.setFirstChild( synpredAST ); 353 GrammarAST[] children = alt.getChildrenAsArray(); 354 adaptor.setChild( alt, 0, synpredAST ); 355 for ( int i = 0; i < children.length; i++ ) 356 { 357 if ( i < children.length - 1 ) 358 adaptor.setChild( alt, i + 1, children[i] ); 359 else 360 adaptor.addChild( alt, children[i] ); 361 } 362 } 363} 364 365protected GrammarAST createSynSemPredFromBlock( GrammarAST synpredBlockAST, int synpredTokenType ) { 366 // add grammar fragment to a list so we can make fake rules for them later. 367 String predName = grammar.defineSyntacticPredicate( synpredBlockAST, currentRuleName ); 368 // convert (alpha)=> into {synpredN}? where N is some pred count 369 // during code gen we convert to function call with templates 370 String synpredinvoke = predName; 371 GrammarAST p = (GrammarAST)adaptor.create( synpredTokenType, synpredinvoke ); 372 // track how many decisions have synpreds 373 grammar.blocksWithSynPreds.add( currentBlockAST ); 374 return p; 375} 376 377public static GrammarAST createSimpleRuleAST( String name, GrammarAST block, boolean fragment ) { 378 TreeAdaptor adaptor = new grammar_Adaptor(null); 379 380 GrammarAST modifier = null; 381 if ( fragment ) 382 { 383 modifier = (GrammarAST)adaptor.create( FRAGMENT, "fragment" ); 384 } 385 386 /* 387 * EOBAST = block.getLastChild() 388 * ^(RULE[block,"rule"] ID["name"] {modifier} ARG["ARG"] RET["RET"] SCOPE["scope"] {block} EOR[EOBAST,"<end-of-rule>"]) 389 */ 390 GrammarAST rule = (GrammarAST)adaptor.create( RULE, block.getToken(), "rule" ); 391 392 adaptor.addChild( rule, adaptor.create( ID, name ) ); 393 if ( modifier != null ) 394 adaptor.addChild( rule, modifier ); 395 adaptor.addChild( rule, adaptor.create( ARG, "ARG" ) ); 396 adaptor.addChild( rule, adaptor.create( RET, "RET" ) ); 397 adaptor.addChild( rule, adaptor.create( SCOPE, "scope" ) ); 398 adaptor.addChild( rule, block ); 399 adaptor.addChild( rule, adaptor.create( EOR, block.getLastChild().getToken(), "<end-of-rule>" ) ); 400 401 return rule; 402} 403 404@Override 405public void reportError(RecognitionException ex) 406{ 407 //Token token = null; 408 //try 409 //{ 410 // token = LT( 1 ); 411 //} 412 //catch ( TokenStreamException tse ) 413 //{ 414 // ErrorManager.internalError( "can't get token???", tse ); 415 //} 416 Token token = ex.token; 417 ErrorManager.syntaxError( 418 ErrorManager.MSG_SYNTAX_ERROR, 419 grammar, 420 token, 421 "antlr: " + ex.toString(), 422 ex ); 423} 424 425public void cleanup( GrammarAST root ) 426{ 427 if ( grammarType == Grammar.LEXER ) 428 { 429 String filter = (String)grammar.getOption( "filter" ); 430 GrammarAST tokensRuleAST = 431 grammar.addArtificialMatchTokensRule( 432 root, 433 grammar.lexerRuleNamesInCombined, 434 grammar.getDelegateNames(), 435 filter != null && filter.equals( "true" ) ); 436 } 437} 438} 439 440public 441grammar_![Grammar g] 442@init 443{ 444 this.grammar = g; 445 Map<String, Object> opts; 446} 447@after 448{ 449 cleanup( $tree ); 450} 451 : //hdr:headerSpec 452 ( ACTION )? 453 ( cmt=DOC_COMMENT )? 454 gr=grammarType gid=id {grammar.setName($gid.text);} SEMI 455 ( optionsSpec {opts = $optionsSpec.opts; grammar.setOptions(opts, $optionsSpec.start);} 456 )? 457 (ig=delegateGrammars)? 458 (ts=tokensSpec)? 459 scopes=attrScopes 460 (a=actions)? 461 r=rules 462 EOF 463 -> ^($gr $gid $cmt? optionsSpec? $ig? $ts? $scopes? $a? $r) 464 ; 465 466grammarType 467 : ( 'lexer' gr='grammar' {grammarType=Grammar.LEXER; grammar.type = Grammar.LEXER;} // pure lexer 468 -> LEXER_GRAMMAR[$gr] 469 | 'parser' gr='grammar' {grammarType=Grammar.PARSER; grammar.type = Grammar.PARSER;} // pure parser 470 -> PARSER_GRAMMAR[$gr] 471 | 'tree' gr='grammar' {grammarType=Grammar.TREE_PARSER; grammar.type = Grammar.TREE_PARSER;} // a tree parser 472 -> TREE_GRAMMAR[$gr] 473 | gr='grammar' {grammarType=Grammar.COMBINED; grammar.type = Grammar.COMBINED;} // merged parser/lexer 474 -> COMBINED_GRAMMAR[$gr] 475 ) 476 ; 477 478actions 479 : (action)+ 480 ; 481 482/** Match stuff like @parser::members {int i;} */ 483action 484 : AMPERSAND^ (actionScopeName COLON! COLON!)? id ACTION 485 ; 486 487/** Sometimes the scope names will collide with keywords; allow them as 488 * ids for action scopes. 489 */ 490actionScopeName 491 : id 492 | l='lexer' 493 -> ID[$l] 494 | p='parser' 495 -> ID[$p] 496 ; 497 498optionsSpec returns [Map<String, Object> opts=new HashMap<String, Object>()] 499 : OPTIONS^ (option[$opts] SEMI!)+ RCURLY! 500 ; 501 502option[Map<String, Object> opts] 503 : id ASSIGN^ optionValue 504 { 505 $opts.put($id.text, $optionValue.value); 506 } 507 ; 508 509optionValue returns [Object value = null] 510 : x=id {$value = $x.text;} 511 | s=STRING_LITERAL {String vs = $s.text; 512 // remove the quotes: 513 $value=vs.substring(1,vs.length()-1);} 514 | c=CHAR_LITERAL {String vs = $c.text; 515 // remove the quotes: 516 $value=vs.substring(1,vs.length()-1);} 517 | i=INT {$value = Integer.parseInt($i.text);} 518 | ss=STAR {$value = "*";} // used for k=* 519 -> STRING_LITERAL[$ss] 520// | cs:charSet {value = #cs;} // return set AST in this case 521 ; 522 523delegateGrammars 524 : 'import'^ delegateGrammar (COMMA! delegateGrammar)* SEMI! 525 ; 526 527delegateGrammar 528 : lab=id ASSIGN^ g=id {grammar.importGrammar($g.tree, $lab.text);} 529 | g2=id {grammar.importGrammar($g2.tree,null);} 530 ; 531 532tokensSpec 533 : TOKENS^ 534 tokenSpec* 535 RCURLY! 536 ; 537 538tokenSpec 539 : TOKEN_REF ( ASSIGN^ (STRING_LITERAL|CHAR_LITERAL) )? SEMI! 540 ; 541 542attrScopes 543 : (attrScope)* 544 ; 545 546attrScope 547 : 'scope'^ id ruleActions? ACTION 548 ; 549 550rules 551 : ( rule 552 )+ 553 ; 554 555public 556rule 557@init 558{ 559 GrammarAST eob=null; 560 CommonToken start = (CommonToken)LT(1); 561 int startLine = LT(1).getLine(); 562} 563 : 564 ( ( d=DOC_COMMENT 565 )? 566 ( p1='protected' //{modifier=$p1.tree;} 567 | p2='public' //{modifier=$p2.tree;} 568 | p3='private' //{modifier=$p3.tree;} 569 | p4='fragment' //{modifier=$p4.tree;} 570 )? 571 ruleName=id 572 { 573 currentRuleName=$ruleName.text; 574 if ( grammarType==Grammar.LEXER && $p4==null ) 575 grammar.lexerRuleNamesInCombined.add(currentRuleName); 576 } 577 ( BANG )? 578 ( aa=ARG_ACTION )? 579 ( 'returns' rt=ARG_ACTION )? 580 ( throwsSpec )? 581 ( optionsSpec )? 582 scopes=ruleScopeSpec 583 (ruleActions)? 584 COLON 585 ruleAltList[$optionsSpec.opts] 586 SEMI 587 ( ex=exceptionGroup )? 588 -> ^( RULE[$ruleName.start, "rule"] 589 $ruleName 590 // the modifier will be 0 or one of the modifiers: 591 $p1? $p2? $p3? $p4? 592 ^(ARG["ARG"] $aa?) 593 ^(RET["RET"] $rt?) 594 throwsSpec? 595 optionsSpec? 596 $scopes 597 ruleActions? 598 ruleAltList 599 $ex? 600 EOR[$SEMI,"<end-of-rule>"]) 601 ) 602 { 603 $tree.setTreeEnclosingRuleNameDeeply(currentRuleName); 604 ((GrammarAST)$tree.getChild(0)).setBlockOptions($optionsSpec.opts); 605 } 606 ; 607 608ruleActions 609 : (ruleAction)+ 610 ; 611 612/** Match stuff like @init {int i;} */ 613ruleAction 614 : AMPERSAND^ id ACTION 615 ; 616 617throwsSpec 618 : 'throws'^ id ( COMMA! id )* 619 ; 620 621ruleScopeSpec 622 : ( 'scope' ruleActions? ACTION )? 623 ( 'scope' idList SEMI )* 624 -> ^(SCOPE[$start,"scope"] ruleActions? ACTION? idList*) 625 ; 626 627ruleAltList[Map<String, Object> opts] 628@init 629{ 630 GrammarAST blkRoot = null; 631 GrammarAST save = currentBlockAST; 632} 633 : ( -> BLOCK[input.LT(-1),"BLOCK"] ) 634 { 635 blkRoot = (GrammarAST)$tree.getChild(0); 636 blkRoot.setBlockOptions($opts); 637 currentBlockAST = blkRoot; 638 } 639 ( a1=alternative r1=rewrite 640 {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a1.tree);} 641 -> $a1 $r1? 642 ) 643 ( ( OR a2=alternative r2=rewrite 644 {if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a2.tree);} 645 -> $ruleAltList $a2 $r2? 646 )+ 647 | 648 ) 649 -> ^({blkRoot} $ruleAltList EOB["<end-of-block>"]) 650 ; 651finally { currentBlockAST = save; } 652 653/** Build #(BLOCK ( #(ALT ...) EOB )+ ) */ 654block 655@init 656{ 657 GrammarAST save = currentBlockAST; 658} 659 : ( lp=LPAREN 660 -> BLOCK[$lp,"BLOCK"] 661 ) 662 {currentBlockAST = (GrammarAST)$tree.getChild(0);} 663 ( 664 // 2nd alt and optional branch ambig due to 665 // linear approx LL(2) issue. COLON ACTION 666 // matched correctly in 2nd alt. 667 (optionsSpec {((GrammarAST)$tree.getChild(0)).setOptions(grammar,$optionsSpec.opts);})? 668 ( ruleActions )? 669 COLON 670 | ACTION COLON 671 )? 672 673 a=alternative r=rewrite 674 { 675 stream_alternative.add( $r.tree ); 676 if ( LA(1)==OR || (LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR) ) 677 prefixWithSynPred($a.tree); 678 } 679 ( OR a=alternative r=rewrite 680 { 681 stream_alternative.add( $r.tree ); 682 if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) 683 prefixWithSynPred($a.tree); 684 } 685 )* 686 687 rp=RPAREN 688 -> ^($block optionsSpec? ruleActions? ACTION? alternative+ EOB[$rp,"<end-of-block>"]) 689 ; 690finally { currentBlockAST = save; } 691 692// ALT and EOA have indexes tracking start/stop of entire alt 693alternative 694 : element+ 695 -> ^(ALT[$start,"ALT"] element+ EOA[input.LT(-1),"<end-of-alt>"]) 696 | // epsilon alt 697 -> ^(ALT[$start,"ALT"] EPSILON[input.LT(-1),"epsilon"] EOA[input.LT(-1),"<end-of-alt>"]) 698 ; 699 700exceptionGroup 701 : exceptionHandler+ finallyClause? 702 | finallyClause 703 ; 704 705exceptionHandler 706 : 'catch'^ ARG_ACTION ACTION 707 ; 708 709finallyClause 710 : 'finally'^ ACTION 711 ; 712 713element 714 : elementNoOptionSpec 715 ; 716 717elementNoOptionSpec 718@init 719{ 720 IntSet elements=null; 721} 722 : ( ( id (ASSIGN^|PLUS_ASSIGN^) (atom|block) 723 ) 724 ( sub=ebnfSuffix[root_0,false]! {root_0 = $sub.tree;} 725 )? 726 | a=atom 727 ( sub2=ebnfSuffix[$a.tree,false]! {root_0=$sub2.tree;} 728 )? 729 | ebnf 730 | FORCED_ACTION 731 | ACTION 732 | p=SEMPRED ( IMPLIES! {$p.setType(GATED_SEMPRED);} )? 733 { 734 grammar.blocksWithSemPreds.add(currentBlockAST); 735 } 736 | t3=tree_ 737 ) 738 ; 739 740atom 741 : range (ROOT^|BANG^)? 742 | ( 743 // grammar.rule but ensure no spaces. "A . B" is not a qualified ref 744 // We do here rather than lexer so we can build a tree 745 ({LT(1).getCharPositionInLine()+LT(1).getText().length()==LT(2).getCharPositionInLine()&& 746 LT(2).getCharPositionInLine()+1==LT(3).getCharPositionInLine()}? id WILDCARD (terminal|ruleref)) => 747 id w=WILDCARD^ (terminal|ruleref) {$w.setType(DOT);} 748 | terminal 749 | ruleref 750 ) 751 | notSet (ROOT^|BANG^)? 752 ; 753 754ruleref 755 : RULE_REF^ ARG_ACTION? (ROOT^|BANG^)? 756 ; 757 758notSet 759 : NOT^ 760 ( notTerminal 761 | block 762 ) 763 ; 764 765treeRoot 766@init{atTreeRoot=true;} 767@after{atTreeRoot=false;} 768 : id (ASSIGN^|PLUS_ASSIGN^) (atom|block) 769 | atom 770 | block 771 ; 772 773tree_ 774 : TREE_BEGIN^ 775 treeRoot element+ 776 RPAREN! 777 ; 778 779/** matches ENBF blocks (and sets via block rule) */ 780ebnf 781 : block 782 ( QUESTION 783 -> ^(OPTIONAL[$start,"?"] block) 784 | STAR 785 -> ^(CLOSURE[$start,"*"] block) 786 | PLUS 787 -> ^(POSITIVE_CLOSURE[$start,"+"] block) 788 | IMPLIES // syntactic predicate 789 // ignore for lexer rules in combined 790 -> {grammarType == Grammar.COMBINED && Rule.getRuleType(currentRuleName) == Grammar.LEXER}? ^(SYNPRED[$start,"=>"] block) 791 // create manually specified (...)=> predicate; convert to sempred 792 -> {createSynSemPredFromBlock($block.tree, SYN_SEMPRED)} 793 | ROOT 794 -> ^(ROOT block) 795 | BANG 796 -> ^(BANG block) 797 | 798 -> block 799 ) 800 ; 801 802range 803 : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL 804 -> ^(CHAR_RANGE[$c1,".."] $c1 $c2) 805 ; 806 807terminal 808 : cl=CHAR_LITERAL^ ( elementOptions[$cl.tree]! )? (ROOT^|BANG^)? 809 810 | tr=TOKEN_REF^ 811 ( elementOptions[$tr.tree]! )? 812 ( ARG_ACTION )? // Args are only valid for lexer rules 813 (ROOT^|BANG^)? 814 815 | sl=STRING_LITERAL^ ( elementOptions[$sl.tree]! )? (ROOT^|BANG^)? 816 817 | wi=WILDCARD (ROOT^|BANG^)? 818 { 819 if ( atTreeRoot ) 820 { 821 ErrorManager.syntaxError( 822 ErrorManager.MSG_WILDCARD_AS_ROOT,grammar,$wi,null,null); 823 } 824 } 825 ; 826 827elementOptions[GrammarAST terminalAST] 828 : OPEN_ELEMENT_OPTION^ defaultNodeOption[terminalAST] CLOSE_ELEMENT_OPTION! 829 | OPEN_ELEMENT_OPTION^ elementOption[terminalAST] (SEMI! elementOption[terminalAST])* CLOSE_ELEMENT_OPTION! 830 ; 831 832defaultNodeOption[GrammarAST terminalAST] 833 : elementOptionId 834 {terminalAST.setTerminalOption(grammar,Grammar.defaultTokenOption,$elementOptionId.qid);} 835 ; 836 837elementOption[GrammarAST terminalAST] 838 : id ASSIGN^ 839 ( elementOptionId 840 {terminalAST.setTerminalOption(grammar,$id.text,$elementOptionId.qid);} 841 | (t=STRING_LITERAL|t=DOUBLE_QUOTE_STRING_LITERAL|t=DOUBLE_ANGLE_STRING_LITERAL) 842 {terminalAST.setTerminalOption(grammar,$id.text,$t.text);} 843 ) 844 ; 845 846elementOptionId returns [String qid] 847@init{StringBuffer buf = new StringBuffer();} 848 : i=id {buf.append($i.text);} ('.' i=id {buf.append("." + $i.text);})* 849 {$qid = buf.toString();} 850 ; 851 852ebnfSuffix[GrammarAST elemAST, boolean inRewrite] 853@init 854{ 855GrammarAST blkRoot=null; 856GrammarAST alt=null; 857GrammarAST save = currentBlockAST; 858} 859@after 860{ 861currentBlockAST = save; 862} 863 : ( -> BLOCK[$elemAST.getToken(), "BLOCK"] 864 ) 865 { blkRoot = (GrammarAST)$tree.getChild(0); currentBlockAST = blkRoot; } 866 ( // create alt 867 -> ^(ALT[$elemAST.getToken(), "ALT"] {$elemAST} EOA["<end-of-alt>"]) 868 ) 869 { 870 alt = (GrammarAST)$tree.getChild(0); 871 if ( !inRewrite ) 872 prefixWithSynPred(alt); 873 } 874 ( QUESTION 875 -> OPTIONAL[$elemAST.getToken(),"?"] 876 | STAR 877 -> CLOSURE[$elemAST.getToken(),"*"] 878 | PLUS 879 -> POSITIVE_CLOSURE[$elemAST.getToken(),"+"] 880 ) 881 -> ^($ebnfSuffix ^({blkRoot} {alt} EOB[$elemAST.getToken(), "<end-of-block>"])) 882 ; 883 884notTerminal 885 : CHAR_LITERAL 886 | TOKEN_REF 887 | STRING_LITERAL 888 ; 889 890idList 891 : id (COMMA! id)* 892 ; 893 894id 895 : TOKEN_REF 896 -> ID[$TOKEN_REF] 897 | RULE_REF 898 -> ID[$RULE_REF] 899 ; 900 901// R E W R I T E S Y N T A X 902 903rewrite 904 : rewrite_with_sempred* 905 REWRITE rewrite_alternative 906 -> ^(REWRITES rewrite_with_sempred* ^(REWRITE rewrite_alternative)) 907 | 908 ; 909 910rewrite_with_sempred 911 : REWRITE^ SEMPRED rewrite_alternative 912 ; 913 914rewrite_block 915 : LPAREN 916 rewrite_alternative 917 RPAREN 918 -> ^(BLOCK[$LPAREN,"BLOCK"] rewrite_alternative EOB[$RPAREN,"<end-of-block>"]) 919 ; 920 921rewrite_alternative 922options{k=1;} 923 : {grammar.buildTemplate()}? => rewrite_template 924 925 | {grammar.buildAST()}? => ( rewrite_element )+ 926 -> {!stream_rewrite_element.hasNext()}? ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"]) 927 -> ^(ALT[LT(1),"ALT"] rewrite_element+ EOA["<end-of-alt>"]) 928 929 | 930 -> ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"]) 931 | {grammar.buildAST()}? ETC 932 ; 933 934rewrite_element 935 : ( t=rewrite_atom 936 -> $t 937 ) 938 ( subrule=ebnfSuffix[$t.tree,true] 939 -> $subrule 940 )? 941 | rewrite_ebnf 942 | ( tr=rewrite_tree 943 -> $tr 944 ) 945 ( subrule=ebnfSuffix[$tr.tree,true] 946 -> $subrule 947 )? 948 ; 949 950rewrite_atom 951 : tr=TOKEN_REF^ elementOptions[$tr.tree]!? ARG_ACTION? // for imaginary nodes 952 | RULE_REF 953 | cl=CHAR_LITERAL elementOptions[$cl.tree]!? 954 | sl=STRING_LITERAL elementOptions[$sl.tree]!? 955 | DOLLAR! label // reference to a label in a rewrite rule 956 | ACTION 957 ; 958 959label 960 : TOKEN_REF -> LABEL[$TOKEN_REF] 961 | RULE_REF -> LABEL[$RULE_REF] 962 ; 963 964rewrite_ebnf 965 : b=rewrite_block 966 ( QUESTION 967 -> ^(OPTIONAL[$b.start,"?"] $b) 968 | STAR 969 -> ^(CLOSURE[$b.start,"*"] $b) 970 | PLUS 971 -> ^(POSITIVE_CLOSURE[$b.start,"+"] $b) 972 ) 973 ; 974 975rewrite_tree 976 : TREE_BEGIN^ 977 rewrite_atom rewrite_element* 978 RPAREN! 979 ; 980 981/** Build a tree for a template rewrite: 982 ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) 983 where ARGLIST is always there even if no args exist. 984 ID can be "template" keyword. If first child is ACTION then it's 985 an indirect template ref 986 987 -> foo(a={...}, b={...}) 988 -> ({string-e})(a={...}, b={...}) // e evaluates to template name 989 -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) 990 -> {st-expr} // st-expr evaluates to ST 991 */ 992public 993rewrite_template 994options{k=1;} 995 : // -> template(a={...},...) "..." 996 {LT(1).getText().equals("template")}? => // inline 997 ( rewrite_template_head 998 -> rewrite_template_head 999 ) 1000 ( st=DOUBLE_QUOTE_STRING_LITERAL | st=DOUBLE_ANGLE_STRING_LITERAL ) 1001 { adaptor.addChild( $tree.getChild(0), adaptor.create($st) ); } 1002 1003 | // -> foo(a={...}, ...) 1004 rewrite_template_head 1005 1006 | // -> ({expr})(a={...}, ...) 1007 rewrite_indirect_template_head 1008 1009 | // -> {...} 1010 ACTION 1011 ; 1012 1013/** -> foo(a={...}, ...) */ 1014rewrite_template_head 1015 : id lp=LPAREN 1016 rewrite_template_args 1017 RPAREN 1018 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args) 1019 ; 1020 1021/** -> ({expr})(a={...}, ...) */ 1022rewrite_indirect_template_head 1023 : lp=LPAREN 1024 ACTION 1025 RPAREN 1026 LPAREN rewrite_template_args RPAREN 1027 -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args) 1028 ; 1029 1030rewrite_template_args 1031 : rewrite_template_arg (COMMA rewrite_template_arg)* 1032 -> ^(ARGLIST["ARGLIST"] rewrite_template_arg+) 1033 | 1034 -> ARGLIST["ARGLIST"] 1035 ; 1036 1037rewrite_template_arg 1038 : id a=ASSIGN ACTION 1039 -> ^(ARG[$a,"ARG"] id ACTION) 1040 ; 1041 1042////////////////////////////////////////////////////////////////////////////// 1043////////////////////////////////////////////////////////////////////////////// 1044////////////////////////////////////////////////////////////////////////////// 1045// L E X E R 1046 1047// get rid of warnings: 1048fragment STRING_LITERAL : ; 1049fragment FORCED_ACTION : ; 1050fragment DOC_COMMENT : ; 1051fragment SEMPRED : ; 1052 1053WS 1054 : ( ' ' 1055 | '\t' 1056 | ('\r')? '\n' 1057 ) 1058 { $channel = HIDDEN; } 1059 ; 1060 1061COMMENT 1062@init{List<Integer> type = new ArrayList<Integer>() {{ add(0); }};} 1063 : ( SL_COMMENT | ML_COMMENT[type] {$type = type.get(0);} ) 1064 { 1065 if ( $type != DOC_COMMENT ) 1066 $channel = HIDDEN; 1067 } 1068 ; 1069 1070fragment 1071SL_COMMENT 1072 : '//' 1073 ( (' $ANTLR') => ' $ANTLR ' SRC (('\r')? '\n')? // src directive 1074 | ~('\r'|'\n')* (('\r')? '\n')? 1075 ) 1076 ; 1077 1078fragment 1079ML_COMMENT[List<Integer> type] 1080 : '/*' 1081 {$type.set(0, (input.LA(1) == '*' && input.LA(2) != '/') ? DOC_COMMENT : ML_COMMENT);} 1082 .* 1083 '*/' 1084 ; 1085 1086OPEN_ELEMENT_OPTION 1087 : '<' 1088 ; 1089 1090CLOSE_ELEMENT_OPTION 1091 : '>' 1092 ; 1093 1094AMPERSAND : '@'; 1095 1096COMMA : ','; 1097 1098QUESTION : '?' ; 1099 1100TREE_BEGIN : '^(' ; 1101 1102LPAREN: '(' ; 1103 1104RPAREN: ')' ; 1105 1106COLON : ':' ; 1107 1108STAR: '*' ; 1109 1110PLUS: '+' ; 1111 1112ASSIGN : '=' ; 1113 1114PLUS_ASSIGN : '+=' ; 1115 1116IMPLIES : '=>' ; 1117 1118REWRITE : '->' ; 1119 1120SEMI: ';' ; 1121 1122ROOT : '^' {hasASTOperator=true;} ; 1123 1124BANG : '!' {hasASTOperator=true;} ; 1125 1126OR : '|' ; 1127 1128WILDCARD : '.' ; 1129 1130ETC : '...' ; 1131 1132RANGE : '..' ; 1133 1134NOT : '~' ; 1135 1136RCURLY: '}' ; 1137 1138DOLLAR : '$' ; 1139 1140STRAY_BRACKET 1141 : ']' 1142 { 1143 ErrorManager.syntaxError( 1144 ErrorManager.MSG_SYNTAX_ERROR, 1145 null, 1146 state.token, 1147 "antlr: dangling ']'? make sure to escape with \\]", 1148 null); 1149 } 1150 ; 1151 1152CHAR_LITERAL 1153 : '\'' 1154 ( ESC 1155 | ~('\\'|'\'') 1156 )* 1157 '\'' 1158 { 1159 StringBuffer s = Grammar.getUnescapedStringFromGrammarStringLiteral($text); 1160 if ( s.length() > 1 ) 1161 { 1162 $type = STRING_LITERAL; 1163 } 1164 } 1165 ; 1166 1167DOUBLE_QUOTE_STRING_LITERAL 1168@init 1169{ 1170 StringBuilder builder = new StringBuilder(); 1171} 1172 : '"' {builder.append('"');} 1173 ( ('\\\"') => '\\' '"' {builder.append('"');} 1174 | '\\' c=~'"' {builder.append("\\" + (char)$c);} 1175 | c=~('\\'|'"') {builder.append((char)$c);} 1176 )* 1177 '"' {builder.append('"');} 1178 { 1179 setText(builder.toString()); 1180 } 1181 ; 1182 1183DOUBLE_ANGLE_STRING_LITERAL 1184 : '<<' .* '>>' 1185 ; 1186 1187fragment 1188ESC 1189 : '\\' 1190 ( // due to the way ESC is used, we don't need to handle the following character in different ways 1191 /*'n' 1192 | 'r' 1193 | 't' 1194 | 'b' 1195 | 'f' 1196 | '"' 1197 | '\'' 1198 | '\\' 1199 | '>' 1200 | 'u' XDIGIT XDIGIT XDIGIT XDIGIT 1201 |*/ . // unknown, leave as it is 1202 ) 1203 ; 1204 1205fragment 1206DIGIT 1207 : '0'..'9' 1208 ; 1209 1210fragment 1211XDIGIT 1212 : '0' .. '9' 1213 | 'a' .. 'f' 1214 | 'A' .. 'F' 1215 ; 1216 1217INT 1218 : ('0'..'9')+ 1219 ; 1220 1221ARG_ACTION 1222@init { 1223 List<String> text = new ArrayList<String>() {{ add(null); }}; 1224} 1225 : '[' 1226 NESTED_ARG_ACTION[text] 1227 ']' 1228 {setText(text.get(0));} 1229 ; 1230 1231fragment 1232NESTED_ARG_ACTION[List<String> text] 1233@init { 1234 $text.set(0, ""); 1235 StringBuilder builder = new StringBuilder(); 1236} 1237 : ( ('\\]') => '\\' ']' {builder.append("]");} 1238 | '\\' c=~(']') {builder.append("\\" + (char)$c);} 1239 | ACTION_STRING_LITERAL {builder.append($ACTION_STRING_LITERAL.text);} 1240 | ACTION_CHAR_LITERAL {builder.append($ACTION_CHAR_LITERAL.text);} 1241 | c=~('\\'|'"'|'\''|']') {builder.append((char)$c);} 1242 )* 1243 { 1244 $text.set(0, builder.toString()); 1245 } 1246 ; 1247 1248ACTION 1249@init 1250{ 1251 int actionLine = getLine(); 1252 int actionColumn = getCharPositionInLine(); 1253} 1254 : NESTED_ACTION 1255 ('?' {$type = SEMPRED;})? 1256 { 1257 String action = $text; 1258 int n = 1; // num delimiter chars 1259 if ( action.startsWith("{{") && action.endsWith("}}") ) 1260 { 1261 $type = FORCED_ACTION; 1262 n = 2; 1263 } 1264 action = action.substring(n,action.length()-n - ($type==SEMPRED ? 1 : 0)); 1265 setText(action); 1266 } 1267 ; 1268 1269fragment 1270NESTED_ACTION 1271 : '{' 1272 ( NESTED_ACTION 1273 | ACTION_CHAR_LITERAL 1274 | ('//' | '/*') => COMMENT 1275 | ACTION_STRING_LITERAL 1276 | ACTION_ESC 1277 | ~('{'|'\''|'"'|'\\'|'}') 1278 )* 1279 '}' 1280 ; 1281 1282fragment 1283ACTION_CHAR_LITERAL 1284 : '\'' 1285 ( ACTION_ESC 1286 | ~('\\'|'\'') 1287 )* 1288 '\'' 1289 ; 1290 1291fragment 1292ACTION_STRING_LITERAL 1293 : '"' 1294 ( ACTION_ESC 1295 | ~('\\'|'"') 1296 )* 1297 '"' 1298 ; 1299 1300fragment 1301ACTION_ESC 1302 : '\\\'' 1303 | '\\\"' 1304 | '\\' ~('\''|'"') 1305 ; 1306 1307TOKEN_REF 1308 : 'A'..'Z' 1309 ( 'a'..'z'|'A'..'Z'|'_'|'0'..'9' 1310 )* 1311 ; 1312 1313TOKENS 1314 : 'tokens' WS_LOOP '{' 1315 ; 1316 1317OPTIONS 1318 : 'options' WS_LOOP '{' 1319 ; 1320 1321// we get a warning here when looking for options '{', but it works right 1322RULE_REF 1323@init 1324{ 1325 int t=0; 1326} 1327 : 'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* 1328 ; 1329 1330fragment 1331WS_LOOP 1332 : ( WS 1333 | COMMENT 1334 )* 1335 ; 1336 1337fragment 1338WS_OPT 1339 : (WS)? 1340 ; 1341 1342/** Reset the file and line information; useful when the grammar 1343 * has been generated so that errors are shown relative to the 1344 * original file like the old C preprocessor used to do. 1345 */ 1346fragment 1347SRC 1348 : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT 1349 { 1350 setFileName($file.text.substring(1,$file.text.length()-1)); 1351 input.setLine(Integer.parseInt($line.text) - 1); // -1 because SL_COMMENT will increment the line no. KR 1352 } 1353 ; 1354