1/* 2 [The "BSD license"] 3 Copyright (c) 2005-2011 Terence Parr 4 All rights reserved. 5 6 Grammar conversion to ANTLR v3: 7 Copyright (c) 2011 Sam Harwell 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 2. Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*/ 32tree grammar DefineGrammarItemsWalker; 33 34options { 35 tokenVocab = ANTLR; 36 ASTLabelType = GrammarAST; 37} 38 39scope AttributeScopeActions { 40 HashMap<GrammarAST, GrammarAST> actions; 41} 42 43@header { 44package org.antlr.grammar.v3; 45import org.antlr.tool.*; 46import java.util.HashSet; 47import java.util.Set; 48} 49 50@members { 51protected Grammar grammar; 52protected GrammarAST root; 53protected String currentRuleName; 54protected GrammarAST currentRewriteBlock; 55protected GrammarAST currentRewriteRule; 56protected int outerAltNum = 0; 57protected int blockLevel = 0; 58 59public final int countAltsForRule( CommonTree t ) { 60 CommonTree block = (CommonTree)t.getFirstChildWithType(BLOCK); 61 int altCount = 0; 62 for (int i = 0; i < block.getChildCount(); i++) { 63 if (block.getChild(i).getType() == ALT) 64 altCount++; 65 } 66 return altCount; 67} 68 69protected final void finish() { 70 trimGrammar(); 71} 72 73/** Remove any lexer rules from a COMBINED; already passed to lexer */ 74protected final void trimGrammar() { 75 if ( grammar.type != Grammar.COMBINED ) { 76 return; 77 } 78 // form is (header ... ) ( grammar ID (scope ...) ... ( rule ... ) ( rule ... ) ... ) 79 GrammarAST p = root; 80 // find the grammar spec 81 while ( !p.getText().equals( "grammar" ) ) { 82 p = (GrammarAST)p.getNextSibling(); 83 } 84 for ( int i = 0; i < p.getChildCount(); i++ ) { 85 if ( p.getChild( i ).getType() != RULE ) 86 continue; 87 88 String ruleName = p.getChild(i).getChild(0).getText(); 89 //Console.Out.WriteLine( "rule " + ruleName + " prev=" + prev.getText() ); 90 if (Rule.getRuleType(ruleName) == Grammar.LEXER) { 91 // remove lexer rule 92 p.deleteChild( i ); 93 i--; 94 } 95 } 96 //Console.Out.WriteLine( "root after removal is: " + root.ToStringList() ); 97} 98 99protected final void trackInlineAction( GrammarAST actionAST ) { 100 Rule r = grammar.getRule( currentRuleName ); 101 if ( r != null ) { 102 r.trackInlineAction( actionAST ); 103 } 104} 105} 106 107public 108grammar_[Grammar g] 109@init 110{ 111grammar = $g; 112root = $start; 113} 114@after 115{ 116finish(); 117} 118 : ^( LEXER_GRAMMAR {grammar.type = Grammar.LEXER;} grammarSpec ) 119 | ^( PARSER_GRAMMAR {grammar.type = Grammar.PARSER;} grammarSpec ) 120 | ^( TREE_GRAMMAR {grammar.type = Grammar.TREE_PARSER;} grammarSpec ) 121 | ^( COMBINED_GRAMMAR {grammar.type = Grammar.COMBINED;} grammarSpec ) 122 ; 123 124attrScope 125scope AttributeScopeActions; 126@init 127{ 128 $AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>(); 129} 130 : ^( 'scope' name=ID attrScopeAction* attrs=ACTION ) 131 { 132 AttributeScope scope = grammar.defineGlobalScope($name.text,$attrs.getToken()); 133 scope.isDynamicGlobalScope = true; 134 scope.addAttributes($attrs.text, ';'); 135 for (GrammarAST action : $AttributeScopeActions::actions.keySet()) 136 scope.defineNamedAction(action, $AttributeScopeActions::actions.get(action)); 137 } 138 ; 139 140attrScopeAction 141 : ^(AMPERSAND ID ACTION) 142 { 143 $AttributeScopeActions::actions.put( $ID, $ACTION ); 144 } 145 ; 146 147grammarSpec 148 : id=ID 149 (cmt=DOC_COMMENT)? 150 ( optionsSpec )? 151 (delegateGrammars)? 152 (tokensSpec)? 153 (attrScope)* 154 (actions)? 155 rules 156 ; 157 158actions 159 : ( action )+ 160 ; 161 162action 163@init 164{ 165 String scope=null; 166 GrammarAST nameAST=null, actionAST=null; 167} 168 : ^(amp=AMPERSAND id1=ID 169 ( id2=ID a1=ACTION 170 {scope=$id1.text; nameAST=$id2; actionAST=$a1;} 171 | a2=ACTION 172 {scope=null; nameAST=$id1; actionAST=$a2;} 173 ) 174 ) 175 { 176 grammar.defineNamedAction($amp,scope,nameAST,actionAST); 177 } 178 ; 179 180optionsSpec 181 : ^(OPTIONS .*) 182 ; 183 184delegateGrammars 185 : ^( 'import' ( ^(ASSIGN ID ID) | ID )+ ) 186 ; 187 188tokensSpec 189 : ^(TOKENS tokenSpec*) 190 ; 191 192tokenSpec 193 : t=TOKEN_REF 194 | ^( ASSIGN 195 TOKEN_REF 196 ( STRING_LITERAL 197 | CHAR_LITERAL 198 ) 199 ) 200 ; 201 202rules 203 : (rule | ^(PREC_RULE .*))+ 204 ; 205 206rule 207@init 208{ 209 String name=null; 210 Map<String, Object> opts=null; 211 Rule r = null; 212} 213 : ^( RULE id=ID {opts = $RULE.getBlockOptions();} 214 (modifier)? 215 ^( ARG (args=ARG_ACTION)? ) 216 ^( RET (ret=ARG_ACTION)? ) 217 (throwsSpec)? 218 (optionsSpec)? 219 { 220 name = $id.text; 221 currentRuleName = name; 222 if ( Rule.getRuleType(name) == Grammar.LEXER && grammar.type==Grammar.COMBINED ) 223 { 224 // a merged grammar spec, track lexer rules and send to another grammar 225 grammar.defineLexerRuleFoundInParser($id.getToken(), $start); 226 } 227 else 228 { 229 int numAlts = countAltsForRule($start); 230 grammar.defineRule($id.getToken(), $modifier.mod, opts, $start, $args, numAlts); 231 r = grammar.getRule(name); 232 if ( $args!=null ) 233 { 234 r.parameterScope = grammar.createParameterScope(name,$args.getToken()); 235 r.parameterScope.addAttributes($args.text, ','); 236 } 237 if ( $ret!=null ) 238 { 239 r.returnScope = grammar.createReturnScope(name,$ret.getToken()); 240 r.returnScope.addAttributes($ret.text, ','); 241 } 242 if ( $throwsSpec.exceptions != null ) 243 { 244 for (String exception : $throwsSpec.exceptions) 245 r.throwsSpec.add( exception ); 246 } 247 } 248 } 249 (ruleScopeSpec[r])? 250 (ruleAction[r])* 251 { this.blockLevel=0; } 252 b=block 253 (exceptionGroup)? 254 EOR 255 { 256 // copy rule options into the block AST, which is where 257 // the analysis will look for k option etc... 258 $b.start.setBlockOptions(opts); 259 } 260 ) 261 ; 262 263ruleAction[Rule r] 264 : ^(amp=AMPERSAND id=ID a=ACTION ) {if (r!=null) r.defineNamedAction($amp,$id,$a);} 265 ; 266 267modifier returns [String mod] 268@init 269{ 270 $mod = $start.getToken().getText(); 271} 272 : 'protected' 273 | 'public' 274 | 'private' 275 | 'fragment' 276 ; 277 278throwsSpec returns [HashSet<String> exceptions] 279@init 280{ 281 $exceptions = new HashSet<String>(); 282} 283 : ^('throws' (ID {$exceptions.add($ID.text);})+ ) 284 ; 285 286ruleScopeSpec[Rule r] 287scope AttributeScopeActions; 288@init 289{ 290 $AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>(); 291} 292 : ^( 'scope' 293 ( attrScopeAction* attrs=ACTION 294 { 295 r.ruleScope = grammar.createRuleScope(r.name,$attrs.getToken()); 296 r.ruleScope.isDynamicRuleScope = true; 297 r.ruleScope.addAttributes($attrs.text, ';'); 298 for (GrammarAST action : $AttributeScopeActions::actions.keySet()) 299 r.ruleScope.defineNamedAction(action, $AttributeScopeActions::actions.get(action)); 300 } 301 )? 302 ( uses=ID 303 { 304 if ( grammar.getGlobalScope($uses.text)==null ) { 305 ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE, 306 grammar, 307 $uses.getToken(), 308 $uses.text); 309 } 310 else { 311 if ( r.useScopes==null ) {r.useScopes=new ArrayList<String>();} 312 r.useScopes.add($uses.text); 313 } 314 } 315 )* 316 ) 317 ; 318 319block 320@init 321{ 322 // must run during backtracking 323 this.blockLevel++; 324 if ( blockLevel == 1 ) 325 this.outerAltNum=1; 326} 327 : ^( BLOCK 328 (optionsSpec)? 329 (blockAction)* 330 ( alternative rewrite 331 {{ 332 if ( this.blockLevel == 1 ) 333 this.outerAltNum++; 334 }} 335 )+ 336 EOB 337 ) 338 ; 339finally { blockLevel--; } 340 341// TODO: this does nothing now! subrules cannot have init actions. :( 342blockAction 343 : ^(amp=AMPERSAND id=ID a=ACTION ) // {r.defineAction(#amp,#id,#a);} 344 ; 345 346alternative 347//@init 348//{ 349// if ( state.backtracking == 0 ) 350// { 351// if ( grammar.type!=Grammar.LEXER && grammar.GetOption("output")!=null && blockLevel==1 ) 352// { 353// GrammarAST aRewriteNode = $start.FindFirstType(REWRITE); // alt itself has rewrite? 354// GrammarAST rewriteAST = (GrammarAST)$start.Parent.getChild($start.ChildIndex + 1); 355// // we have a rewrite if alt uses it inside subrule or this alt has one 356// // but don't count -> ... rewrites, which mean "do default auto construction" 357// if ( aRewriteNode!=null|| 358// (firstRewriteAST!=null && 359// firstRewriteAST.getType()==REWRITE && 360// firstRewriteAST.getChild(0)!=null && 361// firstRewriteAST.getChild(0).getType()!=ETC) ) 362// { 363// Rule r = grammar.getRule(currentRuleName); 364// r.TrackAltsWithRewrites($start,this.outerAltNum); 365// } 366// } 367// } 368//} 369 : ^( ALT (element)+ EOA ) 370 ; 371 372exceptionGroup 373 : ( exceptionHandler )+ (finallyClause)? 374 | finallyClause 375 ; 376 377exceptionHandler 378 : ^('catch' ARG_ACTION ACTION) {trackInlineAction($ACTION);} 379 ; 380 381finallyClause 382 : ^('finally' ACTION) {trackInlineAction($ACTION);} 383 ; 384 385element 386 : ^(ROOT element) 387 | ^(BANG element) 388 | atom[null] 389 | ^(NOT element) 390 | ^(RANGE atom[null] atom[null]) 391 | ^(CHAR_RANGE atom[null] atom[null]) 392 | ^( ASSIGN id=ID el=element) 393 { 394 GrammarAST e = $el.start; 395 if ( e.getType()==ANTLRParser.ROOT || e.getType()==ANTLRParser.BANG ) 396 { 397 e = (GrammarAST)e.getChild(0); 398 } 399 if ( e.getType()==RULE_REF) 400 { 401 grammar.defineRuleRefLabel(currentRuleName,$id.getToken(),e); 402 } 403 else if ( e.getType()==WILDCARD && grammar.type==Grammar.TREE_PARSER ) 404 { 405 grammar.defineWildcardTreeLabel(currentRuleName,$id.getToken(),e); 406 } 407 else 408 { 409 grammar.defineTokenRefLabel(currentRuleName,$id.getToken(),e); 410 } 411 } 412 | ^( PLUS_ASSIGN id2=ID a2=element 413 { 414 GrammarAST a = $a2.start; 415 if ( a.getType()==ANTLRParser.ROOT || a.getType()==ANTLRParser.BANG ) 416 { 417 a = (GrammarAST)a.getChild(0); 418 } 419 if ( a.getType()==RULE_REF ) 420 { 421 grammar.defineRuleListLabel(currentRuleName,$id2.getToken(),a); 422 } 423 else if ( a.getType() == WILDCARD && grammar.type == Grammar.TREE_PARSER ) 424 { 425 grammar.defineWildcardTreeListLabel( currentRuleName, $id2.getToken(), a ); 426 } 427 else 428 { 429 grammar.defineTokenListLabel(currentRuleName,$id2.getToken(),a); 430 } 431 } 432 ) 433 | ebnf 434 | tree_ 435 | ^( SYNPRED block ) 436 | act=ACTION 437 { 438 $act.outerAltNum = this.outerAltNum; 439 trackInlineAction($act); 440 } 441 | act2=FORCED_ACTION 442 { 443 $act2.outerAltNum = this.outerAltNum; 444 trackInlineAction($act2); 445 } 446 | SEMPRED 447 { 448 $SEMPRED.outerAltNum = this.outerAltNum; 449 trackInlineAction($SEMPRED); 450 } 451 | SYN_SEMPRED 452 | ^(BACKTRACK_SEMPRED .*) 453 | GATED_SEMPRED 454 { 455 $GATED_SEMPRED.outerAltNum = this.outerAltNum; 456 trackInlineAction($GATED_SEMPRED); 457 } 458 | EPSILON 459 ; 460 461ebnf 462 : (dotLoop) => dotLoop // .* or .+ 463 | block 464 | ^( OPTIONAL block ) 465 | ^( CLOSURE block ) 466 | ^( POSITIVE_CLOSURE block ) 467 ; 468 469/** Track the .* and .+ idioms and make them nongreedy by default. 470 */ 471dotLoop 472 : ( ^( CLOSURE dotBlock ) 473 | ^( POSITIVE_CLOSURE dotBlock ) 474 ) 475 { 476 GrammarAST block = (GrammarAST)$start.getChild(0); 477 Map<String, Object> opts = new HashMap<String, Object>(); 478 opts.put("greedy", "false"); 479 if ( grammar.type!=Grammar.LEXER ) 480 { 481 // parser grammars assume k=1 for .* loops 482 // otherwise they (analysis?) look til EOF! 483 opts.put("k", 1); 484 } 485 block.setOptions(grammar,opts); 486 } 487 ; 488 489dotBlock 490 : ^( BLOCK ^( ALT WILDCARD EOA ) EOB ) 491 ; 492 493tree_ 494 : ^(TREE_BEGIN element+) 495 ; 496 497atom[GrammarAST scope_] 498 : ^( rr=RULE_REF (rarg=ARG_ACTION)? ) 499 { 500 grammar.altReferencesRule( currentRuleName, $scope_, $rr, this.outerAltNum ); 501 if ( $rarg != null ) 502 { 503 $rarg.outerAltNum = this.outerAltNum; 504 trackInlineAction($rarg); 505 } 506 } 507 | ^( t=TOKEN_REF (targ=ARG_ACTION )? ) 508 { 509 if ( $targ != null ) 510 { 511 $targ.outerAltNum = this.outerAltNum; 512 trackInlineAction($targ); 513 } 514 if ( grammar.type == Grammar.LEXER ) 515 { 516 grammar.altReferencesRule( currentRuleName, $scope_, $t, this.outerAltNum ); 517 } 518 else 519 { 520 grammar.altReferencesTokenID( currentRuleName, $t, this.outerAltNum ); 521 } 522 } 523 | c=CHAR_LITERAL 524 { 525 if ( grammar.type != Grammar.LEXER ) 526 { 527 Rule rule = grammar.getRule(currentRuleName); 528 if ( rule != null ) 529 rule.trackTokenReferenceInAlt($c, outerAltNum); 530 } 531 } 532 | s=STRING_LITERAL 533 { 534 if ( grammar.type != Grammar.LEXER ) 535 { 536 Rule rule = grammar.getRule(currentRuleName); 537 if ( rule!=null ) 538 rule.trackTokenReferenceInAlt($s, outerAltNum); 539 } 540 } 541 | WILDCARD 542 | ^(DOT ID atom[$ID]) // scope override on rule 543 ; 544 545ast_suffix 546 : ROOT 547 | BANG 548 ; 549 550rewrite 551@init 552{ 553 // track top level REWRITES node, store stuff there 554 currentRewriteRule = $start; // has to execute during backtracking 555 if ( state.backtracking == 0 ) 556 { 557 if ( grammar.buildAST() ) 558 currentRewriteRule.rewriteRefsDeep = new HashSet<GrammarAST>(); 559 } 560} 561 : ^( REWRITES 562 ( ^( REWRITE (pred=SEMPRED)? rewrite_alternative ) 563 { 564 if ( $pred != null ) 565 { 566 $pred.outerAltNum = this.outerAltNum; 567 trackInlineAction($pred); 568 } 569 } 570 )* 571 ) 572 //{System.out.println("-> refs = "+currentRewriteRule.rewriteRefsDeep);} 573 | 574 ; 575 576rewrite_block 577@init 578{ 579 GrammarAST enclosingBlock = currentRewriteBlock; 580 if ( state.backtracking == 0 ) 581 { 582 // don't do if guessing 583 currentRewriteBlock=$start; // pts to BLOCK node 584 currentRewriteBlock.rewriteRefsShallow = new HashSet<GrammarAST>(); 585 currentRewriteBlock.rewriteRefsDeep = new HashSet<GrammarAST>(); 586 } 587} 588 : ^( BLOCK rewrite_alternative EOB ) 589 //{System.out.println("atoms="+currentRewriteBlock.rewriteRefs);} 590 { 591 // copy the element refs in this block to the surrounding block 592 if ( enclosingBlock != null ) 593 { 594 for (GrammarAST item : currentRewriteBlock.rewriteRefsShallow) 595 enclosingBlock.rewriteRefsDeep.add( item ); 596 } 597 //currentRewriteBlock = enclosingBlock; // restore old BLOCK ptr 598 } 599 ; 600finally { currentRewriteBlock = enclosingBlock; } 601 602rewrite_alternative 603 : {grammar.buildAST()}? => ^( a=ALT ( ( rewrite_element )+ | EPSILON ) EOA ) 604 | {grammar.buildTemplate()}? => rewrite_template 605 | ETC {this.blockLevel==1}? // only valid as outermost rewrite 606 ; 607 608rewrite_element 609 : rewrite_atom 610 | rewrite_ebnf 611 | rewrite_tree 612 ; 613 614rewrite_ebnf 615 : ^( OPTIONAL rewrite_block ) 616 | ^( CLOSURE rewrite_block ) 617 | ^( POSITIVE_CLOSURE rewrite_block ) 618 ; 619 620rewrite_tree 621 : ^( TREE_BEGIN rewrite_atom ( rewrite_element )* ) 622 ; 623 624rewrite_atom 625@init 626{ 627 if ( state.backtracking == 0 ) 628 { 629 Rule r = grammar.getRule(currentRuleName); 630 Set tokenRefsInAlt = r.getTokenRefsInAlt(outerAltNum); 631 boolean imaginary = 632 $start.getType()==TOKEN_REF && 633 !tokenRefsInAlt.contains($start.getText()); 634 if ( !imaginary && grammar.buildAST() && 635 ($start.getType()==RULE_REF || 636 $start.getType()==LABEL || 637 $start.getType()==TOKEN_REF || 638 $start.getType()==CHAR_LITERAL || 639 $start.getType()==STRING_LITERAL) ) 640 { 641 // track per block and for entire rewrite rule 642 if ( currentRewriteBlock!=null ) 643 { 644 currentRewriteBlock.rewriteRefsShallow.add($start); 645 currentRewriteBlock.rewriteRefsDeep.add($start); 646 } 647 648 //System.out.println("adding "+$start.getText()+" to "+currentRewriteRule.getText()); 649 currentRewriteRule.rewriteRefsDeep.add($start); 650 } 651 } 652} 653 : RULE_REF 654 | ( ^( TOKEN_REF 655 ( ARG_ACTION 656 { 657 $ARG_ACTION.outerAltNum = this.outerAltNum; 658 trackInlineAction($ARG_ACTION); 659 } 660 )? 661 ) 662 | CHAR_LITERAL 663 | STRING_LITERAL 664 ) 665 | LABEL 666 | ACTION 667 { 668 $ACTION.outerAltNum = this.outerAltNum; 669 trackInlineAction($ACTION); 670 } 671 ; 672 673rewrite_template 674 : ^( ALT EPSILON EOA ) 675 | ^( TEMPLATE (id=ID|ind=ACTION) 676 ^( ARGLIST 677 ( ^( ARG arg=ID a=ACTION ) 678 { 679 $a.outerAltNum = this.outerAltNum; 680 trackInlineAction($a); 681 } 682 )* 683 ) 684 { 685 if ( $ind!=null ) 686 { 687 $ind.outerAltNum = this.outerAltNum; 688 trackInlineAction($ind); 689 } 690 } 691 ( DOUBLE_QUOTE_STRING_LITERAL 692 | DOUBLE_ANGLE_STRING_LITERAL 693 )? 694 ) 695 | act=ACTION 696 { 697 $act.outerAltNum = this.outerAltNum; 698 trackInlineAction($act); 699 } 700 ; 701