1/* 2 [The "BSD licence"] 3 Copyright (c) 2005-2007 Terence Parr 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*/ 28 29/** ANTLR v3 grammar written in ANTLR v3 with AST construction */ 30grammar ANTLRv3; 31 32options { 33 output=AST; 34 ASTLabelType=CommonTree; 35} 36 37tokens { 38 DOC_COMMENT; 39 PARSER; 40 LEXER; 41 RULE; 42 BLOCK; 43 OPTIONAL; 44 CLOSURE; 45 POSITIVE_CLOSURE; 46 SYNPRED; 47 RANGE; 48 CHAR_RANGE; 49 EPSILON; 50 ALT; 51 EOR; 52 EOB; 53 EOA; // end of alt 54 ID; 55 ARG; 56 ARGLIST; 57 RET; 58 LEXER_GRAMMAR; 59 PARSER_GRAMMAR; 60 TREE_GRAMMAR; 61 COMBINED_GRAMMAR; 62 INITACTION; 63 LABEL; // $x used in rewrite rules 64 TEMPLATE; 65 SCOPE='scope'; 66 SEMPRED; 67 GATED_SEMPRED; // {p}? => 68 SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred 69 BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred 70 FRAGMENT='fragment'; 71 TREE_BEGIN='^('; 72 ROOT='^'; 73 BANG='!'; 74 RANGE='..'; 75 REWRITE='->'; 76} 77 78@members { 79 int gtype; 80 public List<String> rules; 81} 82 83@header { 84package org.antlr.gunit.swingui.parsers; 85 86import java.util.List; 87} 88 89@lexer::header { 90package org.antlr.gunit.swingui.parsers; 91} 92 93 94grammarDef 95 : DOC_COMMENT? 96 ( 'lexer' {gtype=LEXER_GRAMMAR;} // pure lexer 97 | 'parser' {gtype=PARSER_GRAMMAR;} // pure parser 98 | 'tree' {gtype=TREE_GRAMMAR;} // a tree parser 99 | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer 100 ) 101 g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action* 102 rule+ 103 EOF 104 -> ^( {adaptor.create(gtype,$g)} 105 id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+ 106 ) 107 ; 108 109tokensSpec 110 : TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+) 111 ; 112 113tokenSpec 114 : TOKEN_REF 115 ( '=' (lit=STRING_LITERAL|lit=CHAR_LITERAL) -> ^('=' TOKEN_REF $lit) 116 | -> TOKEN_REF 117 ) 118 ';' 119 ; 120 121attrScope 122 : 'scope' id ACTION -> ^('scope' id ACTION) 123 ; 124 125/** Match stuff like @parser::members {int i;} */ 126action 127 : '@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION) 128 ; 129 130/** Sometimes the scope names will collide with keywords; allow them as 131 * ids for action scopes. 132 */ 133actionScopeName 134 : id 135 | l='lexer' -> ID[$l] 136 | p='parser' -> ID[$p] 137 ; 138 139optionsSpec 140 : OPTIONS (option ';')+ '}' -> ^(OPTIONS option+) 141 ; 142 143option 144 : id '=' optionValue -> ^('=' id optionValue) 145 ; 146 147optionValue 148 : id 149 | STRING_LITERAL 150 | CHAR_LITERAL 151 | INT 152 | s='*' -> STRING_LITERAL[$s] // used for k=* 153 ; 154 155rule 156scope { 157 String name; 158} 159@after{ 160 this.rules.add($rule::name); 161} 162 : DOC_COMMENT? 163 ( modifier=('protected'|'public'|'private'|'fragment') )? 164 id {$rule::name = $id.text;} 165 '!'? 166 ( arg=ARG_ACTION )? 167 ( 'returns' rt=ARG_ACTION )? 168 throwsSpec? optionsSpec? ruleScopeSpec? ruleAction* 169 ':' altList ';' 170 exceptionGroup? 171 -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG $arg)? ^(RET $rt)? 172 optionsSpec? ruleScopeSpec? ruleAction* 173 altList 174 exceptionGroup? 175 EOR["EOR"] 176 ) 177 ; 178 179/** Match stuff like @init {int i;} */ 180ruleAction 181 : '@' id ACTION -> ^('@' id ACTION) 182 ; 183 184throwsSpec 185 : 'throws' id ( ',' id )* -> ^('throws' id+) 186 ; 187 188ruleScopeSpec 189 : 'scope' ACTION -> ^('scope' ACTION) 190 | 'scope' id (',' id)* ';' -> ^('scope' id+) 191 | 'scope' ACTION 192 'scope' id (',' id)* ';' 193 -> ^('scope' ACTION id+ ) 194 ; 195 196block 197 : lp='(' 198 ( (opts=optionsSpec)? ':' )? 199 a1=alternative rewrite ( '|' a2=alternative rewrite )* 200 rp=')' 201 -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? (alternative rewrite?)+ EOB[$rp,"EOB"] ) 202 ; 203 204altList 205@init { 206 // must create root manually as it's used by invoked rules in real antlr tool. 207 // leave here to demonstrate use of {...} in rewrite rule 208 // it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token. 209 CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK"); 210} 211 : a1=alternative rewrite ( '|' a2=alternative rewrite )* 212 -> ^( {blkRoot} (alternative rewrite?)+ EOB["EOB"] ) 213 ; 214 215alternative 216@init { 217 Token firstToken = input.LT(1); 218 Token prevToken = input.LT(-1); // either : or | I think 219} 220 : element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"]) 221 | -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"]) 222 ; 223 224exceptionGroup 225 : ( exceptionHandler )+ ( finallyClause )? 226 | finallyClause 227 ; 228 229exceptionHandler 230 : 'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION) 231 ; 232 233finallyClause 234 : 'finally' ACTION -> ^('finally' ACTION) 235 ; 236 237element 238 : elementNoOptionSpec 239 ; 240 241elementNoOptionSpec 242 : id (labelOp='='|labelOp='+=') atom 243 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"])) 244 | -> ^($labelOp id atom) 245 ) 246 | id (labelOp='='|labelOp='+=') block 247 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"])) 248 | -> ^($labelOp id block) 249 ) 250 | atom 251 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) ) 252 | -> atom 253 ) 254 | ebnf 255 | ACTION 256 | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED ) 257 | treeSpec 258 ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) ) 259 | -> treeSpec 260 ) 261 ; 262 263atom: range ( (op='^'|op='!') -> ^($op range) | -> range ) 264 | terminal 265 | notSet ( (op='^'|op='!') -> ^($op notSet) | -> notSet ) 266 | RULE_REF ( arg=ARG_ACTION )? ( (op='^'|op='!') )? 267 -> {$arg!=null&&op!=null}? ^($op RULE_REF $arg) 268 -> {$arg!=null}? ^(RULE_REF $arg) 269 -> {$op!=null}? ^($op RULE_REF) 270 -> RULE_REF 271 ; 272 273notSet 274 : '~' 275 ( notTerminal -> ^('~' notTerminal) 276 | block -> ^('~' block) 277 ) 278 ; 279 280treeSpec 281 : '^(' element ( element )+ ')' -> ^(TREE_BEGIN element+) 282 ; 283 284/** Matches ENBF blocks (and token sets via block rule) */ 285ebnf 286@init { 287 Token firstToken = input.LT(1); 288} 289@after { 290 $ebnf.tree.getToken().setLine(firstToken.getLine()); 291 $ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 292} 293 : block 294 ( op='?' -> ^(OPTIONAL[op] block) 295 | op='*' -> ^(CLOSURE[op] block) 296 | op='+' -> ^(POSITIVE_CLOSURE[op] block) 297 | '=>' // syntactic predicate 298 -> {gtype==COMBINED_GRAMMAR && 299 Character.isUpperCase($rule::name.charAt(0))}? 300 // if lexer rule in combined, leave as pred for lexer 301 ^(SYNPRED["=>"] block) 302 // in real antlr tool, text for SYN_SEMPRED is predname 303 -> SYN_SEMPRED 304 | -> block 305 ) 306 ; 307 308range! 309 : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL -> ^(CHAR_RANGE[$c1,".."] $c1 $c2) 310 ; 311 312terminal 313 : ( CHAR_LITERAL -> CHAR_LITERAL 314 // Args are only valid for lexer rules 315 | TOKEN_REF 316 ( ARG_ACTION -> ^(TOKEN_REF ARG_ACTION) 317 | -> TOKEN_REF 318 ) 319 | STRING_LITERAL -> STRING_LITERAL 320 | '.' -> '.' 321 ) 322 ( '^' -> ^('^' $terminal) 323 | '!' -> ^('!' $terminal) 324 )? 325 ; 326 327notTerminal 328 : CHAR_LITERAL 329 | TOKEN_REF 330 | STRING_LITERAL 331 ; 332 333ebnfSuffix 334@init { 335 Token op = input.LT(1); 336} 337 : '?' -> OPTIONAL[op] 338 | '*' -> CLOSURE[op] 339 | '+' -> POSITIVE_CLOSURE[op] 340 ; 341 342 343 344// R E W R I T E S Y N T A X 345 346rewrite 347@init { 348 Token firstToken = input.LT(1); 349} 350 : (rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)* 351 rew2='->' last=rewrite_alternative 352 -> ^($rew $preds $predicated)* ^($rew2 $last) 353 | 354 ; 355 356rewrite_alternative 357options {backtrack=true;} 358 : rewrite_template 359 | rewrite_tree_alternative 360 | /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"]) 361 ; 362 363rewrite_tree_block 364 : lp='(' rewrite_tree_alternative ')' 365 -> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"]) 366 ; 367 368rewrite_tree_alternative 369 : rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"]) 370 ; 371 372rewrite_tree_element 373 : rewrite_tree_atom 374 | rewrite_tree_atom ebnfSuffix 375 -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"])) 376 | rewrite_tree 377 ( ebnfSuffix 378 -> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"])) 379 | -> rewrite_tree 380 ) 381 | rewrite_tree_ebnf 382 ; 383 384rewrite_tree_atom 385 : CHAR_LITERAL 386 | TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes 387 | RULE_REF 388 | STRING_LITERAL 389 | d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule 390 | ACTION 391 ; 392 393rewrite_tree_ebnf 394@init { 395 Token firstToken = input.LT(1); 396} 397@after { 398 $rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine()); 399 $rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine()); 400} 401 : rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block) 402 ; 403 404rewrite_tree 405 : '^(' rewrite_tree_atom rewrite_tree_element* ')' 406 -> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* ) 407 ; 408 409/** Build a tree for a template rewrite: 410 ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) ) 411 where ARGLIST is always there even if no args exist. 412 ID can be "template" keyword. If first child is ACTION then it's 413 an indirect template ref 414 415 -> foo(a={...}, b={...}) 416 -> ({string-e})(a={...}, b={...}) // e evaluates to template name 417 -> {%{$ID.text}} // create literal template from string (done in ActionTranslator) 418 -> {st-expr} // st-expr evaluates to ST 419 */ 420rewrite_template 421 : // -> template(a={...},...) "..." inline template 422 id lp='(' rewrite_template_args ')' 423 ( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL ) 424 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str) 425 426 | // -> foo(a={...}, ...) 427 rewrite_template_ref 428 429 | // -> ({expr})(a={...}, ...) 430 rewrite_indirect_template_head 431 432 | // -> {...} 433 ACTION 434 ; 435 436/** -> foo(a={...}, ...) */ 437rewrite_template_ref 438 : id lp='(' rewrite_template_args ')' 439 -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args) 440 ; 441 442/** -> ({expr})(a={...}, ...) */ 443rewrite_indirect_template_head 444 : lp='(' ACTION ')' '(' rewrite_template_args ')' 445 -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args) 446 ; 447 448rewrite_template_args 449 : rewrite_template_arg (',' rewrite_template_arg)* 450 -> ^(ARGLIST rewrite_template_arg+) 451 | -> ARGLIST 452 ; 453 454rewrite_template_arg 455 : id '=' ACTION -> ^(ARG[$id.start] id ACTION) 456 ; 457 458id : TOKEN_REF -> ID[$TOKEN_REF] 459 | RULE_REF -> ID[$RULE_REF] 460 ; 461 462// L E X I C A L R U L E S 463 464SL_COMMENT 465 : '//' 466 ( ' $ANTLR ' SRC // src directive 467 | ~('\r'|'\n')* 468 ) 469 '\r'? '\n' 470 {$channel=HIDDEN;} 471 ; 472 473ML_COMMENT 474 : '/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/' 475 ; 476 477CHAR_LITERAL 478 : '\'' LITERAL_CHAR '\'' 479 ; 480 481STRING_LITERAL 482 : '\'' LITERAL_CHAR LITERAL_CHAR* '\'' 483 ; 484 485fragment 486LITERAL_CHAR 487 : ESC 488 | ~('\''|'\\') 489 ; 490 491DOUBLE_QUOTE_STRING_LITERAL 492 : '"' (ESC | ~('\\'|'"'))* '"' 493 ; 494 495DOUBLE_ANGLE_STRING_LITERAL 496 : '<<' .* '>>' 497 ; 498 499fragment 500ESC : '\\' 501 ( 'n' 502 | 'r' 503 | 't' 504 | 'b' 505 | 'f' 506 | '"' 507 | '\'' 508 | '\\' 509 | '>' 510 | 'u' XDIGIT XDIGIT XDIGIT XDIGIT 511 | . // unknown, leave as it is 512 ) 513 ; 514 515fragment 516XDIGIT : 517 '0' .. '9' 518 | 'a' .. 'f' 519 | 'A' .. 'F' 520 ; 521 522INT : '0'..'9'+ 523 ; 524 525ARG_ACTION 526 : NESTED_ARG_ACTION 527 ; 528 529fragment 530NESTED_ARG_ACTION : 531 '[' 532 ( options {greedy=false; k=1;} 533 : NESTED_ARG_ACTION 534 | ACTION_STRING_LITERAL 535 | ACTION_CHAR_LITERAL 536 | . 537 )* 538 ']' 539 {setText(getText().substring(1, getText().length()-1));} 540 ; 541 542ACTION 543 : NESTED_ACTION ( '?' {$type = SEMPRED;} )? 544 ; 545 546fragment 547NESTED_ACTION : 548 '{' 549 ( options {greedy=false; k=2;} 550 : NESTED_ACTION 551 | SL_COMMENT 552 | ML_COMMENT 553 | ACTION_STRING_LITERAL 554 | ACTION_CHAR_LITERAL 555 | . 556 )* 557 '}' 558 ; 559 560fragment 561ACTION_CHAR_LITERAL 562 : '\'' (ACTION_ESC|~('\\'|'\'')) '\'' 563 ; 564 565fragment 566ACTION_STRING_LITERAL 567 : '"' (ACTION_ESC|~('\\'|'"'))* '"' 568 ; 569 570fragment 571ACTION_ESC 572 : '\\\'' 573 | '\\' '"' // ANTLR doesn't like: '\\"' 574 | '\\' ~('\''|'"') 575 ; 576 577TOKEN_REF 578 : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 579 ; 580 581RULE_REF 582 : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 583 ; 584 585/** Match the start of an options section. Don't allow normal 586 * action processing on the {...} as it's not a action. 587 */ 588OPTIONS 589 : 'options' WS_LOOP '{' 590 ; 591 592TOKENS 593 : 'tokens' WS_LOOP '{' 594 ; 595 596/** Reset the file and line information; useful when the grammar 597 * has been generated so that errors are shown relative to the 598 * original file like the old C preprocessor used to do. 599 */ 600fragment 601SRC : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT 602 ; 603 604WS : ( ' ' 605 | '\t' 606 | '\r'? '\n' 607 )+ 608 {$channel=HIDDEN;} 609 ; 610 611fragment 612WS_LOOP 613 : ( WS 614 | SL_COMMENT 615 | ML_COMMENT 616 )* 617 ; 618 619 620