1/* 2 [The "BSD license"] 3 Copyright (c) 2005-2011 Terence Parr 4 All rights reserved. 5 6 Grammar conversion to ANTLR v3: 7 Copyright (c) 2011 Sam Harwell 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 2. Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*/ 32 33/** [Warning: TJP says that this is probably out of date as of 11/19/2005, 34 * but since it's probably still useful, I'll leave in. Don't have energy 35 * to update at the moment.] 36 * 37 * Compute the token types for all literals and rules etc.. There are 38 * a few different cases to consider for grammar types and a few situations 39 * within. 40 * 41 * CASE 1 : pure parser grammar 42 * a) Any reference to a token gets a token type. 43 * b) The tokens section may alias a token name to a string or char 44 * 45 * CASE 2 : pure lexer grammar 46 * a) Import token vocabulary if available. Set token types for any new tokens 47 * to values above last imported token type 48 * b) token rule definitions get token types if not already defined 49 * c) literals do NOT get token types 50 * 51 * CASE 3 : merged parser / lexer grammar 52 * a) Any char or string literal gets a token type in a parser rule 53 * b) Any reference to a token gets a token type if not referencing 54 * a fragment lexer rule 55 * c) The tokens section may alias a token name to a string or char 56 * which must add a rule to the lexer 57 * d) token rule definitions get token types if not already defined 58 * e) token rule definitions may also alias a token name to a literal. 59 * E.g., Rule 'FOR : "for";' will alias FOR to "for" in the sense that 60 * references to either in the parser grammar will yield the token type 61 * 62 * What this pass does: 63 * 64 * 0. Collects basic info about the grammar like grammar name and type; 65 * Oh, I have go get the options in case they affect the token types. 66 * E.g., tokenVocab option. 67 * Imports any token vocab name/type pairs into a local hashtable. 68 * 1. Finds a list of all literals and token names. 69 * 2. Finds a list of all token name rule definitions; 70 * no token rules implies pure parser. 71 * 3. Finds a list of all simple token rule defs of form "<NAME> : <literal>;" 72 * and aliases them. 73 * 4. Walks token names table and assign types to any unassigned 74 * 5. Walks aliases and assign types to referenced literals 75 * 6. Walks literals, assigning types if untyped 76 * 4. Informs the Grammar object of the type definitions such as: 77 * g.defineToken(<charliteral>, ttype); 78 * g.defineToken(<stringliteral>, ttype); 79 * g.defineToken(<tokenID>, ttype); 80 * where some of the ttype values will be the same for aliases tokens. 81 */ 82tree grammar AssignTokenTypesWalker; 83 84options 85{ 86 tokenVocab = ANTLR; 87 ASTLabelType = GrammarAST; 88} 89 90@header { 91package org.antlr.grammar.v3; 92 93import java.util.*; 94import org.antlr.analysis.*; 95import org.antlr.misc.*; 96import org.antlr.tool.*; 97 98import org.antlr.runtime.BitSet; 99} 100 101@members { 102protected Grammar grammar; 103protected String currentRuleName; 104 105protected static GrammarAST stringAlias; 106protected static GrammarAST charAlias; 107protected static GrammarAST stringAlias2; 108protected static GrammarAST charAlias2; 109 110@Override 111public void reportError(RecognitionException ex) 112{ 113 Token token = null; 114 if (ex instanceof MismatchedTokenException) { 115 token = ((MismatchedTokenException)ex).token; 116 } else if (ex instanceof NoViableAltException) { 117 token = ((NoViableAltException)ex).token; 118 } 119 120 ErrorManager.syntaxError( 121 ErrorManager.MSG_SYNTAX_ERROR, 122 grammar, 123 token, 124 "assign.types: " + ex.toString(), 125 ex); 126} 127 128protected void initASTPatterns() 129{ 130 TreeAdaptor adaptor = new ANTLRParser.grammar_Adaptor(null); 131 132 /* 133 * stringAlias = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] EOA[]) EOB[]) 134 */ 135 stringAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 136 { 137 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 138 adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) ); 139 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 140 adaptor.addChild( stringAlias, alt ); 141 } 142 adaptor.addChild( stringAlias, adaptor.create( EOB, "EOB" ) ); 143 144 /* 145 * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] EOA[]) EOB[]) 146 */ 147 charAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 148 { 149 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 150 adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) ); 151 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 152 adaptor.addChild( charAlias, alt ); 153 } 154 adaptor.addChild( charAlias, adaptor.create( EOB, "EOB" ) ); 155 156 /* 157 * stringAlias2 = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] ACTION[] EOA[]) EOB[]) 158 */ 159 stringAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 160 { 161 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 162 adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) ); 163 adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) ); 164 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 165 adaptor.addChild( stringAlias2, alt ); 166 } 167 adaptor.addChild( stringAlias2, adaptor.create( EOB, "EOB" ) ); 168 169 /* 170 * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] ACTION[] EOA[]) EOB[]) 171 */ 172 charAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 173 { 174 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 175 adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) ); 176 adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) ); 177 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 178 adaptor.addChild( charAlias2, alt ); 179 } 180 adaptor.addChild( charAlias2, adaptor.create( EOB, "EOB" ) ); 181} 182 183// Behavior moved to AssignTokenTypesBehavior 184protected void trackString(GrammarAST t) {} 185protected void trackToken( GrammarAST t ) {} 186protected void trackTokenRule( GrammarAST t, GrammarAST modifier, GrammarAST block ) {} 187protected void alias( GrammarAST t, GrammarAST s ) {} 188public void defineTokens( Grammar root ) {} 189protected void defineStringLiteralsFromDelegates() {} 190protected void assignStringTypes( Grammar root ) {} 191protected void aliasTokenIDsAndLiterals( Grammar root ) {} 192protected void assignTokenIDTypes( Grammar root ) {} 193protected void defineTokenNamesAndLiteralsInGrammar( Grammar root ) {} 194protected void init( Grammar root ) {} 195} 196 197public 198grammar_[Grammar g] 199@init 200{ 201 if ( state.backtracking == 0 ) 202 init($g); 203} 204 : ( ^( LEXER_GRAMMAR grammarSpec ) 205 | ^( PARSER_GRAMMAR grammarSpec ) 206 | ^( TREE_GRAMMAR grammarSpec ) 207 | ^( COMBINED_GRAMMAR grammarSpec ) 208 ) 209 ; 210 211grammarSpec 212 : id=ID 213 (cmt=DOC_COMMENT)? 214 (optionsSpec)? 215 (delegateGrammars)? 216 (tokensSpec)? 217 (attrScope)* 218 ( ^(AMPERSAND .*) )* // skip actions 219 rules 220 ; 221 222attrScope 223 : ^( 'scope' ID ( ^(AMPERSAND .*) )* ACTION ) 224 ; 225 226optionsSpec returns [Map<Object, Object> opts = new HashMap<Object, Object>()] 227 : ^( OPTIONS (option[$opts])+ ) 228 ; 229 230option[Map<Object, Object> opts] 231 : ^( ASSIGN ID optionValue ) 232 { 233 String key = $ID.text; 234 $opts.put(key, $optionValue.value); 235 // check for grammar-level option to import vocabulary 236 if ( currentRuleName==null && key.equals("tokenVocab") ) 237 { 238 grammar.importTokenVocabulary($ID,(String)$optionValue.value); 239 } 240 } 241 ; 242 243optionValue returns [Object value=null] 244@init 245{ 246 if ( state.backtracking == 0 ) 247 $value = $start.getText(); 248} 249 : ID 250 | STRING_LITERAL 251 | CHAR_LITERAL 252 | INT 253 {$value = Integer.parseInt($INT.text);} 254// | cs=charSet {$value = $cs;} // return set AST in this case 255 ; 256 257charSet 258 : ^( CHARSET charSetElement ) 259 ; 260 261charSetElement 262 : CHAR_LITERAL 263 | ^( OR CHAR_LITERAL CHAR_LITERAL ) 264 | ^( RANGE CHAR_LITERAL CHAR_LITERAL ) 265 ; 266 267delegateGrammars 268 : ^( 'import' 269 ( ^(ASSIGN ID ID) 270 | ID 271 )+ 272 ) 273 ; 274 275tokensSpec 276 : ^(TOKENS tokenSpec*) 277 ; 278 279tokenSpec 280 : t=TOKEN_REF {trackToken($t);} 281 | ^( ASSIGN 282 t2=TOKEN_REF {trackToken($t2);} 283 ( s=STRING_LITERAL {trackString($s); alias($t2,$s);} 284 | c=CHAR_LITERAL {trackString($c); alias($t2,$c);} 285 ) 286 ) 287 ; 288 289rules 290 : rule+ 291 ; 292 293rule 294 : ^(RULE ruleBody) 295 | ^(PREC_RULE ruleBody) 296 ; 297 298ruleBody 299 : id=ID {currentRuleName=$id.text;} 300 (m=modifier)? 301 ^(ARG (ARG_ACTION)?) 302 ^(RET (ARG_ACTION)?) 303 (throwsSpec)? 304 (optionsSpec)? 305 (ruleScopeSpec)? 306 ( ^(AMPERSAND .*) )* 307 b=block 308 (exceptionGroup)? 309 EOR 310 {trackTokenRule($id,$m.start,$b.start);} 311 ; 312 313modifier 314 : 'protected' 315 | 'public' 316 | 'private' 317 | 'fragment' 318 ; 319 320throwsSpec 321 : ^('throws' ID+) 322 ; 323 324ruleScopeSpec 325 : ^( 'scope' ( ^(AMPERSAND .*) )* (ACTION)? ( ID )* ) 326 ; 327 328block 329 : ^( BLOCK 330 (optionsSpec)? 331 ( alternative rewrite )+ 332 EOB 333 ) 334 ; 335 336alternative 337 : ^( ALT (element)+ EOA ) 338 ; 339 340exceptionGroup 341 : ( exceptionHandler )+ (finallyClause)? 342 | finallyClause 343 ; 344 345exceptionHandler 346 : ^('catch' ARG_ACTION ACTION) 347 ; 348 349finallyClause 350 : ^('finally' ACTION) 351 ; 352 353rewrite 354 : ^(REWRITES ( ^(REWRITE .*) )* ) 355 | 356 ; 357 358element 359 : ^(ROOT element) 360 | ^(BANG element) 361 | atom 362 | ^(NOT element) 363 | ^(RANGE atom atom) 364 | ^(CHAR_RANGE atom atom) 365 | ^(ASSIGN ID element) 366 | ^(PLUS_ASSIGN ID element) 367 | ebnf 368 | tree_ 369 | ^( SYNPRED block ) 370 | FORCED_ACTION 371 | ACTION 372 | SEMPRED 373 | SYN_SEMPRED 374 | ^(BACKTRACK_SEMPRED .*) 375 | GATED_SEMPRED 376 | EPSILON 377 ; 378 379ebnf 380 : block 381 | ^( OPTIONAL block ) 382 | ^( CLOSURE block ) 383 | ^( POSITIVE_CLOSURE block ) 384 ; 385 386tree_ 387 : ^(TREE_BEGIN element+) 388 ; 389 390atom 391 : ^( RULE_REF (ARG_ACTION)? ) 392 | ^( t=TOKEN_REF (ARG_ACTION )? ) {trackToken($t);} 393 | c=CHAR_LITERAL {trackString($c);} 394 | s=STRING_LITERAL {trackString($s);} 395 | WILDCARD 396 | ^(DOT ID atom) // scope override on rule 397 ; 398 399ast_suffix 400 : ROOT 401 | BANG 402 ; 403