1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2011 Terence Parr
4 All rights reserved.
5
6 Grammar conversion to ANTLR v3:
7 Copyright (c) 2011 Sam Harwell
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13 1. Redistributions of source code must retain the above copyright
14	notice, this list of conditions and the following disclaimer.
15 2. Redistributions in binary form must reproduce the above copyright
16	notice, this list of conditions and the following disclaimer in the
17	documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19	derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/** [Warning: TJP says that this is probably out of date as of 11/19/2005,
34 *   but since it's probably still useful, I'll leave in.  Don't have energy
35 *   to update at the moment.]
36 *
37 *  Compute the token types for all literals and rules etc..  There are
38 *  a few different cases to consider for grammar types and a few situations
39 *  within.
40 *
41 *  CASE 1 : pure parser grammar
42 *	a) Any reference to a token gets a token type.
43 *  b) The tokens section may alias a token name to a string or char
44 *
45 *  CASE 2 : pure lexer grammar
46 *  a) Import token vocabulary if available. Set token types for any new tokens
47 *     to values above last imported token type
48 *  b) token rule definitions get token types if not already defined
49 *  c) literals do NOT get token types
50 *
51 *  CASE 3 : merged parser / lexer grammar
52 *	a) Any char or string literal gets a token type in a parser rule
53 *  b) Any reference to a token gets a token type if not referencing
54 *     a fragment lexer rule
55 *  c) The tokens section may alias a token name to a string or char
56 *     which must add a rule to the lexer
57 *  d) token rule definitions get token types if not already defined
58 *  e) token rule definitions may also alias a token name to a literal.
59 *     E.g., Rule 'FOR : "for";' will alias FOR to "for" in the sense that
60 *     references to either in the parser grammar will yield the token type
61 *
62 *  What this pass does:
63 *
64 *  0. Collects basic info about the grammar like grammar name and type;
65 *     Oh, I have go get the options in case they affect the token types.
66 *     E.g., tokenVocab option.
67 *     Imports any token vocab name/type pairs into a local hashtable.
68 *  1. Finds a list of all literals and token names.
69 *  2. Finds a list of all token name rule definitions;
70 *     no token rules implies pure parser.
71 *  3. Finds a list of all simple token rule defs of form "<NAME> : <literal>;"
72 *     and aliases them.
73 *  4. Walks token names table and assign types to any unassigned
74 *  5. Walks aliases and assign types to referenced literals
75 *  6. Walks literals, assigning types if untyped
76 *  4. Informs the Grammar object of the type definitions such as:
77 *     g.defineToken(<charliteral>, ttype);
78 *     g.defineToken(<stringliteral>, ttype);
79 *     g.defineToken(<tokenID>, ttype);
80 *     where some of the ttype values will be the same for aliases tokens.
81 */
82tree grammar AssignTokenTypesWalker;
83
84options
85{
86	tokenVocab = ANTLR;
87	ASTLabelType = GrammarAST;
88}
89
90@header {
91package org.antlr.grammar.v3;
92
93import java.util.*;
94import org.antlr.analysis.*;
95import org.antlr.misc.*;
96import org.antlr.tool.*;
97
98import org.antlr.runtime.BitSet;
99}
100
101@members {
102protected Grammar grammar;
103protected String currentRuleName;
104
105protected static GrammarAST stringAlias;
106protected static GrammarAST charAlias;
107protected static GrammarAST stringAlias2;
108protected static GrammarAST charAlias2;
109
110@Override
111public void reportError(RecognitionException ex)
112{
113    Token token = null;
114    if (ex instanceof MismatchedTokenException) {
115        token = ((MismatchedTokenException)ex).token;
116    } else if (ex instanceof NoViableAltException) {
117        token = ((NoViableAltException)ex).token;
118    }
119
120    ErrorManager.syntaxError(
121        ErrorManager.MSG_SYNTAX_ERROR,
122        grammar,
123        token,
124        "assign.types: " + ex.toString(),
125        ex);
126}
127
128protected void initASTPatterns()
129{
130    TreeAdaptor adaptor = new ANTLRParser.grammar_Adaptor(null);
131
132    /*
133     * stringAlias = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] EOA[]) EOB[])
134     */
135    stringAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
136    {
137        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
138        adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) );
139        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
140        adaptor.addChild( stringAlias, alt );
141    }
142    adaptor.addChild( stringAlias, adaptor.create( EOB, "EOB" ) );
143
144    /*
145     * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] EOA[]) EOB[])
146     */
147    charAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
148    {
149        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
150        adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) );
151        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
152        adaptor.addChild( charAlias, alt );
153    }
154    adaptor.addChild( charAlias, adaptor.create( EOB, "EOB" ) );
155
156    /*
157     * stringAlias2 = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] ACTION[] EOA[]) EOB[])
158     */
159    stringAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
160    {
161        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
162        adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) );
163        adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) );
164        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
165        adaptor.addChild( stringAlias2, alt );
166    }
167    adaptor.addChild( stringAlias2, adaptor.create( EOB, "EOB" ) );
168
169    /*
170     * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] ACTION[] EOA[]) EOB[])
171     */
172    charAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
173    {
174        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
175        adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) );
176        adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) );
177        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
178        adaptor.addChild( charAlias2, alt );
179    }
180    adaptor.addChild( charAlias2, adaptor.create( EOB, "EOB" ) );
181}
182
183// Behavior moved to AssignTokenTypesBehavior
184protected void trackString(GrammarAST t) {}
185protected void trackToken( GrammarAST t ) {}
186protected void trackTokenRule( GrammarAST t, GrammarAST modifier, GrammarAST block ) {}
187protected void alias( GrammarAST t, GrammarAST s ) {}
188public void defineTokens( Grammar root ) {}
189protected void defineStringLiteralsFromDelegates() {}
190protected void assignStringTypes( Grammar root ) {}
191protected void aliasTokenIDsAndLiterals( Grammar root ) {}
192protected void assignTokenIDTypes( Grammar root ) {}
193protected void defineTokenNamesAndLiteralsInGrammar( Grammar root ) {}
194protected void init( Grammar root ) {}
195}
196
197public
198grammar_[Grammar g]
199@init
200{
201	if ( state.backtracking == 0 )
202		init($g);
203}
204	:	(	^( LEXER_GRAMMAR 	  grammarSpec )
205		|	^( PARSER_GRAMMAR   grammarSpec )
206		|	^( TREE_GRAMMAR     grammarSpec )
207		|	^( COMBINED_GRAMMAR grammarSpec )
208		)
209	;
210
211grammarSpec
212	:	id=ID
213		(cmt=DOC_COMMENT)?
214		(optionsSpec)?
215		(delegateGrammars)?
216		(tokensSpec)?
217		(attrScope)*
218		( ^(AMPERSAND .*) )* // skip actions
219		rules
220	;
221
222attrScope
223	:	^( 'scope' ID ( ^(AMPERSAND .*) )* ACTION )
224	;
225
226optionsSpec returns [Map<Object, Object> opts = new HashMap<Object, Object>()]
227	:	^( OPTIONS (option[$opts])+ )
228	;
229
230option[Map<Object, Object> opts]
231	:	^( ASSIGN ID optionValue )
232		{
233			String key = $ID.text;
234			$opts.put(key, $optionValue.value);
235			// check for grammar-level option to import vocabulary
236			if ( currentRuleName==null && key.equals("tokenVocab") )
237			{
238				grammar.importTokenVocabulary($ID,(String)$optionValue.value);
239			}
240		}
241	;
242
243optionValue returns [Object value=null]
244@init
245{
246	if ( state.backtracking == 0 )
247		$value = $start.getText();
248}
249	:	ID
250	|	STRING_LITERAL
251	|	CHAR_LITERAL
252	|	INT
253		{$value = Integer.parseInt($INT.text);}
254//  |   cs=charSet       {$value = $cs;} // return set AST in this case
255	;
256
257charSet
258	:	^( CHARSET charSetElement )
259	;
260
261charSetElement
262	:	CHAR_LITERAL
263	|	^( OR CHAR_LITERAL CHAR_LITERAL )
264	|	^( RANGE CHAR_LITERAL CHAR_LITERAL )
265	;
266
267delegateGrammars
268	:	^(	'import'
269			(	^(ASSIGN ID ID)
270			|	ID
271			)+
272		)
273	;
274
275tokensSpec
276	:	^(TOKENS tokenSpec*)
277	;
278
279tokenSpec
280	:	t=TOKEN_REF            {trackToken($t);}
281	|	^(	ASSIGN
282			t2=TOKEN_REF       {trackToken($t2);}
283			( s=STRING_LITERAL {trackString($s); alias($t2,$s);}
284			| c=CHAR_LITERAL   {trackString($c); alias($t2,$c);}
285			)
286		)
287	;
288
289rules
290	:	rule+
291	;
292
293rule
294	:	^(RULE ruleBody)
295	|	^(PREC_RULE ruleBody)
296	;
297
298ruleBody
299	:	id=ID {currentRuleName=$id.text;}
300		(m=modifier)?
301		^(ARG (ARG_ACTION)?)
302		^(RET (ARG_ACTION)?)
303		(throwsSpec)?
304		(optionsSpec)?
305		(ruleScopeSpec)?
306		( ^(AMPERSAND .*) )*
307		b=block
308		(exceptionGroup)?
309		EOR
310		{trackTokenRule($id,$m.start,$b.start);}
311	;
312
313modifier
314	:	'protected'
315	|	'public'
316	|	'private'
317	|	'fragment'
318	;
319
320throwsSpec
321	:	^('throws' ID+)
322	;
323
324ruleScopeSpec
325	:	^( 'scope' ( ^(AMPERSAND .*) )* (ACTION)? ( ID )* )
326	;
327
328block
329	:	^(	BLOCK
330			(optionsSpec)?
331			( alternative rewrite )+
332			EOB
333		)
334	;
335
336alternative
337	:	^( ALT (element)+ EOA )
338	;
339
340exceptionGroup
341	:	( exceptionHandler )+ (finallyClause)?
342	|	finallyClause
343	;
344
345exceptionHandler
346	:	^('catch' ARG_ACTION ACTION)
347	;
348
349finallyClause
350	:	^('finally' ACTION)
351	;
352
353rewrite
354	:	^(REWRITES ( ^(REWRITE .*) )* )
355	|
356	;
357
358element
359	:	^(ROOT element)
360	|	^(BANG element)
361	|	atom
362	|	^(NOT element)
363	|	^(RANGE atom atom)
364	|	^(CHAR_RANGE atom atom)
365	|	^(ASSIGN ID element)
366	|	^(PLUS_ASSIGN ID element)
367	|	ebnf
368	|	tree_
369	|	^( SYNPRED block )
370	|	FORCED_ACTION
371	|	ACTION
372	|	SEMPRED
373	|	SYN_SEMPRED
374	|	^(BACKTRACK_SEMPRED .*)
375	|	GATED_SEMPRED
376	|	EPSILON
377	;
378
379ebnf
380	:	block
381	|	^( OPTIONAL block )
382	|	^( CLOSURE block )
383	|	^( POSITIVE_CLOSURE block )
384	;
385
386tree_
387	:	^(TREE_BEGIN element+)
388	;
389
390atom
391	:	^( RULE_REF (ARG_ACTION)? )
392	|	^( t=TOKEN_REF (ARG_ACTION )? ) {trackToken($t);}
393	|	c=CHAR_LITERAL   {trackString($c);}
394	|	s=STRING_LITERAL {trackString($s);}
395	|	WILDCARD
396	|	^(DOT ID atom) // scope override on rule
397	;
398
399ast_suffix
400	:	ROOT
401	|	BANG
402	;
403