1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2011 Terence Parr
4 All rights reserved.
5
6 Grammar conversion to ANTLR v3:
7 Copyright (c) 2011 Sam Harwell
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13 1. Redistributions of source code must retain the above copyright
14	notice, this list of conditions and the following disclaimer.
15 2. Redistributions in binary form must reproduce the above copyright
16	notice, this list of conditions and the following disclaimer in the
17	documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19	derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32tree grammar DefineGrammarItemsWalker;
33
34options {
35	tokenVocab = ANTLR;
36	ASTLabelType = GrammarAST;
37}
38
39scope AttributeScopeActions {
40	HashMap<GrammarAST, GrammarAST> actions;
41}
42
43@header {
44package org.antlr.grammar.v3;
45import org.antlr.tool.*;
46import java.util.HashSet;
47import java.util.Set;
48}
49
50@members {
51protected Grammar grammar;
52protected GrammarAST root;
53protected String currentRuleName;
54protected GrammarAST currentRewriteBlock;
55protected GrammarAST currentRewriteRule;
56protected int outerAltNum = 0;
57protected int blockLevel = 0;
58
59public final int countAltsForRule( CommonTree t ) {
60    CommonTree block = (CommonTree)t.getFirstChildWithType(BLOCK);
61    int altCount = 0;
62    for (int i = 0; i < block.getChildCount(); i++) {
63        if (block.getChild(i).getType() == ALT)
64            altCount++;
65    }
66    return altCount;
67}
68
69protected final void finish() {
70    trimGrammar();
71}
72
73/** Remove any lexer rules from a COMBINED; already passed to lexer */
74protected final void trimGrammar() {
75    if ( grammar.type != Grammar.COMBINED ) {
76        return;
77    }
78    // form is (header ... ) ( grammar ID (scope ...) ... ( rule ... ) ( rule ... ) ... )
79    GrammarAST p = root;
80    // find the grammar spec
81    while ( !p.getText().equals( "grammar" ) ) {
82        p = (GrammarAST)p.getNextSibling();
83    }
84    for ( int i = 0; i < p.getChildCount(); i++ ) {
85        if ( p.getChild( i ).getType() != RULE )
86            continue;
87
88        String ruleName = p.getChild(i).getChild(0).getText();
89        //Console.Out.WriteLine( "rule " + ruleName + " prev=" + prev.getText() );
90        if (Rule.getRuleType(ruleName) == Grammar.LEXER) {
91            // remove lexer rule
92            p.deleteChild( i );
93            i--;
94        }
95    }
96    //Console.Out.WriteLine( "root after removal is: " + root.ToStringList() );
97}
98
99protected final void trackInlineAction( GrammarAST actionAST ) {
100    Rule r = grammar.getRule( currentRuleName );
101    if ( r != null ) {
102        r.trackInlineAction( actionAST );
103    }
104}
105}
106
107public
108grammar_[Grammar g]
109@init
110{
111grammar = $g;
112root = $start;
113}
114@after
115{
116finish();
117}
118	:	^( LEXER_GRAMMAR	{grammar.type = Grammar.LEXER;} 		grammarSpec )
119	|	^( PARSER_GRAMMAR	{grammar.type = Grammar.PARSER;}		grammarSpec )
120	|	^( TREE_GRAMMAR		{grammar.type = Grammar.TREE_PARSER;}	grammarSpec )
121	|	^( COMBINED_GRAMMAR	{grammar.type = Grammar.COMBINED;}		grammarSpec )
122	;
123
124attrScope
125scope AttributeScopeActions;
126@init
127{
128	$AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>();
129}
130	:	^( 'scope' name=ID attrScopeAction* attrs=ACTION )
131		{
132			AttributeScope scope = grammar.defineGlobalScope($name.text,$attrs.getToken());
133			scope.isDynamicGlobalScope = true;
134			scope.addAttributes($attrs.text, ';');
135			for (GrammarAST action : $AttributeScopeActions::actions.keySet())
136				scope.defineNamedAction(action, $AttributeScopeActions::actions.get(action));
137		}
138	;
139
140attrScopeAction
141	:	^(AMPERSAND ID ACTION)
142		{
143			$AttributeScopeActions::actions.put( $ID, $ACTION );
144		}
145	;
146
147grammarSpec
148	:	id=ID
149		(cmt=DOC_COMMENT)?
150		( optionsSpec )?
151		(delegateGrammars)?
152		(tokensSpec)?
153		(attrScope)*
154		(actions)?
155		rules
156	;
157
158actions
159	:	( action )+
160	;
161
162action
163@init
164{
165	String scope=null;
166	GrammarAST nameAST=null, actionAST=null;
167}
168	:	^(amp=AMPERSAND id1=ID
169			( id2=ID a1=ACTION
170			  {scope=$id1.text; nameAST=$id2; actionAST=$a1;}
171			| a2=ACTION
172			  {scope=null; nameAST=$id1; actionAST=$a2;}
173			)
174		 )
175		 {
176		 grammar.defineNamedAction($amp,scope,nameAST,actionAST);
177		 }
178	;
179
180optionsSpec
181	:	^(OPTIONS .*)
182	;
183
184delegateGrammars
185	:	^( 'import' ( ^(ASSIGN ID ID) | ID )+ )
186	;
187
188tokensSpec
189	:	^(TOKENS tokenSpec*)
190	;
191
192tokenSpec
193	:	t=TOKEN_REF
194	|	^(	ASSIGN
195			TOKEN_REF
196			(	STRING_LITERAL
197			|	CHAR_LITERAL
198			)
199		 )
200	;
201
202rules
203	:	(rule | ^(PREC_RULE .*))+
204	;
205
206rule
207@init
208{
209	String name=null;
210	Map<String, Object> opts=null;
211	Rule r = null;
212}
213	:		^( RULE id=ID {opts = $RULE.getBlockOptions();}
214			(modifier)?
215			^( ARG (args=ARG_ACTION)? )
216			^( RET (ret=ARG_ACTION)? )
217			(throwsSpec)?
218			(optionsSpec)?
219			{
220				name = $id.text;
221				currentRuleName = name;
222				if ( Rule.getRuleType(name) == Grammar.LEXER && grammar.type==Grammar.COMBINED )
223				{
224					// a merged grammar spec, track lexer rules and send to another grammar
225					grammar.defineLexerRuleFoundInParser($id.getToken(), $start);
226				}
227				else
228				{
229					int numAlts = countAltsForRule($start);
230					grammar.defineRule($id.getToken(), $modifier.mod, opts, $start, $args, numAlts);
231					r = grammar.getRule(name);
232					if ( $args!=null )
233					{
234						r.parameterScope = grammar.createParameterScope(name,$args.getToken());
235						r.parameterScope.addAttributes($args.text, ',');
236					}
237					if ( $ret!=null )
238					{
239						r.returnScope = grammar.createReturnScope(name,$ret.getToken());
240						r.returnScope.addAttributes($ret.text, ',');
241					}
242					if ( $throwsSpec.exceptions != null )
243					{
244						for (String exception : $throwsSpec.exceptions)
245							r.throwsSpec.add( exception );
246					}
247				}
248			}
249			(ruleScopeSpec[r])?
250			(ruleAction[r])*
251			{ this.blockLevel=0; }
252			b=block
253			(exceptionGroup)?
254			EOR
255			{
256				// copy rule options into the block AST, which is where
257				// the analysis will look for k option etc...
258				$b.start.setBlockOptions(opts);
259			}
260		)
261	;
262
263ruleAction[Rule r]
264	:	^(amp=AMPERSAND id=ID a=ACTION ) {if (r!=null) r.defineNamedAction($amp,$id,$a);}
265	;
266
267modifier returns [String mod]
268@init
269{
270	$mod = $start.getToken().getText();
271}
272	:	'protected'
273	|	'public'
274	|	'private'
275	|	'fragment'
276	;
277
278throwsSpec returns [HashSet<String> exceptions]
279@init
280{
281	$exceptions = new HashSet<String>();
282}
283	:	^('throws' (ID {$exceptions.add($ID.text);})+ )
284	;
285
286ruleScopeSpec[Rule r]
287scope AttributeScopeActions;
288@init
289{
290	$AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>();
291}
292	:	^(	'scope'
293			(	attrScopeAction* attrs=ACTION
294				{
295					r.ruleScope = grammar.createRuleScope(r.name,$attrs.getToken());
296					r.ruleScope.isDynamicRuleScope = true;
297					r.ruleScope.addAttributes($attrs.text, ';');
298					for (GrammarAST action : $AttributeScopeActions::actions.keySet())
299						r.ruleScope.defineNamedAction(action, $AttributeScopeActions::actions.get(action));
300				}
301			)?
302			(	uses=ID
303				{
304					if ( grammar.getGlobalScope($uses.text)==null ) {
305					ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE,
306					grammar,
307					$uses.getToken(),
308					$uses.text);
309					}
310					else {
311					if ( r.useScopes==null ) {r.useScopes=new ArrayList<String>();}
312					r.useScopes.add($uses.text);
313					}
314				}
315			)*
316		)
317	;
318
319block
320@init
321{
322	// must run during backtracking
323	this.blockLevel++;
324	if ( blockLevel == 1 )
325		this.outerAltNum=1;
326}
327	:	^(	BLOCK
328			(optionsSpec)?
329			(blockAction)*
330			(	alternative rewrite
331				{{
332					if ( this.blockLevel == 1 )
333						this.outerAltNum++;
334				}}
335			)+
336			EOB
337		 )
338	;
339finally { blockLevel--; }
340
341// TODO: this does nothing now! subrules cannot have init actions. :(
342blockAction
343	:	^(amp=AMPERSAND id=ID a=ACTION ) // {r.defineAction(#amp,#id,#a);}
344	;
345
346alternative
347//@init
348//{
349//	if ( state.backtracking == 0 )
350//	{
351//		if ( grammar.type!=Grammar.LEXER && grammar.GetOption("output")!=null && blockLevel==1 )
352//		{
353//			GrammarAST aRewriteNode = $start.FindFirstType(REWRITE); // alt itself has rewrite?
354//			GrammarAST rewriteAST = (GrammarAST)$start.Parent.getChild($start.ChildIndex + 1);
355//			// we have a rewrite if alt uses it inside subrule or this alt has one
356//			// but don't count -> ... rewrites, which mean "do default auto construction"
357//			if ( aRewriteNode!=null||
358//				 (firstRewriteAST!=null &&
359//				  firstRewriteAST.getType()==REWRITE &&
360//				  firstRewriteAST.getChild(0)!=null &&
361//				  firstRewriteAST.getChild(0).getType()!=ETC) )
362//			{
363//				Rule r = grammar.getRule(currentRuleName);
364//				r.TrackAltsWithRewrites($start,this.outerAltNum);
365//			}
366//		}
367//	}
368//}
369	:	^( ALT (element)+ EOA )
370	;
371
372exceptionGroup
373	:	( exceptionHandler )+ (finallyClause)?
374	|	finallyClause
375	;
376
377exceptionHandler
378	:   ^('catch' ARG_ACTION ACTION) {trackInlineAction($ACTION);}
379	;
380
381finallyClause
382	:    ^('finally' ACTION) {trackInlineAction($ACTION);}
383	;
384
385element
386	:   ^(ROOT element)
387	|   ^(BANG element)
388	|   atom[null]
389	|   ^(NOT element)
390	|   ^(RANGE atom[null] atom[null])
391	|   ^(CHAR_RANGE atom[null] atom[null])
392	|	^(	ASSIGN id=ID el=element)
393			{
394				GrammarAST e = $el.start;
395				if ( e.getType()==ANTLRParser.ROOT || e.getType()==ANTLRParser.BANG )
396				{
397					e = (GrammarAST)e.getChild(0);
398				}
399				if ( e.getType()==RULE_REF)
400				{
401					grammar.defineRuleRefLabel(currentRuleName,$id.getToken(),e);
402				}
403				else if ( e.getType()==WILDCARD && grammar.type==Grammar.TREE_PARSER )
404				{
405					grammar.defineWildcardTreeLabel(currentRuleName,$id.getToken(),e);
406				}
407				else
408				{
409					grammar.defineTokenRefLabel(currentRuleName,$id.getToken(),e);
410				}
411			}
412	|	^(	PLUS_ASSIGN id2=ID a2=element
413			{
414				GrammarAST a = $a2.start;
415				if ( a.getType()==ANTLRParser.ROOT || a.getType()==ANTLRParser.BANG )
416				{
417					a = (GrammarAST)a.getChild(0);
418				}
419				if ( a.getType()==RULE_REF )
420				{
421					grammar.defineRuleListLabel(currentRuleName,$id2.getToken(),a);
422				}
423				else if ( a.getType() == WILDCARD && grammar.type == Grammar.TREE_PARSER )
424				{
425					grammar.defineWildcardTreeListLabel( currentRuleName, $id2.getToken(), a );
426				}
427				else
428				{
429					grammar.defineTokenListLabel(currentRuleName,$id2.getToken(),a);
430				}
431			}
432		 )
433	|   ebnf
434	|   tree_
435	|   ^( SYNPRED block )
436	|   act=ACTION
437		{
438			$act.outerAltNum = this.outerAltNum;
439			trackInlineAction($act);
440		}
441	|   act2=FORCED_ACTION
442		{
443			$act2.outerAltNum = this.outerAltNum;
444			trackInlineAction($act2);
445		}
446	|   SEMPRED
447		{
448			$SEMPRED.outerAltNum = this.outerAltNum;
449			trackInlineAction($SEMPRED);
450		}
451	|   SYN_SEMPRED
452	|   ^(BACKTRACK_SEMPRED .*)
453	|   GATED_SEMPRED
454		{
455			$GATED_SEMPRED.outerAltNum = this.outerAltNum;
456			trackInlineAction($GATED_SEMPRED);
457		}
458	|   EPSILON
459	;
460
461ebnf
462	:	(dotLoop) => dotLoop // .* or .+
463	|	block
464	|	^( OPTIONAL block )
465	|	^( CLOSURE block )
466	|	^( POSITIVE_CLOSURE block )
467	;
468
469/** Track the .* and .+ idioms and make them nongreedy by default.
470 */
471dotLoop
472	:	(	^( CLOSURE dotBlock )
473		|	^( POSITIVE_CLOSURE dotBlock )
474		)
475		{
476			GrammarAST block = (GrammarAST)$start.getChild(0);
477			Map<String, Object> opts = new HashMap<String, Object>();
478			opts.put("greedy", "false");
479			if ( grammar.type!=Grammar.LEXER )
480			{
481				// parser grammars assume k=1 for .* loops
482				// otherwise they (analysis?) look til EOF!
483				opts.put("k", 1);
484			}
485			block.setOptions(grammar,opts);
486		}
487	;
488
489dotBlock
490	:	^( BLOCK ^( ALT WILDCARD EOA ) EOB )
491	;
492
493tree_
494	:	^(TREE_BEGIN element+)
495	;
496
497atom[GrammarAST scope_]
498	:	^( rr=RULE_REF (rarg=ARG_ACTION)? )
499		{
500			grammar.altReferencesRule( currentRuleName, $scope_, $rr, this.outerAltNum );
501			if ( $rarg != null )
502			{
503				$rarg.outerAltNum = this.outerAltNum;
504				trackInlineAction($rarg);
505			}
506		}
507	|	^( t=TOKEN_REF (targ=ARG_ACTION )? )
508		{
509			if ( $targ != null )
510			{
511				$targ.outerAltNum = this.outerAltNum;
512				trackInlineAction($targ);
513			}
514			if ( grammar.type == Grammar.LEXER )
515			{
516				grammar.altReferencesRule( currentRuleName, $scope_, $t, this.outerAltNum );
517			}
518			else
519			{
520				grammar.altReferencesTokenID( currentRuleName, $t, this.outerAltNum );
521			}
522		}
523	|	c=CHAR_LITERAL
524		{
525			if ( grammar.type != Grammar.LEXER )
526			{
527				Rule rule = grammar.getRule(currentRuleName);
528				if ( rule != null )
529					rule.trackTokenReferenceInAlt($c, outerAltNum);
530			}
531		}
532	|	s=STRING_LITERAL
533		{
534			if ( grammar.type != Grammar.LEXER )
535			{
536				Rule rule = grammar.getRule(currentRuleName);
537				if ( rule!=null )
538					rule.trackTokenReferenceInAlt($s, outerAltNum);
539			}
540		}
541	|	WILDCARD
542	|	^(DOT ID atom[$ID]) // scope override on rule
543	;
544
545ast_suffix
546	:	ROOT
547	|	BANG
548	;
549
550rewrite
551@init
552{
553	// track top level REWRITES node, store stuff there
554	currentRewriteRule = $start; // has to execute during backtracking
555	if ( state.backtracking == 0 )
556	{
557		if ( grammar.buildAST() )
558			currentRewriteRule.rewriteRefsDeep = new HashSet<GrammarAST>();
559	}
560}
561	:	^(	REWRITES
562			(	^( REWRITE (pred=SEMPRED)? rewrite_alternative )
563				{
564					if ( $pred != null )
565					{
566						$pred.outerAltNum = this.outerAltNum;
567						trackInlineAction($pred);
568					}
569				}
570			)*
571		)
572		//{System.out.println("-> refs = "+currentRewriteRule.rewriteRefsDeep);}
573	|
574	;
575
576rewrite_block
577@init
578{
579	GrammarAST enclosingBlock = currentRewriteBlock;
580	if ( state.backtracking == 0 )
581	{
582		// don't do if guessing
583		currentRewriteBlock=$start; // pts to BLOCK node
584		currentRewriteBlock.rewriteRefsShallow = new HashSet<GrammarAST>();
585		currentRewriteBlock.rewriteRefsDeep = new HashSet<GrammarAST>();
586	}
587}
588	:   ^( BLOCK rewrite_alternative EOB )
589		//{System.out.println("atoms="+currentRewriteBlock.rewriteRefs);}
590		{
591			// copy the element refs in this block to the surrounding block
592			if ( enclosingBlock != null )
593			{
594				for (GrammarAST item : currentRewriteBlock.rewriteRefsShallow)
595					enclosingBlock.rewriteRefsDeep.add( item );
596			}
597			//currentRewriteBlock = enclosingBlock; // restore old BLOCK ptr
598		}
599	;
600finally { currentRewriteBlock = enclosingBlock; }
601
602rewrite_alternative
603	:	{grammar.buildAST()}? => ^( a=ALT ( ( rewrite_element )+ | EPSILON ) EOA )
604	|	{grammar.buildTemplate()}? => rewrite_template
605	|	ETC {this.blockLevel==1}? // only valid as outermost rewrite
606	;
607
608rewrite_element
609	:	rewrite_atom
610	|	rewrite_ebnf
611	|	rewrite_tree
612	;
613
614rewrite_ebnf
615	:	^( OPTIONAL rewrite_block )
616	|	^( CLOSURE rewrite_block )
617	|	^( POSITIVE_CLOSURE rewrite_block )
618	;
619
620rewrite_tree
621	:   ^(	TREE_BEGIN rewrite_atom ( rewrite_element )* )
622	;
623
624rewrite_atom
625@init
626{
627	if ( state.backtracking == 0 )
628	{
629		Rule r = grammar.getRule(currentRuleName);
630		Set tokenRefsInAlt = r.getTokenRefsInAlt(outerAltNum);
631		boolean imaginary =
632			$start.getType()==TOKEN_REF &&
633			!tokenRefsInAlt.contains($start.getText());
634		if ( !imaginary && grammar.buildAST() &&
635			 ($start.getType()==RULE_REF ||
636			  $start.getType()==LABEL ||
637			  $start.getType()==TOKEN_REF ||
638			  $start.getType()==CHAR_LITERAL ||
639			  $start.getType()==STRING_LITERAL) )
640		{
641			// track per block and for entire rewrite rule
642			if ( currentRewriteBlock!=null )
643			{
644				currentRewriteBlock.rewriteRefsShallow.add($start);
645				currentRewriteBlock.rewriteRefsDeep.add($start);
646			}
647
648			//System.out.println("adding "+$start.getText()+" to "+currentRewriteRule.getText());
649			currentRewriteRule.rewriteRefsDeep.add($start);
650		}
651	}
652}
653	:	RULE_REF
654	|	(	^(	TOKEN_REF
655				(	ARG_ACTION
656					{
657						$ARG_ACTION.outerAltNum = this.outerAltNum;
658						trackInlineAction($ARG_ACTION);
659					}
660				)?
661			)
662		|	CHAR_LITERAL
663		|	STRING_LITERAL
664		)
665	|	LABEL
666	|	ACTION
667		{
668			$ACTION.outerAltNum = this.outerAltNum;
669			trackInlineAction($ACTION);
670		}
671	;
672
673rewrite_template
674	:	^(	ALT EPSILON EOA )
675	|	^(	TEMPLATE (id=ID|ind=ACTION)
676			^( ARGLIST
677				(	^( ARG arg=ID a=ACTION )
678					{
679						$a.outerAltNum = this.outerAltNum;
680						trackInlineAction($a);
681					}
682				)*
683			)
684			{
685				if ( $ind!=null )
686				{
687					$ind.outerAltNum = this.outerAltNum;
688					trackInlineAction($ind);
689				}
690			}
691			(	DOUBLE_QUOTE_STRING_LITERAL
692			|	DOUBLE_ANGLE_STRING_LITERAL
693			)?
694		)
695	|	act=ACTION
696		{
697			$act.outerAltNum = this.outerAltNum;
698			trackInlineAction($act);
699		}
700	;
701