ANTLR.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2011 Terence Parr
4 All rights reserved.
5
6 Grammar conversion to ANTLR v3:
7 Copyright (c) 2011 Sam Harwell
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13 1. Redistributions of source code must retain the above copyright
14	notice, this list of conditions and the following disclaimer.
15 2. Redistributions in binary form must reproduce the above copyright
16	notice, this list of conditions and the following disclaimer in the
17	documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19	derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/** Read in an ANTLR grammar and build an AST.  Try not to do
34 *  any actions, just build the tree.
35 *
36 *  The phases are:
37 *
38 *		antlr.g (this file)
39 *		assign.types.g
40 *		define.g
41 *		buildnfa.g
42 *		antlr.print.g (optional)
43 *		codegen.g
44 *
45 *  Terence Parr
46 *  University of San Francisco
47 *  2005
48 */
49
50grammar ANTLR;
51
52options
53{
54	output=AST;
55	ASTLabelType=GrammarAST;
56}
57
58tokens
59{
60	//OPTIONS='options';
61	//TOKENS='tokens';
62	LEXER='lexer';
63	PARSER='parser';
64	CATCH='catch';
65	FINALLY='finally';
66	GRAMMAR='grammar';
67	PRIVATE='private';
68	PROTECTED='protected';
69	PUBLIC='public';
70	RETURNS='returns';
71	THROWS='throws';
72	TREE='tree';
73
74	RULE;
75	PREC_RULE;
76	RECURSIVE_RULE_REF; // flip recursive RULE_REF to RECURSIVE_RULE_REF in prec rules
77	BLOCK;
78	OPTIONAL;
79	CLOSURE;
80	POSITIVE_CLOSURE;
81	SYNPRED;
82	RANGE;
83	CHAR_RANGE;
84	EPSILON;
85	ALT;
86	EOR;
87	EOB;
88	EOA; // end of alt
89	ID;
90	ARG;
91	ARGLIST;
92	RET;
93	LEXER_GRAMMAR;
94	PARSER_GRAMMAR;
95	TREE_GRAMMAR;
96	COMBINED_GRAMMAR;
97	INITACTION;
98	FORCED_ACTION; // {{...}} always exec even during syn preds
99	LABEL; // $x used in rewrite rules
100	TEMPLATE;
101	SCOPE='scope';
102	IMPORT='import';
103	GATED_SEMPRED; // {p}? =>
104	SYN_SEMPRED; // (...) =>   it's a manually-specified synpred converted to sempred
105	BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
106	FRAGMENT='fragment';
107	DOT;
108	REWRITES;
109}
110
111@lexer::header {
112package org.antlr.grammar.v3;
113import org.antlr.tool.ErrorManager;
114import org.antlr.tool.Grammar;
115}
116
117@parser::header {
118package org.antlr.grammar.v3;
119import org.antlr.tool.ErrorManager;
120import org.antlr.tool.Grammar;
121import org.antlr.tool.GrammarAST;
122import org.antlr.misc.IntSet;
123import org.antlr.tool.Rule;
124}
125
126@lexer::members {
127public boolean hasASTOperator = false;
128private String fileName;
129
130public String getFileName() {
131    return fileName;
132}
133
134public void setFileName(String value) {
135    fileName = value;
136}
137}
138
139@parser::members {
140protected String currentRuleName = null;
141protected GrammarAST currentBlockAST = null;
142protected boolean atTreeRoot; // are we matching a tree root in tree grammar?
143
144public static ANTLRParser createParser(TokenStream input) {
145    ANTLRParser parser = new ANTLRParser(input);
146    parser.adaptor = new grammar_Adaptor(parser);
147    return parser;
148}
149
150private static class GrammarASTErrorNode extends GrammarAST {
151    public IntStream input;
152    public Token start;
153    public Token stop;
154    public RecognitionException trappedException;
155
156    public GrammarASTErrorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
157        super(stop);
158        //Console.Out.WriteLine( "start: " + start + ", stop: " + stop );
159        if ( stop == null ||
160             ( stop.getTokenIndex() < start.getTokenIndex() &&
161              stop.getType() != Token.EOF) ) {
162            // sometimes resync does not consume a token (when LT(1) is
163            // in follow set.  So, stop will be 1 to left to start. adjust.
164            // Also handle case where start is the first token and no token
165            // is consumed during recovery; LT(-1) will return null.
166            stop = start;
167        }
168        this.input = input;
169        this.start = start;
170        this.stop = stop;
171        this.trappedException = e;
172    }
173
174    @Override
175    public boolean isNil() { return false; }
176
177    @Override
178    public String getText()
179    {
180        String badText = null;
181        if (start instanceof Token) {
182            int i = ((Token)start).getTokenIndex();
183            int j = ((Token)stop).getTokenIndex();
184            if (((Token)stop).getType() == Token.EOF) {
185                j = ((TokenStream)input).size();
186            }
187            badText = ((TokenStream)input).toString(i, j);
188        } else if (start instanceof Tree) {
189            badText = ((TreeNodeStream)input).toString(start, stop);
190        } else {
191            // people should subclass if they alter the tree type so this
192            // next one is for sure correct.
193            badText = "<unknown>";
194        }
195        return badText;
196    }
197
198    @Override
199    public void setText(String value) { }
200
201    @Override
202    public int getType() { return Token.INVALID_TOKEN_TYPE; }
203
204    @Override
205    public void setType(int value) { }
206
207    @Override
208    public String toString()
209    {
210        if (trappedException instanceof MissingTokenException)
211        {
212            return "<missing type: " +
213                   ( (MissingTokenException)trappedException ).getMissingType() +
214                   ">";
215        } else if (trappedException instanceof UnwantedTokenException) {
216            return "<extraneous: " +
217                   ( (UnwantedTokenException)trappedException ).getUnexpectedToken() +
218                   ", resync=" + getText() + ">";
219        } else if (trappedException instanceof MismatchedTokenException) {
220            return "<mismatched token: " + trappedException.token + ", resync=" + getText() + ">";
221        } else if (trappedException instanceof NoViableAltException) {
222            return "<unexpected: " + trappedException.token +
223                   ", resync=" + getText() + ">";
224        }
225        return "<error: " + getText() + ">";
226    }
227}
228
229static class grammar_Adaptor extends CommonTreeAdaptor {
230    ANTLRParser _outer;
231
232    public grammar_Adaptor(ANTLRParser outer) {
233        _outer = outer;
234    }
235
236    @Override
237    public Object create(Token payload) {
238        GrammarAST t = new GrammarAST( payload );
239        if (_outer != null)
240            t.enclosingRuleName = _outer.currentRuleName;
241        return t;
242    }
243
244    @Override
245    public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) {
246        GrammarAST t = new GrammarASTErrorNode(input, start, stop, e);
247        if (_outer != null)
248            t.enclosingRuleName = _outer.currentRuleName;
249        return t;
250    }
251}
252
253private Grammar grammar;
254private int grammarType;
255private String fileName;
256
257public Grammar getGrammar() {
258    return grammar;
259}
260
261public void setGrammar(Grammar value) {
262    grammar = value;
263}
264
265public int getGrammarType() {
266    return grammarType;
267}
268
269public void setGrammarType(int value) {
270    grammarType = value;
271}
272
273public String getFileName() {
274    return fileName;
275}
276
277public void setFileName(String value) {
278    fileName = value;
279}
280
281private final int LA(int i) { return input.LA( i ); }
282
283private final Token LT(int k) { return input.LT( k ); }
284
285/*partial void createTreeAdaptor(ref ITreeAdaptor adaptor)
286{
287    adaptor = new grammar_Adaptor(this);
288}*/
289
290protected GrammarAST setToBlockWithSet(GrammarAST b) {
291    /*
292     * alt = ^(ALT["ALT"] {b} EOA["EOA"])
293     * prefixWithSynpred( alt )
294     * return ^(BLOCK["BLOCK"] {alt} EOB["<end-of-block>"])
295     */
296    GrammarAST alt = (GrammarAST)adaptor.create(ALT, "ALT");
297    adaptor.addChild(alt, b);
298    adaptor.addChild(alt, adaptor.create(EOA, "<end-of-alt>"));
299
300    prefixWithSynPred(alt);
301
302    GrammarAST block = (GrammarAST)adaptor.create(BLOCK, b.getToken(), "BLOCK");
303    adaptor.addChild(block, alt);
304    adaptor.addChild(alt, adaptor.create(EOB, "<end-of-block>"));
305
306    return block;
307}
308
309/** Create a copy of the alt and make it into a BLOCK; all actions,
310 *  labels, tree operators, rewrites are removed.
311 */
312protected GrammarAST createBlockFromDupAlt(GrammarAST alt) {
313    /*
314     * ^(BLOCK["BLOCK"] {GrammarAST.dupTreeNoActions(alt)} EOB["<end-of-block>"])
315     */
316    GrammarAST nalt = GrammarAST.dupTreeNoActions(alt, null);
317
318    GrammarAST block = (GrammarAST)adaptor.create(BLOCK, alt.getToken(), "BLOCK");
319    adaptor.addChild( block, nalt );
320    adaptor.addChild( block, adaptor.create( EOB, "<end-of-block>" ) );
321
322    return block;
323}
324
325/** Rewrite alt to have a synpred as first element;
326 *  (xxx)=>xxx
327 *  but only if they didn't specify one manually.
328 */
329protected void prefixWithSynPred( GrammarAST alt ) {
330    // if they want backtracking and it's not a lexer rule in combined grammar
331    String autoBacktrack = (String)grammar.getBlockOption( currentBlockAST, "backtrack" );
332    if ( autoBacktrack == null )
333    {
334        autoBacktrack = (String)grammar.getOption( "backtrack" );
335    }
336    if ( autoBacktrack != null && autoBacktrack.equals( "true" ) &&
337         !( grammarType == Grammar.COMBINED &&
338         Rule.getRuleType(currentRuleName) == Grammar.LEXER) &&
339         alt.getChild( 0 ).getType() != SYN_SEMPRED )
340    {
341        // duplicate alt and make a synpred block around that dup'd alt
342        GrammarAST synpredBlockAST = createBlockFromDupAlt( alt );
343
344        // Create a BACKTRACK_SEMPRED node as if user had typed this in
345        // Effectively we replace (xxx)=>xxx with {synpredxxx}? xxx
346        GrammarAST synpredAST = createSynSemPredFromBlock( synpredBlockAST,
347                                                          BACKTRACK_SEMPRED );
348
349        // insert BACKTRACK_SEMPRED as first element of alt
350        //synpredAST.getLastSibling().setNextSibling( alt.getFirstChild() );
351        //synpredAST.addChild( alt.getFirstChild() );
352        //alt.setFirstChild( synpredAST );
353        GrammarAST[] children = alt.getChildrenAsArray();
354        adaptor.setChild( alt, 0, synpredAST );
355        for ( int i = 0; i < children.length; i++ )
356        {
357            if ( i < children.length - 1 )
358                adaptor.setChild( alt, i + 1, children[i] );
359            else
360                adaptor.addChild( alt, children[i] );
361        }
362    }
363}
364
365protected GrammarAST createSynSemPredFromBlock( GrammarAST synpredBlockAST, int synpredTokenType ) {
366    // add grammar fragment to a list so we can make fake rules for them later.
367    String predName = grammar.defineSyntacticPredicate( synpredBlockAST, currentRuleName );
368    // convert (alpha)=> into {synpredN}? where N is some pred count
369    // during code gen we convert to function call with templates
370    String synpredinvoke = predName;
371    GrammarAST p = (GrammarAST)adaptor.create( synpredTokenType, synpredinvoke );
372    // track how many decisions have synpreds
373    grammar.blocksWithSynPreds.add( currentBlockAST );
374    return p;
375}
376
377public static GrammarAST createSimpleRuleAST( String name, GrammarAST block, boolean fragment ) {
378    TreeAdaptor adaptor = new grammar_Adaptor(null);
379
380    GrammarAST modifier = null;
381    if ( fragment )
382    {
383        modifier = (GrammarAST)adaptor.create( FRAGMENT, "fragment" );
384    }
385
386    /*
387     * EOBAST = block.getLastChild()
388     * ^(RULE[block,"rule"] ID["name"] {modifier} ARG["ARG"] RET["RET"] SCOPE["scope"] {block} EOR[EOBAST,"<end-of-rule>"])
389     */
390    GrammarAST rule = (GrammarAST)adaptor.create( RULE, block.getToken(), "rule" );
391
392    adaptor.addChild( rule, adaptor.create( ID, name ) );
393    if ( modifier != null )
394        adaptor.addChild( rule, modifier );
395    adaptor.addChild( rule, adaptor.create( ARG, "ARG" ) );
396    adaptor.addChild( rule, adaptor.create( RET, "RET" ) );
397    adaptor.addChild( rule, adaptor.create( SCOPE, "scope" ) );
398    adaptor.addChild( rule, block );
399    adaptor.addChild( rule, adaptor.create( EOR, block.getLastChild().getToken(), "<end-of-rule>" ) );
400
401    return rule;
402}
403
404@Override
405public void reportError(RecognitionException ex)
406{
407    //Token token = null;
408    //try
409    //{
410    //    token = LT( 1 );
411    //}
412    //catch ( TokenStreamException tse )
413    //{
414    //    ErrorManager.internalError( "can't get token???", tse );
415    //}
416    Token token = ex.token;
417    ErrorManager.syntaxError(
418        ErrorManager.MSG_SYNTAX_ERROR,
419        grammar,
420        token,
421        "antlr: " + ex.toString(),
422        ex );
423}
424
425public void cleanup( GrammarAST root )
426{
427    if ( grammarType == Grammar.LEXER )
428    {
429        String filter = (String)grammar.getOption( "filter" );
430        GrammarAST tokensRuleAST =
431            grammar.addArtificialMatchTokensRule(
432                root,
433                grammar.lexerRuleNamesInCombined,
434                grammar.getDelegateNames(),
435                filter != null && filter.equals( "true" ) );
436    }
437}
438}
439
440public
441grammar_![Grammar g]
442@init
443{
444	this.grammar = g;
445	Map<String, Object> opts;
446}
447@after
448{
449	cleanup( $tree );
450}
451	:	//hdr:headerSpec
452		( ACTION )?
453		( cmt=DOC_COMMENT  )?
454		gr=grammarType gid=id {grammar.setName($gid.text);} SEMI
455		(	optionsSpec {opts = $optionsSpec.opts; grammar.setOptions(opts, $optionsSpec.start);}
456		)?
457		(ig=delegateGrammars)?
458		(ts=tokensSpec)?
459		scopes=attrScopes
460		(a=actions)?
461		r=rules
462		EOF
463		-> ^($gr $gid $cmt? optionsSpec? $ig? $ts? $scopes? $a? $r)
464	;
465
466grammarType
467	:	(	'lexer'  gr='grammar' {grammarType=Grammar.LEXER; grammar.type = Grammar.LEXER;}       // pure lexer
468			-> LEXER_GRAMMAR[$gr]
469		|	'parser' gr='grammar' {grammarType=Grammar.PARSER; grammar.type = Grammar.PARSER;}     // pure parser
470			-> PARSER_GRAMMAR[$gr]
471		|	'tree'   gr='grammar' {grammarType=Grammar.TREE_PARSER; grammar.type = Grammar.TREE_PARSER;}  // a tree parser
472			-> TREE_GRAMMAR[$gr]
473		|			 gr='grammar' {grammarType=Grammar.COMBINED; grammar.type = Grammar.COMBINED;} // merged parser/lexer
474			-> COMBINED_GRAMMAR[$gr]
475		)
476	;
477
478actions
479	:	(action)+
480	;
481
482/** Match stuff like @parser::members {int i;} */
483action
484	:	AMPERSAND^ (actionScopeName COLON! COLON!)? id ACTION
485	;
486
487/** Sometimes the scope names will collide with keywords; allow them as
488 *  ids for action scopes.
489 */
490actionScopeName
491	:	id
492	|	l='lexer'
493		-> ID[$l]
494	|	p='parser'
495		-> ID[$p]
496	;
497
498optionsSpec returns [Map<String, Object> opts=new HashMap<String, Object>()]
499	:	OPTIONS^ (option[$opts] SEMI!)+ RCURLY!
500	;
501
502option[Map<String, Object> opts]
503	:	id ASSIGN^ optionValue
504		{
505			$opts.put($id.text, $optionValue.value);
506		}
507	;
508
509optionValue returns [Object value = null]
510	:	x=id			 {$value = $x.text;}
511	|	s=STRING_LITERAL {String vs = $s.text;
512						  // remove the quotes:
513						  $value=vs.substring(1,vs.length()-1);}
514	|	c=CHAR_LITERAL   {String vs = $c.text;
515						  // remove the quotes:
516						  $value=vs.substring(1,vs.length()-1);}
517	|	i=INT            {$value = Integer.parseInt($i.text);}
518	|	ss=STAR			 {$value = "*";} // used for k=*
519		-> STRING_LITERAL[$ss]
520//	|	cs:charSet       {value = #cs;} // return set AST in this case
521	;
522
523delegateGrammars
524	:	'import'^ delegateGrammar (COMMA! delegateGrammar)* SEMI!
525	;
526
527delegateGrammar
528	:	lab=id ASSIGN^ g=id {grammar.importGrammar($g.tree, $lab.text);}
529	|	g2=id               {grammar.importGrammar($g2.tree,null);}
530	;
531
532tokensSpec
533	:	TOKENS^
534			tokenSpec*
535		RCURLY!
536	;
537
538tokenSpec
539	:	TOKEN_REF ( ASSIGN^ (STRING_LITERAL|CHAR_LITERAL) )? SEMI!
540	;
541
542attrScopes
543	:	(attrScope)*
544	;
545
546attrScope
547	:	'scope'^ id ruleActions? ACTION
548	;
549
550rules
551	:	(	rule
552		)+
553	;
554
555public
556rule
557@init
558{
559	GrammarAST eob=null;
560	CommonToken start = (CommonToken)LT(1);
561	int startLine = LT(1).getLine();
562}
563	:
564	(	(	d=DOC_COMMENT
565		)?
566		(	p1='protected'	//{modifier=$p1.tree;}
567		|	p2='public'		//{modifier=$p2.tree;}
568		|	p3='private'	//{modifier=$p3.tree;}
569		|	p4='fragment'	//{modifier=$p4.tree;}
570		)?
571		ruleName=id
572		{
573			currentRuleName=$ruleName.text;
574			if ( grammarType==Grammar.LEXER && $p4==null )
575				grammar.lexerRuleNamesInCombined.add(currentRuleName);
576		}
577		( BANG )?
578		( aa=ARG_ACTION )?
579		( 'returns' rt=ARG_ACTION  )?
580		( throwsSpec )?
581		( optionsSpec )?
582		scopes=ruleScopeSpec
583		(ruleActions)?
584		COLON
585		ruleAltList[$optionsSpec.opts]
586		SEMI
587		( ex=exceptionGroup )?
588		->	^(	RULE[$ruleName.start, "rule"]
589				$ruleName
590				// the modifier will be 0 or one of the modifiers:
591				$p1? $p2? $p3? $p4?
592				^(ARG["ARG"] $aa?)
593				^(RET["RET"] $rt?)
594				throwsSpec?
595				optionsSpec?
596				$scopes
597				ruleActions?
598				ruleAltList
599				$ex?
600				EOR[$SEMI,"<end-of-rule>"])
601	)
602	{
603		$tree.setTreeEnclosingRuleNameDeeply(currentRuleName);
604		((GrammarAST)$tree.getChild(0)).setBlockOptions($optionsSpec.opts);
605	}
606	;
607
608ruleActions
609	:	(ruleAction)+
610	;
611
612/** Match stuff like @init {int i;} */
613ruleAction
614	:	AMPERSAND^ id ACTION
615	;
616
617throwsSpec
618	:	'throws'^ id ( COMMA! id )*
619	;
620
621ruleScopeSpec
622	:	( 'scope' ruleActions? ACTION )?
623		( 'scope' idList SEMI )*
624		-> ^(SCOPE[$start,"scope"] ruleActions? ACTION? idList*)
625	;
626
627ruleAltList[Map<String, Object> opts]
628@init
629{
630	GrammarAST blkRoot = null;
631	GrammarAST save = currentBlockAST;
632}
633	:	( -> BLOCK[input.LT(-1),"BLOCK"] )
634		{
635			blkRoot = (GrammarAST)$tree.getChild(0);
636			blkRoot.setBlockOptions($opts);
637			currentBlockAST = blkRoot;
638		}
639		(	a1=alternative r1=rewrite
640			{if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a1.tree);}
641			-> $a1 $r1?
642		)
643		(	(	OR a2=alternative r2=rewrite
644				{if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR)) prefixWithSynPred($a2.tree);}
645				-> $ruleAltList $a2 $r2?
646			)+
647		|
648		)
649		-> ^({blkRoot} $ruleAltList EOB["<end-of-block>"])
650	;
651finally { currentBlockAST = save; }
652
653/** Build #(BLOCK ( #(ALT ...) EOB )+ ) */
654block
655@init
656{
657	GrammarAST save = currentBlockAST;
658}
659	:	(	lp=LPAREN
660			-> BLOCK[$lp,"BLOCK"]
661		)
662		{currentBlockAST = (GrammarAST)$tree.getChild(0);}
663		(
664			// 2nd alt and optional branch ambig due to
665			// linear approx LL(2) issue.  COLON ACTION
666			// matched correctly in 2nd alt.
667			(optionsSpec {((GrammarAST)$tree.getChild(0)).setOptions(grammar,$optionsSpec.opts);})?
668			( ruleActions )?
669			COLON
670		|	ACTION COLON
671		)?
672
673		a=alternative r=rewrite
674		{
675			stream_alternative.add( $r.tree );
676			if ( LA(1)==OR || (LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR) )
677				prefixWithSynPred($a.tree);
678		}
679		(	OR a=alternative r=rewrite
680			{
681				stream_alternative.add( $r.tree );
682				if (LA(1)==OR||(LA(2)==QUESTION||LA(2)==PLUS||LA(2)==STAR))
683					prefixWithSynPred($a.tree);
684			}
685		)*
686
687		rp=RPAREN
688		-> ^($block optionsSpec? ruleActions? ACTION? alternative+ EOB[$rp,"<end-of-block>"])
689	;
690finally { currentBlockAST = save; }
691
692// ALT and EOA have indexes tracking start/stop of entire alt
693alternative
694	:	element+
695		-> ^(ALT[$start,"ALT"] element+ EOA[input.LT(-1),"<end-of-alt>"])
696	|	// epsilon alt
697		-> ^(ALT[$start,"ALT"] EPSILON[input.LT(-1),"epsilon"] EOA[input.LT(-1),"<end-of-alt>"])
698	;
699
700exceptionGroup
701	:	exceptionHandler+ finallyClause?
702	|	finallyClause
703	;
704
705exceptionHandler
706	:	'catch'^ ARG_ACTION ACTION
707	;
708
709finallyClause
710	:	'finally'^ ACTION
711	;
712
713element
714	:	elementNoOptionSpec
715	;
716
717elementNoOptionSpec
718@init
719{
720	IntSet elements=null;
721}
722	:	(	(	id (ASSIGN^|PLUS_ASSIGN^) (atom|block)
723			)
724			(	sub=ebnfSuffix[root_0,false]! {root_0 = $sub.tree;}
725			)?
726		|	a=atom
727			(	sub2=ebnfSuffix[$a.tree,false]! {root_0=$sub2.tree;}
728			)?
729		|	ebnf
730		|	FORCED_ACTION
731		|	ACTION
732		|	p=SEMPRED ( IMPLIES! {$p.setType(GATED_SEMPRED);} )?
733			{
734			grammar.blocksWithSemPreds.add(currentBlockAST);
735			}
736		|	t3=tree_
737		)
738	;
739
740atom
741	:	range (ROOT^|BANG^)?
742	|	(
743			// grammar.rule but ensure no spaces. "A . B" is not a qualified ref
744			// We do here rather than lexer so we can build a tree
745			({LT(1).getCharPositionInLine()+LT(1).getText().length()==LT(2).getCharPositionInLine()&&
746			 LT(2).getCharPositionInLine()+1==LT(3).getCharPositionInLine()}? id WILDCARD (terminal|ruleref)) =>
747			id w=WILDCARD^ (terminal|ruleref) {$w.setType(DOT);}
748		|	terminal
749		|	ruleref
750		)
751	|	notSet (ROOT^|BANG^)?
752	;
753
754ruleref
755	:	RULE_REF^ ARG_ACTION? (ROOT^|BANG^)?
756	;
757
758notSet
759	:	NOT^
760		(	notTerminal
761		|	block
762		)
763	;
764
765treeRoot
766@init{atTreeRoot=true;}
767@after{atTreeRoot=false;}
768	:	id (ASSIGN^|PLUS_ASSIGN^) (atom|block)
769	|	atom
770	|	block
771	;
772
773tree_
774	:	TREE_BEGIN^
775		treeRoot element+
776		RPAREN!
777	;
778
779/** matches ENBF blocks (and sets via block rule) */
780ebnf
781	:	block
782		(	QUESTION
783			-> ^(OPTIONAL[$start,"?"] block)
784		|	STAR
785			-> ^(CLOSURE[$start,"*"] block)
786		|	PLUS
787			-> ^(POSITIVE_CLOSURE[$start,"+"] block)
788		|	IMPLIES // syntactic predicate
789			// ignore for lexer rules in combined
790			-> {grammarType == Grammar.COMBINED && Rule.getRuleType(currentRuleName) == Grammar.LEXER}? ^(SYNPRED[$start,"=>"] block)
791			// create manually specified (...)=> predicate; convert to sempred
792			-> {createSynSemPredFromBlock($block.tree, SYN_SEMPRED)}
793		|	ROOT
794			-> ^(ROOT block)
795		|	BANG
796			-> ^(BANG block)
797		|
798			-> block
799		)
800	;
801
802range
803	:	c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL
804		-> ^(CHAR_RANGE[$c1,".."] $c1 $c2)
805	;
806
807terminal
808	:	cl=CHAR_LITERAL^ ( elementOptions[$cl.tree]! )? (ROOT^|BANG^)?
809
810	|	tr=TOKEN_REF^
811		( elementOptions[$tr.tree]! )?
812		( ARG_ACTION )? // Args are only valid for lexer rules
813		(ROOT^|BANG^)?
814
815	|	sl=STRING_LITERAL^ ( elementOptions[$sl.tree]! )? (ROOT^|BANG^)?
816
817	|	wi=WILDCARD (ROOT^|BANG^)?
818		{
819			if ( atTreeRoot )
820			{
821				ErrorManager.syntaxError(
822					ErrorManager.MSG_WILDCARD_AS_ROOT,grammar,$wi,null,null);
823			}
824		}
825	;
826
827elementOptions[GrammarAST terminalAST]
828	:	OPEN_ELEMENT_OPTION^ defaultNodeOption[terminalAST] CLOSE_ELEMENT_OPTION!
829	|	OPEN_ELEMENT_OPTION^ elementOption[terminalAST] (SEMI! elementOption[terminalAST])* CLOSE_ELEMENT_OPTION!
830	;
831
832defaultNodeOption[GrammarAST terminalAST]
833	:	elementOptionId
834		{terminalAST.setTerminalOption(grammar,Grammar.defaultTokenOption,$elementOptionId.qid);}
835	;
836
837elementOption[GrammarAST terminalAST]
838	:	id ASSIGN^
839		(	elementOptionId
840			{terminalAST.setTerminalOption(grammar,$id.text,$elementOptionId.qid);}
841		|	(t=STRING_LITERAL|t=DOUBLE_QUOTE_STRING_LITERAL|t=DOUBLE_ANGLE_STRING_LITERAL)
842			{terminalAST.setTerminalOption(grammar,$id.text,$t.text);}
843		)
844	;
845
846elementOptionId returns [String qid]
847@init{StringBuffer buf = new StringBuffer();}
848	:	i=id {buf.append($i.text);} ('.' i=id {buf.append("." + $i.text);})*
849		{$qid = buf.toString();}
850	;
851
852ebnfSuffix[GrammarAST elemAST, boolean inRewrite]
853@init
854{
855GrammarAST blkRoot=null;
856GrammarAST alt=null;
857GrammarAST save = currentBlockAST;
858}
859@after
860{
861currentBlockAST = save;
862}
863	:	(	-> BLOCK[$elemAST.getToken(), "BLOCK"]
864		)
865		{ blkRoot = (GrammarAST)$tree.getChild(0); currentBlockAST = blkRoot; }
866		(	// create alt
867			-> ^(ALT[$elemAST.getToken(), "ALT"] {$elemAST} EOA["<end-of-alt>"])
868		)
869		{
870			alt = (GrammarAST)$tree.getChild(0);
871			if ( !inRewrite )
872				prefixWithSynPred(alt);
873		}
874		(	QUESTION
875			-> OPTIONAL[$elemAST.getToken(),"?"]
876		|	STAR
877			-> CLOSURE[$elemAST.getToken(),"*"]
878		|	PLUS
879			-> POSITIVE_CLOSURE[$elemAST.getToken(),"+"]
880		)
881		-> ^($ebnfSuffix ^({blkRoot} {alt} EOB[$elemAST.getToken(), "<end-of-block>"]))
882	;
883
884notTerminal
885	:	CHAR_LITERAL
886	|	TOKEN_REF
887	|	STRING_LITERAL
888	;
889
890idList
891	:	id (COMMA! id)*
892	;
893
894id
895	:	TOKEN_REF
896		-> ID[$TOKEN_REF]
897	|	RULE_REF
898		-> ID[$RULE_REF]
899	;
900
901// R E W R I T E  S Y N T A X
902
903rewrite
904	:	rewrite_with_sempred*
905		REWRITE rewrite_alternative
906		-> ^(REWRITES rewrite_with_sempred* ^(REWRITE rewrite_alternative))
907	|
908	;
909
910rewrite_with_sempred
911	:	REWRITE^ SEMPRED rewrite_alternative
912	;
913
914rewrite_block
915	:	LPAREN
916		rewrite_alternative
917		RPAREN
918		-> ^(BLOCK[$LPAREN,"BLOCK"] rewrite_alternative EOB[$RPAREN,"<end-of-block>"])
919	;
920
921rewrite_alternative
922options{k=1;}
923	:	{grammar.buildTemplate()}? => rewrite_template
924
925	|	{grammar.buildAST()}? => ( rewrite_element )+
926		-> {!stream_rewrite_element.hasNext()}? ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"])
927		-> ^(ALT[LT(1),"ALT"] rewrite_element+ EOA["<end-of-alt>"])
928
929	|
930		-> ^(ALT[LT(1),"ALT"] EPSILON["epsilon"] EOA["<end-of-alt>"])
931	|	{grammar.buildAST()}? ETC
932	;
933
934rewrite_element
935	:	(	t=rewrite_atom
936			-> $t
937		)
938		(	subrule=ebnfSuffix[$t.tree,true]
939			-> $subrule
940		)?
941	|	rewrite_ebnf
942	|	(	tr=rewrite_tree
943			-> $tr
944		)
945		(	subrule=ebnfSuffix[$tr.tree,true]
946			-> $subrule
947		)?
948	;
949
950rewrite_atom
951	:	tr=TOKEN_REF^ elementOptions[$tr.tree]!? ARG_ACTION? // for imaginary nodes
952	|	RULE_REF
953	|	cl=CHAR_LITERAL elementOptions[$cl.tree]!?
954	|	sl=STRING_LITERAL elementOptions[$sl.tree]!?
955	|	DOLLAR! label // reference to a label in a rewrite rule
956	|	ACTION
957	;
958
959label
960	:	TOKEN_REF -> LABEL[$TOKEN_REF]
961	|	RULE_REF -> LABEL[$RULE_REF]
962	;
963
964rewrite_ebnf
965	:	b=rewrite_block
966		(	QUESTION
967			-> ^(OPTIONAL[$b.start,"?"] $b)
968		|	STAR
969			-> ^(CLOSURE[$b.start,"*"] $b)
970		|	PLUS
971			-> ^(POSITIVE_CLOSURE[$b.start,"+"] $b)
972		)
973	;
974
975rewrite_tree
976	:	TREE_BEGIN^
977			rewrite_atom rewrite_element*
978		RPAREN!
979	;
980
981/** Build a tree for a template rewrite:
982	  ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
983	where ARGLIST is always there even if no args exist.
984	ID can be "template" keyword.  If first child is ACTION then it's
985	an indirect template ref
986
987	-> foo(a={...}, b={...})
988	-> ({string-e})(a={...}, b={...})  // e evaluates to template name
989	-> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
990	-> {st-expr} // st-expr evaluates to ST
991 */
992public
993rewrite_template
994options{k=1;}
995	:	// -> template(a={...},...) "..."
996		{LT(1).getText().equals("template")}? => // inline
997		(	rewrite_template_head
998			-> rewrite_template_head
999		)
1000		( st=DOUBLE_QUOTE_STRING_LITERAL | st=DOUBLE_ANGLE_STRING_LITERAL )
1001		{ adaptor.addChild( $tree.getChild(0), adaptor.create($st) ); }
1002
1003	|	// -> foo(a={...}, ...)
1004		rewrite_template_head
1005
1006	|	// -> ({expr})(a={...}, ...)
1007		rewrite_indirect_template_head
1008
1009	|	// -> {...}
1010		ACTION
1011	;
1012
1013/** -> foo(a={...}, ...) */
1014rewrite_template_head
1015	:	id lp=LPAREN
1016		rewrite_template_args
1017		RPAREN
1018		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
1019	;
1020
1021/** -> ({expr})(a={...}, ...) */
1022rewrite_indirect_template_head
1023	:	lp=LPAREN
1024		ACTION
1025		RPAREN
1026		LPAREN rewrite_template_args RPAREN
1027		-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
1028	;
1029
1030rewrite_template_args
1031	:	rewrite_template_arg (COMMA rewrite_template_arg)*
1032		-> ^(ARGLIST["ARGLIST"] rewrite_template_arg+)
1033	|
1034		-> ARGLIST["ARGLIST"]
1035	;
1036
1037rewrite_template_arg
1038	:	id a=ASSIGN ACTION
1039		-> ^(ARG[$a,"ARG"] id ACTION)
1040	;
1041
1042//////////////////////////////////////////////////////////////////////////////
1043//////////////////////////////////////////////////////////////////////////////
1044//////////////////////////////////////////////////////////////////////////////
1045// L E X E R
1046
1047// get rid of warnings:
1048fragment STRING_LITERAL : ;
1049fragment FORCED_ACTION : ;
1050fragment DOC_COMMENT : ;
1051fragment SEMPRED : ;
1052
1053WS
1054	:	(	' '
1055		|	'\t'
1056		|	('\r')? '\n'
1057		)
1058		{ $channel = HIDDEN; }
1059	;
1060
1061COMMENT
1062@init{List<Integer> type = new ArrayList<Integer>() {{ add(0); }};}
1063	:	( SL_COMMENT | ML_COMMENT[type] {$type = type.get(0);} )
1064		{
1065			if ( $type != DOC_COMMENT )
1066				$channel = HIDDEN;
1067		}
1068	;
1069
1070fragment
1071SL_COMMENT
1072	:	'//'
1073		(	(' $ANTLR') => ' $ANTLR ' SRC (('\r')? '\n')? // src directive
1074		|	~('\r'|'\n')* (('\r')? '\n')?
1075		)
1076	;
1077
1078fragment
1079ML_COMMENT[List<Integer> type]
1080	:	'/*'
1081		{$type.set(0, (input.LA(1) == '*' && input.LA(2) != '/') ? DOC_COMMENT : ML_COMMENT);}
1082		.*
1083		'*/'
1084	;
1085
1086OPEN_ELEMENT_OPTION
1087	:	'<'
1088	;
1089
1090CLOSE_ELEMENT_OPTION
1091	:	'>'
1092	;
1093
1094AMPERSAND : '@';
1095
1096COMMA : ',';
1097
1098QUESTION :	'?' ;
1099
1100TREE_BEGIN : '^(' ;
1101
1102LPAREN:	'(' ;
1103
1104RPAREN:	')' ;
1105
1106COLON :	':' ;
1107
1108STAR:	'*' ;
1109
1110PLUS:	'+' ;
1111
1112ASSIGN : '=' ;
1113
1114PLUS_ASSIGN : '+=' ;
1115
1116IMPLIES : '=>' ;
1117
1118REWRITE : '->' ;
1119
1120SEMI:	';' ;
1121
1122ROOT : '^' {hasASTOperator=true;} ;
1123
1124BANG : '!' {hasASTOperator=true;} ;
1125
1126OR	:	'|' ;
1127
1128WILDCARD : '.' ;
1129
1130ETC : '...' ;
1131
1132RANGE : '..' ;
1133
1134NOT :	'~' ;
1135
1136RCURLY:	'}'	;
1137
1138DOLLAR : '$' ;
1139
1140STRAY_BRACKET
1141	:	']'
1142		{
1143			ErrorManager.syntaxError(
1144				ErrorManager.MSG_SYNTAX_ERROR,
1145				null,
1146				state.token,
1147				"antlr: dangling ']'? make sure to escape with \\]",
1148				null);
1149		}
1150	;
1151
1152CHAR_LITERAL
1153	:	'\''
1154		(	ESC
1155		|	~('\\'|'\'')
1156		)*
1157		'\''
1158		{
1159			StringBuffer s = Grammar.getUnescapedStringFromGrammarStringLiteral($text);
1160			if ( s.length() > 1 )
1161			{
1162				$type = STRING_LITERAL;
1163			}
1164		}
1165	;
1166
1167DOUBLE_QUOTE_STRING_LITERAL
1168@init
1169{
1170	StringBuilder builder = new StringBuilder();
1171}
1172	:	'"'							{builder.append('"');}
1173		(	('\\\"') => '\\' '"'	{builder.append('"');}
1174		|	'\\' c=~'"'				{builder.append("\\" + (char)$c);}
1175		|	c=~('\\'|'"')			{builder.append((char)$c);}
1176		)*
1177		'"'							{builder.append('"');}
1178		{
1179			setText(builder.toString());
1180		}
1181	;
1182
1183DOUBLE_ANGLE_STRING_LITERAL
1184	:	'<<' .* '>>'
1185	;
1186
1187fragment
1188ESC
1189	:	'\\'
1190		(	// due to the way ESC is used, we don't need to handle the following character in different ways
1191			/*'n'
1192		|	'r'
1193		|	't'
1194		|	'b'
1195		|	'f'
1196		|	'"'
1197		|	'\''
1198		|	'\\'
1199		|	'>'
1200		|	'u' XDIGIT XDIGIT XDIGIT XDIGIT
1201		|*/	. // unknown, leave as it is
1202		)
1203	;
1204
1205fragment
1206DIGIT
1207	:	'0'..'9'
1208	;
1209
1210fragment
1211XDIGIT
1212	:	'0' .. '9'
1213	|	'a' .. 'f'
1214	|	'A' .. 'F'
1215	;
1216
1217INT
1218	:	('0'..'9')+
1219	;
1220
1221ARG_ACTION
1222@init {
1223	List<String> text = new ArrayList<String>() {{ add(null); }};
1224}
1225	:	'['
1226		NESTED_ARG_ACTION[text]
1227		']'
1228		{setText(text.get(0));}
1229	;
1230
1231fragment
1232NESTED_ARG_ACTION[List<String> text]
1233@init {
1234	$text.set(0, "");
1235	StringBuilder builder = new StringBuilder();
1236}
1237	:	(	('\\]') => '\\' ']'		{builder.append("]");}
1238		|	'\\' c=~(']')			{builder.append("\\" + (char)$c);}
1239		|	ACTION_STRING_LITERAL	{builder.append($ACTION_STRING_LITERAL.text);}
1240		|	ACTION_CHAR_LITERAL		{builder.append($ACTION_CHAR_LITERAL.text);}
1241		|	c=~('\\'|'"'|'\''|']')	{builder.append((char)$c);}
1242		)*
1243		{
1244			$text.set(0, builder.toString());
1245		}
1246	;
1247
1248ACTION
1249@init
1250{
1251	int actionLine = getLine();
1252	int actionColumn = getCharPositionInLine();
1253}
1254	:	NESTED_ACTION
1255		('?' {$type = SEMPRED;})?
1256		{
1257			String action = $text;
1258			int n = 1; // num delimiter chars
1259			if ( action.startsWith("{{") && action.endsWith("}}") )
1260			{
1261				$type = FORCED_ACTION;
1262				n = 2;
1263			}
1264			action = action.substring(n,action.length()-n - ($type==SEMPRED ? 1 : 0));
1265			setText(action);
1266		}
1267	;
1268
1269fragment
1270NESTED_ACTION
1271	:	'{'
1272		(	NESTED_ACTION
1273		|	ACTION_CHAR_LITERAL
1274		|	('//' | '/*') => COMMENT
1275		|	ACTION_STRING_LITERAL
1276		|	ACTION_ESC
1277		|	~('{'|'\''|'"'|'\\'|'}')
1278		)*
1279		'}'
1280	;
1281
1282fragment
1283ACTION_CHAR_LITERAL
1284	:	'\''
1285		(	ACTION_ESC
1286		|	~('\\'|'\'')
1287		)*
1288		'\''
1289	;
1290
1291fragment
1292ACTION_STRING_LITERAL
1293	:	'"'
1294		(	ACTION_ESC
1295		|	~('\\'|'"')
1296		)*
1297		'"'
1298	;
1299
1300fragment
1301ACTION_ESC
1302	:	'\\\''
1303	|	'\\\"'
1304	|	'\\' ~('\''|'"')
1305	;
1306
1307TOKEN_REF
1308	:	'A'..'Z'
1309		(	'a'..'z'|'A'..'Z'|'_'|'0'..'9'
1310		)*
1311	;
1312
1313TOKENS
1314	:	'tokens' WS_LOOP '{'
1315	;
1316
1317OPTIONS
1318	:	'options' WS_LOOP '{'
1319	;
1320
1321// we get a warning here when looking for options '{', but it works right
1322RULE_REF
1323@init
1324{
1325	int t=0;
1326}
1327	:	'a'..'z' ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
1328	;
1329
1330fragment
1331WS_LOOP
1332	:	(	WS
1333		|	COMMENT
1334		)*
1335	;
1336
1337fragment
1338WS_OPT
1339	:	(WS)?
1340	;
1341
1342/** Reset the file and line information; useful when the grammar
1343 *  has been generated so that errors are shown relative to the
1344 *  original file like the old C preprocessor used to do.
1345 */
1346fragment
1347SRC
1348	:	'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT
1349		{
1350			setFileName($file.text.substring(1,$file.text.length()-1));
1351			input.setLine(Integer.parseInt($line.text) - 1);  // -1 because SL_COMMENT will increment the line no. KR
1352		}
1353	;
1354