1/*
2 [The "BSD licence"]
3 Copyright (c) 2005-2007 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15    derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29/** ANTLR v3 grammar written in ANTLR v3 with AST construction */
30grammar ANTLRv3;
31
32options {
33	output=AST;
34	ASTLabelType=CommonTree;
35}
36
37tokens {
38	DOC_COMMENT;
39	PARSER;
40    LEXER;
41    RULE;
42    BLOCK;
43    OPTIONAL;
44    CLOSURE;
45    POSITIVE_CLOSURE;
46    SYNPRED;
47    RANGE;
48    CHAR_RANGE;
49    EPSILON;
50    ALT;
51    EOR;
52    EOB;
53    EOA; // end of alt
54    ID;
55    ARG;
56    ARGLIST;
57    RET;
58    LEXER_GRAMMAR;
59    PARSER_GRAMMAR;
60    TREE_GRAMMAR;
61    COMBINED_GRAMMAR;
62    INITACTION;
63    LABEL; // $x used in rewrite rules
64    TEMPLATE;
65    SCOPE='scope';
66    SEMPRED;
67    GATED_SEMPRED; // {p}? =>
68    SYN_SEMPRED; // (...) =>   it's a manually-specified synpred converted to sempred
69    BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
70    FRAGMENT='fragment';
71    TREE_BEGIN='^(';
72    ROOT='^';
73    BANG='!';
74    RANGE='..';
75    REWRITE='->';
76}
77
78@members {
79	int gtype;
80	public List<String> rules;
81}
82
83@header {
84package org.antlr.gunit.swingui.parsers;
85
86import java.util.List;
87}
88
89@lexer::header {
90package org.antlr.gunit.swingui.parsers;
91}
92
93
94grammarDef
95    :   DOC_COMMENT?
96    	(	'lexer'  {gtype=LEXER_GRAMMAR;}    // pure lexer
97    	|   'parser' {gtype=PARSER_GRAMMAR;}   // pure parser
98    	|   'tree'   {gtype=TREE_GRAMMAR;}     // a tree parser
99    	|		     {gtype=COMBINED_GRAMMAR;} // merged parser/lexer
100    	)
101    	g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action*
102    	rule+
103    	EOF
104    	-> ^( {adaptor.create(gtype,$g)}
105    		  id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+
106    		)
107    ;
108
109tokensSpec
110	:	TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+)
111	;
112
113tokenSpec
114	:	TOKEN_REF
115		(	'=' (lit=STRING_LITERAL|lit=CHAR_LITERAL)	-> ^('=' TOKEN_REF $lit)
116		|												-> TOKEN_REF
117		)
118		';'
119	;
120
121attrScope
122	:	'scope' id ACTION -> ^('scope' id ACTION)
123	;
124
125/** Match stuff like @parser::members {int i;} */
126action
127	:	'@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION)
128	;
129
130/** Sometimes the scope names will collide with keywords; allow them as
131 *  ids for action scopes.
132 */
133actionScopeName
134	:	id
135	|	l='lexer'	-> ID[$l]
136    |   p='parser'	-> ID[$p]
137	;
138
139optionsSpec
140	:	OPTIONS (option ';')+ '}' -> ^(OPTIONS option+)
141	;
142
143option
144    :   id '=' optionValue -> ^('=' id optionValue)
145 	;
146
147optionValue
148    :   id
149    |   STRING_LITERAL
150    |   CHAR_LITERAL
151    |   INT
152    |	s='*' -> STRING_LITERAL[$s]  // used for k=*
153    ;
154
155rule
156scope {
157	String name;
158}
159@after{
160	this.rules.add($rule::name);
161}
162	:	DOC_COMMENT?
163		( modifier=('protected'|'public'|'private'|'fragment') )?
164		id {$rule::name = $id.text;}
165		'!'?
166		( arg=ARG_ACTION )?
167		( 'returns' rt=ARG_ACTION  )?
168		throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
169		':'	altList	';'
170		exceptionGroup?
171	    -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG $arg)? ^(RET $rt)?
172	    	  optionsSpec? ruleScopeSpec? ruleAction*
173	    	  altList
174	    	  exceptionGroup?
175	    	  EOR["EOR"]
176	    	)
177	;
178
179/** Match stuff like @init {int i;} */
180ruleAction
181	:	'@' id ACTION -> ^('@' id ACTION)
182	;
183
184throwsSpec
185	:	'throws' id ( ',' id )* -> ^('throws' id+)
186	;
187
188ruleScopeSpec
189	:	'scope' ACTION -> ^('scope' ACTION)
190	|	'scope' id (',' id)* ';' -> ^('scope' id+)
191	|	'scope' ACTION
192		'scope' id (',' id)* ';'
193		-> ^('scope' ACTION id+ )
194	;
195
196block
197    :   lp='('
198		( (opts=optionsSpec)? ':' )?
199		a1=alternative rewrite ( '|' a2=alternative rewrite )*
200        rp=')'
201        -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? (alternative rewrite?)+ EOB[$rp,"EOB"] )
202    ;
203
204altList
205@init {
206	// must create root manually as it's used by invoked rules in real antlr tool.
207	// leave here to demonstrate use of {...} in rewrite rule
208	// it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token.
209    CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK");
210}
211    :   a1=alternative rewrite ( '|' a2=alternative rewrite )*
212		-> ^( {blkRoot} (alternative rewrite?)+ EOB["EOB"] )
213    ;
214
215alternative
216@init {
217	Token firstToken = input.LT(1);
218	Token prevToken = input.LT(-1); // either : or | I think
219}
220    :   element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"])
221    |   -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"])
222    ;
223
224exceptionGroup
225	:	( exceptionHandler )+ ( finallyClause )?
226	|	finallyClause
227    ;
228
229exceptionHandler
230    :    'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION)
231    ;
232
233finallyClause
234    :    'finally' ACTION -> ^('finally' ACTION)
235    ;
236
237element
238	:	elementNoOptionSpec
239	;
240
241elementNoOptionSpec
242	:	id (labelOp='='|labelOp='+=') atom
243		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"]))
244		|				-> ^($labelOp id atom)
245		)
246	|	id (labelOp='='|labelOp='+=') block
247		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"]))
248		|				-> ^($labelOp id block)
249		)
250	|	atom
251		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) )
252		|				-> atom
253		)
254	|	ebnf
255	|   ACTION
256	|   SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
257	|   treeSpec
258		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) )
259		|				-> treeSpec
260		)
261	;
262
263atom:   range ( (op='^'|op='!') -> ^($op range) | -> range )
264    |   terminal
265    |	notSet ( (op='^'|op='!') -> ^($op notSet) | -> notSet )
266    |   RULE_REF ( arg=ARG_ACTION )? ( (op='^'|op='!') )?
267    	-> {$arg!=null&&op!=null}?	^($op RULE_REF $arg)
268    	-> {$arg!=null}?			^(RULE_REF $arg)
269    	-> {$op!=null}?				^($op RULE_REF)
270    	-> RULE_REF
271    ;
272
273notSet
274	:	'~'
275		(	notTerminal	-> ^('~' notTerminal)
276		|	block		-> ^('~' block)
277		)
278	;
279
280treeSpec
281	:	'^(' element ( element )+ ')' -> ^(TREE_BEGIN element+)
282	;
283
284/** Matches ENBF blocks (and token sets via block rule) */
285ebnf
286@init {
287    Token firstToken = input.LT(1);
288}
289@after {
290	$ebnf.tree.getToken().setLine(firstToken.getLine());
291	$ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
292}
293	:	block
294		(	op='?'	-> ^(OPTIONAL[op] block)
295		|	op='*'	-> ^(CLOSURE[op] block)
296		|	op='+'	-> ^(POSITIVE_CLOSURE[op] block)
297		|   '=>'	// syntactic predicate
298					-> {gtype==COMBINED_GRAMMAR &&
299					    Character.isUpperCase($rule::name.charAt(0))}?
300					   // if lexer rule in combined, leave as pred for lexer
301					   ^(SYNPRED["=>"] block)
302					// in real antlr tool, text for SYN_SEMPRED is predname
303					-> SYN_SEMPRED
304        |			-> block
305		)
306	;
307
308range!
309	:	c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL -> ^(CHAR_RANGE[$c1,".."] $c1 $c2)
310	;
311
312terminal
313    :   (	CHAR_LITERAL				-> CHAR_LITERAL
314    		// Args are only valid for lexer rules
315		|   TOKEN_REF
316			( ARG_ACTION				-> ^(TOKEN_REF ARG_ACTION)
317			|							-> TOKEN_REF
318			)
319		|   STRING_LITERAL				-> STRING_LITERAL
320		|   '.'							-> '.'
321		)
322		(	'^'							-> ^('^' $terminal)
323		|	'!' 						-> ^('!' $terminal)
324		)?
325	;
326
327notTerminal
328	:   CHAR_LITERAL
329	|	TOKEN_REF
330	|	STRING_LITERAL
331	;
332
333ebnfSuffix
334@init {
335	Token op = input.LT(1);
336}
337	:	'?'	-> OPTIONAL[op]
338  	|	'*' -> CLOSURE[op]
339   	|	'+' -> POSITIVE_CLOSURE[op]
340	;
341
342
343
344// R E W R I T E  S Y N T A X
345
346rewrite
347@init {
348	Token firstToken = input.LT(1);
349}
350	:	(rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)*
351		rew2='->' last=rewrite_alternative
352        -> ^($rew $preds $predicated)* ^($rew2 $last)
353	|
354	;
355
356rewrite_alternative
357options {backtrack=true;}
358	:	rewrite_template
359	|	rewrite_tree_alternative
360   	|   /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"])
361	;
362
363rewrite_tree_block
364    :   lp='(' rewrite_tree_alternative ')'
365    	-> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"])
366    ;
367
368rewrite_tree_alternative
369    :	rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"])
370    ;
371
372rewrite_tree_element
373	:	rewrite_tree_atom
374	|	rewrite_tree_atom ebnfSuffix
375		-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"]))
376	|   rewrite_tree
377		(	ebnfSuffix
378			-> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"]))
379		|	-> rewrite_tree
380		)
381	|   rewrite_tree_ebnf
382	;
383
384rewrite_tree_atom
385    :   CHAR_LITERAL
386	|   TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes
387    |   RULE_REF
388	|   STRING_LITERAL
389	|   d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule
390	|	ACTION
391	;
392
393rewrite_tree_ebnf
394@init {
395    Token firstToken = input.LT(1);
396}
397@after {
398	$rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine());
399	$rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
400}
401	:	rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block)
402	;
403
404rewrite_tree
405	:	'^(' rewrite_tree_atom rewrite_tree_element* ')'
406		-> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* )
407	;
408
409/** Build a tree for a template rewrite:
410      ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
411    where ARGLIST is always there even if no args exist.
412    ID can be "template" keyword.  If first child is ACTION then it's
413    an indirect template ref
414
415    -> foo(a={...}, b={...})
416    -> ({string-e})(a={...}, b={...})  // e evaluates to template name
417    -> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
418	-> {st-expr} // st-expr evaluates to ST
419 */
420rewrite_template
421	:   // -> template(a={...},...) "..."    inline template
422		id lp='(' rewrite_template_args	')'
423		( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL )
424		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str)
425
426	|	// -> foo(a={...}, ...)
427		rewrite_template_ref
428
429	|	// -> ({expr})(a={...}, ...)
430		rewrite_indirect_template_head
431
432	|	// -> {...}
433		ACTION
434	;
435
436/** -> foo(a={...}, ...) */
437rewrite_template_ref
438	:	id lp='(' rewrite_template_args	')'
439		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
440	;
441
442/** -> ({expr})(a={...}, ...) */
443rewrite_indirect_template_head
444	:	lp='(' ACTION ')' '(' rewrite_template_args ')'
445		-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
446	;
447
448rewrite_template_args
449	:	rewrite_template_arg (',' rewrite_template_arg)*
450		-> ^(ARGLIST rewrite_template_arg+)
451	|	-> ARGLIST
452	;
453
454rewrite_template_arg
455	:   id '=' ACTION -> ^(ARG[$id.start] id ACTION)
456	;
457
458id	:	TOKEN_REF -> ID[$TOKEN_REF]
459	|	RULE_REF  -> ID[$RULE_REF]
460	;
461
462// L E X I C A L   R U L E S
463
464SL_COMMENT
465 	:	'//'
466 	 	(	' $ANTLR ' SRC // src directive
467 		|	~('\r'|'\n')*
468		)
469		'\r'? '\n'
470		{$channel=HIDDEN;}
471	;
472
473ML_COMMENT
474	:	'/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/'
475	;
476
477CHAR_LITERAL
478	:	'\'' LITERAL_CHAR '\''
479	;
480
481STRING_LITERAL
482	:	'\'' LITERAL_CHAR LITERAL_CHAR* '\''
483	;
484
485fragment
486LITERAL_CHAR
487	:	ESC
488	|	~('\''|'\\')
489	;
490
491DOUBLE_QUOTE_STRING_LITERAL
492	:	'"' (ESC | ~('\\'|'"'))* '"'
493	;
494
495DOUBLE_ANGLE_STRING_LITERAL
496	:	'<<' .* '>>'
497	;
498
499fragment
500ESC	:	'\\'
501		(	'n'
502		|	'r'
503		|	't'
504		|	'b'
505		|	'f'
506		|	'"'
507		|	'\''
508		|	'\\'
509		|	'>'
510		|	'u' XDIGIT XDIGIT XDIGIT XDIGIT
511		|	. // unknown, leave as it is
512		)
513	;
514
515fragment
516XDIGIT :
517		'0' .. '9'
518	|	'a' .. 'f'
519	|	'A' .. 'F'
520	;
521
522INT	:	'0'..'9'+
523	;
524
525ARG_ACTION
526	:	NESTED_ARG_ACTION
527	;
528
529fragment
530NESTED_ARG_ACTION :
531	'['
532	(	options {greedy=false; k=1;}
533	:	NESTED_ARG_ACTION
534	|	ACTION_STRING_LITERAL
535	|	ACTION_CHAR_LITERAL
536	|	.
537	)*
538	']'
539	{setText(getText().substring(1, getText().length()-1));}
540	;
541
542ACTION
543	:	NESTED_ACTION ( '?' {$type = SEMPRED;} )?
544	;
545
546fragment
547NESTED_ACTION :
548	'{'
549	(	options {greedy=false; k=2;}
550	:	NESTED_ACTION
551	|	SL_COMMENT
552	|	ML_COMMENT
553	|	ACTION_STRING_LITERAL
554	|	ACTION_CHAR_LITERAL
555	|	.
556	)*
557	'}'
558   ;
559
560fragment
561ACTION_CHAR_LITERAL
562	:	'\'' (ACTION_ESC|~('\\'|'\'')) '\''
563	;
564
565fragment
566ACTION_STRING_LITERAL
567	:	'"' (ACTION_ESC|~('\\'|'"'))* '"'
568	;
569
570fragment
571ACTION_ESC
572	:	'\\\''
573	|	'\\' '"' // ANTLR doesn't like: '\\"'
574	|	'\\' ~('\''|'"')
575	;
576
577TOKEN_REF
578	:	'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
579	;
580
581RULE_REF
582	:	'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
583	;
584
585/** Match the start of an options section.  Don't allow normal
586 *  action processing on the {...} as it's not a action.
587 */
588OPTIONS
589	:	'options' WS_LOOP '{'
590	;
591
592TOKENS
593	:	'tokens' WS_LOOP '{'
594	;
595
596/** Reset the file and line information; useful when the grammar
597 *  has been generated so that errors are shown relative to the
598 *  original file like the old C preprocessor used to do.
599 */
600fragment
601SRC	:	'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT
602	;
603
604WS	:	(	' '
605		|	'\t'
606		|	'\r'? '\n'
607		)+
608		{$channel=HIDDEN;}
609	;
610
611fragment
612WS_LOOP
613	:	(	WS
614		|	SL_COMMENT
615		|	ML_COMMENT
616		)*
617	;
618
619
620