ANTLRv3.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 [The "BSD license"]
3 Copyright (c) 2010 Terence Parr
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15    derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28/** ANTLR v3 grammar written in ANTLR v3 with AST construction */
29grammar ANTLRv3;
30
31options {
32	output=AST;
33	ASTLabelType=CommonTree;
34}
35
36tokens {
37	DOC_COMMENT;
38	PARSER;
39    LEXER;
40    RULE;
41    BLOCK;
42    OPTIONAL;
43    CLOSURE;
44    POSITIVE_CLOSURE;
45    SYNPRED;
46    RANGE;
47    CHAR_RANGE;
48    EPSILON;
49    ALT;
50    EOR;
51    EOB;
52    EOA; // end of alt
53    ID;
54    ARG;
55    ARGLIST;
56    RET='returns';
57    LEXER_GRAMMAR;
58    PARSER_GRAMMAR;
59    TREE_GRAMMAR;
60    COMBINED_GRAMMAR;
61    LABEL; // $x used in rewrite rules
62    TEMPLATE;
63    SCOPE='scope';
64    SEMPRED;
65    GATED_SEMPRED; // {p}? =>
66    SYN_SEMPRED; // (...) =>   it's a manually-specified synpred converted to sempred
67    BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
68    FRAGMENT='fragment';
69    TREE_BEGIN='^(';
70    ROOT='^';
71    BANG='!';
72    RANGE='..';
73    REWRITE='->';
74    AT='@';
75    LABEL_ASSIGN='=';
76    LIST_LABEL_ASSIGN='+=';
77}
78
79@parser::header
80{
81    package org.antlr.grammar.v3;
82}
83@lexer::header
84{
85    package org.antlr.grammar.v3;
86}
87
88@members {
89	int gtype;
90}
91
92grammarDef
93    :   DOC_COMMENT?
94    	(	'lexer'  {gtype=LEXER_GRAMMAR;}    // pure lexer
95    	|   'parser' {gtype=PARSER_GRAMMAR;}   // pure parser
96    	|   'tree'   {gtype=TREE_GRAMMAR;}     // a tree parser
97    	|		     {gtype=COMBINED_GRAMMAR;} // merged parser/lexer
98    	)
99    	g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action*
100    	rule+
101    	EOF
102    	-> ^( {adaptor.create(gtype,$g)}
103    		  id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+
104    		)
105    ;
106
107tokensSpec
108	:	TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+)
109	;
110
111tokenSpec
112	:	TOKEN_REF
113		(	'=' (lit=STRING_LITERAL|lit=CHAR_LITERAL)	-> ^('=' TOKEN_REF $lit)
114		|												-> TOKEN_REF
115		)
116		';'
117	;
118
119attrScope
120	:	'scope' id ACTION -> ^('scope' id ACTION)
121	;
122
123/** Match stuff like @parser::members {int i;} */
124action
125	:	'@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION)
126	;
127
128/** Sometimes the scope names will collide with keywords; allow them as
129 *  ids for action scopes.
130 */
131actionScopeName
132	:	id
133	|	l='lexer'	-> ID[$l]
134    |   p='parser'	-> ID[$p]
135	;
136
137optionsSpec
138	:	OPTIONS (option ';')+ '}' -> ^(OPTIONS option+)
139	;
140
141option
142    :   id '=' optionValue -> ^('=' id optionValue)
143 	;
144
145optionValue
146    :   qid
147    |   STRING_LITERAL
148    |   CHAR_LITERAL
149    |   INT
150    |	s='*' -> STRING_LITERAL[$s]  // used for k=*
151    ;
152
153rule
154scope {
155	String name;
156}
157	:	DOC_COMMENT?
158		( modifier=('protected'|'public'|'private'|'fragment') )?
159		id {$rule::name = $id.text;}
160		'!'?
161		( arg=ARG_ACTION )?
162		( 'returns' rt=ARG_ACTION  )?
163		throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
164		':'	altList	';'
165		exceptionGroup?
166	    -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG[$arg] $arg)? ^('returns' $rt)?
167	    	  throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
168	    	  altList
169	    	  exceptionGroup?
170	    	  EOR["EOR"]
171	    	)
172	;
173
174/** Match stuff like @init {int i;} */
175ruleAction
176	:	'@' id ACTION -> ^('@' id ACTION)
177	;
178
179throwsSpec
180	:	'throws' id ( ',' id )* -> ^('throws' id+)
181	;
182
183ruleScopeSpec
184	:	'scope' ACTION -> ^('scope' ACTION)
185	|	'scope' id (',' id)* ';' -> ^('scope' id+)
186	|	'scope' ACTION
187		'scope' id (',' id)* ';'
188		-> ^('scope' ACTION id+ )
189	;
190
191block
192    :   lp='('
193		( (opts=optionsSpec)? ':' )?
194		altpair ( '|' altpair )*
195        rp=')'
196        -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? altpair+ EOB[$rp,"EOB"] )
197    ;
198
199altpair : alternative rewrite ;
200
201altList
202@init {
203	// must create root manually as it's used by invoked rules in real antlr tool.
204	// leave here to demonstrate use of {...} in rewrite rule
205	// it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token.
206    CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK");
207}
208    :   altpair ( '|' altpair )* -> ^( {blkRoot} altpair+ EOB["EOB"] )
209    ;
210
211alternative
212@init {
213	Token firstToken = input.LT(1);
214	Token prevToken = input.LT(-1); // either : or | I think
215}
216    :   element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"])
217    |   -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"])
218    ;
219
220exceptionGroup
221	:	( exceptionHandler )+ ( finallyClause )?
222	|	finallyClause
223    ;
224
225exceptionHandler
226    :    'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION)
227    ;
228
229finallyClause
230    :    'finally' ACTION -> ^('finally' ACTION)
231    ;
232
233element
234	:	id (labelOp='='|labelOp='+=') atom
235		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"]))
236		|				-> ^($labelOp id atom)
237		)
238	|	id (labelOp='='|labelOp='+=') block
239		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"]))
240		|				-> ^($labelOp id block)
241		)
242	|	atom
243		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"]) )
244		|				-> atom
245		)
246	|	ebnf
247	|   ACTION
248	|   SEMPRED ( g='=>' -> GATED_SEMPRED[$g] | -> SEMPRED )
249	|   treeSpec
250		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] treeSpec EOA["EOA"]) EOB["EOB"]) )
251		|				-> treeSpec
252		)
253	;
254
255atom:   terminal
256	|	range
257		(	(op='^'|op='!')	-> ^($op range)
258		|					-> range
259		)
260    |	notSet
261		(	(op='^'|op='!')	-> ^($op notSet)
262		|					-> notSet
263		)
264    |   RULE_REF ARG_ACTION?
265		(	(op='^'|op='!')	-> ^($op RULE_REF ARG_ACTION?)
266		|					-> ^(RULE_REF ARG_ACTION?)
267		)
268    ;
269
270notSet
271	:	'~'
272		(	notTerminal elementOptions?	-> ^('~' notTerminal elementOptions?)
273		|	block elementOptions?		-> ^('~' block elementOptions?)
274		)
275	;
276
277notTerminal
278	:   CHAR_LITERAL
279	|	TOKEN_REF
280	|	STRING_LITERAL
281	;
282
283elementOptions
284	:	'<' qid '>'					 -> ^(OPTIONS qid)
285	|	'<' option (';' option)* '>' -> ^(OPTIONS option+)
286	;
287
288elementOption
289	:	id '=' optionValue -> ^('=' id optionValue)
290	;
291
292treeSpec
293	:	'^(' element ( element )+ ')' -> ^(TREE_BEGIN element+)
294	;
295
296range!
297	:	c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL elementOptions?
298		-> ^(CHAR_RANGE[$c1,".."] $c1 $c2 elementOptions?)
299	;
300
301terminal
302    :   (	CHAR_LITERAL elementOptions?    	  -> ^(CHAR_LITERAL elementOptions?)
303	    	// Args are only valid for lexer rules
304		|   TOKEN_REF ARG_ACTION? elementOptions? -> ^(TOKEN_REF ARG_ACTION? elementOptions?)
305		|   STRING_LITERAL elementOptions?		  -> ^(STRING_LITERAL elementOptions?)
306		|   '.' elementOptions?		 			  -> ^('.' elementOptions?)
307		)
308		(	'^'							-> ^('^' $terminal)
309		|	'!' 						-> ^('!' $terminal)
310		)?
311	;
312
313/** Matches ENBF blocks (and token sets via block rule) */
314ebnf
315@init {
316    Token firstToken = input.LT(1);
317}
318@after {
319	$ebnf.tree.getToken().setLine(firstToken.getLine());
320	$ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
321}
322	:	block
323		(	op='?'	-> ^(OPTIONAL[op] block)
324		|	op='*'	-> ^(CLOSURE[op] block)
325		|	op='+'	-> ^(POSITIVE_CLOSURE[op] block)
326		|   '=>'	// syntactic predicate
327					-> {gtype==COMBINED_GRAMMAR &&
328					    Character.isUpperCase($rule::name.charAt(0))}?
329					   // if lexer rule in combined, leave as pred for lexer
330					   ^(SYNPRED["=>"] block)
331					// in real antlr tool, text for SYN_SEMPRED is predname
332					-> SYN_SEMPRED
333        |			-> block
334		)
335	;
336
337ebnfSuffix
338@init {
339	Token op = input.LT(1);
340}
341	:	'?'	-> OPTIONAL[op]
342  	|	'*' -> CLOSURE[op]
343   	|	'+' -> POSITIVE_CLOSURE[op]
344	;
345
346
347
348// R E W R I T E  S Y N T A X
349
350rewrite
351@init {
352	Token firstToken = input.LT(1);
353}
354	:	(rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)*
355		rew2='->' last=rewrite_alternative
356        -> ^($rew $preds $predicated)* ^($rew2 $last)
357	|
358	;
359
360rewrite_alternative
361options {backtrack=true;}
362	:	rewrite_template
363	|	rewrite_tree_alternative
364   	|   /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"])
365	;
366
367rewrite_tree_block
368    :   lp='(' rewrite_tree_alternative ')'
369    	-> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"])
370    ;
371
372rewrite_tree_alternative
373    :	rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"])
374    ;
375
376rewrite_tree_element
377	:	rewrite_tree_atom
378	|	rewrite_tree_atom ebnfSuffix
379		-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"]))
380	|   rewrite_tree
381		(	ebnfSuffix
382			-> ^(ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"]))
383		|	-> rewrite_tree
384		)
385	|   rewrite_tree_ebnf
386	;
387
388rewrite_tree_atom
389    :   CHAR_LITERAL
390	|   TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes
391    |   RULE_REF
392	|   STRING_LITERAL
393	|   d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule
394	|	ACTION
395	;
396
397rewrite_tree_ebnf
398@init {
399    Token firstToken = input.LT(1);
400}
401@after {
402	$rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine());
403	$rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
404}
405	:	rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block)
406	;
407
408rewrite_tree
409	:	'^(' rewrite_tree_atom rewrite_tree_element* ')'
410		-> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* )
411	;
412
413/** Build a tree for a template rewrite:
414      ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
415    where ARGLIST is always there even if no args exist.
416    ID can be "template" keyword.  If first child is ACTION then it's
417    an indirect template ref
418
419    -> foo(a={...}, b={...})
420    -> ({string-e})(a={...}, b={...})  // e evaluates to template name
421    -> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
422	-> {st-expr} // st-expr evaluates to ST
423 */
424rewrite_template
425	:   // -> template(a={...},...) "..."    inline template
426		id lp='(' rewrite_template_args	')'
427		( str=DOUBLE_QUOTE_STRING_LITERAL | str=DOUBLE_ANGLE_STRING_LITERAL )
428		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $str)
429
430	|	// -> foo(a={...}, ...)
431		rewrite_template_ref
432
433	|	// -> ({expr})(a={...}, ...)
434		rewrite_indirect_template_head
435
436	|	// -> {...}
437		ACTION
438	;
439
440/** -> foo(a={...}, ...) */
441rewrite_template_ref
442	:	id lp='(' rewrite_template_args	')'
443		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
444	;
445
446/** -> ({expr})(a={...}, ...) */
447rewrite_indirect_template_head
448	:	lp='(' ACTION ')' '(' rewrite_template_args ')'
449		-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
450	;
451
452rewrite_template_args
453	:	rewrite_template_arg (',' rewrite_template_arg)*
454		-> ^(ARGLIST rewrite_template_arg+)
455	|	-> ARGLIST
456	;
457
458rewrite_template_arg
459	:   id '=' ACTION -> ^(ARG[$id.start] id ACTION)
460	;
461
462qid :	id ('.' id)* ;
463
464id	:	TOKEN_REF -> ID[$TOKEN_REF]
465	|	RULE_REF  -> ID[$RULE_REF]
466	;
467
468// L E X I C A L   R U L E S
469
470SL_COMMENT
471 	:	'//'
472 	 	(	' $ANTLR ' SRC // src directive
473 		|	~('\r'|'\n')*
474		)
475		'\r'? '\n'
476		{$channel=HIDDEN;}
477	;
478
479ML_COMMENT
480	:	'/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/'
481	;
482
483CHAR_LITERAL
484	:	'\'' LITERAL_CHAR '\''
485	;
486
487STRING_LITERAL
488	:	'\'' LITERAL_CHAR LITERAL_CHAR* '\''
489	;
490
491fragment
492LITERAL_CHAR
493	:	ESC
494	|	~('\''|'\\')
495	;
496
497DOUBLE_QUOTE_STRING_LITERAL
498	:	'"' (ESC | ~('\\'|'"'))* '"'
499	;
500
501DOUBLE_ANGLE_STRING_LITERAL
502	:	'<<' .* '>>'
503	;
504
505fragment
506ESC	:	'\\'
507		(	'n'
508		|	'r'
509		|	't'
510		|	'b'
511		|	'f'
512		|	'"'
513		|	'\''
514		|	'\\'
515		|	'>'
516		|	'u' XDIGIT XDIGIT XDIGIT XDIGIT
517		|	. // unknown, leave as it is
518		)
519	;
520
521fragment
522XDIGIT :
523		'0' .. '9'
524	|	'a' .. 'f'
525	|	'A' .. 'F'
526	;
527
528INT	:	'0'..'9'+
529	;
530
531ARG_ACTION
532	:	NESTED_ARG_ACTION
533	;
534
535fragment
536NESTED_ARG_ACTION :
537	'['
538	(	options {greedy=false; k=1;}
539	:	NESTED_ARG_ACTION
540	|	ACTION_STRING_LITERAL
541	|	ACTION_CHAR_LITERAL
542	|	.
543	)*
544	']'
545	//{setText(getText().substring(1, getText().length()-1));}
546	;
547
548ACTION
549	:	NESTED_ACTION ( '?' {$type = SEMPRED;} )?
550	;
551
552fragment
553NESTED_ACTION :
554	'{'
555	(	options {greedy=false; k=2;}
556	:	NESTED_ACTION
557	|	SL_COMMENT
558	|	ML_COMMENT
559	|	ACTION_STRING_LITERAL
560	|	ACTION_CHAR_LITERAL
561	|	.
562	)*
563	'}'
564   ;
565
566fragment
567ACTION_CHAR_LITERAL
568	:	'\'' (ACTION_ESC|~('\\'|'\'')) '\''
569	;
570
571fragment
572ACTION_STRING_LITERAL
573	:	'"' (ACTION_ESC|~('\\'|'"'))* '"'
574	;
575
576fragment
577ACTION_ESC
578	:	'\\\''
579	|	'\\' '"' // ANTLR doesn't like: '\\"'
580	|	'\\' ~('\''|'"')
581	;
582
583TOKEN_REF
584	:	'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
585	;
586
587RULE_REF
588	:	'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
589	;
590
591/** Match the start of an options section.  Don't allow normal
592 *  action processing on the {...} as it's not a action.
593 */
594OPTIONS
595	:	'options' WS_LOOP '{'
596	;
597
598TOKENS
599	:	'tokens' WS_LOOP '{'
600	;
601
602/** Reset the file and line information; useful when the grammar
603 *  has been generated so that errors are shown relative to the
604 *  original file like the old C preprocessor used to do.
605 */
606fragment
607SRC	:	'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT
608	;
609
610WS	:	(	' '
611		|	'\t'
612		|	'\r'? '\n'
613		)+
614		{$channel=HIDDEN;}
615	;
616
617fragment
618WS_LOOP
619	:	(	WS
620		|	SL_COMMENT
621		|	ML_COMMENT
622		)*
623	;
624
625