t033backtracking.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1grammar t033backtracking;
2options {
3    language=JavaScript;
4    backtrack=true;
5    memoize=true;
6    k=2;
7}
8
9scope Symbols {
10	types;
11}
12
13@members {
14this.isTypeName = function(name) {
15    var i, scope;
16    for (i=this.Symbols_stack.length-1; i>=0; i--) {
17        scope = this.Symbols_stack[i];
18        if (!org.antlr.lang.isUndefined(scope.types[name])) {
19            return true;
20        }
21    }
22    return false;
23};
24}
25
26translation_unit
27scope Symbols; // entire file is a scope
28@init {
29  $Symbols::types = {};
30}
31	: external_declaration+
32	;
33
34/** Either a function definition or any other kind of C decl/def.
35 *  The LL(*) analysis algorithm fails to deal with this due to
36 *  recursion in the declarator rules.  I'm putting in a
37 *  manual predicate here so that we don't backtrack over
38 *  the entire function.  Further, you get a better error
39 *  as errors within the function itself don't make it fail
40 *  to predict that it's a function.  Weird errors previously.
41 *  Remember: the goal is to avoid backtrack like the plague
42 *  because it makes debugging, actions, and errors harder.
43 *
44 *  Note that k=1 results in a much smaller predictor for the 
45 *  fixed lookahead; k=2 made a few extra thousand lines. ;)
46 *  I'll have to optimize that in the future.
47 */
48external_declaration
49options {k=1;}
50	: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
51	| declaration
52	;
53
54function_definition
55scope Symbols; // put parameters and locals into same scope for now
56@init {
57  $Symbols::types = set()
58}
59	:	declaration_specifiers? declarator
60// 		(	declaration+ compound_statement	// K&R style
61// 		|	compound_statement				// ANSI style
62// 		)
63	;
64
65declaration
66scope {
67  isTypedef;
68}
69@init {
70  $declaration::isTypedef = false;
71}
72	: 'typedef' declaration_specifiers? {$declaration::isTypedef = true;}
73	  init_declarator_list ';' // special case, looking for typedef	
74	| declaration_specifiers init_declarator_list? ';'
75	;
76
77declaration_specifiers
78	:   (   storage_class_specifier
79		|   type_specifier
80        |   type_qualifier
81        )+
82	;
83
84init_declarator_list
85	: init_declarator (',' init_declarator)*
86	;
87
88init_declarator
89	: declarator //('=' initializer)?
90	;
91
92storage_class_specifier
93	: 'extern'
94	| 'static'
95	| 'auto'
96	| 'register'
97	;
98
99type_specifier
100	: 'void'
101	| 'char'
102	| 'short'
103	| 'int'
104	| 'long'
105	| 'float'
106	| 'double'
107	| 'signed'
108	| 'unsigned'
109// 	| struct_or_union_specifier
110// 	| enum_specifier
111	| type_id
112	;
113
114type_id
115    :   {this.isTypeName(this.input.LT(1).getText())}? IDENTIFIER
116//    	{System.out.println($IDENTIFIER.text+" is a type");}
117    ;
118
119// struct_or_union_specifier
120// options {k=3;}
121// scope Symbols; // structs are scopes
122// @init {
123//   $Symbols::types = set()
124// }
125// 	: struct_or_union IDENTIFIER? '{' struct_declaration_list '}'
126// 	| struct_or_union IDENTIFIER
127// 	;
128
129// struct_or_union
130// 	: 'struct'
131// 	| 'union'
132// 	;
133
134// struct_declaration_list
135// 	: struct_declaration+
136// 	;
137
138// struct_declaration
139// 	: specifier_qualifier_list struct_declarator_list ';'
140// 	;
141
142// specifier_qualifier_list
143// 	: ( type_qualifier | type_specifier )+
144// 	;
145
146// struct_declarator_list
147// 	: struct_declarator (',' struct_declarator)*
148// 	;
149
150// struct_declarator
151// 	: declarator (':' constant_expression)?
152// 	| ':' constant_expression
153// 	;
154
155// enum_specifier
156// options {k=3;}
157// 	: 'enum' '{' enumerator_list '}'
158// 	| 'enum' IDENTIFIER '{' enumerator_list '}'
159// 	| 'enum' IDENTIFIER
160// 	;
161
162// enumerator_list
163// 	: enumerator (',' enumerator)*
164// 	;
165
166// enumerator
167// 	: IDENTIFIER ('=' constant_expression)?
168// 	;
169
170type_qualifier
171	: 'const'
172	| 'volatile'
173	;
174
175declarator
176	: pointer? direct_declarator
177	| pointer
178	;
179
180direct_declarator
181	:   (	IDENTIFIER
182			{
183			if ($declaration.length >0 && $declaration::isTypedef) {
184				$Symbols::types[$IDENTIFIER.text] = true;
185				alert ("define type "+$IDENTIFIER.text);
186            }
187			}
188		|	'(' declarator ')'
189		)
190        declarator_suffix*
191	;
192
193declarator_suffix
194	:   /*'[' constant_expression ']'
195    |*/   '[' ']'
196//     |   '(' parameter_type_list ')'
197//     |   '(' identifier_list ')'
198    |   '(' ')'
199	;
200
201pointer
202	: '*' type_qualifier+ pointer?
203	| '*' pointer
204	| '*'
205	;
206
207// parameter_type_list
208// 	: parameter_list (',' '...')?
209// 	;
210
211// parameter_list
212// 	: parameter_declaration (',' parameter_declaration)*
213// 	;
214
215// parameter_declaration
216// 	: declaration_specifiers (declarator|abstract_declarator)*
217// 	;
218
219// identifier_list
220// 	: IDENTIFIER (',' IDENTIFIER)*
221// 	;
222
223// type_name
224// 	: specifier_qualifier_list abstract_declarator?
225// 	;
226
227// abstract_declarator
228// 	: pointer direct_abstract_declarator?
229// 	| direct_abstract_declarator
230// 	;
231
232// direct_abstract_declarator
233// 	:	( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix*
234// 	;
235
236// abstract_declarator_suffix
237// 	:	'[' ']'
238// 	|	'[' constant_expression ']'
239// 	|	'(' ')'
240// 	|	'(' parameter_type_list ')'
241// 	;
242	
243// initializer
244// 	: assignment_expression
245// 	| '{' initializer_list ','? '}'
246// 	;
247
248// initializer_list
249// 	: initializer (',' initializer)*
250// 	;
251
252// // E x p r e s s i o n s
253
254// argument_expression_list
255// 	:   assignment_expression (',' assignment_expression)*
256// 	;
257
258// additive_expression
259// 	: (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)*
260// 	;
261
262// multiplicative_expression
263// 	: (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
264// 	;
265
266// cast_expression
267// 	: '(' type_name ')' cast_expression
268// 	| unary_expression
269// 	;
270
271// unary_expression
272// 	: postfix_expression
273// 	| '++' unary_expression
274// 	| '--' unary_expression
275// 	| unary_operator cast_expression
276// 	| 'sizeof' unary_expression
277// 	| 'sizeof' '(' type_name ')'
278// 	;
279
280// postfix_expression
281// 	:   primary_expression
282//         (   '[' expression ']'
283//         |   '(' ')'
284//         |   '(' argument_expression_list ')'
285//         |   '.' IDENTIFIER
286//         |   '*' IDENTIFIER
287//         |   '->' IDENTIFIER
288//         |   '++'
289//         |   '--'
290//         )*
291// 	;
292
293// unary_operator
294// 	: '&'
295// 	| '*'
296// 	| '+'
297// 	| '-'
298// 	| '~'
299// 	| '!'
300// 	;
301
302// primary_expression
303// 	: IDENTIFIER
304// 	| constant
305// 	| '(' expression ')'
306// 	;
307
308// constant
309//     :   HEX_LITERAL
310//     |   OCTAL_LITERAL
311//     |   DECIMAL_LITERAL
312//     |	CHARACTER_LITERAL
313// 	|	STRING_LITERAL
314//     |   FLOATING_POINT_LITERAL
315//     ;
316
317// /////
318
319// expression
320// 	: assignment_expression (',' assignment_expression)*
321// 	;
322
323// constant_expression
324// 	: conditional_expression
325// 	;
326
327// assignment_expression
328// 	: lvalue assignment_operator assignment_expression
329// 	| conditional_expression
330// 	;
331	
332// lvalue
333// 	:	unary_expression
334// 	;
335
336// assignment_operator
337// 	: '='
338// 	| '*='
339// 	| '/='
340// 	| '%='
341// 	| '+='
342// 	| '-='
343// 	| '<<='
344// 	| '>>='
345// 	| '&='
346// 	| '^='
347// 	| '|='
348// 	;
349
350// conditional_expression
351// 	: logical_or_expression ('?' expression ':' conditional_expression)?
352// 	;
353
354// logical_or_expression
355// 	: logical_and_expression ('||' logical_and_expression)*
356// 	;
357
358// logical_and_expression
359// 	: inclusive_or_expression ('&&' inclusive_or_expression)*
360// 	;
361
362// inclusive_or_expression
363// 	: exclusive_or_expression ('|' exclusive_or_expression)*
364// 	;
365
366// exclusive_or_expression
367// 	: and_expression ('^' and_expression)*
368// 	;
369
370// and_expression
371// 	: equality_expression ('&' equality_expression)*
372// 	;
373// equality_expression
374// 	: relational_expression (('=='|'!=') relational_expression)*
375// 	;
376
377// relational_expression
378// 	: shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
379// 	;
380
381// shift_expression
382// 	: additive_expression (('<<'|'>>') additive_expression)*
383// 	;
384
385// // S t a t e m e n t s
386
387// statement
388// 	: labeled_statement
389// 	| compound_statement
390// 	| expression_statement
391// 	| selection_statement
392// 	| iteration_statement
393// 	| jump_statement
394// 	;
395
396// labeled_statement
397// 	: IDENTIFIER ':' statement
398// 	| 'case' constant_expression ':' statement
399// 	| 'default' ':' statement
400// 	;
401
402// compound_statement
403// scope Symbols; // blocks have a scope of symbols
404// @init {
405//   $Symbols::types = {}
406// }
407// 	: '{' declaration* statement_list? '}'
408// 	;
409
410// statement_list
411// 	: statement+
412// 	;
413
414// expression_statement
415// 	: ';'
416// 	| expression ';'
417// 	;
418
419// selection_statement
420// 	: 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)?
421// 	| 'switch' '(' expression ')' statement
422// 	;
423
424// iteration_statement
425// 	: 'while' '(' expression ')' statement
426// 	| 'do' statement 'while' '(' expression ')' ';'
427// 	| 'for' '(' expression_statement expression_statement expression? ')' statement
428// 	;
429
430// jump_statement
431// 	: 'goto' IDENTIFIER ';'
432// 	| 'continue' ';'
433// 	| 'break' ';'
434// 	| 'return' ';'
435// 	| 'return' expression ';'
436// 	;
437
438IDENTIFIER
439	:	LETTER (LETTER|'0'..'9')*
440	;
441	
442fragment
443LETTER
444	:	'$'
445	|	'A'..'Z'
446	|	'a'..'z'
447	|	'_'
448	;
449
450CHARACTER_LITERAL
451    :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
452    ;
453
454STRING_LITERAL
455    :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
456    ;
457
458HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
459
460DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
461
462OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
463
464fragment
465HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
466
467fragment
468IntegerTypeSuffix
469	:	('u'|'U')? ('l'|'L')
470	|	('u'|'U')  ('l'|'L')?
471	;
472
473FLOATING_POINT_LITERAL
474    :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
475    |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
476    |   ('0'..'9')+ Exponent FloatTypeSuffix?
477    |   ('0'..'9')+ Exponent? FloatTypeSuffix
478	;
479
480fragment
481Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
482
483fragment
484FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
485
486fragment
487EscapeSequence
488    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
489    |   OctalEscape
490    ;
491
492fragment
493OctalEscape
494    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
495    |   '\\' ('0'..'7') ('0'..'7')
496    |   '\\' ('0'..'7')
497    ;
498
499fragment
500UnicodeEscape
501    :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
502    ;
503
504WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
505    ;
506
507COMMENT
508    :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
509    ;
510
511LINE_COMMENT
512    : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
513    ;
514
515// ignore #line info for now
516LINE_COMMAND 
517    : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=org.antlr.runtime.Token.HIDDEN_CHANNEL;}
518    ;
519
520