1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3/test/functional'
5
6class TestBacktracking < ANTLR3::Test::Functional
7
8  inline_grammar( <<-'END' )
9    grammar Backtrack;
10    options {
11			language = Ruby;
12			backtrack=true;
13			memoize=true;
14			k=2;
15    }
16    
17    scope Symbols {
18    	types;
19    }
20    
21    @members {
22      def is_type_name?(name)
23        @Symbols_stack.reverse_each do |scope|
24          scope.types.include?(name) and return true
25        end
26        return false
27      end
28      
29      def report_error(e)
30        # do nothing
31      end
32      
33    }
34    
35    translation_unit
36    scope Symbols; // entire file is a scope
37    @init {
38      $Symbols::types = Set.new
39    }
40    	: external_declaration+
41    	;
42    
43    /** Either a function definition or any other kind of C decl/def.
44     *  The LL(*) analysis algorithm fails to deal with this due to
45     *  recursion in the declarator rules.  I'm putting in a
46     *  manual predicate here so that we don't backtrack over
47     *  the entire function.  Further, you get a better error
48     *  as errors within the function itself don't make it fail
49     *  to predict that it's a function.  Weird errors previously.
50     *  Remember: the goal is to avoid backtrack like the plague
51     *  because it makes debugging, actions, and errors harder.
52     *
53     *  Note that k=1 results in a much smaller predictor for the 
54     *  fixed look; k=2 made a few extra thousand lines. ;)
55     *  I'll have to optimize that in the future.
56     */
57    external_declaration
58    options {k=1;}
59    	: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
60    	| declaration
61    	;
62    
63    function_definition
64    scope Symbols; // put parameters and locals into same scope for now
65    @init {
66      $Symbols::types = set()
67    }
68    	:	declaration_specifiers? declarator
69    	;
70    
71    declaration
72    scope {
73      is_type_def;
74    }
75    @init {
76      $declaration::is_type_def = false
77    }
78    	: 'typedef' declaration_specifiers? {$declaration::is_type_def = true}
79    	  init_declarator_list ';' // special case, looking for typedef	
80    	| declaration_specifiers init_declarator_list? ';'
81    	;
82    
83    declaration_specifiers
84    	:   (   storage_class_specifier
85    		|   type_specifier
86            |   type_qualifier
87            )+
88    	;
89    
90    init_declarator_list
91    	: init_declarator (',' init_declarator)*
92    	;
93    
94    init_declarator
95    	: declarator //('=' initializer)?
96    	;
97    
98    storage_class_specifier
99    	: 'extern'
100    	| 'static'
101    	| 'auto'
102    	| 'register'
103    	;
104    
105    type_specifier
106    	: 'void'
107    	| 'char'
108    	| 'short'
109    	| 'int'
110    	| 'long'
111    	| 'float'
112    	| 'double'
113    	| 'signed'
114    	| 'unsigned'
115    	| type_id
116    	;
117    
118    type_id
119        :   { is_type_name?(@input.look(1).text)}? IDENTIFIER
120        ;
121    
122    type_qualifier
123    	: 'const'
124    	| 'volatile'
125    	;
126    
127    declarator
128    	: pointer? direct_declarator
129    	| pointer
130    	;
131    
132    direct_declarator
133    	:   (	IDENTIFIER
134    			{
135    			if $declaration.length > 0 && $declaration::is_type_def
136						$Symbols::types.add($IDENTIFIER.text)
137					end
138    			}
139    		|	'(' declarator ')'
140    		)
141            declarator_suffix*
142    	;
143    
144    declarator_suffix
145    	:   /*'[' constant_expression ']'
146        |*/   '[' ']'
147        |   '(' ')'
148    	;
149    
150    pointer
151    	: '*' type_qualifier+ pointer?
152    	| '*' pointer
153    	| '*'
154    	;
155    
156    IDENTIFIER
157    	:	LETTER (LETTER|'0'..'9')*
158    	;
159    	
160    fragment
161    LETTER
162    	:	'$'
163    	|	'A'..'Z'
164    	|	'a'..'z'
165    	|	'_'
166    	;
167    
168    CHARACTER_LITERAL
169        :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
170        ;
171    
172    STRING_LITERAL
173        :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
174        ;
175    
176    HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
177    
178    DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
179    
180    OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
181    
182    fragment
183    HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
184    
185    fragment
186    IntegerTypeSuffix
187    	:	('u'|'U')? ('l'|'L')
188    	|	('u'|'U')  ('l'|'L')?
189    	;
190    
191    FLOATING_POINT_LITERAL
192        :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
193        |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
194        |   ('0'..'9')+ Exponent FloatTypeSuffix?
195        |   ('0'..'9')+ Exponent? FloatTypeSuffix
196    	;
197    
198    fragment
199    Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
200    
201    fragment
202    FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
203    
204    fragment
205    EscapeSequence
206        :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
207        |   OctalEscape
208        ;
209    
210    fragment
211    OctalEscape
212        :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
213        |   '\\' ('0'..'7') ('0'..'7')
214        |   '\\' ('0'..'7')
215        ;
216    
217    fragment
218    UnicodeEscape
219        :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
220        ;
221    
222    WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
223        ;
224    
225    COMMENT
226        :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
227        ;
228    
229    LINE_COMMENT
230        : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
231        ;
232    LINE_COMMAND 
233        : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
234        ;
235  END
236
237  example "grammar with backtracking and memoization" do
238    lexer = Backtrack::Lexer.new( 'int a;' )
239    parser = Backtrack::Parser.new lexer
240    events = parser.translation_unit
241  end
242
243end
244