1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrequire 'antlr3/test/functional' 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TestBacktracking < ANTLR3::Test::Functional 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inline_grammar( <<-'END' ) 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver grammar Backtrack; 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver options { 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver language = Ruby; 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver backtrack=true; 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver memoize=true; 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver k=2; 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver scope Symbols { 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver types; 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @members { 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def is_type_name?(name) 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @Symbols_stack.reverse_each do |scope| 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver scope.types.include?(name) and return true 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def report_error(e) 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # do nothing 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver translation_unit 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver scope Symbols; // entire file is a scope 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @init { 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver $Symbols::types = Set.new 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : external_declaration+ 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Either a function definition or any other kind of C decl/def. 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The LL(*) analysis algorithm fails to deal with this due to 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * recursion in the declarator rules. I'm putting in a 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * manual predicate here so that we don't backtrack over 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the entire function. Further, you get a better error 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * as errors within the function itself don't make it fail 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to predict that it's a function. Weird errors previously. 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Remember: the goal is to avoid backtrack like the plague 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * because it makes debugging, actions, and errors harder. 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that k=1 results in a much smaller predictor for the 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * fixed look; k=2 made a few extra thousand lines. ;) 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * I'll have to optimize that in the future. 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver external_declaration 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver options {k=1;} 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | declaration 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver function_definition 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver scope Symbols; // put parameters and locals into same scope for now 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @init { 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver $Symbols::types = set() 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : declaration_specifiers? declarator 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver declaration 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver scope { 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is_type_def; 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @init { 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver $declaration::is_type_def = false 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'typedef' declaration_specifiers? {$declaration::is_type_def = true} 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver init_declarator_list ';' // special case, looking for typedef 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | declaration_specifiers init_declarator_list? ';' 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver declaration_specifiers 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ( storage_class_specifier 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | type_specifier 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | type_qualifier 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver )+ 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver init_declarator_list 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : init_declarator (',' init_declarator)* 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver init_declarator 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : declarator //('=' initializer)? 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver storage_class_specifier 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'extern' 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'static' 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'auto' 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'register' 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type_specifier 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'void' 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'char' 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'short' 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'int' 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'long' 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'float' 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'double' 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'signed' 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'unsigned' 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | type_id 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type_id 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : { is_type_name?(@input.look(1).text)}? IDENTIFIER 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type_qualifier 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'const' 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'volatile' 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver declarator 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : pointer? direct_declarator 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | pointer 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver direct_declarator 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ( IDENTIFIER 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if $declaration.length > 0 && $declaration::is_type_def 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver $Symbols::types.add($IDENTIFIER.text) 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '(' declarator ')' 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver declarator_suffix* 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver declarator_suffix 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : /*'[' constant_expression ']' 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver |*/ '[' ']' 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '(' ')' 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pointer 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '*' type_qualifier+ pointer? 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '*' pointer 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '*' 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver IDENTIFIER 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : LETTER (LETTER|'0'..'9')* 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver LETTER 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '$' 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'A'..'Z' 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'a'..'z' 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '_' 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CHARACTER_LITERAL 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver STRING_LITERAL 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver IntegerTypeSuffix 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ('u'|'U')? ('l'|'L') 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | ('u'|'U') ('l'|'L')? 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FLOATING_POINT_LITERAL 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | ('0'..'9')+ Exponent FloatTypeSuffix? 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | ('0'..'9')+ Exponent? FloatTypeSuffix 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver EscapeSequence 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | OctalEscape 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver OctalEscape 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\\' ('0'..'7') ('0'..'7') 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\\' ('0'..'7') 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UnicodeEscape 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver COMMENT 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver LINE_COMMENT 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver LINE_COMMAND 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver example "grammar with backtracking and memoization" do 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = Backtrack::Lexer.new( 'int a;' ) 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver parser = Backtrack::Parser.new lexer 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver events = parser.translation_unit 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 244