1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrequire 'antlr3/test/functional' 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TestFilterMode < ANTLR3::Test::Functional 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inline_grammar( <<-'END' ) 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer grammar Filter; 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver options { 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver language = Ruby; 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver filter=true; 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver IMPORT 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'import' WS QIDStar WS? ';' 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver RETURN 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'return' .* ';' 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CLASS 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'class' WS ID WS? ('extends' WS QID WS?)? 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ('implements' WS QID WS? (',' WS? QID WS?)*)? '{' 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver COMMENT 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '/*' .* '*/' 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver STRING 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '"' (options {greedy=false;}: ESC | .)* '"' 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CHAR 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\'' (options {greedy=false;}: ESC | .)* '\'' 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver WS : (' '|'\t'|'\n')+ 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver QID : ID ('.' ID)* 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** QID cannot see beyond end of token so using QID '.*'? somewhere won't 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ever match since k=1 look in the QID loop of '.' will make it loop. 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * I made this rule to compensate. 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver QIDStar 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ID ('.' ID)* '.*'? 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TYPE: QID '[]'? 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ARG : TYPE WS ID 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ESC : '\\' ('"'|'\''|'\\') 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver example "skipping tokens that aren't important with filter mode" do 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = <<-END.fixed_indent( 0 ) 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver import org.antlr.runtime.*; 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public class Main { 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static void main(String[] args) throws Exception { 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i=0; i<args.length; i++) { 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CharStream input = new ANTLRFileStream(args[i]); 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FuzzyJava lex = new FuzzyJava(input); 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TokenStream tokens = new CommonTokenStream(lex); 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokens.toString(); 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println(tokens); 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = Filter::Lexer.new( input ) 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokens = lexer.map { |tk| tk } 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TestFuzzy < ANTLR3::Test::Functional 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inline_grammar( <<-'END' ) 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer grammar Fuzzy; 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver options { 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver language = Ruby; 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver filter=true; 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @members { 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include ANTLR3::Test::CaptureOutput 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver IMPORT 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'import' WS name=QIDStar WS? ';' 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Avoids having "return foo;" match as a field */ 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver RETURN 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'return' (options {greedy=false;}:.)* ';' 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CLASS 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'class' WS name=ID WS? ('extends' WS QID WS?)? 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ('implements' WS QID WS? (',' WS? QID WS?)*)? '{' 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found class " << $name.text) 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver METHOD 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ('throws' WS QID WS? (',' WS? QID WS?)*)? '{' 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found method " << $name.text) 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FIELD 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : TYPE WS name=ID '[]'? WS? (';'|'=') 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found var " << $name.text) 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver STAT: ('if'|'while'|'switch'|'for') WS? '(' ; 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CALL 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : name=QID WS? '(' 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found call " << $name.text) 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver COMMENT 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '/*' (options {greedy=false;} : . )* '*/' 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found comment " << self.text) 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver SL_COMMENT 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '//' (options {greedy=false;} : . )* '\n' 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver say("found // comment " << self.text) 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver STRING 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '"' (options {greedy=false;}: ESC | .)* '"' 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CHAR 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '\'' (options {greedy=false;}: ESC | .)* '\'' 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver WS : (' '|'\t'|'\n')+ 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver QID : ID ('.' ID)* 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** QID cannot see beyond end of token so using QID '.*'? somewhere won't 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ever match since k=1 look in the QID loop of '.' will make it loop. 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * I made this rule to compensate. 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver QIDStar 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ID ('.' ID)* '.*'? 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TYPE: QID '[]'? 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ARG : TYPE WS ID 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ESC : '\\' ('"'|'\''|'\\') 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver example "fuzzy lexing with the filter mode option" do 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = <<-END.fixed_indent( 0 ) 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver import org.antlr.runtime.*; 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public class Main { 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public static void main(String[] args) throws Exception { 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i=0; i<args.length; i++) { 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CharStream input = new ANTLRFileStream(args[i]); 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver FuzzyJava lex = new FuzzyJava(input); 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver TokenStream tokens = new CommonTokenStream(lex); 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokens.toString(); 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println(tokens); 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver expected_output = <<-END.fixed_indent( 0 ) 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found class Main 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found method main 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found var i 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found var input 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found call ANTLRFileStream 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found var lex 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found call FuzzyJava 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found var tokens 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found call CommonTokenStream 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found call tokens.toString 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver found // comment //System.out.println(tokens); 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = Fuzzy::Lexer.new( input ) 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer.each { |tk| tk } 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer.output.should == expected_output 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 245