1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrequire 'antlr3/test/functional'
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TestFilterMode < ANTLR3::Test::Functional
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  inline_grammar( <<-'END' )
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer grammar Filter;
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    options {
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        language = Ruby;
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        filter=true;
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    IMPORT
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'import' WS QIDStar WS? ';'
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    RETURN
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'return' .* ';'
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    CLASS
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'class' WS ID WS? ('extends' WS QID WS?)?
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    COMMENT
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   '/*' .* '*/'
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    STRING
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :  '"' (options {greedy=false;}: ESC | .)* '"'
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    CHAR
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  '\'' (options {greedy=false;}: ESC | .)* '\''
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    WS  :   (' '|'\t'|'\n')+
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    QID :  ID ('.' ID)*
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  ever match since k=1 look in the QID loop of '.' will make it loop.
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  I made this rule to compensate.
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    QIDStar
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  ID ('.' ID)* '.*'?
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    TYPE:   QID '[]'?
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ARG :   TYPE WS ID
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ESC  :  '\\' ('"'|'\''|'\\')
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  END
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  example "skipping tokens that aren't important with filter mode" do
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input = <<-END.fixed_indent( 0 )
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      import org.antlr.runtime.*;
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      public class Main {
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        public static void main(String[] args) throws Exception {
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            for (int i=0; i<args.length; i++) {
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          CharStream input = new ANTLRFileStream(args[i]);
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          FuzzyJava lex = new FuzzyJava(input);
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          TokenStream tokens = new CommonTokenStream(lex);
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          tokens.toString();
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          //System.out.println(tokens);
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      }
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    END
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer = Filter::Lexer.new( input )
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    tokens = lexer.map { |tk| tk }
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass TestFuzzy < ANTLR3::Test::Functional
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  inline_grammar( <<-'END' )
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer grammar Fuzzy;
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    options {
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        language = Ruby;
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        filter=true;
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @members {
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      include ANTLR3::Test::CaptureOutput
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    IMPORT
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'import' WS name=QIDStar WS? ';'
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Avoids having "return foo;" match as a field */
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    RETURN
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'return' (options {greedy=false;}:.)* ';'
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    CLASS
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  'class' WS name=ID WS? ('extends' WS QID WS?)?
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {  
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          say("found class " << $name.text)  
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    METHOD
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? 
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver           ('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              say("found method " << $name.text)
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    FIELD
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   TYPE WS name=ID '[]'? WS? (';'|'=')
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              say("found var " << $name.text)
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    STAT:  ('if'|'while'|'switch'|'for') WS? '(' ;
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    CALL
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   name=QID WS? '('
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              say("found call " << $name.text)
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    COMMENT
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   '/*' (options {greedy=false;} : . )* '*/'
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              say("found comment " << self.text)
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    SL_COMMENT
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :   '//' (options {greedy=false;} : . )* '\n'
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              say("found // comment " << self.text)
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    STRING
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  '"' (options {greedy=false;}: ESC | .)* '"'
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    CHAR
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  '\'' (options {greedy=false;}: ESC | .)* '\''
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    WS  :   (' '|'\t'|'\n')+
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    QID :  ID ('.' ID)*
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  ever match since k=1 look in the QID loop of '.' will make it loop.
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  I made this rule to compensate.
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    QIDStar
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :  ID ('.' ID)* '.*'?
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    TYPE:   QID '[]'?
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ARG :   TYPE WS ID
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ESC  :  '\\' ('"'|'\''|'\\')
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  END
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  example "fuzzy lexing with the filter mode option" do
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input = <<-END.fixed_indent( 0 )
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      import org.antlr.runtime.*;
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      public class Main {
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        public static void main(String[] args) throws Exception {
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            for (int i=0; i<args.length; i++) {
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          CharStream input = new ANTLRFileStream(args[i]);
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          FuzzyJava lex = new FuzzyJava(input);
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          TokenStream tokens = new CommonTokenStream(lex);
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          tokens.toString();
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          //System.out.println(tokens);
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      }
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    END
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    expected_output = <<-END.fixed_indent( 0 )
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found class Main
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found method main
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found var i
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found var input
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found call ANTLRFileStream
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found var lex
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found call FuzzyJava
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found var tokens
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found call CommonTokenStream
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found call tokens.toString
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      found // comment //System.out.println(tokens);
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    END
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer = Fuzzy::Lexer.new( input )
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer.each { |tk| tk }
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer.output.should == expected_output
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
245