filter-mode.rb revision 324c4644fee44b9898524c09511bd33c3f12e2df
1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3/test/functional'
5
6class TestFilterMode < ANTLR3::Test::Functional
7
8  inline_grammar( <<-'END' )
9    lexer grammar Filter;
10    options {
11        language = Ruby;
12        filter=true;
13    }
14    
15    IMPORT
16      :  'import' WS QIDStar WS? ';'
17      ;
18      
19    RETURN
20      :  'return' .* ';'
21      ;
22    
23    CLASS
24      :  'class' WS ID WS? ('extends' WS QID WS?)?
25        ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
26      ;
27      
28    COMMENT
29        :   '/*' .* '*/'
30        ;
31    
32    STRING
33        :  '"' (options {greedy=false;}: ESC | .)* '"'
34      ;
35    
36    CHAR
37      :  '\'' (options {greedy=false;}: ESC | .)* '\''
38      ;
39    
40    WS  :   (' '|'\t'|'\n')+
41        ;
42    
43    fragment
44    QID :  ID ('.' ID)*
45      ;
46      
47    /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
48     *  ever match since k=1 look in the QID loop of '.' will make it loop.
49     *  I made this rule to compensate.
50     */
51    fragment
52    QIDStar
53      :  ID ('.' ID)* '.*'?
54      ;
55    
56    fragment
57    TYPE:   QID '[]'?
58        ;
59        
60    fragment
61    ARG :   TYPE WS ID
62        ;
63    
64    fragment
65    ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
66        ;
67    
68    fragment
69    ESC  :  '\\' ('"'|'\''|'\\')
70      ;
71  END
72
73  example "skipping tokens that aren't important with filter mode" do
74    input = <<-END.fixed_indent( 0 )
75      import org.antlr.runtime.*;
76      
77      public class Main {
78        public static void main(String[] args) throws Exception {
79            for (int i=0; i<args.length; i++) {
80          CharStream input = new ANTLRFileStream(args[i]);
81          FuzzyJava lex = new FuzzyJava(input);
82          TokenStream tokens = new CommonTokenStream(lex);
83          tokens.toString();
84          //System.out.println(tokens);
85            }
86        }
87      }
88    END
89    
90    lexer = Filter::Lexer.new( input )
91    tokens = lexer.map { |tk| tk }
92  end
93  
94
95end
96
97
98class TestFuzzy < ANTLR3::Test::Functional
99
100  inline_grammar( <<-'END' )
101    lexer grammar Fuzzy;
102    options {
103        language = Ruby;
104        filter=true;
105    }
106    
107    @members {
108      include ANTLR3::Test::CaptureOutput
109    }
110    
111    IMPORT
112      :  'import' WS name=QIDStar WS? ';'
113      ;
114      
115    /** Avoids having "return foo;" match as a field */
116    RETURN
117      :  'return' (options {greedy=false;}:.)* ';'
118      ;
119    
120    CLASS
121      :  'class' WS name=ID WS? ('extends' WS QID WS?)?
122        ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
123        {  
124          say("found class " << $name.text)  
125        }
126      ;
127      
128    METHOD
129        :   TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? 
130           ('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
131            {
132              say("found method " << $name.text)
133            }
134        ;
135    
136    FIELD
137        :   TYPE WS name=ID '[]'? WS? (';'|'=')
138            {
139              say("found var " << $name.text)
140            }
141        ;
142    
143    STAT:  ('if'|'while'|'switch'|'for') WS? '(' ;
144      
145    CALL
146        :   name=QID WS? '('
147            {
148              say("found call " << $name.text)
149            }
150        ;
151    
152    COMMENT
153        :   '/*' (options {greedy=false;} : . )* '*/'
154            {
155              say("found comment " << self.text)
156            }
157        ;
158    
159    SL_COMMENT
160        :   '//' (options {greedy=false;} : . )* '\n'
161            {
162              say("found // comment " << self.text)
163            }
164        ;
165      
166    STRING
167      :  '"' (options {greedy=false;}: ESC | .)* '"'
168      ;
169    
170    CHAR
171      :  '\'' (options {greedy=false;}: ESC | .)* '\''
172      ;
173    
174    WS  :   (' '|'\t'|'\n')+
175        ;
176    
177    fragment
178    QID :  ID ('.' ID)*
179      ;
180      
181    /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
182     *  ever match since k=1 look in the QID loop of '.' will make it loop.
183     *  I made this rule to compensate.
184     */
185    fragment
186    QIDStar
187      :  ID ('.' ID)* '.*'?
188      ;
189    
190    fragment
191    TYPE:   QID '[]'?
192        ;
193        
194    fragment
195    ARG :   TYPE WS ID
196        ;
197    
198    fragment
199    ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
200        ;
201    
202    fragment
203    ESC  :  '\\' ('"'|'\''|'\\')
204      ;
205  END
206  
207  example "fuzzy lexing with the filter mode option" do
208    input = <<-END.fixed_indent( 0 )
209      import org.antlr.runtime.*;
210      
211      public class Main {
212        public static void main(String[] args) throws Exception {
213            for (int i=0; i<args.length; i++) {
214          CharStream input = new ANTLRFileStream(args[i]);
215          FuzzyJava lex = new FuzzyJava(input);
216          TokenStream tokens = new CommonTokenStream(lex);
217          tokens.toString();
218          //System.out.println(tokens);
219            }
220        }
221      }
222    END
223    
224    expected_output = <<-END.fixed_indent( 0 )
225      found class Main
226      found method main
227      found var i
228      found var input
229      found call ANTLRFileStream
230      found var lex
231      found call FuzzyJava
232      found var tokens
233      found call CommonTokenStream
234      found call tokens.toString
235      found // comment //System.out.println(tokens);
236    END
237    
238    lexer = Fuzzy::Lexer.new( input )
239    lexer.each { |tk| tk }
240    lexer.output.should == expected_output
241  end
242
243
244end
245