GrammarSpelunker.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.tool;
29
30import java.io.*;
31import java.util.ArrayList;
32import java.util.List;
33
34/** Load a grammar file and scan it just until we learn a few items
35 *  of interest.  Currently: name, type, imports, tokenVocab, language option.
36 *
37 *  GrammarScanner (at bottom of this class) converts grammar to stuff like:
38 *
39 *   grammar Java ; options { backtrack true memoize true }
40 *   import JavaDecl JavaAnnotations JavaExpr ;
41 *   ... : ...
42 *
43 *  First ':' or '@' indicates we can stop looking for imports/options.
44 *
45 *  Then we just grab interesting grammar properties.
46 */
47public class GrammarSpelunker {
48    protected String grammarFileName;
49    protected String token;
50    protected Scanner scanner;
51
52    // grammar info / properties
53    protected String grammarModifier;
54    protected String grammarName;
55    protected String tokenVocab;
56    protected String language = "Java"; // default
57    protected String inputDirectory;
58    protected List<String> importedGrammars;
59
60    public GrammarSpelunker(String inputDirectory, String grammarFileName) {
61        this.inputDirectory = inputDirectory;
62        this.grammarFileName = grammarFileName;
63    }
64
65    void consume() throws IOException { token = scanner.nextToken(); }
66
67    protected void match(String expecting) throws IOException {
68        //System.out.println("match "+expecting+"; is "+token);
69        if ( token.equals(expecting) ) consume();
70        else throw new Error("Error parsing "+grammarFileName+": '"+token+
71                             "' not expected '"+expecting+"'");
72    }
73
74    public void parse() throws IOException {
75        Reader r = new FileReader((inputDirectory != null ? inputDirectory + File.separator : "") + grammarFileName);
76        BufferedReader br = new BufferedReader(r);
77        try {
78            scanner = new Scanner(br);
79            consume();
80            grammarHeader();
81            // scan until imports or options
82            while ( token!=null && !token.equals("@") && !token.equals(":") &&
83                    !token.equals("import") && !token.equals("options") )
84            {
85                consume();
86            }
87            if ( token.equals("options") ) options();
88            // scan until options or first rule
89            while ( token!=null && !token.equals("@") && !token.equals(":") &&
90                    !token.equals("import") )
91            {
92                consume();
93            }
94            if ( token.equals("import") ) imports();
95            // ignore rest of input; close up shop
96        }
97        finally {
98            if ( br!=null ) br.close();
99        }
100    }
101
102    protected void grammarHeader() throws IOException {
103        if ( token==null ) return;
104        if ( token.equals("tree") || token.equals("parser") || token.equals("lexer") ) {
105            grammarModifier=token;
106            consume();
107        }
108        match("grammar");
109        grammarName = token;
110        consume(); // move beyond name
111    }
112
113    // looks like "options { backtrack true ; tokenVocab MyTokens ; }"
114    protected void options() throws IOException {
115        match("options");
116        match("{");
117        while ( token!=null && !token.equals("}") ) {
118            String name = token;
119            consume();
120            String value = token;
121            consume();
122            match(";");
123            if ( name.equals("tokenVocab") ) tokenVocab = value;
124            if ( name.equals("language") ) language = value;
125        }
126        match("}");
127    }
128
129    // looks like "import JavaDecl JavaAnnotations JavaExpr ;"
130    protected void imports() throws IOException {
131        match("import");
132        importedGrammars = new ArrayList<String>();
133        while ( token!=null && !token.equals(";") ) {
134            importedGrammars.add(token);
135            consume();
136        }
137        match(";");
138        if ( importedGrammars.size()==0 ) importedGrammars = null;
139    }
140
141    public String getGrammarModifier() { return grammarModifier; }
142
143    public String getGrammarName() { return grammarName; }
144
145    public String getTokenVocab() { return tokenVocab; }
146
147    public String getLanguage() { return language; }
148
149    public List<String> getImportedGrammars() { return importedGrammars; }
150
151    /** Strip comments and then return stream of words and
152     *  tokens {';', ':', '{', '}'}
153     */
154    public static class Scanner {
155        public static final int EOF = -1;
156        Reader input;
157        int c;
158
159        public Scanner(Reader input) throws IOException {
160            this.input = input;
161            consume();
162        }
163
164        boolean isDIGIT() { return c>='0'&&c<='9'; }
165        boolean isID_START() { return c>='a'&&c<='z' || c>='A'&&c<='Z'; }
166        boolean isID_LETTER() { return isID_START() || c>='0'&&c<='9' || c=='_'; }
167
168        void consume() throws IOException { c = input.read(); }
169
170        public String nextToken() throws IOException {
171            while ( c!=EOF ) {
172                //System.out.println("check "+(char)c);
173                switch ( c ) {
174                    case ';' : consume(); return ";";
175                    case '{' : consume(); return "{";
176                    case '}' : consume(); return "}";
177                    case ':' : consume(); return ":";
178                    case '@' : consume(); return "@";
179                    case '/' : COMMENT(); break;
180                    case '\'': return STRING();
181                    default:
182                        if ( isID_START() ) return ID();
183                        else if ( isDIGIT() ) return INT();
184                        consume(); // ignore anything else
185                }
186            }
187            return null;
188        }
189
190        /** NAME : LETTER+ ; // NAME is sequence of >=1 letter */
191        String ID() throws IOException {
192            StringBuffer buf = new StringBuffer();
193            while ( c!=EOF && isID_LETTER() ) { buf.append((char)c); consume(); }
194            return buf.toString();
195        }
196
197        String INT() throws IOException {
198            StringBuffer buf = new StringBuffer();
199            while ( c!=EOF && isDIGIT() ) { buf.append((char)c); consume(); }
200            return buf.toString();
201        }
202
203        String STRING() throws IOException {
204            StringBuffer buf = new StringBuffer();
205            consume();
206            while ( c!=EOF && c!='\'' ) {
207                if ( c=='\\' ) {
208                    buf.append((char)c);
209                    consume();
210                }
211                buf.append((char)c);
212                consume();
213            }
214            consume(); // scan past '
215            return buf.toString();
216        }
217
218        void COMMENT() throws IOException {
219            if ( c=='/' ) {
220                consume();
221                if ( c=='*' ) {
222                    consume();
223        scarf:
224                    while ( true ) {
225                        if ( c=='*' ) {
226                            consume();
227                            if ( c=='/' ) { consume(); break scarf; }
228                        }
229                        else {
230                            while ( c!=EOF && c!='*' ) consume();
231                        }
232                    }
233                }
234                else if ( c=='/' ) {
235                    while ( c!=EOF && c!='\n' ) consume();
236                }
237            }
238        }
239    }
240
241    /** Tester; Give grammar filename as arg */
242    public static void main(String[] args) throws IOException {
243        GrammarSpelunker g = new GrammarSpelunker(".", args[0]);
244        g.parse();
245        System.out.println(g.grammarModifier+" grammar "+g.grammarName);
246        System.out.println("language="+g.language);
247        System.out.println("tokenVocab="+g.tokenVocab);
248        System.out.println("imports="+g.importedGrammars);
249    }
250}
251