GrammarSpelunker.java revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28package org.antlr.tool; 29 30import java.io.*; 31import java.util.ArrayList; 32import java.util.List; 33 34/** Load a grammar file and scan it just until we learn a few items 35 * of interest. Currently: name, type, imports, tokenVocab, language option. 36 * 37 * GrammarScanner (at bottom of this class) converts grammar to stuff like: 38 * 39 * grammar Java ; options { backtrack true memoize true } 40 * import JavaDecl JavaAnnotations JavaExpr ; 41 * ... : ... 42 * 43 * First ':' or '@' indicates we can stop looking for imports/options. 44 * 45 * Then we just grab interesting grammar properties. 46 */ 47public class GrammarSpelunker { 48 protected String grammarFileName; 49 protected String token; 50 protected Scanner scanner; 51 52 // grammar info / properties 53 protected String grammarModifier; 54 protected String grammarName; 55 protected String tokenVocab; 56 protected String language = "Java"; // default 57 protected String inputDirectory; 58 protected List<String> importedGrammars; 59 60 public GrammarSpelunker(String inputDirectory, String grammarFileName) { 61 this.inputDirectory = inputDirectory; 62 this.grammarFileName = grammarFileName; 63 } 64 65 void consume() throws IOException { token = scanner.nextToken(); } 66 67 protected void match(String expecting) throws IOException { 68 //System.out.println("match "+expecting+"; is "+token); 69 if ( token.equals(expecting) ) consume(); 70 else throw new Error("Error parsing "+grammarFileName+": '"+token+ 71 "' not expected '"+expecting+"'"); 72 } 73 74 public void parse() throws IOException { 75 Reader r = new FileReader((inputDirectory != null ? inputDirectory + File.separator : "") + grammarFileName); 76 BufferedReader br = new BufferedReader(r); 77 try { 78 scanner = new Scanner(br); 79 consume(); 80 grammarHeader(); 81 // scan until imports or options 82 while ( token!=null && !token.equals("@") && !token.equals(":") && 83 !token.equals("import") && !token.equals("options") ) 84 { 85 consume(); 86 } 87 if ( token.equals("options") ) options(); 88 // scan until options or first rule 89 while ( token!=null && !token.equals("@") && !token.equals(":") && 90 !token.equals("import") ) 91 { 92 consume(); 93 } 94 if ( token.equals("import") ) imports(); 95 // ignore rest of input; close up shop 96 } 97 finally { 98 if ( br!=null ) br.close(); 99 } 100 } 101 102 protected void grammarHeader() throws IOException { 103 if ( token==null ) return; 104 if ( token.equals("tree") || token.equals("parser") || token.equals("lexer") ) { 105 grammarModifier=token; 106 consume(); 107 } 108 match("grammar"); 109 grammarName = token; 110 consume(); // move beyond name 111 } 112 113 // looks like "options { backtrack true ; tokenVocab MyTokens ; }" 114 protected void options() throws IOException { 115 match("options"); 116 match("{"); 117 while ( token!=null && !token.equals("}") ) { 118 String name = token; 119 consume(); 120 String value = token; 121 consume(); 122 match(";"); 123 if ( name.equals("tokenVocab") ) tokenVocab = value; 124 if ( name.equals("language") ) language = value; 125 } 126 match("}"); 127 } 128 129 // looks like "import JavaDecl JavaAnnotations JavaExpr ;" 130 protected void imports() throws IOException { 131 match("import"); 132 importedGrammars = new ArrayList<String>(); 133 while ( token!=null && !token.equals(";") ) { 134 importedGrammars.add(token); 135 consume(); 136 } 137 match(";"); 138 if ( importedGrammars.size()==0 ) importedGrammars = null; 139 } 140 141 public String getGrammarModifier() { return grammarModifier; } 142 143 public String getGrammarName() { return grammarName; } 144 145 public String getTokenVocab() { return tokenVocab; } 146 147 public String getLanguage() { return language; } 148 149 public List<String> getImportedGrammars() { return importedGrammars; } 150 151 /** Strip comments and then return stream of words and 152 * tokens {';', ':', '{', '}'} 153 */ 154 public static class Scanner { 155 public static final int EOF = -1; 156 Reader input; 157 int c; 158 159 public Scanner(Reader input) throws IOException { 160 this.input = input; 161 consume(); 162 } 163 164 boolean isDIGIT() { return c>='0'&&c<='9'; } 165 boolean isID_START() { return c>='a'&&c<='z' || c>='A'&&c<='Z'; } 166 boolean isID_LETTER() { return isID_START() || c>='0'&&c<='9' || c=='_'; } 167 168 void consume() throws IOException { c = input.read(); } 169 170 public String nextToken() throws IOException { 171 while ( c!=EOF ) { 172 //System.out.println("check "+(char)c); 173 switch ( c ) { 174 case ';' : consume(); return ";"; 175 case '{' : consume(); return "{"; 176 case '}' : consume(); return "}"; 177 case ':' : consume(); return ":"; 178 case '@' : consume(); return "@"; 179 case '/' : COMMENT(); break; 180 case '\'': return STRING(); 181 default: 182 if ( isID_START() ) return ID(); 183 else if ( isDIGIT() ) return INT(); 184 consume(); // ignore anything else 185 } 186 } 187 return null; 188 } 189 190 /** NAME : LETTER+ ; // NAME is sequence of >=1 letter */ 191 String ID() throws IOException { 192 StringBuffer buf = new StringBuffer(); 193 while ( c!=EOF && isID_LETTER() ) { buf.append((char)c); consume(); } 194 return buf.toString(); 195 } 196 197 String INT() throws IOException { 198 StringBuffer buf = new StringBuffer(); 199 while ( c!=EOF && isDIGIT() ) { buf.append((char)c); consume(); } 200 return buf.toString(); 201 } 202 203 String STRING() throws IOException { 204 StringBuffer buf = new StringBuffer(); 205 consume(); 206 while ( c!=EOF && c!='\'' ) { 207 if ( c=='\\' ) { 208 buf.append((char)c); 209 consume(); 210 } 211 buf.append((char)c); 212 consume(); 213 } 214 consume(); // scan past ' 215 return buf.toString(); 216 } 217 218 void COMMENT() throws IOException { 219 if ( c=='/' ) { 220 consume(); 221 if ( c=='*' ) { 222 consume(); 223 scarf: 224 while ( true ) { 225 if ( c=='*' ) { 226 consume(); 227 if ( c=='/' ) { consume(); break scarf; } 228 } 229 else { 230 while ( c!=EOF && c!='*' ) consume(); 231 } 232 } 233 } 234 else if ( c=='/' ) { 235 while ( c!=EOF && c!='\n' ) consume(); 236 } 237 } 238 } 239 } 240 241 /** Tester; Give grammar filename as arg */ 242 public static void main(String[] args) throws IOException { 243 GrammarSpelunker g = new GrammarSpelunker(".", args[0]); 244 g.parse(); 245 System.out.println(g.grammarModifier+" grammar "+g.grammarName); 246 System.out.println("language="+g.language); 247 System.out.println("tokenVocab="+g.tokenVocab); 248 System.out.println("imports="+g.importedGrammars); 249 } 250} 251