1/* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28package org.antlr.codegen; 29 30import org.antlr.Tool; 31import org.stringtemplate.v4.ST; 32import org.antlr.tool.Grammar; 33 34import java.io.IOException; 35import java.util.ArrayList; 36 37public class CTarget extends Target { 38 39 ArrayList strings = new ArrayList(); 40 41 @Override 42 protected void genRecognizerFile(Tool tool, 43 CodeGenerator generator, 44 Grammar grammar, 45 ST outputFileST) 46 throws IOException { 47 48 // Before we write this, and cause it to generate its string, 49 // we need to add all the string literals that we are going to match 50 // 51 outputFileST.add("literals", strings); 52 String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); 53 generator.write(outputFileST, fileName); 54 } 55 56 @Override 57 protected void genRecognizerHeaderFile(Tool tool, 58 CodeGenerator generator, 59 Grammar grammar, 60 ST headerFileST, 61 String extName) 62 throws IOException { 63 // Pick up the file name we are generating. This method will return a 64 // a file suffixed with .c, so we must substring and add the extName 65 // to it as we cannot assign into strings in Java. 66 /// 67 String fileName = generator.getRecognizerFileName(grammar.name, grammar.type); 68 fileName = fileName.substring(0, fileName.length() - 2) + extName; 69 70 generator.write(headerFileST, fileName); 71 } 72 73 protected ST chooseWhereCyclicDFAsGo(Tool tool, 74 CodeGenerator generator, 75 Grammar grammar, 76 ST recognizerST, 77 ST cyclicDFAST) { 78 return recognizerST; 79 } 80 81 /** Is scope in @scope::name {action} valid for this kind of grammar? 82 * Targets like C++ may want to allow new scopes like headerfile or 83 * some such. The action names themselves are not policed at the 84 * moment so targets can add template actions w/o having to recompile 85 * ANTLR. 86 */ 87 @Override 88 public boolean isValidActionScope(int grammarType, String scope) { 89 switch (grammarType) { 90 case Grammar.LEXER: 91 if (scope.equals("lexer")) { 92 return true; 93 } 94 if (scope.equals("header")) { 95 return true; 96 } 97 if (scope.equals("includes")) { 98 return true; 99 } 100 if (scope.equals("preincludes")) { 101 return true; 102 } 103 if (scope.equals("overrides")) { 104 return true; 105 } 106 break; 107 case Grammar.PARSER: 108 if (scope.equals("parser")) { 109 return true; 110 } 111 if (scope.equals("header")) { 112 return true; 113 } 114 if (scope.equals("includes")) { 115 return true; 116 } 117 if (scope.equals("preincludes")) { 118 return true; 119 } 120 if (scope.equals("overrides")) { 121 return true; 122 } 123 break; 124 case Grammar.COMBINED: 125 if (scope.equals("parser")) { 126 return true; 127 } 128 if (scope.equals("lexer")) { 129 return true; 130 } 131 if (scope.equals("header")) { 132 return true; 133 } 134 if (scope.equals("includes")) { 135 return true; 136 } 137 if (scope.equals("preincludes")) { 138 return true; 139 } 140 if (scope.equals("overrides")) { 141 return true; 142 } 143 break; 144 case Grammar.TREE_PARSER: 145 if (scope.equals("treeparser")) { 146 return true; 147 } 148 if (scope.equals("header")) { 149 return true; 150 } 151 if (scope.equals("includes")) { 152 return true; 153 } 154 if (scope.equals("preincludes")) { 155 return true; 156 } 157 if (scope.equals("overrides")) { 158 return true; 159 } 160 break; 161 } 162 return false; 163 } 164 165 @Override 166 public String getTargetCharLiteralFromANTLRCharLiteral( 167 CodeGenerator generator, 168 String literal) { 169 170 if (literal.startsWith("'\\u")) { 171 literal = "0x" + literal.substring(3, 7); 172 } else { 173 int c = literal.charAt(1); 174 175 if (c < 32 || c > 127) { 176 literal = "0x" + Integer.toHexString(c); 177 } 178 } 179 180 return literal; 181 } 182 183 /** Convert from an ANTLR string literal found in a grammar file to 184 * an equivalent string literal in the C target. 185 * Because we must support Unicode character sets and have chosen 186 * to have the lexer match UTF32 characters, then we must encode 187 * string matches to use 32 bit character arrays. Here then we 188 * must produce the C array and cater for the case where the 189 * lexer has been encoded with a string such as 'xyz\n', 190 */ 191 @Override 192 public String getTargetStringLiteralFromANTLRStringLiteral( 193 CodeGenerator generator, 194 String literal) { 195 int index; 196 String bytes; 197 StringBuffer buf = new StringBuffer(); 198 199 buf.append("{ "); 200 201 // We need ot lose any escaped characters of the form \x and just 202 // replace them with their actual values as well as lose the surrounding 203 // quote marks. 204 // 205 for (int i = 1; i < literal.length() - 1; i++) { 206 buf.append("0x"); 207 208 if (literal.charAt(i) == '\\') { 209 i++; // Assume that there is a next character, this will just yield 210 // invalid strings if not, which is what the input would be of course - invalid 211 switch (literal.charAt(i)) { 212 case 'u': 213 case 'U': 214 buf.append(literal.substring(i + 1, i + 5)); // Already a hex string 215 i = i + 5; // Move to next string/char/escape 216 break; 217 218 case 'n': 219 case 'N': 220 221 buf.append("0A"); 222 break; 223 224 case 'r': 225 case 'R': 226 227 buf.append("0D"); 228 break; 229 230 case 't': 231 case 'T': 232 233 buf.append("09"); 234 break; 235 236 case 'b': 237 case 'B': 238 239 buf.append("08"); 240 break; 241 242 case 'f': 243 case 'F': 244 245 buf.append("0C"); 246 break; 247 248 default: 249 250 // Anything else is what it is! 251 // 252 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); 253 break; 254 } 255 } else { 256 buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase()); 257 } 258 buf.append(", "); 259 } 260 buf.append(" ANTLR3_STRING_TERMINATOR}"); 261 262 bytes = buf.toString(); 263 index = strings.indexOf(bytes); 264 265 if (index == -1) { 266 strings.add(bytes); 267 index = strings.indexOf(bytes); 268 } 269 270 String strref = "lit_" + String.valueOf(index + 1); 271 272 return strref; 273 } 274 275 /** 276 * Overrides the standard grammar analysis so we can prepare the analyser 277 * a little differently from the other targets. 278 * 279 * In particular we want to influence the way the code generator makes assumptions about 280 * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that 281 * has the minimum use of tables, and tha meximum use of large switch statements. This 282 * allows the optimizers to generate very efficient code, it can reduce object code size 283 * by about 30% and give about a 20% performance improvement over not doing this. Hence, 284 * for the C target only, we change the defaults here, but only if they are still set to the 285 * defaults. 286 * 287 * @param generator An instance of the generic code generator class. 288 * @param grammar The grammar that we are currently analyzing 289 */ 290 @Override 291 protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) { 292 293 // Check to see if the maximum inline DFA states is still set to 294 // the default size. If it is then whack it all the way up to the maximum that 295 // we can sensibly get away with. 296 // 297 if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) { 298 299 CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535; 300 } 301 302 // Check to see if the maximum switch size is still set to the default 303 // and bring it up much higher if it is. Modern C compilers can handle 304 // much bigger switch statements than say Java can and if anyone finds a compiler 305 // that cannot deal with such big switches, all the need do is generate the 306 // code with a reduced -Xmaxswitchcaselabels nnn 307 // 308 if (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) { 309 310 CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000; 311 } 312 313 // Check to see if the number of transitions considered a miminum for using 314 // a switch is still at the default. Because a switch is still generally faster than 315 // an if even with small sets, and given that the optimizer will do the best thing with it 316 // anyway, then we simply want to generate a switch for any number of states. 317 // 318 if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) { 319 320 CodeGenerator.MIN_SWITCH_ALTS = 1; 321 } 322 323 // Now we allow the superclass implementation to do whatever it feels it 324 // must do. 325 // 326 super.performGrammarAnalysis(generator, grammar); 327 } 328} 329 330