1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/* 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * [The "BSD license"] 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Copyright (c) 2010 Terence Parr 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * All rights reserved. 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Redistribution and use in source and binary forms, with or without 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * modification, are permitted provided that the following conditions 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are met: 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1. Redistributions of source code must retain the above copyright 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * notice, this list of conditions and the following disclaimer. 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 2. Redistributions in binary form must reproduce the above copyright 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * notice, this list of conditions and the following disclaimer in the 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * documentation and/or other materials provided with the distribution. 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 3. The name of the author may not be used to endorse or promote products 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * derived from this software without specific prior written permission. 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpackage org.antlr.codegen; 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.Tool; 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.analysis.Label; 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.runtime.Token; 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.stringtemplate.v4.ST; 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.tool.Grammar; 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.io.IOException; 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.List; 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** The code generator for ANTLR can usually be retargeted just by providing 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a new X.stg file for language X, however, sometimes the files that must 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * be generated vary enough that some X-specific functionality is required. 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For example, in C, you must generate header files whereas in Java you do not. 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Other languages may want to keep DFA separate from the main 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * generated recognizer file. 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The notion of a Code Generator target abstracts out the creation 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * of the various files. As new language targets get added to the ANTLR 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * system, this target class may have to be altered to handle more 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * functionality. Eventually, just about all language generation issues 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * will be expressible in terms of these methods. 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If org.antlr.codegen.XTarget class exists, it is used else 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Target base class is used. I am using a superclass rather than an 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * interface for this target concept because I can add functionality 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * later without breaking previously written targets (extra interface 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * methods would force adding dummy functions to all code generator 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * target classes). 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpublic class Target { 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** For pure strings of Java 16-bit unicode char, how can we display 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it in the target language as a literal. Useful for dumping 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * predicates and such that may refer to chars that need to be escaped 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * when represented as strings. Also, templates need to be escaped so 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * that the target language can hold them as a string. 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * I have defined (via the constructor) the set of typical escapes, 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * but your Target subclass is free to alter the translated chars or 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * add more definitions. This is nonstatic so each target can have 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a different set in memory at same time. 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected String[] targetCharValueEscape = new String[255]; 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public Target() { 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\n'] = "\\n"; 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\r'] = "\\r"; 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\t'] = "\\t"; 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\b'] = "\\b"; 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\f'] = "\\f"; 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\\'] = "\\\\"; 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['\''] = "\\'"; 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape['"'] = "\\\""; 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected void genRecognizerFile(Tool tool, 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CodeGenerator generator, 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Grammar grammar, 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ST outputFileST) 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throws IOException 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String fileName = 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver generator.getRecognizerFileName(grammar.name, grammar.type); 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver generator.write(outputFileST, fileName); 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected void genRecognizerHeaderFile(Tool tool, 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CodeGenerator generator, 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Grammar grammar, 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ST headerFileST, 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String extName) // e.g., ".h" 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver throws IOException 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // no header file by default 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected void performGrammarAnalysis(CodeGenerator generator, 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Grammar grammar) 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Build NFAs from the grammar AST 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver grammar.buildNFA(); 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Create the DFA predictors for each decision 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver grammar.createLookaheadDFAs(); 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Is scope in @scope::name {action} valid for this kind of grammar? 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Targets like C++ may want to allow new scopes like headerfile or 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * some such. The action names themselves are not policed at the 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * moment so targets can add template actions w/o having to recompile 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ANTLR. 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public boolean isValidActionScope(int grammarType, String scope) { 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (grammarType) { 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case Grammar.LEXER : 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( scope.equals("lexer") ) {return true;} 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case Grammar.PARSER : 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( scope.equals("parser") ) {return true;} 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case Grammar.COMBINED : 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( scope.equals("parser") ) {return true;} 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( scope.equals("lexer") ) {return true;} 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case Grammar.TREE_PARSER : 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( scope.equals("treeparser") ) {return true;} 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false; 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Target must be able to override the labels used for token types */ 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTokenTypeAsTargetLabel(CodeGenerator generator, int ttype) { 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String name = generator.grammar.getTokenDisplayName(ttype); 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If name is a literal, return the token type instead 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( name.charAt(0)=='\'' ) { 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return String.valueOf(ttype); 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return name; 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Convert from an ANTLR char literal found in a grammar file to 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * an equivalent char literal in the target language. For most 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * languages, this means leaving 'x' as 'x'. Actually, we need 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to escape '\u000A' so that it doesn't get converted to \n by 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the compiler. Convert the literal to the char value and then 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to an appropriate target char literal. 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Expect single quotes around the incoming literal. 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTargetCharLiteralFromANTLRCharLiteral( 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CodeGenerator generator, 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String literal) 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuffer buf = new StringBuffer(); 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append('\''); 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int c = Grammar.getCharValueFromGrammarCharLiteral(literal); 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( c<Label.MIN_CHAR_VALUE ) { 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "'\u0000'"; 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( c<targetCharValueEscape.length && 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape[c]!=null ) 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append(targetCharValueEscape[c]); 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if ( Character.UnicodeBlock.of((char)c)== 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Character.UnicodeBlock.BASIC_LATIN && 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver !Character.isISOControl((char)c) ) 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // normal char 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append((char)c); 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // must be something unprintable...use \\uXXXX 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // turn on the bit above max "\\uFFFF" value so that we pad with zeros 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // then only take last 4 digits 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5); 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append("\\u"); 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append(hex); 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append('\''); 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return buf.toString(); 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Convert from an ANTLR string literal found in a grammar file to 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * an equivalent string literal in the target language. For Java, this 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is the translation 'a\n"' -> "a\n\"". Expect single quotes 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * around the incoming literal. Just flip the quotes and replace 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * double quotes with \" 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that we have decided to allow poeple to use '\"' without 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * penalty, so we must build the target string in a loop as Utils.replae 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * cannot handle both \" and " without a lot of messing around. 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTargetStringLiteralFromANTLRStringLiteral( 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CodeGenerator generator, 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String literal) 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuilder sb = new StringBuilder(); 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuffer is = new StringBuffer(literal); 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Opening quote 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sb.append('"'); 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i = 1; i < is.length() -1; i++) { 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (is.charAt(i) == '\\') { 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Anything escaped is what it is! We assume that 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // people know how to escape characters correctly. However 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // we catch anything that does not need an escape in Java (which 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // is what the default implementation is dealing with and remove 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the escape. The C target does this for instance. 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (is.charAt(i+1)) { 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Pass through any escapes that Java also needs 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case '"': 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 'n': 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 'r': 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 't': 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 'b': 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 'f': 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case '\\': 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 'u': // Assume unnnn 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sb.append('\\'); // Pass the escape through 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver default: 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Remove the escape by virtue of not adding it here 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Thus \' becomes ' and so on 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Go past the \ character 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver i++; 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } else { 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Chracters that don't need \ in ANTLR 'strings' but do in Java 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (is.charAt(i) == '"') { 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We need to escape " in Java 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sb.append('\\'); 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Add in the next character, which may have been escaped 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sb.append(is.charAt(i)); 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Append closing " and return 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver sb.append('"'); 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return sb.toString(); 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Given a random string of Java unicode chars, return a new string with 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * optionally appropriate quote characters for target language and possibly 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * with some escaped characters. For example, if the incoming string has 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * actual newline characters, the output of this method would convert them 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to the two char sequence \n for Java, C, C++, ... The new string has 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * double-quotes around it as well. Example String in memory: 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a"[newlinechar]b'c[carriagereturnchar]d[tab]e\f 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * would be converted to the valid Java s: 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * "a\"\nb'c\rd\te\\f" 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * or 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a\"\nb'c\rd\te\\f 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * depending on the quoted arg. 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTargetStringLiteralFromString(String s, boolean quoted) { 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( s==null ) { 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuffer buf = new StringBuffer(); 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( quoted ) { 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append('"'); 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i=0; i<s.length(); i++) { 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int c = s.charAt(i); 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( c!='\'' && // don't escape single quotes in strings for java 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c<targetCharValueEscape.length && 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver targetCharValueEscape[c]!=null ) 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append(targetCharValueEscape[c]); 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append((char)c); 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( quoted ) { 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append('"'); 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return buf.toString(); 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTargetStringLiteralFromString(String s) { 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return getTargetStringLiteralFromString(s, false); 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Convert long to 0xNNNNNNNNNNNNNNNN by default for spitting out 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * with bitsets. I.e., convert bytes to hex string. 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String getTarget64BitStringFromValue(long word) { 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int numHexDigits = 8*2; 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuffer buf = new StringBuffer(numHexDigits+2); 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append("0x"); 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String digits = Long.toHexString(word); 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver digits = digits.toUpperCase(); 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int padding = numHexDigits - digits.length(); 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // pad left with zeros 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i=1; i<=padding; i++) { 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append('0'); 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver buf.append(digits); 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return buf.toString(); 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public String encodeIntAsCharEscape(int v) { 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( v<=127 ) { 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "\\"+Integer.toOctalString(v); 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String hex = Integer.toHexString(v|0x10000).substring(1,5); 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return "\\u"+hex; 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Some targets only support ASCII or 8-bit chars/strings. For example, 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * C++ will probably want to return 0xFF here. 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public int getMaxCharValue(CodeGenerator generator) { 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return Label.MAX_CHAR_VALUE; 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Give target a chance to do some postprocessing on actions. 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Python for example will have to fix the indention. 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public List postProcessAction(List chunks, Token actionToken) { 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return chunks; 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 361