1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.codegen;
29
30import org.antlr.Tool;
31import org.stringtemplate.v4.ST;
32import org.antlr.tool.Grammar;
33
34import java.io.IOException;
35import java.util.ArrayList;
36
37public class CTarget extends Target {
38
39    ArrayList strings = new ArrayList();
40
41    @Override
42    protected void genRecognizerFile(Tool tool,
43            CodeGenerator generator,
44            Grammar grammar,
45            ST outputFileST)
46            throws IOException {
47
48        // Before we write this, and cause it to generate its string,
49        // we need to add all the string literals that we are going to match
50        //
51        outputFileST.add("literals", strings);
52        String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
53        generator.write(outputFileST, fileName);
54    }
55
56    @Override
57    protected void genRecognizerHeaderFile(Tool tool,
58            CodeGenerator generator,
59            Grammar grammar,
60            ST headerFileST,
61            String extName)
62            throws IOException {
63        // Pick up the file name we are generating. This method will return a
64        // a file suffixed with .c, so we must substring and add the extName
65        // to it as we cannot assign into strings in Java.
66        ///
67        String fileName = generator.getRecognizerFileName(grammar.name, grammar.type);
68        fileName = fileName.substring(0, fileName.length() - 2) + extName;
69
70        generator.write(headerFileST, fileName);
71    }
72
73    protected ST chooseWhereCyclicDFAsGo(Tool tool,
74            CodeGenerator generator,
75            Grammar grammar,
76            ST recognizerST,
77            ST cyclicDFAST) {
78        return recognizerST;
79    }
80
81    /** Is scope in @scope::name {action} valid for this kind of grammar?
82     *  Targets like C++ may want to allow new scopes like headerfile or
83     *  some such.  The action names themselves are not policed at the
84     *  moment so targets can add template actions w/o having to recompile
85     *  ANTLR.
86     */
87    @Override
88    public boolean isValidActionScope(int grammarType, String scope) {
89        switch (grammarType) {
90            case Grammar.LEXER:
91                if (scope.equals("lexer")) {
92                    return true;
93                }
94                if (scope.equals("header")) {
95                    return true;
96                }
97                if (scope.equals("includes")) {
98                    return true;
99                }
100                if (scope.equals("preincludes")) {
101                    return true;
102                }
103                if (scope.equals("overrides")) {
104                    return true;
105                }
106                break;
107            case Grammar.PARSER:
108                if (scope.equals("parser")) {
109                    return true;
110                }
111                if (scope.equals("header")) {
112                    return true;
113                }
114                if (scope.equals("includes")) {
115                    return true;
116                }
117                if (scope.equals("preincludes")) {
118                    return true;
119                }
120                if (scope.equals("overrides")) {
121                    return true;
122                }
123                break;
124            case Grammar.COMBINED:
125                if (scope.equals("parser")) {
126                    return true;
127                }
128                if (scope.equals("lexer")) {
129                    return true;
130                }
131                if (scope.equals("header")) {
132                    return true;
133                }
134                if (scope.equals("includes")) {
135                    return true;
136                }
137                if (scope.equals("preincludes")) {
138                    return true;
139                }
140                if (scope.equals("overrides")) {
141                    return true;
142                }
143                break;
144            case Grammar.TREE_PARSER:
145                if (scope.equals("treeparser")) {
146                    return true;
147                }
148                if (scope.equals("header")) {
149                    return true;
150                }
151                if (scope.equals("includes")) {
152                    return true;
153                }
154                if (scope.equals("preincludes")) {
155                    return true;
156                }
157                if (scope.equals("overrides")) {
158                    return true;
159                }
160                break;
161        }
162        return false;
163    }
164
165    @Override
166    public String getTargetCharLiteralFromANTLRCharLiteral(
167            CodeGenerator generator,
168            String literal) {
169
170        if (literal.startsWith("'\\u")) {
171            literal = "0x" + literal.substring(3, 7);
172        } else {
173            int c = literal.charAt(1);
174
175            if (c < 32 || c > 127) {
176                literal = "0x" + Integer.toHexString(c);
177            }
178        }
179
180        return literal;
181    }
182
183    /** Convert from an ANTLR string literal found in a grammar file to
184     *  an equivalent string literal in the C target.
185     *  Because we must support Unicode character sets and have chosen
186     *  to have the lexer match UTF32 characters, then we must encode
187     *  string matches to use 32 bit character arrays. Here then we
188     *  must produce the C array and cater for the case where the
189     *  lexer has been encoded with a string such as 'xyz\n',
190     */
191    @Override
192    public String getTargetStringLiteralFromANTLRStringLiteral(
193            CodeGenerator generator,
194            String literal) {
195        int index;
196        String bytes;
197        StringBuffer buf = new StringBuffer();
198
199        buf.append("{ ");
200
201        // We need ot lose any escaped characters of the form \x and just
202        // replace them with their actual values as well as lose the surrounding
203        // quote marks.
204        //
205        for (int i = 1; i < literal.length() - 1; i++) {
206            buf.append("0x");
207
208            if (literal.charAt(i) == '\\') {
209                i++; // Assume that there is a next character, this will just yield
210                // invalid strings if not, which is what the input would be of course - invalid
211                switch (literal.charAt(i)) {
212                    case 'u':
213                    case 'U':
214                        buf.append(literal.substring(i + 1, i + 5));  // Already a hex string
215                        i = i + 5;                                // Move to next string/char/escape
216                        break;
217
218                    case 'n':
219                    case 'N':
220
221                        buf.append("0A");
222                        break;
223
224                    case 'r':
225                    case 'R':
226
227                        buf.append("0D");
228                        break;
229
230                    case 't':
231                    case 'T':
232
233                        buf.append("09");
234                        break;
235
236                    case 'b':
237                    case 'B':
238
239                        buf.append("08");
240                        break;
241
242                    case 'f':
243                    case 'F':
244
245                        buf.append("0C");
246                        break;
247
248                    default:
249
250                        // Anything else is what it is!
251                        //
252                        buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
253                        break;
254                }
255            } else {
256                buf.append(Integer.toHexString((int) literal.charAt(i)).toUpperCase());
257            }
258            buf.append(", ");
259        }
260        buf.append(" ANTLR3_STRING_TERMINATOR}");
261
262        bytes = buf.toString();
263        index = strings.indexOf(bytes);
264
265        if (index == -1) {
266            strings.add(bytes);
267            index = strings.indexOf(bytes);
268        }
269
270        String strref = "lit_" + String.valueOf(index + 1);
271
272        return strref;
273    }
274
275    /**
276     * Overrides the standard grammar analysis so we can prepare the analyser
277     * a little differently from the other targets.
278     *
279     * In particular we want to influence the way the code generator makes assumptions about
280     * switchs vs ifs, vs table driven DFAs. In general, C code should be generated that
281     * has the minimum use of tables, and tha meximum use of large switch statements. This
282     * allows the optimizers to generate very efficient code, it can reduce object code size
283     * by about 30% and give about a 20% performance improvement over not doing this. Hence,
284     * for the C target only, we change the defaults here, but only if they are still set to the
285     * defaults.
286     *
287     * @param generator An instance of the generic code generator class.
288     * @param grammar The grammar that we are currently analyzing
289     */
290    @Override
291    protected void performGrammarAnalysis(CodeGenerator generator, Grammar grammar) {
292
293        // Check to see if the maximum inline DFA states is still set to
294        // the default size. If it is then whack it all the way up to the maximum that
295        // we can sensibly get away with.
296        //
297        if (CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE == CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE ) {
298
299            CodeGenerator.MAX_ACYCLIC_DFA_STATES_INLINE = 65535;
300        }
301
302        // Check to see if the maximum switch size is still set to the default
303        // and bring it up much higher if it is. Modern C compilers can handle
304        // much bigger switch statements than say Java can and if anyone finds a compiler
305        // that cannot deal with such big switches, all the need do is generate the
306        // code with a reduced -Xmaxswitchcaselabels nnn
307        //
308        if  (CodeGenerator.MAX_SWITCH_CASE_LABELS == CodeGenerator.MSCL_DEFAULT) {
309
310            CodeGenerator.MAX_SWITCH_CASE_LABELS = 3000;
311        }
312
313        // Check to see if the number of transitions considered a miminum for using
314        // a switch is still at the default. Because a switch is still generally faster than
315        // an if even with small sets, and given that the optimizer will do the best thing with it
316        // anyway, then we simply want to generate a switch for any number of states.
317        //
318        if (CodeGenerator.MIN_SWITCH_ALTS == CodeGenerator.MSA_DEFAULT) {
319
320            CodeGenerator.MIN_SWITCH_ALTS = 1;
321        }
322
323        // Now we allow the superclass implementation to do whatever it feels it
324        // must do.
325        //
326        super.performGrammarAnalysis(generator, grammar);
327    }
328}
329
330