/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.codegen; import org.antlr.Tool; import org.antlr.analysis.DFA; import org.antlr.analysis.*; import org.antlr.grammar.v3.ANTLRLexer; import org.antlr.grammar.v3.ANTLRParser; import org.antlr.grammar.v3.ActionTranslator; import org.antlr.grammar.v3.CodeGenTreeWalker; import org.antlr.misc.BitSet; import org.antlr.misc.*; import org.antlr.runtime.*; import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.tool.*; import org.stringtemplate.v4.*; import java.io.IOException; import java.io.Writer; import java.util.*; /** ANTLR's code generator. * * Generate recognizers derived from grammars. Language independence * achieved through the use of STGroup objects. All output * strings are completely encapsulated in the group files such as Java.stg. * Some computations are done that are unused by a particular language. * This generator just computes and sets the values into the templates; * the templates are free to use or not use the information. * * To make a new code generation target, define X.stg for language X * by copying from existing Y.stg most closely releated to your language; * e.g., to do CSharp.stg copy Java.stg. The template group file has a * bunch of templates that are needed by the code generator. You can add * a new target w/o even recompiling ANTLR itself. The language=X option * in a grammar file dictates which templates get loaded/used. * * Some language like C need both parser files and header files. Java needs * to have a separate file for the cyclic DFA as ANTLR generates bytecodes * directly (which cannot be in the generated parser Java file). To facilitate * this, * * cyclic can be in same file, but header, output must be searpate. recognizer * is in outptufile. */ public class CodeGenerator { /** When generating SWITCH statements, some targets might need to limit * the size (based upon the number of case labels). Generally, this * limit will be hit only for lexers where wildcard in a UNICODE * vocabulary environment would generate a SWITCH with 65000 labels. */ public final static int MSCL_DEFAULT = 300; public static int MAX_SWITCH_CASE_LABELS = MSCL_DEFAULT; public final static int MSA_DEFAULT = 3; public static int MIN_SWITCH_ALTS = MSA_DEFAULT; public boolean GENERATE_SWITCHES_WHEN_POSSIBLE = true; public static boolean LAUNCH_ST_INSPECTOR = false; public final static int MADSI_DEFAULT = 60; // do lots of states inline (needed for expression rules) public static int MAX_ACYCLIC_DFA_STATES_INLINE = MADSI_DEFAULT; public static String classpathTemplateRootDirectoryName = "org/antlr/codegen/templates"; /** Which grammar are we generating code for? Each generator * is attached to a specific grammar. */ public Grammar grammar; /** What language are we generating? */ protected String language; /** The target specifies how to write out files and do other language * specific actions. */ public Target target = null; /** Where are the templates this generator should use to generate code? */ protected STGroup templates; /** The basic output templates without AST or templates stuff; this will be * the templates loaded for the language such as Java.stg *and* the Dbg * stuff if turned on. This is used for generating syntactic predicates. */ protected STGroup baseTemplates; protected ST recognizerST; protected ST outputFileST; protected ST headerFileST; /** Used to create unique labels */ protected int uniqueLabelNumber = 1; /** A reference to the ANTLR tool so we can learn about output directories * and such. */ protected Tool tool; /** Generate debugging event method calls */ protected boolean debug; /** Create a Tracer object and make the recognizer invoke this. */ protected boolean trace; /** Track runtime parsing information about decisions etc... * This requires the debugging event mechanism to work. */ protected boolean profile; protected int lineWidth = 72; /** I have factored out the generation of acyclic DFAs to separate class */ public ACyclicDFACodeGenerator acyclicDFAGenerator = new ACyclicDFACodeGenerator(this); /** I have factored out the generation of cyclic DFAs to separate class */ /* public CyclicDFACodeGenerator cyclicDFAGenerator = new CyclicDFACodeGenerator(this); */ public static final String VOCAB_FILE_EXTENSION = ".tokens"; protected final static String vocabFilePattern = "=\n}>" + "=\n}>"; public CodeGenerator(Tool tool, Grammar grammar, String language) { this.tool = tool; this.grammar = grammar; this.language = language; target = loadLanguageTarget(language); } public static Target loadLanguageTarget(String language) { Target target = null; String targetName = "org.antlr.codegen."+language+"Target"; try { Class c = Class.forName(targetName); target = (Target)c.newInstance(); } catch (ClassNotFoundException cnfe) { target = new Target(); // use default } catch (InstantiationException ie) { ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR, targetName, ie); } catch (IllegalAccessException cnfe) { ErrorManager.error(ErrorManager.MSG_CANNOT_CREATE_TARGET_GENERATOR, targetName, cnfe); } return target; } /** load the main language.stg template group file */ public void loadTemplates(String language) { String langDir = classpathTemplateRootDirectoryName+"/"+language; STGroup coreTemplates = new STGroupFile(langDir+"/"+language+".stg"); baseTemplates = coreTemplates; if ( coreTemplates ==null ) { ErrorManager.error(ErrorManager.MSG_MISSING_CODE_GEN_TEMPLATES, language); return; } // dynamically add subgroups that act like filters to apply to // their supergroup. E.g., Java:Dbg:AST:ASTParser::ASTDbg. String outputOption = (String)grammar.getOption("output"); if ( outputOption!=null && outputOption.equals("AST") ) { if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); baseTemplates = dbgTemplates; STGroup astTemplates = new STGroupFile(langDir+"/AST.stg"); astTemplates.importTemplates(dbgTemplates); STGroup astParserTemplates = astTemplates; if ( grammar.type==Grammar.TREE_PARSER ) { astParserTemplates = new STGroupFile(langDir+"/ASTTreeParser.stg"); astParserTemplates.importTemplates(astTemplates); } else { astParserTemplates = new STGroupFile(langDir+"/ASTParser.stg"); astParserTemplates.importTemplates(astTemplates); } STGroup astDbgTemplates = new STGroupFile(langDir+"/ASTDbg.stg"); astDbgTemplates.importTemplates(astParserTemplates); templates = astDbgTemplates; dbgTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps astDbgTemplates.iterateAcrossValues = true; astParserTemplates.iterateAcrossValues = true; } else { STGroup astTemplates = new STGroupFile(langDir+"/AST.stg"); astTemplates.importTemplates(coreTemplates); STGroup astParserTemplates = astTemplates; if ( grammar.type==Grammar.TREE_PARSER ) { astParserTemplates = new STGroupFile(langDir+"/ASTTreeParser.stg"); astParserTemplates.importTemplates(astTemplates); } else { astParserTemplates = new STGroupFile(langDir+"/ASTParser.stg"); astParserTemplates.importTemplates(astTemplates); } templates = astParserTemplates; astTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps astParserTemplates.iterateAcrossValues = true; } } else if ( outputOption!=null && outputOption.equals("template") ) { if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); baseTemplates = dbgTemplates; STGroup stTemplates = new STGroupFile(langDir+"/ST.stg"); stTemplates.importTemplates(dbgTemplates); templates = stTemplates; dbgTemplates.iterateAcrossValues = true; } else { STGroup stTemplates = new STGroupFile(langDir+"/ST.stg"); stTemplates.importTemplates(coreTemplates); templates = stTemplates; } templates.iterateAcrossValues = true; // ST v3 compatibility with Maps } else if ( debug && grammar.type!=Grammar.LEXER ) { STGroup dbgTemplates = new STGroupFile(langDir+"/Dbg.stg"); dbgTemplates.importTemplates(coreTemplates); templates = dbgTemplates; baseTemplates = templates; baseTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps } else { templates = coreTemplates; coreTemplates.iterateAcrossValues = true; // ST v3 compatibility with Maps } } /** Given the grammar to which we are attached, walk the AST associated * with that grammar to create NFAs. Then create the DFAs for all * decision points in the grammar by converting the NFAs to DFAs. * Finally, walk the AST again to generate code. * * Either 1 or 2 files are written: * * recognizer: the main parser/lexer/treewalker item * header file: language like C/C++ need extern definitions * * The target, such as JavaTarget, dictates which files get written. */ public ST genRecognizer() { //System.out.println("### generate "+grammar.name+" recognizer"); // LOAD OUTPUT TEMPLATES loadTemplates(language); if ( templates==null ) { return null; } // CREATE NFA FROM GRAMMAR, CREATE DFA FROM NFA if ( ErrorManager.doNotAttemptAnalysis() ) { return null; } target.performGrammarAnalysis(this, grammar); // some grammar analysis errors will not yield reliable DFA if ( ErrorManager.doNotAttemptCodeGen() ) { return null; } // OPTIMIZE DFA DFAOptimizer optimizer = new DFAOptimizer(grammar); optimizer.optimize(); // OUTPUT FILE (contains recognizerST) outputFileST = templates.getInstanceOf("outputFile"); // HEADER FILE if ( templates.isDefined("headerFile") ) { headerFileST = templates.getInstanceOf("headerFile"); } else { // create a dummy to avoid null-checks all over code generator headerFileST = new ST(templates,"xyz"); headerFileST.add("cyclicDFAs", (Object)null); // it normally sees this from outputFile //headerFileST.impl.name = "dummy-header-file"; } boolean filterMode = grammar.getOption("filter")!=null && grammar.getOption("filter").equals("true"); boolean canBacktrack = grammar.getSyntacticPredicates()!=null || grammar.composite.getRootGrammar().atLeastOneBacktrackOption || filterMode; // TODO: move this down further because generating the recognizer // alters the model with info on who uses predefined properties etc... // The actions here might refer to something. // The only two possible output files are available at this point. // Verify action scopes are ok for target and dump actions into output // Templates can say for example. Map> actions = grammar.getActions(); verifyActionScopesOkForTarget(actions); // translate $x::y references translateActionAttributeReferences(actions); ST gateST = templates.getInstanceOf("actionGate"); if ( filterMode ) { // if filtering, we need to set actions to execute at backtracking // level 1 not 0. gateST = templates.getInstanceOf("filteringActionGate"); } grammar.setSynPredGateIfNotAlready(gateST); headerFileST.add("actions", actions); outputFileST.add("actions", actions); headerFileST.add("buildTemplate", new Boolean(grammar.buildTemplate())); outputFileST.add("buildTemplate", new Boolean(grammar.buildTemplate())); headerFileST.add("buildAST", new Boolean(grammar.buildAST())); outputFileST.add("buildAST", new Boolean(grammar.buildAST())); outputFileST.add("rewriteMode", Boolean.valueOf(grammar.rewriteMode())); headerFileST.add("rewriteMode", Boolean.valueOf(grammar.rewriteMode())); outputFileST.add("backtracking", Boolean.valueOf(canBacktrack)); headerFileST.add("backtracking", Boolean.valueOf(canBacktrack)); // turn on memoize attribute at grammar level so we can create ruleMemo. // each rule has memoize attr that hides this one, indicating whether // it needs to save results String memoize = (String)grammar.getOption("memoize"); outputFileST.add("memoize", (grammar.atLeastOneRuleMemoizes || Boolean.valueOf(memoize != null && memoize.equals("true")) && canBacktrack)); headerFileST.add("memoize", (grammar.atLeastOneRuleMemoizes || Boolean.valueOf(memoize != null && memoize.equals("true")) && canBacktrack)); outputFileST.add("trace", Boolean.valueOf(trace)); headerFileST.add("trace", Boolean.valueOf(trace)); outputFileST.add("profile", Boolean.valueOf(profile)); headerFileST.add("profile", Boolean.valueOf(profile)); // RECOGNIZER if ( grammar.type==Grammar.LEXER ) { recognizerST = templates.getInstanceOf("lexer"); outputFileST.add("LEXER", Boolean.valueOf(true)); headerFileST.add("LEXER", Boolean.valueOf(true)); recognizerST.add("filterMode", Boolean.valueOf(filterMode)); } else if ( grammar.type==Grammar.PARSER || grammar.type==Grammar.COMBINED ) { recognizerST = templates.getInstanceOf("parser"); outputFileST.add("PARSER", Boolean.valueOf(true)); headerFileST.add("PARSER", Boolean.valueOf(true)); } else { recognizerST = templates.getInstanceOf("treeParser"); outputFileST.add("TREE_PARSER", Boolean.valueOf(true)); headerFileST.add("TREE_PARSER", Boolean.valueOf(true)); recognizerST.add("filterMode", Boolean.valueOf(filterMode)); } outputFileST.add("recognizer", recognizerST); headerFileST.add("recognizer", recognizerST); outputFileST.add("actionScope", grammar.getDefaultActionScope(grammar.type)); headerFileST.add("actionScope", grammar.getDefaultActionScope(grammar.type)); String targetAppropriateFileNameString = target.getTargetStringLiteralFromString(grammar.getFileName()); outputFileST.add("fileName", targetAppropriateFileNameString); headerFileST.add("fileName", targetAppropriateFileNameString); outputFileST.add("ANTLRVersion", tool.VERSION); headerFileST.add("ANTLRVersion", tool.VERSION); outputFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp()); headerFileST.add("generatedTimestamp", Tool.getCurrentTimeStamp()); // GENERATE RECOGNIZER // Walk the AST holding the input grammar, this time generating code // Decisions are generated by using the precomputed DFAs // Fill in the various templates with data CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(grammar.getGrammarTree())); try { gen.grammar_( grammar, recognizerST, outputFileST, headerFileST); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } genTokenTypeConstants(recognizerST); genTokenTypeConstants(outputFileST); genTokenTypeConstants(headerFileST); if ( grammar.type!=Grammar.LEXER ) { genTokenTypeNames(recognizerST); genTokenTypeNames(outputFileST); genTokenTypeNames(headerFileST); } // Now that we know what synpreds are used, we can set into template Set synpredNames = null; if ( grammar.synPredNamesUsedInDFA.size()>0 ) { synpredNames = grammar.synPredNamesUsedInDFA; } outputFileST.add("synpreds", synpredNames); headerFileST.add("synpreds", synpredNames); // all recognizers can see Grammar object recognizerST.add("grammar", grammar); if (LAUNCH_ST_INSPECTOR) { outputFileST.inspect(); if ( templates.isDefined("headerFile") ) headerFileST.inspect(); } // WRITE FILES try { target.genRecognizerFile(tool,this,grammar,outputFileST); if ( templates.isDefined("headerFile") ) { ST extST = templates.getInstanceOf("headerFileExtension"); target.genRecognizerHeaderFile(tool,this,grammar,headerFileST,extST.render()); } // write out the vocab interchange file; used by antlr, // does not change per target ST tokenVocabSerialization = genTokenVocabOutput(); String vocabFileName = getVocabFileName(); if ( vocabFileName!=null ) { write(tokenVocabSerialization, vocabFileName); } //System.out.println(outputFileST.getDOTForDependencyGraph(false)); } catch (IOException ioe) { ErrorManager.error(ErrorManager.MSG_CANNOT_WRITE_FILE, ioe); } /* System.out.println("num obj.prop refs: "+ ASTExpr.totalObjPropRefs); System.out.println("num reflection lookups: "+ ASTExpr.totalReflectionLookups); */ return outputFileST; } /** Some targets will have some extra scopes like C++ may have * '@headerfile:name {action}' or something. Make sure the * target likes the scopes in action table. */ protected void verifyActionScopesOkForTarget(Map actions) { Set actionScopeKeySet = actions.keySet(); for (Iterator it = actionScopeKeySet.iterator(); it.hasNext();) { String scope = (String)it.next(); if ( !target.isValidActionScope(grammar.type, scope) ) { // get any action from the scope to get error location Map scopeActions = (Map)actions.get(scope); GrammarAST actionAST = (GrammarAST)scopeActions.values().iterator().next(); ErrorManager.grammarError( ErrorManager.MSG_INVALID_ACTION_SCOPE,grammar, actionAST.getToken(),scope, grammar.getGrammarTypeString()); } } } /** Actions may reference $x::y attributes, call translateAction on * each action and replace that action in the Map. */ protected void translateActionAttributeReferences(Map actions) { Set actionScopeKeySet = actions.keySet(); for (Iterator it = actionScopeKeySet.iterator(); it.hasNext();) { String scope = (String)it.next(); Map scopeActions = (Map)actions.get(scope); translateActionAttributeReferencesForSingleScope(null,scopeActions); } } /** Use for translating rule @init{...} actions that have no scope */ public void translateActionAttributeReferencesForSingleScope( Rule r, Map scopeActions) { String ruleName=null; if ( r!=null ) { ruleName = r.name; } Set actionNameSet = scopeActions.keySet(); for (Iterator nameIT = actionNameSet.iterator(); nameIT.hasNext();) { String name = (String) nameIT.next(); GrammarAST actionAST = (GrammarAST)scopeActions.get(name); List chunks = translateAction(ruleName,actionAST); scopeActions.put(name, chunks); // replace with translation } } /** Error recovery in ANTLR recognizers. * * Based upon original ideas: * * Algorithms + Data Structures = Programs by Niklaus Wirth * * and * * A note on error recovery in recursive descent parsers: * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * Efficient and Comfortable Error Recovery in Recursive Descent Parsers: * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined at run-time * upon error to avoid parsing overhead. */ public void generateLocalFOLLOW(GrammarAST referencedElementNode, String referencedElementName, String enclosingRuleName, int elementIndex) { /* System.out.println("compute FOLLOW "+grammar.name+"."+referencedElementNode.toString()+ " for "+referencedElementName+"#"+elementIndex +" in "+ enclosingRuleName+ " line="+referencedElementNode.getLine()); */ NFAState followingNFAState = referencedElementNode.followingNFAState; LookaheadSet follow = null; if ( followingNFAState!=null ) { // compute follow for this element and, as side-effect, track // the rule LOOK sensitivity. follow = grammar.FIRST(followingNFAState); } if ( follow==null ) { ErrorManager.internalError("no follow state or cannot compute follow"); follow = new LookaheadSet(); } if ( follow.member(Label.EOF) ) { // TODO: can we just remove? Seems needed here: // compilation_unit : global_statement* EOF // Actually i guess we resync to EOF regardless follow.remove(Label.EOF); } //System.out.println(" "+follow); List tokenTypeList = null; long[] words = null; if ( follow.tokenTypeSet==null ) { words = new long[1]; tokenTypeList = new ArrayList(); } else { BitSet bits = BitSet.of(follow.tokenTypeSet); words = bits.toPackedArray(); tokenTypeList = follow.tokenTypeSet.toList(); } // use the target to convert to hex strings (typically) String[] wordStrings = new String[words.length]; for (int j = 0; j < words.length; j++) { long w = words[j]; wordStrings[j] = target.getTarget64BitStringFromValue(w); } recognizerST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); outputFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); headerFileST.addAggr("bitsets.{name,inName,bits,tokenTypes,tokenIndex}", referencedElementName, enclosingRuleName, wordStrings, tokenTypeList, Utils.integer(elementIndex)); } // L O O K A H E A D D E C I S I O N G E N E R A T I O N /** Generate code that computes the predicted alt given a DFA. The * recognizerST can be either the main generated recognizerTemplate * for storage in the main parser file or a separate file. It's up to * the code that ultimately invokes the codegen.g grammar rule. * * Regardless, the output file and header file get a copy of the DFAs. */ public ST genLookaheadDecision(ST recognizerST, DFA dfa) { ST decisionST; // If we are doing inline DFA and this one is acyclic and LL(*) // I have to check for is-non-LL(*) because if non-LL(*) the cyclic // check is not done by DFA.verify(); that is, verify() avoids // doesStateReachAcceptState() if non-LL(*) if ( dfa.canInlineDecision() ) { decisionST = acyclicDFAGenerator.genFixedLookaheadDecision(getTemplates(), dfa); } else { // generate any kind of DFA here (cyclic or acyclic) dfa.createStateTables(this); outputFileST.add("cyclicDFAs", dfa); headerFileST.add("cyclicDFAs", dfa); decisionST = templates.getInstanceOf("dfaDecision"); String description = dfa.getNFADecisionStartState().getDescription(); description = target.getTargetStringLiteralFromString(description); if ( description!=null ) { decisionST.add("description", description); } decisionST.add("decisionNumber", Utils.integer(dfa.getDecisionNumber())); } return decisionST; } /** A special state is huge (too big for state tables) or has a predicated * edge. Generate a simple if-then-else. Cannot be an accept state as * they have no emanating edges. Don't worry about switch vs if-then-else * because if you get here, the state is super complicated and needs an * if-then-else. This is used by the new DFA scheme created June 2006. */ public ST generateSpecialState(DFAState s) { ST stateST; stateST = templates.getInstanceOf("cyclicDFAState"); stateST.add("needErrorClause", Boolean.valueOf(true)); stateST.add("semPredState", Boolean.valueOf(s.isResolvedWithPredicates())); stateST.add("stateNumber", s.stateNumber); stateST.add("decisionNumber", s.dfa.decisionNumber); boolean foundGatedPred = false; ST eotST = null; for (int i = 0; i < s.getNumberOfTransitions(); i++) { Transition edge = (Transition) s.transition(i); ST edgeST; if ( edge.label.getAtom()==Label.EOT ) { // this is the default clause; has to held until last edgeST = templates.getInstanceOf("eotDFAEdge"); stateST.remove("needErrorClause"); eotST = edgeST; } else { edgeST = templates.getInstanceOf("cyclicDFAEdge"); ST exprST = genLabelExpr(templates,edge,1); edgeST.add("labelExpr", exprST); } edgeST.add("edgeNumber", Utils.integer(i + 1)); edgeST.add("targetStateNumber", Utils.integer(edge.target.stateNumber)); // stick in any gated predicates for any edge if not already a pred if ( !edge.label.isSemanticPredicate() ) { DFAState t = (DFAState)edge.target; SemanticContext preds = t.getGatedPredicatesInNFAConfigurations(); if ( preds!=null ) { foundGatedPred = true; ST predST = preds.genExpr(this, getTemplates(), t.dfa); edgeST.add("predicates", predST.render()); } } if ( edge.label.getAtom()!=Label.EOT ) { stateST.add("edges", edgeST); } } if ( foundGatedPred ) { // state has >= 1 edge with a gated pred (syn or sem) // must rewind input first, set flag. stateST.add("semPredState", new Boolean(foundGatedPred)); } if ( eotST!=null ) { stateST.add("edges", eotST); } return stateST; } /** Generate an expression for traversing an edge. */ protected ST genLabelExpr(STGroup templates, Transition edge, int k) { Label label = edge.label; if ( label.isSemanticPredicate() ) { return genSemanticPredicateExpr(templates, edge); } if ( label.isSet() ) { return genSetExpr(templates, label.getSet(), k, true); } // must be simple label ST eST = templates.getInstanceOf("lookaheadTest"); eST.add("atom", getTokenTypeAsTargetLabel(label.getAtom())); eST.add("atomAsInt", Utils.integer(label.getAtom())); eST.add("k", Utils.integer(k)); return eST; } protected ST genSemanticPredicateExpr(STGroup templates, Transition edge) { DFA dfa = ((DFAState)edge.target).dfa; // which DFA are we in Label label = edge.label; SemanticContext semCtx = label.getSemanticContext(); return semCtx.genExpr(this,templates,dfa); } /** For intervals such as [3..3, 30..35], generate an expression that * tests the lookahead similar to LA(1)==3 || (LA(1)>=30&&LA(1)<=35) */ public ST genSetExpr(STGroup templates, IntSet set, int k, boolean partOfDFA) { if ( !(set instanceof IntervalSet) ) { throw new IllegalArgumentException("unable to generate expressions for non IntervalSet objects"); } IntervalSet iset = (IntervalSet)set; if ( iset.getIntervals()==null || iset.getIntervals().size()==0 ) { ST emptyST = new ST(templates, ""); emptyST.impl.name = "empty-set-expr"; return emptyST; } String testSTName = "lookaheadTest"; String testRangeSTName = "lookaheadRangeTest"; if ( !partOfDFA ) { testSTName = "isolatedLookaheadTest"; testRangeSTName = "isolatedLookaheadRangeTest"; } ST setST = templates.getInstanceOf("setTest"); Iterator iter = iset.getIntervals().iterator(); int rangeNumber = 1; while (iter.hasNext()) { Interval I = (Interval) iter.next(); int a = I.a; int b = I.b; ST eST; if ( a==b ) { eST = templates.getInstanceOf(testSTName); eST.add("atom", getTokenTypeAsTargetLabel(a)); eST.add("atomAsInt", Utils.integer(a)); //eST.add("k",Utils.integer(k)); } else { eST = templates.getInstanceOf(testRangeSTName); eST.add("lower", getTokenTypeAsTargetLabel(a)); eST.add("lowerAsInt", Utils.integer(a)); eST.add("upper", getTokenTypeAsTargetLabel(b)); eST.add("upperAsInt", Utils.integer(b)); eST.add("rangeNumber", Utils.integer(rangeNumber)); } eST.add("k", Utils.integer(k)); setST.add("ranges", eST); rangeNumber++; } return setST; } // T O K E N D E F I N I T I O N G E N E R A T I O N /** Set attributes tokens and literals attributes in the incoming * code template. This is not the token vocab interchange file, but * rather a list of token type ID needed by the recognizer. */ protected void genTokenTypeConstants(ST code) { // make constants for the token types Iterator tokenIDs = grammar.getTokenIDs().iterator(); while (tokenIDs.hasNext()) { String tokenID = (String) tokenIDs.next(); int tokenType = grammar.getTokenType(tokenID); if ( tokenType==Label.EOF || tokenType>=Label.MIN_TOKEN_TYPE ) { // don't do FAUX labels 'cept EOF code.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType)); } } } /** Generate a token names table that maps token type to a printable * name: either the label like INT or the literal like "begin". */ protected void genTokenTypeNames(ST code) { for (int t=Label.MIN_TOKEN_TYPE; t<=grammar.getMaxTokenType(); t++) { String tokenName = grammar.getTokenDisplayName(t); if ( tokenName!=null ) { tokenName=target.getTargetStringLiteralFromString(tokenName, true); code.add("tokenNames", tokenName); } } } /** Get a meaningful name for a token type useful during code generation. * Literals without associated names are converted to the string equivalent * of their integer values. Used to generate x==ID and x==34 type comparisons * etc... Essentially we are looking for the most obvious way to refer * to a token type in the generated code. If in the lexer, return the * char literal translated to the target language. For example, ttype=10 * will yield '\n' from the getTokenDisplayName method. That must * be converted to the target languages literals. For most C-derived * languages no translation is needed. */ public String getTokenTypeAsTargetLabel(int ttype) { if ( grammar.type==Grammar.LEXER ) { String name = grammar.getTokenDisplayName(ttype); return target.getTargetCharLiteralFromANTLRCharLiteral(this,name); } return target.getTokenTypeAsTargetLabel(this,ttype); } /** Generate a token vocab file with all the token names/types. For example: * ID=7 * FOR=8 * 'for'=8 * * This is independent of the target language; used by antlr internally */ protected ST genTokenVocabOutput() { ST vocabFileST = new ST(vocabFilePattern); vocabFileST.add("literals",(Object)null); // "define" literals arg vocabFileST.add("tokens",(Object)null); vocabFileST.impl.name = "vocab-file"; // make constants for the token names Iterator tokenIDs = grammar.getTokenIDs().iterator(); while (tokenIDs.hasNext()) { String tokenID = (String) tokenIDs.next(); int tokenType = grammar.getTokenType(tokenID); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", tokenID, Utils.integer(tokenType)); } } // now dump the strings Iterator literals = grammar.getStringLiterals().iterator(); while (literals.hasNext()) { String literal = (String) literals.next(); int tokenType = grammar.getTokenType(literal); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType)); } } return vocabFileST; } public List translateAction(String ruleName, GrammarAST actionTree) { if ( actionTree.getType()==ANTLRParser.ARG_ACTION ) { return translateArgAction(ruleName, actionTree); } ActionTranslator translator = new ActionTranslator(this,ruleName,actionTree); List chunks = translator.translateToChunks(); chunks = target.postProcessAction(chunks, actionTree.token); return chunks; } /** Translate an action like [3,"foo",a[3]] and return a List of the * translated actions. Because actions are themselves translated to a list * of chunks, must cat together into a ST>. Don't translate * to strings early as we need to eval templates in context. */ public List translateArgAction(String ruleName, GrammarAST actionTree) { String actionText = actionTree.token.getText(); List args = getListOfArgumentsFromAction(actionText,','); List translatedArgs = new ArrayList(); for (String arg : args) { if ( arg!=null ) { Token actionToken = new CommonToken(ANTLRParser.ACTION,arg); ActionTranslator translator = new ActionTranslator(this,ruleName, actionToken, actionTree.outerAltNum); List chunks = translator.translateToChunks(); chunks = target.postProcessAction(chunks, actionToken); ST catST = new ST(templates, ""); catST.add("chunks", chunks); translatedArgs.add(catST); } } if ( translatedArgs.size()==0 ) { return null; } return translatedArgs; } public static List getListOfArgumentsFromAction(String actionText, int separatorChar) { List args = new ArrayList(); getListOfArgumentsFromAction(actionText, 0, -1, separatorChar, args); return args; } /** Given an arg action like * * [x, (*a).foo(21,33), 3.2+1, '\n', * "a,oo\nick", {bl, "fdkj"eck}, ["cat\n,", x, 43]] * * convert to a list of arguments. Allow nested square brackets etc... * Set separatorChar to ';' or ',' or whatever you want. */ public static int getListOfArgumentsFromAction(String actionText, int start, int targetChar, int separatorChar, List args) { if ( actionText==null ) { return -1; } actionText = actionText.replaceAll("//.*\n", ""); int n = actionText.length(); //System.out.println("actionText@"+start+"->"+(char)targetChar+"="+actionText.substring(start,n)); int p = start; int last = p; while ( p',p+1)>=p ) { // do we see a matching '>' ahead? if so, hope it's a generic // and not less followed by expr with greater than p = getListOfArgumentsFromAction(actionText,p+1,'>',separatorChar,args); } else { p++; // treat as normal char } break; case '[' : p = getListOfArgumentsFromAction(actionText,p+1,']',separatorChar,args); break; default : if ( c==separatorChar && targetChar==-1 ) { String arg = actionText.substring(last, p); //System.out.println("arg="+arg); args.add(arg.trim()); last = p+1; } p++; break; } } if ( targetChar==-1 && p<=n ) { String arg = actionText.substring(last, p).trim(); //System.out.println("arg="+arg); if ( arg.length()>0 ) { args.add(arg.trim()); } } p++; return p; } /** Given a template constructor action like %foo(a={...}) in * an action, translate it to the appropriate template constructor * from the templateLib. This translates a *piece* of the action. */ public ST translateTemplateConstructor(String ruleName, int outerAltNum, Token actionToken, String templateActionText) { // first, parse with antlr.g //System.out.println("translate template: "+templateActionText); ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(templateActionText)); lexer.setFileName(grammar.getFileName()); ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer)); parser.setFileName(grammar.getFileName()); ANTLRParser.rewrite_template_return parseResult = null; try { parseResult = parser.rewrite_template(); } catch (RecognitionException re) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_TEMPLATE_ACTION, grammar, actionToken, templateActionText); } catch (Exception tse) { ErrorManager.internalError("can't parse template action",tse); } GrammarAST rewriteTree = (GrammarAST)parseResult.getTree(); // then translate via codegen.g CodeGenTreeWalker gen = new CodeGenTreeWalker(new CommonTreeNodeStream(rewriteTree)); gen.init(grammar); gen.setCurrentRuleName(ruleName); gen.setOuterAltNum(outerAltNum); ST st = null; try { st = gen.rewrite_template(); } catch (RecognitionException re) { ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); } return st; } public void issueInvalidScopeError(String x, String y, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x+"::"+y); Rule r = grammar.getRule(x); AttributeScope scope = grammar.getGlobalScope(x); if ( scope==null ) { if ( r!=null ) { scope = r.ruleScope; // if not global, might be rule scope } } if ( scope==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE, grammar, actionToken, x); } else if ( scope.getAttribute(y)==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE_ATTRIBUTE, grammar, actionToken, x, y); } } public void issueInvalidAttributeError(String x, String y, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x+"."+y); if ( enclosingRule==null ) { // action not in a rule ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE, grammar, actionToken, x, y); return; } // action is in a rule Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x); if ( label!=null || enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null ) { // $rulelabel.attr or $ruleref.attr; must be unknown attr String refdRuleName = x; if ( label!=null ) { refdRuleName = enclosingRule.getRuleLabel(x).referencedRuleName; } Rule refdRule = grammar.getRule(refdRuleName); AttributeScope scope = refdRule.getAttributeScope(y); if ( scope==null ) { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_RULE_ATTRIBUTE, grammar, actionToken, refdRuleName, y); } else if ( scope.isParameterScope ) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_PARAMETER_REF, grammar, actionToken, refdRuleName, y); } else if ( scope.isDynamicRuleScope ) { ErrorManager.grammarError(ErrorManager.MSG_INVALID_RULE_SCOPE_ATTRIBUTE_REF, grammar, actionToken, refdRuleName, y); } } } public void issueInvalidAttributeError(String x, Rule enclosingRule, Token actionToken, int outerAltNum) { //System.out.println("error $"+x); if ( enclosingRule==null ) { // action not in a rule ErrorManager.grammarError(ErrorManager.MSG_ATTRIBUTE_REF_NOT_IN_RULE, grammar, actionToken, x); return; } // action is in a rule Grammar.LabelElementPair label = enclosingRule.getRuleLabel(x); AttributeScope scope = enclosingRule.getAttributeScope(x); if ( label!=null || enclosingRule.getRuleRefsInAlt(x, outerAltNum)!=null || enclosingRule.name.equals(x) ) { ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_SCOPE, grammar, actionToken, x); } else if ( scope!=null && scope.isDynamicRuleScope ) { ErrorManager.grammarError(ErrorManager.MSG_ISOLATED_RULE_ATTRIBUTE, grammar, actionToken, x); } else { ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_SIMPLE_ATTRIBUTE, grammar, actionToken, x); } } // M I S C public STGroup getTemplates() { return templates; } public STGroup getBaseTemplates() { return baseTemplates; } public void setDebug(boolean debug) { this.debug = debug; } public void setTrace(boolean trace) { this.trace = trace; } public void setProfile(boolean profile) { this.profile = profile; if ( profile ) { setDebug(true); // requires debug events } } public ST getRecognizerST() { return outputFileST; } /** Generate TParser.java and TLexer.java from T.g if combined, else * just use T.java as output regardless of type. */ public String getRecognizerFileName(String name, int type) { ST extST = templates.getInstanceOf("codeFileExtension"); String recognizerName = grammar.getRecognizerName(); return recognizerName+extST.render(); /* String suffix = ""; if ( type==Grammar.COMBINED || (type==Grammar.LEXER && !grammar.implicitLexer) ) { suffix = Grammar.grammarTypeToFileNameSuffix[type]; } return name+suffix+extST.toString(); */ } /** What is the name of the vocab file generated for this grammar? * Returns null if no .tokens file should be generated. */ public String getVocabFileName() { if ( grammar.isBuiltFromString() ) { return null; } return grammar.name+VOCAB_FILE_EXTENSION; } public void write(ST code, String fileName) throws IOException { //long start = System.currentTimeMillis(); Writer w = tool.getOutputFile(grammar, fileName); // Write the output to a StringWriter STWriter wr = new AutoIndentWriter(w); wr.setLineWidth(lineWidth); code.write(wr); w.close(); //long stop = System.currentTimeMillis(); //System.out.println("render time for "+fileName+": "+(int)(stop-start)+"ms"); } /** You can generate a switch rather than if-then-else for a DFA state * if there are no semantic predicates and the number of edge label * values is small enough; e.g., don't generate a switch for a state * containing an edge label such as 20..52330 (the resulting byte codes * would overflow the method 65k limit probably). */ protected boolean canGenerateSwitch(DFAState s) { if ( !GENERATE_SWITCHES_WHEN_POSSIBLE ) { return false; } int size = 0; for (int i = 0; i < s.getNumberOfTransitions(); i++) { Transition edge = (Transition) s.transition(i); if ( edge.label.isSemanticPredicate() ) { return false; } // can't do a switch if the edges are going to require predicates if ( edge.label.getAtom()==Label.EOT ) { int EOTPredicts = ((DFAState)edge.target).getUniquelyPredictedAlt(); if ( EOTPredicts==NFA.INVALID_ALT_NUMBER ) { // EOT target has to be a predicate then; no unique alt return false; } } // if target is a state with gated preds, we need to use preds on // this edge then to reach it. if ( ((DFAState)edge.target).getGatedPredicatesInNFAConfigurations()!=null ) { return false; } size += edge.label.getSet().size(); } if ( s.getNumberOfTransitions()MAX_SWITCH_CASE_LABELS ) { return false; } return true; } /** Create a label to track a token / rule reference's result. * Technically, this is a place where I break model-view separation * as I am creating a variable name that could be invalid in a * target language, however, label ::= is probably ok in * all languages we care about. */ public String createUniqueLabel(String name) { return new StringBuffer() .append(name).append(uniqueLabelNumber++).toString(); } }