/* * [The "BSD license"] * Copyright (c) 2010 Terence Parr * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package org.antlr.tool; import org.antlr.analysis.Label; import org.antlr.grammar.v3.AssignTokenTypesWalker; import org.antlr.misc.Utils; import org.antlr.runtime.tree.TreeNodeStream; import java.util.*; /** Move all of the functionality from assign.types.g grammar file. */ public class AssignTokenTypesBehavior extends AssignTokenTypesWalker { protected static final Integer UNASSIGNED = Utils.integer(-1); protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2); protected Map stringLiterals = new TreeMap(); protected Map tokens = new TreeMap(); protected Map aliases = new TreeMap(); protected Map aliasesReverseIndex = new HashMap(); /** Track actual lexer rule defs so we don't get repeated token defs in * generated lexer. */ protected Set tokenRuleDefs = new HashSet(); public AssignTokenTypesBehavior() { super(null); } @Override protected void init(Grammar g) { this.grammar = g; currentRuleName = null; if ( stringAlias==null ) { // only init once; can't statically init since we need astFactory initASTPatterns(); } } /** Track string literals (could be in tokens{} section) */ @Override protected void trackString(GrammarAST t) { // if lexer, don't allow aliasing in tokens section if ( currentRuleName==null && grammar.type==Grammar.LEXER ) { ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER, grammar, t.token, t.getText()); return; } // in a plain parser grammar rule, cannot reference literals // (unless defined previously via tokenVocab option) // don't warn until we hit root grammar as may be defined there. if ( grammar.getGrammarIsRoot() && grammar.type==Grammar.PARSER && grammar.getTokenType(t.getText())== Label.INVALID ) { ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE, grammar, t.token, t.getText()); } // Don't record literals for lexers, they are things to match not tokens if ( grammar.type==Grammar.LEXER ) { return; } // otherwise add literal to token types if referenced from parser rule // or in the tokens{} section if ( (currentRuleName==null || Character.isLowerCase(currentRuleName.charAt(0))) && grammar.getTokenType(t.getText())==Label.INVALID ) { stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE); } } @Override protected void trackToken(GrammarAST t) { // imported token names might exist, only add if new // Might have ';'=4 in vocab import and SEMI=';'. Avoid // setting to UNASSIGNED if we have loaded ';'/SEMI if ( grammar.getTokenType(t.getText())==Label.INVALID && tokens.get(t.getText())==null ) { tokens.put(t.getText(), UNASSIGNED); } } @Override protected void trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block) { // imported token names might exist, only add if new if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) { if ( !Character.isUpperCase(t.getText().charAt(0)) ) { return; } if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { // don't add Tokens rule return; } // track all lexer rules so we can look for token refs w/o // associated lexer rules. grammar.composite.lexerRules.add(t.getText()); int existing = grammar.getTokenType(t.getText()); if ( existing==Label.INVALID ) { tokens.put(t.getText(), UNASSIGNED); } // look for " : ;" pattern // (can have optional action last) if ( block.hasSameTreeStructure(charAlias) || block.hasSameTreeStructure(stringAlias) || block.hasSameTreeStructure(charAlias2) || block.hasSameTreeStructure(stringAlias2) ) { tokenRuleDefs.add(t.getText()); /* Grammar parent = grammar.composite.getDelegator(grammar); boolean importedByParserOrCombined = parent!=null && (parent.type==Grammar.LEXER||parent.type==Grammar.PARSER); */ if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) { // only call this rule an alias if combined or lexer alias(t, (GrammarAST)block.getChild(0).getChild(0)); } } } // else error } @Override protected void alias(GrammarAST t, GrammarAST s) { String tokenID = t.getText(); String literal = s.getText(); String prevAliasLiteralID = aliasesReverseIndex.get(literal); if ( prevAliasLiteralID!=null ) { // we've seen this literal before if ( tokenID.equals(prevAliasLiteralID) ) { // duplicate but identical alias; might be tokens {A='a'} and // lexer rule A : 'a' ; Is ok, just return return; } // give error unless both are rules (ok if one is in tokens section) if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) ) { // don't allow alias if A='a' in tokens section and B : 'a'; is rule. // Allow if both are rules. Will get DFA nondeterminism error later. ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT, grammar, t.token, tokenID+"="+literal, prevAliasLiteralID); } return; // don't do the alias } int existingLiteralType = grammar.getTokenType(literal); if ( existingLiteralType !=Label.INVALID ) { // we've seen this before from a tokenVocab most likely // don't assign a new token type; use existingLiteralType. tokens.put(tokenID, existingLiteralType); } String prevAliasTokenID = aliases.get(tokenID); if ( prevAliasTokenID!=null ) { ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT, grammar, t.token, tokenID+"="+literal, prevAliasTokenID); return; // don't do the alias } aliases.put(tokenID, literal); aliasesReverseIndex.put(literal, tokenID); } @Override public void defineTokens(Grammar root) { /* System.out.println("stringLiterals="+stringLiterals); System.out.println("tokens="+tokens); System.out.println("aliases="+aliases); System.out.println("aliasesReverseIndex="+aliasesReverseIndex); */ assignTokenIDTypes(root); aliasTokenIDsAndLiterals(root); assignStringTypes(root); /* System.out.println("stringLiterals="+stringLiterals); System.out.println("tokens="+tokens); System.out.println("aliases="+aliases); */ defineTokenNamesAndLiteralsInGrammar(root); } /* protected void defineStringLiteralsFromDelegates() { if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) { List delegates = grammar.getDelegates(); System.out.println("delegates in master combined: "+delegates); for (int i = 0; i < delegates.size(); i++) { Grammar d = (Grammar) delegates.get(i); Set literals = d.getStringLiterals(); for (Iterator it = literals.iterator(); it.hasNext();) { String literal = (String) it.next(); System.out.println("literal "+literal); int ttype = grammar.getTokenType(literal); grammar.defineLexerRuleForStringLiteral(literal, ttype); } } } } */ @Override protected void assignStringTypes(Grammar root) { // walk string literals assigning types to unassigned ones Set s = stringLiterals.keySet(); for (Iterator it = s.iterator(); it.hasNext();) { String lit = (String) it.next(); Integer oldTypeI = (Integer)stringLiterals.get(lit); int oldType = oldTypeI.intValue(); if ( oldType