1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.tool;
29
30import org.antlr.analysis.Label;
31import org.antlr.grammar.v3.AssignTokenTypesWalker;
32import org.antlr.misc.Utils;
33import org.antlr.runtime.tree.TreeNodeStream;
34
35import java.util.*;
36
37/** Move all of the functionality from assign.types.g grammar file. */
38public class AssignTokenTypesBehavior extends AssignTokenTypesWalker {
39	protected static final Integer UNASSIGNED = Utils.integer(-1);
40	protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2);
41
42	protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>();
43	protected Map<String,Integer> tokens = new TreeMap<String, Integer>();
44	protected Map<String,String> aliases = new TreeMap<String, String>();
45	protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>();
46
47	/** Track actual lexer rule defs so we don't get repeated token defs in
48	 *  generated lexer.
49	 */
50	protected Set<String> tokenRuleDefs = new HashSet();
51
52	public AssignTokenTypesBehavior() {
53		super(null);
54	}
55
56    @Override
57	protected void init(Grammar g) {
58		this.grammar = g;
59		currentRuleName = null;
60		if ( stringAlias==null ) {
61			// only init once; can't statically init since we need astFactory
62			initASTPatterns();
63		}
64	}
65
66	/** Track string literals (could be in tokens{} section) */
67    @Override
68	protected void trackString(GrammarAST t) {
69		// if lexer, don't allow aliasing in tokens section
70		if ( currentRuleName==null && grammar.type==Grammar.LEXER ) {
71			ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER,
72									  grammar,
73									  t.token,
74									  t.getText());
75			return;
76		}
77		// in a plain parser grammar rule, cannot reference literals
78		// (unless defined previously via tokenVocab option)
79		// don't warn until we hit root grammar as may be defined there.
80		if ( grammar.getGrammarIsRoot() &&
81			 grammar.type==Grammar.PARSER &&
82			 grammar.getTokenType(t.getText())== Label.INVALID )
83		{
84			ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE,
85									  grammar,
86									  t.token,
87									  t.getText());
88		}
89		// Don't record literals for lexers, they are things to match not tokens
90		if ( grammar.type==Grammar.LEXER ) {
91			return;
92		}
93		// otherwise add literal to token types if referenced from parser rule
94		// or in the tokens{} section
95		if ( (currentRuleName==null ||
96			  Character.isLowerCase(currentRuleName.charAt(0))) &&
97																grammar.getTokenType(t.getText())==Label.INVALID )
98		{
99			stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE);
100		}
101	}
102
103    @Override
104	protected void trackToken(GrammarAST t) {
105		// imported token names might exist, only add if new
106		// Might have ';'=4 in vocab import and SEMI=';'. Avoid
107		// setting to UNASSIGNED if we have loaded ';'/SEMI
108		if ( grammar.getTokenType(t.getText())==Label.INVALID &&
109			 tokens.get(t.getText())==null )
110		{
111			tokens.put(t.getText(), UNASSIGNED);
112		}
113	}
114
115    @Override
116	protected void trackTokenRule(GrammarAST t,
117								  GrammarAST modifier,
118								  GrammarAST block)
119	{
120		// imported token names might exist, only add if new
121		if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) {
122			if ( !Character.isUpperCase(t.getText().charAt(0)) ) {
123				return;
124			}
125			if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
126				// don't add Tokens rule
127				return;
128			}
129
130			// track all lexer rules so we can look for token refs w/o
131			// associated lexer rules.
132			grammar.composite.lexerRules.add(t.getText());
133
134			int existing = grammar.getTokenType(t.getText());
135			if ( existing==Label.INVALID ) {
136				tokens.put(t.getText(), UNASSIGNED);
137			}
138			// look for "<TOKEN> : <literal> ;" pattern
139			// (can have optional action last)
140			if ( block.hasSameTreeStructure(charAlias) ||
141				 block.hasSameTreeStructure(stringAlias) ||
142				 block.hasSameTreeStructure(charAlias2) ||
143				 block.hasSameTreeStructure(stringAlias2) )
144			{
145				tokenRuleDefs.add(t.getText());
146				/*
147			Grammar parent = grammar.composite.getDelegator(grammar);
148			boolean importedByParserOrCombined =
149				parent!=null &&
150				(parent.type==Grammar.LEXER||parent.type==Grammar.PARSER);
151				*/
152				if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) {
153					// only call this rule an alias if combined or lexer
154					alias(t, (GrammarAST)block.getChild(0).getChild(0));
155				}
156			}
157		}
158		// else error
159	}
160
161    @Override
162	protected void alias(GrammarAST t, GrammarAST s) {
163		String tokenID = t.getText();
164		String literal = s.getText();
165		String prevAliasLiteralID = aliasesReverseIndex.get(literal);
166		if ( prevAliasLiteralID!=null ) { // we've seen this literal before
167			if ( tokenID.equals(prevAliasLiteralID) ) {
168				// duplicate but identical alias; might be tokens {A='a'} and
169				// lexer rule A : 'a' ;  Is ok, just return
170				return;
171			}
172
173			// give error unless both are rules (ok if one is in tokens section)
174			if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) )
175			{
176				// don't allow alias if A='a' in tokens section and B : 'a'; is rule.
177				// Allow if both are rules.  Will get DFA nondeterminism error later.
178				ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT,
179										  grammar,
180										  t.token,
181										  tokenID+"="+literal,
182										  prevAliasLiteralID);
183			}
184			return; // don't do the alias
185		}
186		int existingLiteralType = grammar.getTokenType(literal);
187		if ( existingLiteralType !=Label.INVALID ) {
188			// we've seen this before from a tokenVocab most likely
189			// don't assign a new token type; use existingLiteralType.
190			tokens.put(tokenID, existingLiteralType);
191		}
192		String prevAliasTokenID = aliases.get(tokenID);
193		if ( prevAliasTokenID!=null ) {
194			ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT,
195									  grammar,
196									  t.token,
197									  tokenID+"="+literal,
198									  prevAliasTokenID);
199			return; // don't do the alias
200		}
201		aliases.put(tokenID, literal);
202		aliasesReverseIndex.put(literal, tokenID);
203	}
204
205    @Override
206	public void defineTokens(Grammar root) {
207/*
208	System.out.println("stringLiterals="+stringLiterals);
209	System.out.println("tokens="+tokens);
210	System.out.println("aliases="+aliases);
211	System.out.println("aliasesReverseIndex="+aliasesReverseIndex);
212*/
213
214		assignTokenIDTypes(root);
215
216		aliasTokenIDsAndLiterals(root);
217
218		assignStringTypes(root);
219
220/*
221	System.out.println("stringLiterals="+stringLiterals);
222	System.out.println("tokens="+tokens);
223	System.out.println("aliases="+aliases);
224*/
225		defineTokenNamesAndLiteralsInGrammar(root);
226	}
227
228/*
229protected void defineStringLiteralsFromDelegates() {
230	 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) {
231		 List<Grammar> delegates = grammar.getDelegates();
232		 System.out.println("delegates in master combined: "+delegates);
233		 for (int i = 0; i < delegates.size(); i++) {
234			 Grammar d = (Grammar) delegates.get(i);
235			 Set<String> literals = d.getStringLiterals();
236			 for (Iterator it = literals.iterator(); it.hasNext();) {
237				 String literal = (String) it.next();
238				 System.out.println("literal "+literal);
239				 int ttype = grammar.getTokenType(literal);
240				 grammar.defineLexerRuleForStringLiteral(literal, ttype);
241			 }
242		 }
243	 }
244}
245*/
246
247    @Override
248	protected void assignStringTypes(Grammar root) {
249		// walk string literals assigning types to unassigned ones
250		Set s = stringLiterals.keySet();
251		for (Iterator it = s.iterator(); it.hasNext();) {
252			String lit = (String) it.next();
253			Integer oldTypeI = (Integer)stringLiterals.get(lit);
254			int oldType = oldTypeI.intValue();
255			if ( oldType<Label.MIN_TOKEN_TYPE ) {
256				Integer typeI = Utils.integer(root.getNewTokenType());
257				stringLiterals.put(lit, typeI);
258				// if string referenced in combined grammar parser rule,
259				// automatically define in the generated lexer
260				root.defineLexerRuleForStringLiteral(lit, typeI.intValue());
261			}
262		}
263	}
264
265    @Override
266	protected void aliasTokenIDsAndLiterals(Grammar root) {
267		if ( root.type==Grammar.LEXER ) {
268			return; // strings/chars are never token types in LEXER
269		}
270		// walk aliases if any and assign types to aliased literals if literal
271		// was referenced
272		Set s = aliases.keySet();
273		for (Iterator it = s.iterator(); it.hasNext();) {
274			String tokenID = (String) it.next();
275			String literal = (String)aliases.get(tokenID);
276			if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) {
277				stringLiterals.put(literal, tokens.get(tokenID));
278				// an alias still means you need a lexer rule for it
279				Integer typeI = (Integer)tokens.get(tokenID);
280				if ( !tokenRuleDefs.contains(tokenID) ) {
281					root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI.intValue());
282				}
283			}
284		}
285	}
286
287    @Override
288	protected void assignTokenIDTypes(Grammar root) {
289		// walk token names, assigning values if unassigned
290		Set s = tokens.keySet();
291		for (Iterator it = s.iterator(); it.hasNext();) {
292			String tokenID = (String) it.next();
293			if ( tokens.get(tokenID)==UNASSIGNED ) {
294				tokens.put(tokenID, Utils.integer(root.getNewTokenType()));
295			}
296		}
297	}
298
299    @Override
300	protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) {
301		Set s = tokens.keySet();
302		for (Iterator it = s.iterator(); it.hasNext();) {
303			String tokenID = (String) it.next();
304			int ttype = ((Integer)tokens.get(tokenID)).intValue();
305			root.defineToken(tokenID, ttype);
306		}
307		s = stringLiterals.keySet();
308		for (Iterator it = s.iterator(); it.hasNext();) {
309			String lit = (String) it.next();
310			int ttype = ((Integer)stringLiterals.get(lit)).intValue();
311			root.defineToken(lit, ttype);
312		}
313	}
314
315}
316