1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/*
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * [The "BSD license"]
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  Copyright (c) 2010 Terence Parr
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  All rights reserved.
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  Redistribution and use in source and binary forms, with or without
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  modification, are permitted provided that the following conditions
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  are met:
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  1. Redistributions of source code must retain the above copyright
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      notice, this list of conditions and the following disclaimer.
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  2. Redistributions in binary form must reproduce the above copyright
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      notice, this list of conditions and the following disclaimer in the
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      documentation and/or other materials provided with the distribution.
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  3. The name of the author may not be used to endorse or promote products
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *      derived from this software without specific prior written permission.
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpackage org.antlr.tool;
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.analysis.*;
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.misc.Utils;
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.*;
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** An aspect of FA (finite automata) that knows how to dump them to serialized
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  strings.
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpublic class FASerializer {
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** To prevent infinite recursion when walking state machines, record
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  which states we've visited.  Make a new set every time you start
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  walking in case you reuse this object.  Multiple threads will trash
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  this shared variable.  Use a different FASerializer per thread.
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected Set markedStates;
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Each state we walk will get a new state number for serialization
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  purposes.  This is the variable that tracks state numbers.
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected int stateCounter = 0;
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Rather than add a new instance variable to NFA and DFA just for
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  serializing machines, map old state numbers to new state numbers
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  by a State object -> Integer new state number HashMap.
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected Map stateNumberTranslator;
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected Grammar grammar;
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** This aspect is associated with a grammar; used to get token names */
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public FASerializer(Grammar grammar) {
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        this.grammar = grammar;
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	public String serialize(State s) {
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( s==null ) {
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			return "<no automaton>";
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return serialize(s, true);
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/** Return a string representation of a state machine.  Two identical
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  NFAs or DFAs will have identical serialized representations.  The
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  state numbers inside the state are not used; instead, a new number
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  is computed and because the serialization will walk the two
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  machines using the same specific algorithm, then the state numbers
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  will be identical.  Accept states are distinguished from regular
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  states.
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    public String serialize(State s, boolean renumber) {
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        markedStates = new HashSet();
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stateCounter = 0;
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( renumber ) {
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			stateNumberTranslator = new HashMap();
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        	walkFANormalizingStateNumbers(s);
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		List lines = new ArrayList();
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( s.getNumberOfTransitions()>0 ) {
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			walkSerializingFA(lines, s);
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		else {
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// special case: s0 is an accept
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			String s0 = getStateString(0, s);
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			lines.add(s0+"\n");
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        StringBuffer buf = new StringBuffer(0);
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // sort lines to normalize; makes states come out ordered
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // and then ordered by edge labels then by target state number :)
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Collections.sort(lines);
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for (int i = 0; i < lines.size(); i++) {
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            String line = (String) lines.get(i);
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            buf.append(line);
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return buf.toString();
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** In stateNumberTranslator, get a map from State to new, normalized
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  state number.  Used by walkSerializingFA to make sure any two
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  identical state machines will serialize the same way.
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected void walkFANormalizingStateNumbers(State s) {
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( s==null ) {
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			ErrorManager.internalError("null state s");
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			return;
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( stateNumberTranslator.get(s)!=null ) {
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return; // already did this state
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // assign a new state number for this node if there isn't one
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stateNumberTranslator.put(s, Utils.integer(stateCounter));
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        stateCounter++;
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // visit nodes pointed to by each transition;
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            Transition edge = (Transition) s.transition(i);
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            walkFANormalizingStateNumbers(edge.target); // keep walkin'
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // if this transition is a rule reference, the node "following" this state
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // will not be found and appear to be not in graph.  Must explicitly jump
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // to it, but don't "draw" an edge.
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( edge instanceof RuleClosureTransition ) {
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				walkFANormalizingStateNumbers(((RuleClosureTransition) edge).followState);
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    protected void walkSerializingFA(List lines, State s) {
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( markedStates.contains(s) ) {
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return; // already visited this node
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        markedStates.add(s); // mark this node as completed.
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		int normalizedStateNumber = s.stateNumber;
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if ( stateNumberTranslator!=null ) {
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	        Integer normalizedStateNumberI = (Integer)stateNumberTranslator.get(s);
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			normalizedStateNumber = normalizedStateNumberI.intValue();
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		String stateStr = getStateString(normalizedStateNumber, s);
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // depth first walk each transition, printing its edge first
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        for (int i = 0; i < s.getNumberOfTransitions(); i++) {
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            Transition edge = (Transition) s.transition(i);
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            StringBuffer buf = new StringBuffer();
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            buf.append(stateStr);
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( edge.isAction() ) {
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				buf.append("-{}->");
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			else if ( edge.isEpsilon() ) {
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				buf.append("->");
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			else if ( edge.isSemanticPredicate() ) {
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				buf.append("-{"+edge.label.getSemanticContext()+"}?->");
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			else {
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				String predsStr = "";
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				if ( edge.target instanceof DFAState ) {
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					// look for gated predicates; don't add gated to simple sempred edges
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					SemanticContext preds =
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver						((DFAState)edge.target).getGatedPredicatesInNFAConfigurations();
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					if ( preds!=null ) {
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver						predsStr = "&&{"+
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver							preds.genExpr(grammar.generator,
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver									   	  grammar.generator.getTemplates(), null).render()
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver							+"}?";
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					}
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				}
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				buf.append("-"+edge.label.toString(grammar)+predsStr+"->");
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			int normalizedTargetStateNumber = edge.target.stateNumber;
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			if ( stateNumberTranslator!=null ) {
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				Integer normalizedTargetStateNumberI =
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver					(Integer)stateNumberTranslator.get(edge.target);
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				normalizedTargetStateNumber = normalizedTargetStateNumberI.intValue();
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			}
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			buf.append(getStateString(normalizedTargetStateNumber, edge.target));
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            buf.append("\n");
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            lines.add(buf.toString());
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // walk this transition
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            walkSerializingFA(lines, edge.target);
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // if this transition is a rule reference, the node "following" this state
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // will not be found and appear to be not in graph.  Must explicitly jump
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // to it, but don't "draw" an edge.
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( edge instanceof RuleClosureTransition ) {
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver				walkSerializingFA(lines, ((RuleClosureTransition) edge).followState);
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    private String getStateString(int n, State s) {
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        String stateStr = ".s"+n;
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if ( s.isAcceptState() ) {
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if ( s instanceof DFAState ) {
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                stateStr = ":s"+n+"=>"+((DFAState)s).getUniquelyPredictedAlt();
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else {
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                stateStr = ":s"+n;
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return stateStr;
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
218