1/*
2 * [The "BSD license"]
3 *  Copyright (c) 2010 Terence Parr
4 *  All rights reserved.
5 *
6 *  Redistribution and use in source and binary forms, with or without
7 *  modification, are permitted provided that the following conditions
8 *  are met:
9 *  1. Redistributions of source code must retain the above copyright
10 *      notice, this list of conditions and the following disclaimer.
11 *  2. Redistributions in binary form must reproduce the above copyright
12 *      notice, this list of conditions and the following disclaimer in the
13 *      documentation and/or other materials provided with the distribution.
14 *  3. The name of the author may not be used to endorse or promote products
15 *      derived from this software without specific prior written permission.
16 *
17 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28package org.antlr.analysis;
29
30import org.antlr.misc.Utils;
31import org.antlr.tool.Grammar;
32
33import java.util.HashSet;
34import java.util.Set;
35
36/** A module to perform optimizations on DFAs.
37 *
38 *  I could more easily (and more quickly) do some optimizations (such as
39 *  PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it
40 *  messes up the determinism checking.  For example, it looks like
41 *  loop exit branches are unreachable if you prune exit branches
42 *  during DFA construction and before determinism checks.
43 *
44 *  In general, ANTLR's NFA->DFA->codegen pipeline seems very robust
45 *  to me which I attribute to a uniform and consistent set of data
46 *  structures.  Regardless of what I want to "say"/implement, I do so
47 *  within the confines of, for example, a DFA.  The code generator
48 *  can then just generate code--it doesn't have to do much thinking.
49 *  Putting optimizations in the code gen code really starts to make
50 *  it a spagetti factory (uh oh, now I'm hungry!).  The pipeline is
51 *  very testable; each stage has well defined input/output pairs.
52 *
53 *  ### Optimization: PRUNE_EBNF_EXIT_BRANCHES
54 *
55 *  There is no need to test EBNF block exit branches.  Not only is it
56 *  an unneeded computation, but counter-intuitively, you actually get
57 *  better errors. You can report an error at the missing or extra
58 *  token rather than as soon as you've figured out you will fail.
59 *
60 *  Imagine optional block "( DOT CLASS )? SEMI".  ANTLR generates:
61 *
62 *  int alt=0;
63 *  if ( input.LA(1)==DOT ) {
64 *      alt=1;
65 *  }
66 *  else if ( input.LA(1)==SEMI ) {
67 *      alt=2;
68 *  }
69 *
70 *  Clearly, since Parser.match() will ultimately find the error, we
71 *  do not want to report an error nor do we want to bother testing
72 *  lookahead against what follows the (...)?  We want to generate
73 *  simply "should I enter the subrule?":
74 *
75 *  int alt=2;
76 *  if ( input.LA(1)==DOT ) {
77 *      alt=1;
78 *  }
79 *
80 *  NOTE 1. Greedy loops cannot be optimized in this way.  For example,
81 *  "(greedy=false:'x'|.)* '\n'".  You specifically need the exit branch
82 *  to tell you when to terminate the loop as the same input actually
83 *  predicts one of the alts (i.e., staying in the loop).
84 *
85 *  NOTE 2.  I do not optimize cyclic DFAs at the moment as it doesn't
86 *  seem to work. ;)  I'll have to investigate later to see what work I
87 *  can do on cyclic DFAs to make them have fewer edges.  Might have
88 *  something to do with the EOT token.
89 *
90 *  ### PRUNE_SUPERFLUOUS_EOT_EDGES
91 *
92 *  When a token is a subset of another such as the following rules, ANTLR
93 *  quietly assumes the first token to resolve the ambiguity.
94 *
95 *  EQ			: '=' ;
96 *  ASSIGNOP	: '=' | '+=' ;
97 *
98 *  It can yield states that have only a single edge on EOT to an accept
99 *  state.  This is a waste and messes up my code generation. ;)  If
100 *  Tokens rule DFA goes
101 *
102 * 		s0 -'='-> s3 -EOT-> s5 (accept)
103 *
104 *  then s5 should be pruned and s3 should be made an accept.  Do NOT do this
105 *  for keyword versus ID as the state with EOT edge emanating from it will
106 *  also have another edge.
107 *
108 *  ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES
109 *
110 *  Done during DFA construction.  See method addTransition() in
111 *  NFAToDFAConverter.
112 *
113 *  ### Optimization: MERGE_STOP_STATES
114 *
115 *  Done during DFA construction.  See addDFAState() in NFAToDFAConverter.
116 */
117public class DFAOptimizer {
118	public static boolean PRUNE_EBNF_EXIT_BRANCHES = true;
119	public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true;
120	public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true;
121	public static boolean MERGE_STOP_STATES = true;
122
123	/** Used by DFA state machine generator to avoid infinite recursion
124	 *  resulting from cycles int the DFA.  This is a set of int state #s.
125	 *  This is a side-effect of calling optimize; can't clear after use
126	 *  because code gen needs it.
127	 */
128	protected Set visited = new HashSet();
129
130    protected Grammar grammar;
131
132    public DFAOptimizer(Grammar grammar) {
133		this.grammar = grammar;
134    }
135
136	public void optimize() {
137		// optimize each DFA in this grammar
138		for (int decisionNumber=1;
139			 decisionNumber<=grammar.getNumberOfDecisions();
140			 decisionNumber++)
141		{
142			DFA dfa = grammar.getLookaheadDFA(decisionNumber);
143			optimize(dfa);
144		}
145	}
146
147	protected void optimize(DFA dfa) {
148		if ( dfa==null ) {
149			return; // nothing to do
150		}
151		/*
152		System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+
153						   " num states="+dfa.getNumberOfStates());
154		*/
155		//long start = System.currentTimeMillis();
156		if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) {
157			visited.clear();
158			int decisionType =
159				dfa.getNFADecisionStartState().decisionStateType;
160			if ( dfa.isGreedy() &&
161				 (decisionType==NFAState.OPTIONAL_BLOCK_START ||
162				 decisionType==NFAState.LOOPBACK) )
163			{
164				optimizeExitBranches(dfa.startState);
165			}
166		}
167		// If the Tokens rule has syntactically ambiguous rules, try to prune
168		if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
169			 dfa.isTokensRuleDecision() &&
170			 dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 )
171		{
172			visited.clear();
173			optimizeEOTBranches(dfa.startState);
174		}
175
176		/* ack...code gen needs this, cannot optimize
177		visited.clear();
178		unlinkUnneededStateData(dfa.startState);
179		*/
180		//long stop = System.currentTimeMillis();
181		//System.out.println("minimized in "+(int)(stop-start)+" ms");
182    }
183
184	protected void optimizeExitBranches(DFAState d) {
185		Integer sI = Utils.integer(d.stateNumber);
186		if ( visited.contains(sI) ) {
187			return; // already visited
188		}
189		visited.add(sI);
190		int nAlts = d.dfa.getNumberOfAlts();
191		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
192			Transition edge = (Transition) d.transition(i);
193			DFAState edgeTarget = ((DFAState)edge.target);
194			/*
195			System.out.println(d.stateNumber+"-"+
196							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
197							   edgeTarget.stateNumber);
198			*/
199			// if target is an accept state and that alt is the exit alt
200			if ( edgeTarget.isAcceptState() &&
201				edgeTarget.getUniquelyPredictedAlt()==nAlts)
202			{
203				/*
204				System.out.println("ignoring transition "+i+" to max alt "+
205					d.dfa.getNumberOfAlts());
206				*/
207				d.removeTransition(i);
208				i--; // back up one so that i++ of loop iteration stays within bounds
209			}
210			optimizeExitBranches(edgeTarget);
211		}
212	}
213
214	protected void optimizeEOTBranches(DFAState d) {
215		Integer sI = Utils.integer(d.stateNumber);
216		if ( visited.contains(sI) ) {
217			return; // already visited
218		}
219		visited.add(sI);
220		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
221			Transition edge = (Transition) d.transition(i);
222			DFAState edgeTarget = ((DFAState)edge.target);
223			/*
224			System.out.println(d.stateNumber+"-"+
225							   edge.label.toString(d.dfa.nfa.grammar)+"->"+
226							   edgeTarget.stateNumber);
227			*/
228			// if only one edge coming out, it is EOT, and target is accept prune
229			if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES &&
230				edgeTarget.isAcceptState() &&
231				d.getNumberOfTransitions()==1 &&
232				edge.label.isAtom() &&
233				edge.label.getAtom()==Label.EOT )
234			{
235				//System.out.println("state "+d+" can be pruned");
236				// remove the superfluous EOT edge
237				d.removeTransition(i);
238				d.setAcceptState(true); // make it an accept state
239				// force it to uniquely predict the originally predicted state
240				d.cachedUniquelyPredicatedAlt =
241					edgeTarget.getUniquelyPredictedAlt();
242				i--; // back up one so that i++ of loop iteration stays within bounds
243			}
244			optimizeEOTBranches(edgeTarget);
245		}
246	}
247
248	/** Walk DFA states, unlinking the nfa configs and whatever else I
249	 *  can to reduce memory footprint.
250	protected void unlinkUnneededStateData(DFAState d) {
251		Integer sI = Utils.integer(d.stateNumber);
252		if ( visited.contains(sI) ) {
253			return; // already visited
254		}
255		visited.add(sI);
256		d.nfaConfigurations = null;
257		for (int i = 0; i < d.getNumberOfTransitions(); i++) {
258			Transition edge = (Transition) d.transition(i);
259			DFAState edgeTarget = ((DFAState)edge.target);
260			unlinkUnneededStateData(edgeTarget);
261		}
262	}
263	 */
264
265}
266