1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/* 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * [The "BSD license"] 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Copyright (c) 2010 Terence Parr 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * All rights reserved. 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Redistribution and use in source and binary forms, with or without 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * modification, are permitted provided that the following conditions 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are met: 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1. Redistributions of source code must retain the above copyright 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * notice, this list of conditions and the following disclaimer. 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 2. Redistributions in binary form must reproduce the above copyright 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * notice, this list of conditions and the following disclaimer in the 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * documentation and/or other materials provided with the distribution. 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 3. The name of the author may not be used to endorse or promote products 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * derived from this software without specific prior written permission. 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpackage org.antlr.tool; 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.analysis.*; 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.misc.IntSet; 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport org.antlr.misc.IntervalSet; 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.ArrayList; 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.Collection; 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.Iterator; 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport java.util.List; 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Routines to construct StateClusters from EBNF grammar constructs. 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * No optimization is done to remove unnecessary epsilon edges. 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * TODO: add an optimization that reduces number of states and transitions 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * will help with speed of conversion and make it easier to view NFA. For 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * example, o-A->o-->o-B->o should be o-A->o-B->o 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverpublic class NFAFactory { 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** This factory is attached to a specifc NFA that it is building. 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The NFA will be filled up with states and transitions. 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFA nfa = null; 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public Rule getCurrentRule() { 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return currentRule; 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public void setCurrentRule(Rule currentRule) { 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this.currentRule = currentRule; 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Rule currentRule = null; 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public NFAFactory(NFA nfa) { 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nfa.setFactory(this); 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver this.nfa = nfa; 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public NFAState newState() { 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState n = new NFAState(nfa); 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int state = nfa.getNewNFAStateNumber(); 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver n.stateNumber = state; 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nfa.addState(n); 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver n.enclosingRule = currentRule; 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return n; 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Optimize an alternative (list of grammar elements). 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Walk the chain of elements (which can be complicated loop blocks...) 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and throw away any epsilon transitions used to link up simple elements. 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This only removes 195 states from the java.g's NFA, but every little 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * bit helps. Perhaps I can improve in the future. 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public void optimizeAlternative(StateCluster alt) { 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState s = alt.left; 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while ( s!=alt.right ) { 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if it's a block element, jump over it and continue 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) { 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = nfa.getState(s.endOfBlockStateNumber); 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue; 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition t = s.transition[0]; 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( t instanceof RuleClosureTransition ) { 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = ((RuleClosureTransition) t).followState; 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue; 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) { 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // bypass epsilon transition and point to what the epsilon's 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // target points to unless that epsilon transition points to 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a block or loop etc.. Also don't collapse epsilons that 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // point at the last node of the alt. Don't collapse action edges 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState epsilonTarget = (NFAState)t.target; 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER && 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver epsilonTarget.transition[0] !=null ) 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s.setTransition0(epsilonTarget.transition[0]); 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver System.out.println("### opt "+s.stateNumber+"->"+ 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver epsilonTarget.transition(0).target.stateNumber); 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver s = (NFAState)t.target; 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From label A build Graph o-A->o */ 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Atom(int label, GrammarAST associatedAST) { 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.associatedASTNode = associatedAST; 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver right.associatedASTNode = associatedAST; 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(left, right, label); 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Atom(GrammarAST atomAST) { 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int tokenType = nfa.grammar.getTokenType(atomAST.getText()); 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return build_Atom(tokenType, atomAST); 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From set build single edge graph o->o-set->o. To conform to 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * what an alt block looks like, must have extra state on left. 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Set(IntSet set, GrammarAST associatedAST) { 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.associatedASTNode = associatedAST; 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver right.associatedASTNode = associatedAST; 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Label label = new Label(set); 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(label,right); 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Can only complement block of simple alts; can complement build_Set() 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * result, that is. Get set and complement, replace old with complement. 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_AlternativeBlockComplement(StateCluster blk) { 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver State s0 = blk.left; 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver IntSet set = getCollapsedBlockAsSet(s0); 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( set!=null ) { 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if set is available, then structure known and blk is a set 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver set = nfa.grammar.complement(set); 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Label label = s0.transition(0).target.transition(0).label; 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver label.setSet(set); 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return blk; 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Range(int a, int b) { 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Label label = new Label(IntervalSet.of(a, b)); 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(label,right); 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From char 'c' build StateCluster o-intValue(c)->o 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) { 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText()); 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return build_Atom(c, charLiteralAST); 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From char 'c' build StateCluster o-intValue(c)->o 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can include unicode spec likes '\u0024' later. Accepts 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * actual unicode 16-bit now, of course, by default. 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * TODO not supplemental char clean! 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_CharRange(String a, String b) { 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int from = Grammar.getCharValueFromGrammarCharLiteral(a); 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int to = Grammar.getCharValueFromGrammarCharLiteral(b); 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return build_Range(from, to); 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** For a non-lexer, just build a simple token reference atom. 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * For a lexer, a string is a sequence of char to match. That is, 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * "fog" is treated as 'f' 'o' 'g' not as a single transition in 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for n characters. 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) { 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( nfa.grammar.type==Grammar.LEXER ) { 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StringBuffer chars = 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText()); 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState first = newState(); 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState last = null; 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState prev = first; 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (int i=0; i<chars.length(); i++) { 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int c = chars.charAt(i); 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState next = newState(); 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(prev, next, c); 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver prev = last = next; 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return new StateCluster(first, last); 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a simple token reference in non-Lexers 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText()); 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return build_Atom(tokenType, stringLiteralAST); 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** For reference to rule r, build 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o-e->(r) o 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * where (r) is the start of rule r and the trailing o is not linked 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to from rule ref state directly (it's done thru the transition(0) 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * RuleClosureTransition. 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If the rule r is just a list of tokens, it's block will be just 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the rule reference, but i'm not doing this yet as I'm not sure 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it would help much in the NFA->DFA construction. 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * TODO add to codegen: collapse alt blks that are sets into single matchSet 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) { 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name); 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // left.setDescription("ref to "+ruleStart.getDescription()); 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // right.setDescription("NFAState following ref to "+ruleStart.getDescription()); 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new RuleClosureTransition(refDef,ruleStart,right); 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From an empty alternative build StateCluster o-e->o */ 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Epsilon() { 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(left, right, Label.EPSILON); 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Build what amounts to an epsilon transition with a semantic 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * predicate action. The pred is a pointer into the AST of 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the SEMPRED token. 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_SemanticPredicate(GrammarAST pred) { 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // don't count syn preds 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( !pred.getText().toUpperCase() 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver .startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) ) 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nfa.grammar.numberOfSemanticPredicates++; 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(new PredicateLabel(pred), right); 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Build what amounts to an epsilon transition with an action. 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The action goes into NFA though it is ignored during analysis. 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * It slows things down a bit, but I must ignore predicates after 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * having seen an action (5-5-2008). 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Action(GrammarAST action) { 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(new ActionLabel(action), right); 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return new StateCluster(left, right); 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** add an EOF transition to any rule end NFAState that points to nothing 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (i.e., for all those rules not invoked by another rule). These 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are start symbols then. 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Return the number of grammar entry points; i.e., how many rules are 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * not invoked by another rule (they can only be invoked from outside). 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * These are the start rules. 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public int build_EOFStates(Collection rules) { 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int numberUnInvokedRules = 0; 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (Iterator iterator = rules.iterator(); iterator.hasNext();) { 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Rule r = (Rule) iterator.next(); 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState endNFAState = r.stopState; 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Is this rule a start symbol? (no follow links) 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( endNFAState.transition[0] ==null ) { 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if so, then don't let algorithm fall off the end of 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the rule, make it hit EOF/EOT. 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver build_EOFState(endNFAState); 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // track how many rules have been invoked by another rule 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver numberUnInvokedRules++; 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return numberUnInvokedRules; 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** set up an NFA NFAState that will yield eof tokens or, 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * in the case of a lexer grammar, an EOT token when the conversion 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * hits the end of a rule. 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver private void build_EOFState(NFAState endNFAState) { 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState end = newState(); 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int label = Label.EOF; 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( nfa.grammar.type==Grammar.LEXER ) { 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver label = Label.EOT; 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end.setEOTTargetState(true); 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+ 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " loop on end of state "+endNFAState.getDescription()+ 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " to state "+end.stateNumber); 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition toEnd = new Transition(label, end); 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver endNFAState.addTransition(toEnd); 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From A B build A-e->B (that is, build an epsilon arc from right 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * of A to left of B). 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * As a convenience, return B if A is null or return A if B is null. 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_AB(StateCluster A, StateCluster B) { 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( A==null ) { 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return B; 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( B==null ) { 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return A; 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, B.left, Label.EPSILON); 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(A.left, B.right); 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From a set ('a'|'b') build 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts) 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_AlternativeBlockFromSet(StateCluster set) { 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( set==null ) { 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // single alt, no decision, just return only alt state cluster 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState startOfAlt = newState(); // must have this no matter what 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(startOfAlt, set.left, Label.EPSILON); 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return new StateCluster(startOfAlt,set.right); 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From A|B|..|Z alternative block build 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts) 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | ^ 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-B->o--| 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | | 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ... | 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | | 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-Z->o--| 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * So every alternative gets begin NFAState connected by epsilon 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and every alt right side points at a block end NFAState. There is a 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * new NFAState in the NFAState in the StateCluster for each alt plus one for the 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * end NFAState. 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Special case: only one alternative: don't make a block with alt 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * begin/end. 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Special case: if just a list of tokens/chars/sets, then collapse 383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to a single edge'd o-set->o graph. 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Set alt number (1..n) in the left-Transition NFAState. 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_AlternativeBlock(List alternativeStateClusters) 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster result = null; 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( alternativeStateClusters==null || alternativeStateClusters.size()==0 ) { 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // single alt case 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( alternativeStateClusters.size()==1 ) { 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // single alt, no decision, just return only alt state cluster 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = (StateCluster)alternativeStateClusters.get(0); 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState startOfAlt = newState(); // must have this no matter what 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(startOfAlt, g.left, Label.EPSILON); 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //System.out.println("### opt saved start/stop end in (...)"); 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return new StateCluster(startOfAlt,g.right); 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // even if we can collapse for lookahead purposes, we will still 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // need to predict the alts of this subrule in case there are actions 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // etc... This is the decision that is pointed to from the AST node 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // (always) 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState prevAlternative = null; // tracks prev so we can link to next alt 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState firstAlt = null; 411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState blockEndNFAState = newState(); 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState.setDescription("end block"); 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int altNum = 1; 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (Iterator iter = alternativeStateClusters.iterator(); iter.hasNext();) { 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = (StateCluster) iter.next(); 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // add begin NFAState for this alt connected by epsilon 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.setDescription("alt "+altNum+" of ()"); 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(left, g.left, Label.EPSILON); 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON); 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Are we the first alternative? 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( firstAlt==null ) { 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver firstAlt = left; // track extreme left node of StateCluster 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if not first alternative, must link to this alt from previous 427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(prevAlternative, left, Label.EPSILON); 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver prevAlternative = left; 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver altNum++; 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // return StateCluster pointing representing entire block 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Points to first alt NFAState on left, block end on right 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver result = new StateCluster(firstAlt, blockEndNFAState); 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver firstAlt.decisionStateType = NFAState.BLOCK_START; 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // set EOB markers for Jean 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber; 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return result; 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From (A)? build either: 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o--A->o 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | ^ 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o---->| 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * or, if A is a block, just add an empty alt to the end of the block 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Aoptional(StateCluster A) { 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = null; 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left); 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( n==1 ) { 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // no decision, just wrap in an optional path 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //NFAState decisionState = newState(); 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState decisionState = A.left; // resuse left edge 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver decisionState.setDescription("only alt of ()? block"); 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState emptyAlt = newState(); 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver emptyAlt.setDescription("epsilon path of ()? block"); 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState blockEndNFAState = null; 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState = newState(); 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState.setDescription("end ()? block"); 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver //transitionBetweenStates(decisionState, A.left, Label.EPSILON); 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON); 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON); 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // set EOB markers for Jean 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver g = new StateCluster(decisionState, blockEndNFAState); 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a decision block, add an empty alt 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState lastRealAlt = 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nfa.grammar.getNFAStateForAltOfDecision(A.left, n); 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState emptyAlt = newState(); 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver emptyAlt.setDescription("epsilon path of ()? block"); 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON); 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(emptyAlt, A.right, Label.EPSILON); 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // set EOB markers for Jean (I think this is redundant here) 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.left.endOfBlockStateNumber = A.right.stateNumber; 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver g = A; // return same block, but now with optional last path 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START; 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From (A)+ build 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * |---| (Transition 2 from A.right points at alt 1) 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * v | (follow of loop is Transition 1) 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-A-o->o 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Meaning that the last NFAState in A points back to A's left Transition NFAState 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and we add a new begin/end NFAState. A can be single alternative or 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * multiple. 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * During analysis we'll call the follow link (transition 1) alt n+1 for 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * an n-alt A block. 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Aplus(StateCluster A) { 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState blockEndNFAState = newState(); 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // don't reuse A.right as loopback if it's right edge of another block 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // nested A* so make another tail node to be the loop back 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // instead of the usual A.right which is the EOB for inner loop 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState extraRightEdge = newState(); 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right = extraRightEdge; 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // turn A's block end into a loopback (acts like alt 2) 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(left, A.left, Label.EPSILON); 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right.decisionStateType = NFAState.LOOPBACK; 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.left.decisionStateType = NFAState.BLOCK_START; 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // set EOB markers for Jean 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.left.endOfBlockStateNumber = A.right.stateNumber; 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, blockEndNFAState); 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** From (A)* build 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * |---| 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * v | 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | ^ 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o---------| (optional branch is 2nd alt of optional block containing A+) 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Meaning that the last (end) NFAState in A points back to A's 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * left side NFAState and we add 3 new NFAStates (the 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * optional branch is built just like an optional subrule). 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * See the Aplus() method for more on the loop back Transition. 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can detect nested (A*)* loops and insert an extra node. Previously, 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * two blocks shared same EOB node. 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * There are 2 or 3 decision points in a A*. If A is not a block (i.e., 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it only has one alt), then there are two decisions: the optional bypass 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and then loopback. If A is a block of alts, then there are three 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * decisions: bypass, loopback, and A's decision point. 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that the optional bypass must be outside the loop as (A|B)* is 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * not the same thing as (A|B|)+. 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is an accurate NFA representation of the meaning of (A)*, but 564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for generating code, I don't need a DFA for the optional branch by 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * virtue of how I generate code. The exit-loopback-branch decision 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is sufficient to let me make an appropriate enter, exit, loop 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * determination. See codegen.g 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Astar(StateCluster A) { 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState bypassDecisionState = newState(); 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver bypassDecisionState.setDescription("enter loop path of ()* block"); 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState optionalAlt = newState(); 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver optionalAlt.setDescription("epsilon path of ()* block"); 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState blockEndNFAState = newState(); 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // don't reuse A.right as loopback if it's right edge of another block 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // nested A* so make another tail node to be the loop back 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // instead of the usual A.right which is the EOB for inner loop 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState extraRightEdge = newState(); 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right = extraRightEdge; 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // convert A's end block to loopback 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right.setDescription("()* loopback"); 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Transition 1 to actual block of stuff 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON); 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Transition 2 optional to bypass 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON); 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON); 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Transition 1 of end block exits 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Transition 2 of end block loops 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(A.right, A.left, Label.EPSILON); 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver bypassDecisionState.decisionStateType = NFAState.BYPASS; 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.left.decisionStateType = NFAState.BLOCK_START; 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.right.decisionStateType = NFAState.LOOPBACK; 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // set EOB markers for Jean 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver A.left.endOfBlockStateNumber = A.right.stateNumber; 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState); 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Build an NFA predictor for special rule called Tokens manually that 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * predicts which token will succeed. The refs to the rules are not 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * RuleRefTransitions as I want DFA conversion to stop at the EOT 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * transition on the end of each token, rather than return to Tokens rule. 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * If I used normal build_alternativeBlock for this, the RuleRefTransitions 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * would save return address when jumping away from Tokens rule. 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * All I do here is build n new states for n rules with an epsilon 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * edge to the rule start states and then to the next state in the 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * list: 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->(A) (a state links to start of A and to next in list) 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->(B) 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * ... 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * | 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * o->(Z) 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is the NFA created for the artificial rule created in 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Grammar.addArtificialMatchTokensRule(). 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 11/28/2005: removed so we can use normal rule construction for Tokens. 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public NFAState build_ArtificialMatchTokensRuleNFA() { 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int altNum = 1; 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState firstAlt = null; // the start state for the "rule" 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState prevAlternative = null; 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Iterator iter = nfa.grammar.getRules().iterator(); 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // TODO: add a single decision node/state for good description 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (iter.hasNext()) { 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Rule r = (Rule) iter.next(); 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String ruleName = r.name; 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver String modifier = nfa.grammar.getRuleModifier(ruleName); 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) || 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (modifier!=null && 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) ) 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue; // don't loop to yourself or do nontoken rules 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName); 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME); 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(left, ruleStartState, Label.EPSILON); 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Are we the first alternative? 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( firstAlt==null ) { 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver firstAlt = left; // track extreme top left node as rule start 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else { 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // if not first alternative, must link to this alt from previous 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver transitionBetweenStates(prevAlternative, left, Label.EPSILON); 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver prevAlternative = left; 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver altNum++; 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver firstAlt.decisionStateType = NFAState.BLOCK_START; 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return firstAlt; 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Build an atom with all possible values in its label */ 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_Wildcard(GrammarAST associatedAST) { 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState left = newState(); 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver NFAState right = newState(); 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.associatedASTNode = associatedAST; 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver right.associatedASTNode = associatedAST; 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(label,right); 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver left.addTransition(e); 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster g = new StateCluster(left, right); 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return g; 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Build a subrule matching ^(. .*) (any tree or node). Let's use 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (^(. .+) | .) to be safe. 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver public StateCluster build_WildcardTree(GrammarAST associatedAST) { 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster wildRoot = build_Wildcard(associatedAST); 688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster down = build_Atom(Label.DOWN, associatedAST); 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver wildRoot = build_AB(wildRoot,down); // hook in; . DOWN 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // make .+ 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster wildChildren = build_Wildcard(associatedAST); 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver wildChildren = build_Aplus(wildChildren); 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+ 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster up = build_Atom(Label.UP, associatedAST); 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // make optional . alt 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster optionalNodeAlt = build_Wildcard(associatedAST); 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver List alts = new ArrayList(); 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alts.add(wildRoot); 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alts.add(optionalNodeAlt); 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver StateCluster blk = build_AlternativeBlock(alts); 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return blk; 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Given a collapsed block of alts (a set of atoms), pull out 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the set and return it. 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver protected IntSet getCollapsedBlockAsSet(State blk) { 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver State s0 = blk; 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( s0!=null && s0.transition(0)!=null ) { 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver State s1 = s0.transition(0).target; 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( s1!=null && s1.transition(0)!=null ) { 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Label label = s1.transition(0).label; 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( label.isSet() ) { 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return label.getSet(); 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return null; 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver private void transitionBetweenStates(NFAState a, NFAState b, int label) { 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Transition e = new Transition(label,b); 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a.addTransition(e); 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 733