1/* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28package org.antlr.tool; 29 30import org.antlr.analysis.*; 31import org.antlr.misc.IntSet; 32import org.antlr.misc.IntervalSet; 33 34import java.util.ArrayList; 35import java.util.Collection; 36import java.util.Iterator; 37import java.util.List; 38 39/** Routines to construct StateClusters from EBNF grammar constructs. 40 * No optimization is done to remove unnecessary epsilon edges. 41 * 42 * TODO: add an optimization that reduces number of states and transitions 43 * will help with speed of conversion and make it easier to view NFA. For 44 * example, o-A->o-->o-B->o should be o-A->o-B->o 45 */ 46public class NFAFactory { 47 /** This factory is attached to a specifc NFA that it is building. 48 * The NFA will be filled up with states and transitions. 49 */ 50 NFA nfa = null; 51 52 public Rule getCurrentRule() { 53 return currentRule; 54 } 55 56 public void setCurrentRule(Rule currentRule) { 57 this.currentRule = currentRule; 58 } 59 60 Rule currentRule = null; 61 62 public NFAFactory(NFA nfa) { 63 nfa.setFactory(this); 64 this.nfa = nfa; 65 } 66 67 public NFAState newState() { 68 NFAState n = new NFAState(nfa); 69 int state = nfa.getNewNFAStateNumber(); 70 n.stateNumber = state; 71 nfa.addState(n); 72 n.enclosingRule = currentRule; 73 return n; 74 } 75 76 /** Optimize an alternative (list of grammar elements). 77 * 78 * Walk the chain of elements (which can be complicated loop blocks...) 79 * and throw away any epsilon transitions used to link up simple elements. 80 * 81 * This only removes 195 states from the java.g's NFA, but every little 82 * bit helps. Perhaps I can improve in the future. 83 */ 84 public void optimizeAlternative(StateCluster alt) { 85 NFAState s = alt.left; 86 while ( s!=alt.right ) { 87 // if it's a block element, jump over it and continue 88 if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) { 89 s = nfa.getState(s.endOfBlockStateNumber); 90 continue; 91 } 92 Transition t = s.transition[0]; 93 if ( t instanceof RuleClosureTransition ) { 94 s = ((RuleClosureTransition) t).followState; 95 continue; 96 } 97 if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) { 98 // bypass epsilon transition and point to what the epsilon's 99 // target points to unless that epsilon transition points to 100 // a block or loop etc.. Also don't collapse epsilons that 101 // point at the last node of the alt. Don't collapse action edges 102 NFAState epsilonTarget = (NFAState)t.target; 103 if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER && 104 epsilonTarget.transition[0] !=null ) 105 { 106 s.setTransition0(epsilonTarget.transition[0]); 107 /* 108 System.out.println("### opt "+s.stateNumber+"->"+ 109 epsilonTarget.transition(0).target.stateNumber); 110 */ 111 } 112 } 113 s = (NFAState)t.target; 114 } 115 } 116 117 /** From label A build Graph o-A->o */ 118 public StateCluster build_Atom(int label, GrammarAST associatedAST) { 119 NFAState left = newState(); 120 NFAState right = newState(); 121 left.associatedASTNode = associatedAST; 122 right.associatedASTNode = associatedAST; 123 transitionBetweenStates(left, right, label); 124 StateCluster g = new StateCluster(left, right); 125 return g; 126 } 127 128 public StateCluster build_Atom(GrammarAST atomAST) { 129 int tokenType = nfa.grammar.getTokenType(atomAST.getText()); 130 return build_Atom(tokenType, atomAST); 131 } 132 133 /** From set build single edge graph o->o-set->o. To conform to 134 * what an alt block looks like, must have extra state on left. 135 */ 136 public StateCluster build_Set(IntSet set, GrammarAST associatedAST) { 137 NFAState left = newState(); 138 NFAState right = newState(); 139 left.associatedASTNode = associatedAST; 140 right.associatedASTNode = associatedAST; 141 Label label = new Label(set); 142 Transition e = new Transition(label,right); 143 left.addTransition(e); 144 StateCluster g = new StateCluster(left, right); 145 return g; 146 } 147 148 /** Can only complement block of simple alts; can complement build_Set() 149 * result, that is. Get set and complement, replace old with complement. 150 public StateCluster build_AlternativeBlockComplement(StateCluster blk) { 151 State s0 = blk.left; 152 IntSet set = getCollapsedBlockAsSet(s0); 153 if ( set!=null ) { 154 // if set is available, then structure known and blk is a set 155 set = nfa.grammar.complement(set); 156 Label label = s0.transition(0).target.transition(0).label; 157 label.setSet(set); 158 } 159 return blk; 160 } 161 */ 162 163 public StateCluster build_Range(int a, int b) { 164 NFAState left = newState(); 165 NFAState right = newState(); 166 Label label = new Label(IntervalSet.of(a, b)); 167 Transition e = new Transition(label,right); 168 left.addTransition(e); 169 StateCluster g = new StateCluster(left, right); 170 return g; 171 } 172 173 /** From char 'c' build StateCluster o-intValue(c)->o 174 */ 175 public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) { 176 int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText()); 177 return build_Atom(c, charLiteralAST); 178 } 179 180 /** From char 'c' build StateCluster o-intValue(c)->o 181 * can include unicode spec likes '\u0024' later. Accepts 182 * actual unicode 16-bit now, of course, by default. 183 * TODO not supplemental char clean! 184 */ 185 public StateCluster build_CharRange(String a, String b) { 186 int from = Grammar.getCharValueFromGrammarCharLiteral(a); 187 int to = Grammar.getCharValueFromGrammarCharLiteral(b); 188 return build_Range(from, to); 189 } 190 191 /** For a non-lexer, just build a simple token reference atom. 192 * For a lexer, a string is a sequence of char to match. That is, 193 * "fog" is treated as 'f' 'o' 'g' not as a single transition in 194 * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states 195 * for n characters. 196 */ 197 public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) { 198 if ( nfa.grammar.type==Grammar.LEXER ) { 199 StringBuffer chars = 200 Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText()); 201 NFAState first = newState(); 202 NFAState last = null; 203 NFAState prev = first; 204 for (int i=0; i<chars.length(); i++) { 205 int c = chars.charAt(i); 206 NFAState next = newState(); 207 transitionBetweenStates(prev, next, c); 208 prev = last = next; 209 } 210 return new StateCluster(first, last); 211 } 212 213 // a simple token reference in non-Lexers 214 int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText()); 215 return build_Atom(tokenType, stringLiteralAST); 216 } 217 218 /** For reference to rule r, build 219 * 220 * o-e->(r) o 221 * 222 * where (r) is the start of rule r and the trailing o is not linked 223 * to from rule ref state directly (it's done thru the transition(0) 224 * RuleClosureTransition. 225 * 226 * If the rule r is just a list of tokens, it's block will be just 227 * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing 228 * the rule reference, but i'm not doing this yet as I'm not sure 229 * it would help much in the NFA->DFA construction. 230 * 231 * TODO add to codegen: collapse alt blks that are sets into single matchSet 232 */ 233 public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) { 234 //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name); 235 NFAState left = newState(); 236 // left.setDescription("ref to "+ruleStart.getDescription()); 237 NFAState right = newState(); 238 // right.setDescription("NFAState following ref to "+ruleStart.getDescription()); 239 Transition e = new RuleClosureTransition(refDef,ruleStart,right); 240 left.addTransition(e); 241 StateCluster g = new StateCluster(left, right); 242 return g; 243 } 244 245 /** From an empty alternative build StateCluster o-e->o */ 246 public StateCluster build_Epsilon() { 247 NFAState left = newState(); 248 NFAState right = newState(); 249 transitionBetweenStates(left, right, Label.EPSILON); 250 StateCluster g = new StateCluster(left, right); 251 return g; 252 } 253 254 /** Build what amounts to an epsilon transition with a semantic 255 * predicate action. The pred is a pointer into the AST of 256 * the SEMPRED token. 257 */ 258 public StateCluster build_SemanticPredicate(GrammarAST pred) { 259 // don't count syn preds 260 if ( !pred.getText().toUpperCase() 261 .startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) ) 262 { 263 nfa.grammar.numberOfSemanticPredicates++; 264 } 265 NFAState left = newState(); 266 NFAState right = newState(); 267 Transition e = new Transition(new PredicateLabel(pred), right); 268 left.addTransition(e); 269 StateCluster g = new StateCluster(left, right); 270 return g; 271 } 272 273 /** Build what amounts to an epsilon transition with an action. 274 * The action goes into NFA though it is ignored during analysis. 275 * It slows things down a bit, but I must ignore predicates after 276 * having seen an action (5-5-2008). 277 */ 278 public StateCluster build_Action(GrammarAST action) { 279 NFAState left = newState(); 280 NFAState right = newState(); 281 Transition e = new Transition(new ActionLabel(action), right); 282 left.addTransition(e); 283 return new StateCluster(left, right); 284 } 285 286 /** add an EOF transition to any rule end NFAState that points to nothing 287 * (i.e., for all those rules not invoked by another rule). These 288 * are start symbols then. 289 * 290 * Return the number of grammar entry points; i.e., how many rules are 291 * not invoked by another rule (they can only be invoked from outside). 292 * These are the start rules. 293 */ 294 public int build_EOFStates(Collection rules) { 295 int numberUnInvokedRules = 0; 296 for (Iterator iterator = rules.iterator(); iterator.hasNext();) { 297 Rule r = (Rule) iterator.next(); 298 NFAState endNFAState = r.stopState; 299 // Is this rule a start symbol? (no follow links) 300 if ( endNFAState.transition[0] ==null ) { 301 // if so, then don't let algorithm fall off the end of 302 // the rule, make it hit EOF/EOT. 303 build_EOFState(endNFAState); 304 // track how many rules have been invoked by another rule 305 numberUnInvokedRules++; 306 } 307 } 308 return numberUnInvokedRules; 309 } 310 311 /** set up an NFA NFAState that will yield eof tokens or, 312 * in the case of a lexer grammar, an EOT token when the conversion 313 * hits the end of a rule. 314 */ 315 private void build_EOFState(NFAState endNFAState) { 316 NFAState end = newState(); 317 int label = Label.EOF; 318 if ( nfa.grammar.type==Grammar.LEXER ) { 319 label = Label.EOT; 320 end.setEOTTargetState(true); 321 } 322 /* 323 System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+ 324 " loop on end of state "+endNFAState.getDescription()+ 325 " to state "+end.stateNumber); 326 */ 327 Transition toEnd = new Transition(label, end); 328 endNFAState.addTransition(toEnd); 329 } 330 331 /** From A B build A-e->B (that is, build an epsilon arc from right 332 * of A to left of B). 333 * 334 * As a convenience, return B if A is null or return A if B is null. 335 */ 336 public StateCluster build_AB(StateCluster A, StateCluster B) { 337 if ( A==null ) { 338 return B; 339 } 340 if ( B==null ) { 341 return A; 342 } 343 transitionBetweenStates(A.right, B.left, Label.EPSILON); 344 StateCluster g = new StateCluster(A.left, B.right); 345 return g; 346 } 347 348 /** From a set ('a'|'b') build 349 * 350 * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts) 351 */ 352 public StateCluster build_AlternativeBlockFromSet(StateCluster set) { 353 if ( set==null ) { 354 return null; 355 } 356 357 // single alt, no decision, just return only alt state cluster 358 NFAState startOfAlt = newState(); // must have this no matter what 359 transitionBetweenStates(startOfAlt, set.left, Label.EPSILON); 360 361 return new StateCluster(startOfAlt,set.right); 362 } 363 364 /** From A|B|..|Z alternative block build 365 * 366 * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts) 367 * | ^ 368 * o->o-B->o--| 369 * | | 370 * ... | 371 * | | 372 * o->o-Z->o--| 373 * 374 * So every alternative gets begin NFAState connected by epsilon 375 * and every alt right side points at a block end NFAState. There is a 376 * new NFAState in the NFAState in the StateCluster for each alt plus one for the 377 * end NFAState. 378 * 379 * Special case: only one alternative: don't make a block with alt 380 * begin/end. 381 * 382 * Special case: if just a list of tokens/chars/sets, then collapse 383 * to a single edge'd o-set->o graph. 384 * 385 * Set alt number (1..n) in the left-Transition NFAState. 386 */ 387 public StateCluster build_AlternativeBlock(List alternativeStateClusters) 388 { 389 StateCluster result = null; 390 if ( alternativeStateClusters==null || alternativeStateClusters.size()==0 ) { 391 return null; 392 } 393 394 // single alt case 395 if ( alternativeStateClusters.size()==1 ) { 396 // single alt, no decision, just return only alt state cluster 397 StateCluster g = (StateCluster)alternativeStateClusters.get(0); 398 NFAState startOfAlt = newState(); // must have this no matter what 399 transitionBetweenStates(startOfAlt, g.left, Label.EPSILON); 400 401 //System.out.println("### opt saved start/stop end in (...)"); 402 return new StateCluster(startOfAlt,g.right); 403 } 404 405 // even if we can collapse for lookahead purposes, we will still 406 // need to predict the alts of this subrule in case there are actions 407 // etc... This is the decision that is pointed to from the AST node 408 // (always) 409 NFAState prevAlternative = null; // tracks prev so we can link to next alt 410 NFAState firstAlt = null; 411 NFAState blockEndNFAState = newState(); 412 blockEndNFAState.setDescription("end block"); 413 int altNum = 1; 414 for (Iterator iter = alternativeStateClusters.iterator(); iter.hasNext();) { 415 StateCluster g = (StateCluster) iter.next(); 416 // add begin NFAState for this alt connected by epsilon 417 NFAState left = newState(); 418 left.setDescription("alt "+altNum+" of ()"); 419 transitionBetweenStates(left, g.left, Label.EPSILON); 420 transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON); 421 // Are we the first alternative? 422 if ( firstAlt==null ) { 423 firstAlt = left; // track extreme left node of StateCluster 424 } 425 else { 426 // if not first alternative, must link to this alt from previous 427 transitionBetweenStates(prevAlternative, left, Label.EPSILON); 428 } 429 prevAlternative = left; 430 altNum++; 431 } 432 433 // return StateCluster pointing representing entire block 434 // Points to first alt NFAState on left, block end on right 435 result = new StateCluster(firstAlt, blockEndNFAState); 436 437 firstAlt.decisionStateType = NFAState.BLOCK_START; 438 439 // set EOB markers for Jean 440 firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber; 441 442 return result; 443 } 444 445 /** From (A)? build either: 446 * 447 * o--A->o 448 * | ^ 449 * o---->| 450 * 451 * or, if A is a block, just add an empty alt to the end of the block 452 */ 453 public StateCluster build_Aoptional(StateCluster A) { 454 StateCluster g = null; 455 int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left); 456 if ( n==1 ) { 457 // no decision, just wrap in an optional path 458 //NFAState decisionState = newState(); 459 NFAState decisionState = A.left; // resuse left edge 460 decisionState.setDescription("only alt of ()? block"); 461 NFAState emptyAlt = newState(); 462 emptyAlt.setDescription("epsilon path of ()? block"); 463 NFAState blockEndNFAState = null; 464 blockEndNFAState = newState(); 465 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 466 blockEndNFAState.setDescription("end ()? block"); 467 //transitionBetweenStates(decisionState, A.left, Label.EPSILON); 468 transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON); 469 transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON); 470 471 // set EOB markers for Jean 472 decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 473 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 474 475 g = new StateCluster(decisionState, blockEndNFAState); 476 } 477 else { 478 // a decision block, add an empty alt 479 NFAState lastRealAlt = 480 nfa.grammar.getNFAStateForAltOfDecision(A.left, n); 481 NFAState emptyAlt = newState(); 482 emptyAlt.setDescription("epsilon path of ()? block"); 483 transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON); 484 transitionBetweenStates(emptyAlt, A.right, Label.EPSILON); 485 486 // set EOB markers for Jean (I think this is redundant here) 487 A.left.endOfBlockStateNumber = A.right.stateNumber; 488 A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 489 490 g = A; // return same block, but now with optional last path 491 } 492 g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START; 493 494 return g; 495 } 496 497 /** From (A)+ build 498 * 499 * |---| (Transition 2 from A.right points at alt 1) 500 * v | (follow of loop is Transition 1) 501 * o->o-A-o->o 502 * 503 * Meaning that the last NFAState in A points back to A's left Transition NFAState 504 * and we add a new begin/end NFAState. A can be single alternative or 505 * multiple. 506 * 507 * During analysis we'll call the follow link (transition 1) alt n+1 for 508 * an n-alt A block. 509 */ 510 public StateCluster build_Aplus(StateCluster A) { 511 NFAState left = newState(); 512 NFAState blockEndNFAState = newState(); 513 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 514 515 // don't reuse A.right as loopback if it's right edge of another block 516 if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 517 // nested A* so make another tail node to be the loop back 518 // instead of the usual A.right which is the EOB for inner loop 519 NFAState extraRightEdge = newState(); 520 transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 521 A.right = extraRightEdge; 522 } 523 524 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1 525 // turn A's block end into a loopback (acts like alt 2) 526 transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2 527 transitionBetweenStates(left, A.left, Label.EPSILON); 528 529 A.right.decisionStateType = NFAState.LOOPBACK; 530 A.left.decisionStateType = NFAState.BLOCK_START; 531 532 // set EOB markers for Jean 533 A.left.endOfBlockStateNumber = A.right.stateNumber; 534 535 StateCluster g = new StateCluster(left, blockEndNFAState); 536 return g; 537 } 538 539 /** From (A)* build 540 * 541 * |---| 542 * v | 543 * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) 544 * | ^ 545 * o---------| (optional branch is 2nd alt of optional block containing A+) 546 * 547 * Meaning that the last (end) NFAState in A points back to A's 548 * left side NFAState and we add 3 new NFAStates (the 549 * optional branch is built just like an optional subrule). 550 * See the Aplus() method for more on the loop back Transition. 551 * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we 552 * can detect nested (A*)* loops and insert an extra node. Previously, 553 * two blocks shared same EOB node. 554 * 555 * There are 2 or 3 decision points in a A*. If A is not a block (i.e., 556 * it only has one alt), then there are two decisions: the optional bypass 557 * and then loopback. If A is a block of alts, then there are three 558 * decisions: bypass, loopback, and A's decision point. 559 * 560 * Note that the optional bypass must be outside the loop as (A|B)* is 561 * not the same thing as (A|B|)+. 562 * 563 * This is an accurate NFA representation of the meaning of (A)*, but 564 * for generating code, I don't need a DFA for the optional branch by 565 * virtue of how I generate code. The exit-loopback-branch decision 566 * is sufficient to let me make an appropriate enter, exit, loop 567 * determination. See codegen.g 568 */ 569 public StateCluster build_Astar(StateCluster A) { 570 NFAState bypassDecisionState = newState(); 571 bypassDecisionState.setDescription("enter loop path of ()* block"); 572 NFAState optionalAlt = newState(); 573 optionalAlt.setDescription("epsilon path of ()* block"); 574 NFAState blockEndNFAState = newState(); 575 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 576 577 // don't reuse A.right as loopback if it's right edge of another block 578 if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 579 // nested A* so make another tail node to be the loop back 580 // instead of the usual A.right which is the EOB for inner loop 581 NFAState extraRightEdge = newState(); 582 transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 583 A.right = extraRightEdge; 584 } 585 586 // convert A's end block to loopback 587 A.right.setDescription("()* loopback"); 588 // Transition 1 to actual block of stuff 589 transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON); 590 // Transition 2 optional to bypass 591 transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON); 592 transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON); 593 // Transition 1 of end block exits 594 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 595 // Transition 2 of end block loops 596 transitionBetweenStates(A.right, A.left, Label.EPSILON); 597 598 bypassDecisionState.decisionStateType = NFAState.BYPASS; 599 A.left.decisionStateType = NFAState.BLOCK_START; 600 A.right.decisionStateType = NFAState.LOOPBACK; 601 602 // set EOB markers for Jean 603 A.left.endOfBlockStateNumber = A.right.stateNumber; 604 bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 605 606 StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState); 607 return g; 608 } 609 610 /** Build an NFA predictor for special rule called Tokens manually that 611 * predicts which token will succeed. The refs to the rules are not 612 * RuleRefTransitions as I want DFA conversion to stop at the EOT 613 * transition on the end of each token, rather than return to Tokens rule. 614 * If I used normal build_alternativeBlock for this, the RuleRefTransitions 615 * would save return address when jumping away from Tokens rule. 616 * 617 * All I do here is build n new states for n rules with an epsilon 618 * edge to the rule start states and then to the next state in the 619 * list: 620 * 621 * o->(A) (a state links to start of A and to next in list) 622 * | 623 * o->(B) 624 * | 625 * ... 626 * | 627 * o->(Z) 628 * 629 * This is the NFA created for the artificial rule created in 630 * Grammar.addArtificialMatchTokensRule(). 631 * 632 * 11/28/2005: removed so we can use normal rule construction for Tokens. 633 public NFAState build_ArtificialMatchTokensRuleNFA() { 634 int altNum = 1; 635 NFAState firstAlt = null; // the start state for the "rule" 636 NFAState prevAlternative = null; 637 Iterator iter = nfa.grammar.getRules().iterator(); 638 // TODO: add a single decision node/state for good description 639 while (iter.hasNext()) { 640 Rule r = (Rule) iter.next(); 641 String ruleName = r.name; 642 String modifier = nfa.grammar.getRuleModifier(ruleName); 643 if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) || 644 (modifier!=null && 645 modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) ) 646 { 647 continue; // don't loop to yourself or do nontoken rules 648 } 649 NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName); 650 NFAState left = newState(); 651 left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME); 652 transitionBetweenStates(left, ruleStartState, Label.EPSILON); 653 // Are we the first alternative? 654 if ( firstAlt==null ) { 655 firstAlt = left; // track extreme top left node as rule start 656 } 657 else { 658 // if not first alternative, must link to this alt from previous 659 transitionBetweenStates(prevAlternative, left, Label.EPSILON); 660 } 661 prevAlternative = left; 662 altNum++; 663 } 664 firstAlt.decisionStateType = NFAState.BLOCK_START; 665 666 return firstAlt; 667 } 668 */ 669 670 /** Build an atom with all possible values in its label */ 671 public StateCluster build_Wildcard(GrammarAST associatedAST) { 672 NFAState left = newState(); 673 NFAState right = newState(); 674 left.associatedASTNode = associatedAST; 675 right.associatedASTNode = associatedAST; 676 Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens 677 Transition e = new Transition(label,right); 678 left.addTransition(e); 679 StateCluster g = new StateCluster(left, right); 680 return g; 681 } 682 683 /** Build a subrule matching ^(. .*) (any tree or node). Let's use 684 * (^(. .+) | .) to be safe. 685 */ 686 public StateCluster build_WildcardTree(GrammarAST associatedAST) { 687 StateCluster wildRoot = build_Wildcard(associatedAST); 688 689 StateCluster down = build_Atom(Label.DOWN, associatedAST); 690 wildRoot = build_AB(wildRoot,down); // hook in; . DOWN 691 692 // make .+ 693 StateCluster wildChildren = build_Wildcard(associatedAST); 694 wildChildren = build_Aplus(wildChildren); 695 wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+ 696 697 StateCluster up = build_Atom(Label.UP, associatedAST); 698 wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP 699 700 // make optional . alt 701 StateCluster optionalNodeAlt = build_Wildcard(associatedAST); 702 703 List alts = new ArrayList(); 704 alts.add(wildRoot); 705 alts.add(optionalNodeAlt); 706 StateCluster blk = build_AlternativeBlock(alts); 707 708 return blk; 709 } 710 711 /** Given a collapsed block of alts (a set of atoms), pull out 712 * the set and return it. 713 */ 714 protected IntSet getCollapsedBlockAsSet(State blk) { 715 State s0 = blk; 716 if ( s0!=null && s0.transition(0)!=null ) { 717 State s1 = s0.transition(0).target; 718 if ( s1!=null && s1.transition(0)!=null ) { 719 Label label = s1.transition(0).label; 720 if ( label.isSet() ) { 721 return label.getSet(); 722 } 723 } 724 } 725 return null; 726 } 727 728 private void transitionBetweenStates(NFAState a, NFAState b, int label) { 729 Transition e = new Transition(label,b); 730 a.addTransition(e); 731 } 732} 733