12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */ 2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* 52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 61c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Copyright (C) 2003-2016, International Business Machines Corporation and others. All Rights Reserved. 72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ******************************************************************************* 82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 91c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text; 112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParsePosition; 132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.HashMap; 142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Assert; 162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility; 172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter; 182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/** 202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This class is part of the Rule Based Break Iterator rule compiler. 212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * It scans the rules and builds the parse tree. 222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * There is no public API here. 232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */ 242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass RBBIRuleScanner { 251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller private final static int kStackSize = 100; // The size of the state stack for 272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rules parsing. Corresponds roughly 282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to the depth of parentheses nesting 292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that is allowed in the rules. 301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static class RBBIRuleChar { 322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fChar; 332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean fEscaped; 342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIRuleBuilder fRB; // The rule builder that we are part of. 391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fScanIndex; // Index of current character being processed 412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in the rule input string. 422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fNextIndex; // Index of the next character, which 432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is the first character not yet scanned. 442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean fQuoteMode; // Scan is in a 'quoted region' 452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fLineNum; // Line number in input file. 462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fCharNum; // Char position within the line. 472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fLastChar; // Previous char, needed to count CR-LF 482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // as a single line, not two. 491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIRuleChar fC = new RBBIRuleChar(); // Current char for parse state machine 512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // processing. 521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller short fStack[] = new short[kStackSize]; // State stack, holds state pushes 552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fStackPtr; // and pops as specified in the state 562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // transition rules. 571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode fNodeStack[] = new RBBINode[kStackSize]; // Node stack, holds nodes created 592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // during the parse of a rule 602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fNodeStackPtr; 611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert boolean fReverseRule; // True if the rule currently being scanned 642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is a reverse direction rule (if it 652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // starts with a '!') 661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert boolean fLookAheadRule; // True if the rule includes a '/' 682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // somewhere within it. 691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert boolean fNoChainInRule; // True if the current rule starts with a '^'. 711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert RBBISymbolTable fSymbolTable; // symbol table, holds definitions of 742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // $variable symbols. 751c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller HashMap<String, RBBISetTableEl> fSetTable = new HashMap<String, RBBISetTableEl>(); // UnicocodeSet hash table, holds indexes to 772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the sets created while parsing rules. 782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The key is the string used for creating 792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the set. 801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet fRuleSets[] = new UnicodeSet[10]; // Unicode Sets that are needed during 822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the scanning of RBBI rules. The 8305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // Indices for these are assigned by the 842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // perl script that builds the state tables. 852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // See rbbirpt.h. 861c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fRuleNum; // Counts each rule as it is scanned. 881c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int fOptionStart; // Input index of start of a !!option 902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // keyword, while being scanned. 912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 921c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 9305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert // gRuleSet_rule_char_pattern is characters that may appear as literals in patterns without escaping or quoting. 942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String gRuleSet_rule_char_pattern = "[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]"; 952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String gRuleSet_name_char_pattern = "[_\\p{L}\\p{N}]"; 962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String gRuleSet_digit_char_pattern = "[0-9]"; 972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String gRuleSet_name_start_char_pattern = "[_\\p{L}]"; 982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String gRuleSet_white_space_pattern = "[\\p{Pattern_White_Space}]"; 992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static private String kAny = "any"; 1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1011c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 1021c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Constructor. 1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIRuleScanner(RBBIRuleBuilder rb) { 1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB = rb; 1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fLineNum = 1; 1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Set up the constant Unicode Sets. 1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Note: These could be made static and shared among 1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // all instances of RBBIRuleScanners. 1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128] = new UnicodeSet(gRuleSet_rule_char_pattern); 1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleSets[RBBIRuleParseTable.kRuleSet_white_space - 128] = new UnicodeSet(gRuleSet_white_space_pattern); 1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleSets[RBBIRuleParseTable.kRuleSet_name_char - 128] = new UnicodeSet(gRuleSet_name_char_pattern); 1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleSets[RBBIRuleParseTable.kRuleSet_name_start_char - 128] = new UnicodeSet(gRuleSet_name_start_char_pattern); 1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleSets[RBBIRuleParseTable.kRuleSet_digit_char - 128] = new UnicodeSet(gRuleSet_digit_char_pattern); 1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 123f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert fSymbolTable = new RBBISymbolTable(this); 1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // doParseAction Do some action during rule parsing. 1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Called by the parse state machine. 1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Actions build the parse tree and Unicode Sets, 1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and maintain the parse stack for nested expressions. 1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean doParseActions(int action) { 1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode n = null; 1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller boolean returnVal = true; 1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller switch (action) { 1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExprStart: 1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pushNewNode(RBBINode.opStart); 1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRuleNum++; 1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert case RBBIRuleParseTable.doNoChain: 1471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Scanned a '^' while on the rule start state. 1481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fNoChainInRule = true; 1491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert break; 1501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 1511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExprOrOperator: { 1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fixOpStack(RBBINode.precOpCat); 1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operandNode = fNodeStack[fNodeStackPtr--]; 1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode orNode = pushNewNode(RBBINode.opOr); 1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller orNode.fLeftChild = operandNode; 1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operandNode.fParent = orNode; 1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExprCatOperator: 1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // concatenation operator. 1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // For the implicit concatenation of adjacent terms in an expression 1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that are 1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // not separated by any other operator. Action is invoked between the 1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // actions for the two terms. 1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fixOpStack(RBBINode.precOpCat); 1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operandNode = fNodeStack[fNodeStackPtr--]; 1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode catNode = pushNewNode(RBBINode.opCat); 1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller catNode.fLeftChild = operandNode; 1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operandNode.fParent = catNode; 1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doLParen: 1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Open Paren. 1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The openParen node is a dummy operation type with a low 1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // precedence, 1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // which has the affect of ensuring that any real binary op that 1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // follows within the parens binds more tightly to the operands than 1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // stuff outside of the parens. 1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pushNewNode(RBBINode.opLParen); 1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExprRParen: 1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fixOpStack(RBBINode.precLParen); 1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doNOP: 1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doStartAssign: 1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We've just scanned "$variable = " 1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The top of the node stack has the $variable ref node. 1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Save the start position of the RHS text in the StartExpression 1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // node 1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that precedes the $variableReference node on the stack. 2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This will eventually be used when saving the full $variable 2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // replacement 2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // text as a string. 2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr - 1]; 2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fNextIndex; // move past the '=' 2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Push a new start-of-expression node; needed to keep parse of the 2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RHS expression happy. 2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller pushNewNode(RBBINode.opStart); 2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doEndAssign: { 2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We have reached the end of an assignement statement. 2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Current scan char is the ';' that terminates the assignment. 2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Terminate expression, leaves expression parse tree rooted in TOS 2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // node. 2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fixOpStack(RBBINode.precStart); 2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode startExprNode = fNodeStack[fNodeStackPtr - 2]; 2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode varRefNode = fNodeStack[fNodeStackPtr - 1]; 2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode RHSExprNode = fNodeStack[fNodeStackPtr]; 2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Save original text of right side of assignment, excluding the 2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // terminating ';' 2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in the root of the node for the right-hand-side expression. 2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RHSExprNode.fFirstPos = startExprNode.fFirstPos; 2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RHSExprNode.fLastPos = fScanIndex; 2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fRB.fRules.extractBetween(RHSExprNode.fFirstPos, 2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RHSExprNode.fLastPos, RHSExprNode.fText); 2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RHSExprNode.fText = fRB.fRules.substring(RHSExprNode.fFirstPos, 2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RHSExprNode.fLastPos); 2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Expression parse tree becomes l. child of the $variable reference 2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // node. 2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller varRefNode.fLeftChild = RHSExprNode; 2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RHSExprNode.fParent = varRefNode; 2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make a symbol table entry for the $variableRef node. 2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSymbolTable.addEntry(varRefNode.fText, varRefNode); 2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Clean up the stack. 2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr -= 3; 2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doEndOfRule: { 2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fixOpStack(RBBINode.precStart); // Terminate expression, leaves 2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // expression 2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("rtree") >= 0) { 2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller printNodeStack("end of rule"); 2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Assert.assrt(fNodeStackPtr == 1); 2541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert RBBINode thisRule = fNodeStack[fNodeStackPtr]; 2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If this rule includes a look-ahead '/', add a endMark node to the 2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // expression tree. 2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fLookAheadRule) { 2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode endNode = pushNewNode(RBBINode.endMark); 2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode catNode = pushNewNode(RBBINode.opCat); 2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr -= 2; 2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller catNode.fLeftChild = thisRule; 2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller catNode.fRightChild = endNode; 2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStack[fNodeStackPtr] = catNode; 2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller endNode.fVal = fRuleNum; 2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller endNode.fLookAheadEnd = true; 2671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert thisRule = catNode; 2681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // TODO: Disable chaining out of look-ahead (hard break) rules. 2701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // The break on rule match is forced, so there is no point in building up 2711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // the state table to chain into another rule for a longer match. 2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2741c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Mark this node as being the root of a rule. 2751c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert thisRule.fRuleRoot = true; 2761c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2771c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // Flag if chaining into this rule is wanted. 2781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // 2791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert if (fRB.fChainRules && // If rule chaining is enabled globally via !!chain 2801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule 2811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert thisRule.fChainIn = true; 2821c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert } 2831c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2841c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert 2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // All rule expressions are ORed together. 2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The ';' that terminates an expression really just functions as a 2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // '|' with 2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // a low operator prededence. 2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Each of the four sets of rules are collected separately. 2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (forward, reverse, safe_forward, safe_reverse) 2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // OR this rule into the appropriate group of them. 2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int destRules = (fReverseRule ? RBBIRuleBuilder.fReverseTree : fRB.fDefaultTree); 2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fTreeRoots[destRules] != null) { 2981c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert // This is not the first rule encountered. 2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // OR previous stuff (from *destRules) 3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with the current rule expression (on the Node Stack) 3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with the resulting OR expression going to *destRules 3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 3031c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert thisRule = fNodeStack[fNodeStackPtr]; 3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode prevRules = fRB.fTreeRoots[destRules]; 3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode orNode = pushNewNode(RBBINode.opOr); 3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller orNode.fLeftChild = prevRules; 3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller prevRules.fParent = orNode; 3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller orNode.fRightChild = thisRule; 3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller thisRule.fParent = orNode; 3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[destRules] = orNode; 3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This is the first rule encountered (for this direction). 3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Just move its parse tree from the stack to *destRules. 3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[destRules] = fNodeStack[fNodeStackPtr]; 3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fReverseRule = false; // in preparation for the next rule. 3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fLookAheadRule = false; 3181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert fNoChainInRule = false; 3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr = 0; 3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doRuleError: 3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX); 3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doVariableNameExpectedErr: 3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX); 3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unary operands + ? * 3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // These all appear after the operand to which they apply. 3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // When we hit one, the operand (may be a whole sub expression) 3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // will be on the top of the stack. 3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unary Operator becomes TOS, with the old TOS as its one child. 3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doUnaryOpPlus: { 3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operandNode = fNodeStack[fNodeStackPtr--]; 3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode plusNode = pushNewNode(RBBINode.opPlus); 3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller plusNode.fLeftChild = operandNode; 3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operandNode.fParent = plusNode; 3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doUnaryOpQuestion: { 3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operandNode = fNodeStack[fNodeStackPtr--]; 3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode qNode = pushNewNode(RBBINode.opQuestion); 3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller qNode.fLeftChild = operandNode; 3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operandNode.fParent = qNode; 3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doUnaryOpStar: { 3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operandNode = fNodeStack[fNodeStackPtr--]; 3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode starNode = pushNewNode(RBBINode.opStar); 3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller starNode.fLeftChild = operandNode; 3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operandNode.fParent = starNode; 3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doRuleChar: 3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A "Rule Character" is any single character that is a literal part 3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // of the regular expression. Like a, b and c in the expression "(abc*) 3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // | [:L:]" 3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // These are pretty uncommon in break rules; the terms are more commonly 3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // sets. To keep things uniform, treat these characters like as 3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // sets that just happen to contain only one character. 3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.setRef); 3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String s = String.valueOf((char)fC.fChar); 3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findSetFor(s, n, null); 3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fScanIndex; 3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos); 3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doDotAny: 3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // scanned a ".", meaning match any single character. 3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.setRef); 3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findSetFor(kAny, n, null); 3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fScanIndex; 3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos); 3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doSlash: 3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Scanned a '/', which identifies a look-ahead break position in a 3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rule. 3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.lookAhead); 3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fVal = fRuleNum; 3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fScanIndex; 3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos); 3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fLookAheadRule = true; 3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doStartTagValue: 4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Scanned a '{', the opening delimiter for a tag value within a 4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // rule. 4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.tag); 4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fVal = 0; 4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fScanIndex; 4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doTagDigit: 4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Just scanned a decimal digit that's part of a tag value 4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr]; 4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int v = UCharacter.digit((char) fC.fChar, 10); 4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fVal = n.fVal * 10 + v; 4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doTagValue: 4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr]; 4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos); 4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doTagExpectedError: 4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_MALFORMED_RULE_TAG); 4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doOptionStart: 4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Scanning a !!option. At the start of string. 4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fOptionStart = fScanIndex; 4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doOptionEnd: { 4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String opt = fRB.fRules.substring(fOptionStart, fScanIndex); 4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (opt.equals("chain")) { 4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fChainRules = true; 4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("LBCMNoChain")) { 4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fLBCMNoChain = true; 4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("forward")) { 4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fDefaultTree = RBBIRuleBuilder.fForwardTree; 4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("reverse")) { 4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fDefaultTree = RBBIRuleBuilder.fReverseTree; 4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("safe_forward")) { 4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fDefaultTree = RBBIRuleBuilder.fSafeFwdTree; 4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("safe_reverse")) { 4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fDefaultTree = RBBIRuleBuilder.fSafeRevTree; 4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else if (opt.equals("lookAheadHardBreak")) { 4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fLookAheadHardBreak = true; 45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (opt.equals("quoted_literals_only")) { 45205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128].clear(); 45305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert } else if (opt.equals("unquoted_literals")) { 45405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128].applyPattern(gRuleSet_rule_char_pattern); 4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_UNRECOGNIZED_OPTION); 4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doReverseDir: 4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fReverseRule = true; 4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doStartVariableName: 4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.varRef); 4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = fScanIndex; 4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doEndVariableName: 4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr]; 4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n == null || n.fType != RBBINode.varRef) { 4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fScanIndex; 4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos + 1, n.fLastPos); 4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Look the newly scanned name up in the symbol table 4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there's an entry, set the l. child of the var ref to the 4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // replacement expression. 4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (We also pass through here when scanning assignments, but no harm 4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // is done, other 4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // than a slight wasted effort that seems hard to avoid. Lookup will 4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // be null) 4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLeftChild = fSymbolTable.lookupNode(n.fText); 4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doCheckVarDef: 4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr]; 4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n.fLeftChild == null) { 4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_UNDEFINED_VARIABLE); 4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExprFinished: 4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doRuleErrorAssignExpr: 5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_ASSIGN_ERROR); 5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doExit: 5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller case RBBIRuleParseTable.doScanUnicodeSet: 5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller scanSet(); 5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller default: 5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller returnVal = false; 5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return returnVal; 5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Error Throw and IllegalArgumentException in response to a rule parse 5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // error. 5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void error(int e) { 5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String s = "Error " + e + " at line " + fLineNum + " column " 5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + fCharNum; 5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller IllegalArgumentException ex = new IllegalArgumentException(s); 5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller throw ex; 5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // fixOpStack The parse stack holds partially assembled chunks of the parse 5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // tree. 5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // An entry on the stack may be as small as a single setRef node, 5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // or as large as the parse tree 5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for an entire expression (this will be the one item left on the stack 5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // when the parsing of an RBBI rule completes. 5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This function is called when a binary operator is encountered. 5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It looks back up the stack for operators that are not yet associated 5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // with a right operand, and if the precedence of the stacked operator >= 5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the precedence of the current operator, binds the operand left, 5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to the previously encountered operator. 5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void fixOpStack(int p) { 5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode n; 5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // printNodeStack("entering fixOpStack()"); 5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = fNodeStack[fNodeStackPtr - 1]; // an operator node 5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n.fPrecedence == 0) { 5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print("RBBIRuleScanner.fixOpStack, bad operator node"); 5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n.fPrecedence < p || n.fPrecedence <= RBBINode.precLParen) { 5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The most recent operand goes with the current operator, 5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // not with the previously stacked one. 5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Stack operator is a binary op ( '|' or concatenation) 5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TOS operand becomes right child of this operator. 5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Resulting subexpression becomes the TOS operand. 5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fRightChild = fNodeStack[fNodeStackPtr]; 5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStack[fNodeStackPtr].fParent = n; 5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr--; 5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // printNodeStack("looping in fixOpStack() "); 5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (p <= RBBINode.precLParen) { 5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Scan is at a right paren or end of expression. 5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The scanned item must match the stack, or else there was an 5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // error. 5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Discard the left paren (or start expr) node from the stack, 5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // leaving the completed (sub)expression as TOS. 5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (n.fPrecedence != p) { 5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Right paren encountered matched start of expression node, or 5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // end of expression matched with a left paren node. 5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_MISMATCHED_PAREN); 5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStack[fNodeStackPtr - 1] = fNodeStack[fNodeStackPtr]; 5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr--; 5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Delete the now-discarded LParen or Start node. 5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // delete n; 5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // printNodeStack("leaving fixOpStack()"); 5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------- 5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // RBBISetTableEl is an entry in the hash table of UnicodeSets that have 5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // been encountered. The val Node will be of nodetype uset 5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and contain pointers to the actual UnicodeSets. 5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The Key is the source string for initializing the set. 6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The hash table is used to avoid creating duplicate 6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // unnamed (not $var references) UnicodeSets. 6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------- 6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static class RBBISetTableEl { 6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller String key; 6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode val; 6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // findSetFor given a String, 6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - find the corresponding Unicode Set (uset node) 6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (create one if necessary) 6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - Set fLeftChild of the caller's node (should be a setRef node) 6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // to the uset node 6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Maintain a hash table of uset nodes, so the same one is always used 6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for the same string. 6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If a "to adopt" set is provided and we haven't seen this key before, 6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // add the provided set to the hash table. 6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the string is one (32 bit) char in length, the set contains 6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // just one element which is the char in question. 6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the string is "any", return a set containing all chars. 6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void findSetFor(String s, RBBINode node, UnicodeSet setToAdopt) { 6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBISetTableEl el; 6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // First check whether we've already cached a set for this string. 6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If so, just use the cached set in the new node. 6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // delete any set provided by the caller, since we own it. 6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller el = fSetTable.get(s); 6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (el != null) { 6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node.fLeftChild = el.val; 6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller Assert.assrt(node.fLeftChild.fType == RBBINode.uset); 6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Haven't seen this set before. 6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If the caller didn't provide us with a prebuilt set, 6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // create a new UnicodeSet now. 6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (setToAdopt == null) { 6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (s.equals(kAny)) { 6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setToAdopt = new UnicodeSet(0x000000, 0x10ffff); 6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int c; 6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c = UTF16.charAt(s, 0); 6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller setToAdopt = new UnicodeSet(c, c); 6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make a new uset node to refer to this UnicodeSet 6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This new uset node becomes the child of the caller's setReference 6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // node. 6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode usetNode = new RBBINode(RBBINode.uset); 6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller usetNode.fInputSet = setToAdopt; 6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller usetNode.fParent = node; 6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller node.fLeftChild = usetNode; 6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller usetNode.fText = s; 6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Add the new uset node to the list of all uset nodes. 6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fUSetNodes.add(usetNode); 6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Add the new set to the set hash table. 6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller el = new RBBISetTableEl(); 6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller el.key = s; 6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller el.val = usetNode; 6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSetTable.put(el.key, el); 6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Assorted Unicode character constants. 6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Numeric because there is no portable way to enter them as literals. 6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // (Think EBCDIC). 6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int chNEL = 0x85; // NEL newline variant 6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static final int chLS = 0x2028; // Unicode Line Separator 6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // stripRules Return a rules string without unnecessary 6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // characters. 6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller static String stripRules(String rules) { 6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller StringBuilder strippedRules = new StringBuilder(); 6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int rulesLength = rules.length(); 7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int idx = 0; idx < rulesLength;) { 7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller char ch = rules.charAt(idx++); 7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ch == '#') { 7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller while (idx < rulesLength 7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && ch != '\r' && ch != '\n' && ch != chNEL) { 7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch = rules.charAt(idx++); 7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (!UCharacter.isISOControl(ch)) { 7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller strippedRules.append(ch); 7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return strippedRules.toString(); 7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nextCharLL Low Level Next Char from rule input source. 7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get a char from the input character iterator, 7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // keep track of input position for error reporting. 7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //---------------------------------------------------------------------------------------- 7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int nextCharLL() { 7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int ch; 7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fNextIndex >= fRB.fRules.length()) { 7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return -1; 7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch = UTF16.charAt(fRB.fRules, fNextIndex); 7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNextIndex = UTF16.moveCodePointOffset(fRB.fRules, fNextIndex, 1); 7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ch == '\r' || 7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch == chNEL || 7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch == chLS || 7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ch == '\n' && fLastChar != '\r') { 7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Character is starting a new line. Bump up the line number, and 7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // reset the column to 0. 7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fLineNum++; 7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCharNum = 0; 7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fQuoteMode) { 7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_NEW_LINE_IN_QUOTED_STRING); 7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fQuoteMode = false; 7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Character is not starting a new line. Except in the case of a 7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // LF following a CR, increment the column position. 7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (ch != '\n') { 7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCharNum++; 7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fLastChar = ch; 7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return ch; 7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // nextChar for rules scanning. At this level, we handle stripping 7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // out comments and processing backslash character escapes. 7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The rest of the rules grammar is handled at the next level up. 7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void nextChar(RBBIRuleChar c) { 7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Unicode Character constants needed for the processing done by nextChar(), 7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // in hex because literals wont work on EBCDIC machines. 7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fScanIndex = fNextIndex; 7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = nextCharLL(); 7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fEscaped = false; 7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // check for '' sequence. 7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // These are recognized in all contexts, whether in quoted text or not. 7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c.fChar == '\'') { 7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (UTF16.charAt(fRB.fRules, fNextIndex) == '\'') { 7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = nextCharLL(); // get nextChar officially so character counts 7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fEscaped = true; // stay correct. 7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Single quote, by itself. 7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Toggle quoting mode. 7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Return either '(' or ')', because quotes cause a grouping of the quoted text. 7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fQuoteMode = !fQuoteMode; 7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fQuoteMode == true) { 7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = '('; 7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = ')'; 7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fEscaped = false; // The paren that we return is not escaped. 7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fQuoteMode) { 7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fEscaped = true; 7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We are not in a 'quoted region' of the source. 7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c.fChar == '#') { 7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Start of a comment. Consume the rest of it. 8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The new-line char that terminates the comment is always returned. 8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // It will be treated as white-space, and serves to break up anything 8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that might otherwise incorrectly clump together with a comment in 8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the middle (a variable name, for example.) 8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = nextCharLL(); 8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c.fChar == -1 || // EOF 8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar == '\r' || 8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar == '\n' || 8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar == chNEL || 8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar == chLS) 8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller { 8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c.fChar == -1) { 8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return; 8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // check for backslash escaped characters. 8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Use String.unescapeAt() to handle them. 8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (c.fChar == '\\') { 8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fEscaped = true; 8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int[] unescapeIndex = new int[1]; 8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller unescapeIndex[0] = fNextIndex; 8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller c.fChar = Utility.unescapeAt(fRB.fRules, unescapeIndex); 8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (unescapeIndex[0] == fNextIndex) { 8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_HEX_DIGITS_EXPECTED); 8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fCharNum += unescapeIndex[0] - fNextIndex; 8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNextIndex = unescapeIndex[0]; 8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // putc(c.fChar, stdout); 8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Parse RBBI rules. The state machine for rules parsing is here. 8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The state tables are hand-written in the file rbbirpt.txt, 8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and converted to the form used here by a perl 8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // script rbbicst.pl 8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void parse() { 8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int state; 8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBIRuleParseTable.RBBIRuleTableElement tableEl; 8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state = 1; 8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextChar(fC); 8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Main loop for the rule parsing state machine. 8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Runs once per state transition. 8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Each time through optionally performs, depending on the state table, 8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - an advance to the the next input char 8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - an action to be performed. 8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - pushing or popping a state to/from the local state return stack. 8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Quit if state == 0. This is the normal way to exit the state machine. 8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (state == 0) { 8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Find the state table element that matches the input char from the rule, or the 8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // class of the input character. Start with the first table row for this 8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // state, then linearly scan forward until we find a row that matches the 8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // character. The last row for each state always matches all characters, so 8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the search will stop there, if not before. 8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tableEl = RBBIRuleParseTable.gRuleParseStateTable[state]; 8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) { 8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("char, line, col = (\'" + (char) fC.fChar 8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + "\', " + fLineNum + ", " + fCharNum + " state = " 8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller + tableEl.fStateName); 8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (int tableRow = state;; tableRow++) { // loop over the state table rows associated with this state. 8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller tableEl = RBBIRuleParseTable.gRuleParseStateTable[tableRow]; 8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) { 8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.print("."); 8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass < 127 && fC.fEscaped == false 8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && tableEl.fCharClass == fC.fChar) { 8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified an individual character, not a set, and 8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the input character is not escaped, and 8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the input character matched it. 8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass == 255) { 8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified default, match anything character class. 8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass == 254 && fC.fEscaped) { 8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified "escaped" and the char was escaped. 9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass == 253 && fC.fEscaped 9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller && (fC.fChar == 0x50 || fC.fChar == 0x70)) { 9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified "escaped P" and the char is either 'p' or 'P'. 9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass == 252 && fC.fChar == -1) { 9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified eof and we hit eof on the input. 9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fCharClass >= 128 && tableEl.fCharClass < 240 && // Table specs a char class && 9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fC.fEscaped == false && // char is not escaped && 9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fC.fChar != -1) { // char is not EOF 9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet uniset = fRuleSets[tableEl.fCharClass - 128]; 9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (uniset.contains(fC.fChar)) { 9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Table row specified a character class, or set of characters, 9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and the current char matches it. 9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) { 9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(""); 9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We've found the row of the state table that matches the current input 9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // character from the rules string. 9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Perform any action specified by this row in the state table. 9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (doParseActions(tableEl.fAction) == false) { 9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Break out of the state machine loop if the 9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the action signalled some kind of error, or 9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the action was to exit, occurs on normal end-of-rules-input. 9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fPushState != 0) { 9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fStackPtr++; 9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fStackPtr >= kStackSize) { 9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("RBBIRuleScanner.parse() - state stack overflow."); 9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fStack[fStackPtr] = tableEl.fPushState; 9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fNextChar) { 9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextChar(fC); 9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Get the next state from the table entry, or from the 9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // state stack if the next state was specified as "pop". 9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (tableEl.fNextState != 255) { 9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state = tableEl.fNextState; 9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller state = fStack[fStackPtr]; 9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fStackPtr--; 9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fStackPtr < 0) { 9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("RBBIRuleScanner.parse() - state stack underflow."); 9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 96505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert 966f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // If there are no forward rules throw an error. 967f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert // 968f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert if (fRB.fTreeRoots[RBBIRuleBuilder.fForwardTree] == null) { 969f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX); 970f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert } 9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // If there were NO user specified reverse rules, set up the equivalent of ".*;" 9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] == null) { 9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] = pushNewNode(RBBINode.opStar); 9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode operand = pushNewNode(RBBINode.setRef); 9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findSetFor(kAny, operand, null); 9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree].fLeftChild = operand; 9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller operand.fParent = fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree]; 9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr -= 2; 9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Parsing of the input RBBI rules is complete. 9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // We now have a parse tree for the rule expressions 9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // and a list of all UnicodeSets that are referenced. 9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("symbols") >= 0) { 9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fSymbolTable.rbbiSymtablePrint(); 9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("ptree") >= 0) { 9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("Completed Forward Rules Parse Tree..."); 9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fForwardTree].printTree(true); 9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("\nCompleted Reverse Rules Parse Tree..."); 9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree].printTree(true); 9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("\nCompleted Safe Point Forward Rules Parse Tree..."); 9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree] == null) { 9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(" -- null -- "); 10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree].printTree(true); 10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("\nCompleted Safe Point Reverse Rules Parse Tree..."); 10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree] == null) { 10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(" -- null -- "); 10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } else { 10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree].printTree(true); 10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // printNodeStack for debugging... 10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:OFF 10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void printNodeStack(String title) { 10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println(title + ". Dumping node stack...\n"); 10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (i = fNodeStackPtr; i > 0; i--) { 10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStack[i].printTree(true); 10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ///CLOVER:ON 10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // pushNewNode create a new RBBINode of the specified type and push it 10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // onto the stack of nodes. 10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode pushNewNode(int nodeType) { 10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStackPtr++; 10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fNodeStackPtr >= kStackSize) { 10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller System.out.println("RBBIRuleScanner.pushNewNode - stack overflow."); 10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR); 10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller fNodeStack[fNodeStackPtr] = new RBBINode(nodeType); 10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller return fNodeStack[fNodeStackPtr]; 10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // scanSet Construct a UnicodeSet from the text at the current scan 10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // position. Advance the scan position to the first character 10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // after the set. 10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // A new RBBI setref node referring to the set is pushed onto the node 10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // stack. 10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // The scan position is normally under the control of the state machine 10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that controls rule parsing. UnicodeSets, however, are parsed by 10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // the UnicodeSet constructor, not by the RBBI rule parser. 10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller //--------------------------------------------------------------------------------- 10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller void scanSet() { 10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller UnicodeSet uset = null; 10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int startPos; 10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller ParsePosition pos = new ParsePosition(fScanIndex); 10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller int i; 10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller startPos = fScanIndex; 10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller try { 10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller uset = new UnicodeSet(fRB.fRules, pos, fSymbolTable, UnicodeSet.IGNORE_SPACE); 10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } catch (Exception e) { // TODO: catch fewer exception types. 10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Repackage UnicodeSet errors as RBBI rule builder errors, with location info. 10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_MALFORMED_SET); 10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Verify that the set contains at least one code point. 10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // 10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (uset.isEmpty()) { 10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // This set is empty. 10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Make it an error, because it almost certainly is not what the user wanted. 10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Also, avoids having to think about corner cases in the tree manipulation code 10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // that occurs later on. 10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // TODO: this shouldn't be an error; it does happen. 10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller error(RBBIRuleBuilder.U_BRK_RULE_EMPTY_SET); 10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Advance the RBBI parse postion over the UnicodeSet pattern. 10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // Don't just set fScanIndex because the line/char positions maintained 10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // for error reporting would be thrown off. 10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller i = pos.getIndex(); 10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller for (;;) { 10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller if (fNextIndex >= i) { 10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller break; 10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller nextCharLL(); 10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller RBBINode n; 10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n = pushNewNode(RBBINode.setRef); 10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fFirstPos = startPos; 10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fLastPos = fNextIndex; 10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos); 10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // findSetFor() serves several purposes here: 11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - Adopts storage for the UnicodeSet, will be responsible for deleting. 11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - Mantains collection of all sets in use, needed later for establishing 11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // character categories for run time engine. 11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - Eliminates mulitiple instances of the same set. 11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller // - Creates a new uset node if necessary (if this isn't a duplicate.) 11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller findSetFor(n.fText, n, uset); 11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller } 11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller} 11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1110