12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
61c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert * Copyright (C) 2003-2016, International Business Machines Corporation and others. All Rights Reserved.
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
91c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParsePosition;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.HashMap;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Assert;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  *  This class is part of the Rule Based Break Iterator rule compiler.
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  *  It scans the rules and builds the parse tree.
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  *  There is no public API here.
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  */
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerclass RBBIRuleScanner {
251c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final static int    kStackSize = 100;               // The size of the state stack for
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //   rules parsing.  Corresponds roughly
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //   to the depth of parentheses nesting
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //   that is allowed in the rules.
301c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static class RBBIRuleChar {
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int             fChar;
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean         fEscaped;
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBIRuleBuilder               fRB;              // The rule builder that we are part of.
391c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fScanIndex;        // Index of current character being processed
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   in the rule input string.
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fNextIndex;        // Index of the next character, which
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   is the first character not yet scanned.
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    boolean                  fQuoteMode;        // Scan is in a 'quoted region'
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fLineNum;          // Line number in input file.
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fCharNum;          // Char position within the line.
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fLastChar;         // Previous char, needed to count CR-LF
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   as a single line, not two.
491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBIRuleChar              fC = new RBBIRuleChar();    // Current char for parse state machine
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   processing.
521c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
531c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    short  fStack[] = new short[kStackSize];  // State stack, holds state pushes
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                       fStackPtr;           //  and pops as specified in the state
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                       //  transition rules.
571c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBINode  fNodeStack[] = new RBBINode[kStackSize]; // Node stack, holds nodes created
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                           //  during the parse of a rule
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                        fNodeStackPtr;
611c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
621c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
631c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    boolean                    fReverseRule;         // True if the rule currently being scanned
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //  is a reverse direction rule (if it
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //  starts with a '!')
661c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    boolean                    fLookAheadRule;       // True if the rule includes a '/'
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   somewhere within it.
691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    boolean                    fNoChainInRule;       // True if the current rule starts with a '^'.
711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
721c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
731c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert    RBBISymbolTable            fSymbolTable;         // symbol table, holds definitions of
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //   $variable symbols.
751c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    HashMap<String, RBBISetTableEl> fSetTable = new HashMap<String, RBBISetTableEl>(); // UnicocodeSet hash table, holds indexes to
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                                       //   the sets created while parsing rules.
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                                       //   The key is the string used for creating
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                                       //   the set.
801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    UnicodeSet      fRuleSets[] = new UnicodeSet[10];    // Unicode Sets that are needed during
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //  the scanning of RBBI rules.  The
8305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                                     //  Indices for these are assigned by the
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //  perl script that builds the state tables.
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                     //  See rbbirpt.h.
861c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                        fRuleNum;         // Counts each rule as it is scanned.
881c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int                        fOptionStart;     // Input index of start of a !!option
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                 //   keyword, while being scanned.
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
921c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
9305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert   // gRuleSet_rule_char_pattern is characters that may appear as literals in patterns without escaping or quoting.
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String gRuleSet_rule_char_pattern       = "[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]";
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String gRuleSet_name_char_pattern       = "[_\\p{L}\\p{N}]";
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String gRuleSet_digit_char_pattern      = "[0-9]";
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String gRuleSet_name_start_char_pattern = "[_\\p{L}]";
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String gRuleSet_white_space_pattern     = "[\\p{Pattern_White_Space}]";
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   static private String kAny =  "any";
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1011c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
1021c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  Constructor.
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBIRuleScanner(RBBIRuleBuilder rb) {
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRB = rb;
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fLineNum = 1;
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  Set up the constant Unicode Sets.
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     Note: These could be made static and shared among
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //            all instances of RBBIRuleScanners.
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128] = new UnicodeSet(gRuleSet_rule_char_pattern);
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRuleSets[RBBIRuleParseTable.kRuleSet_white_space - 128] = new UnicodeSet(gRuleSet_white_space_pattern);
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRuleSets[RBBIRuleParseTable.kRuleSet_name_char - 128] = new UnicodeSet(gRuleSet_name_char_pattern);
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRuleSets[RBBIRuleParseTable.kRuleSet_name_start_char - 128] = new UnicodeSet(gRuleSet_name_start_char_pattern);
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRuleSets[RBBIRuleParseTable.kRuleSet_digit_char - 128] = new UnicodeSet(gRuleSet_digit_char_pattern);
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
123f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        fSymbolTable = new RBBISymbolTable(this);
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  doParseAction Do some action during rule parsing.
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                       Called by the parse state machine.
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                       Actions build the parse tree and Unicode Sets,
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                       and maintain the parse stack for nested expressions.
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    boolean doParseActions(int action) {
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBINode n = null;
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean returnVal = true;
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch (action) {
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExprStart:
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pushNewNode(RBBINode.opStart);
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRuleNum++;
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1461c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert        case RBBIRuleParseTable.doNoChain:
1471c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            // Scanned a '^' while on the rule start state.
1481c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fNoChainInRule = true;
1491c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            break;
1501c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
1511c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExprOrOperator: {
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fixOpStack(RBBINode.precOpCat);
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode orNode = pushNewNode(RBBINode.opOr);
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            orNode.fLeftChild = operandNode;
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operandNode.fParent = orNode;
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExprCatOperator:
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // concatenation operator.
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // For the implicit concatenation of adjacent terms in an expression
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // that are
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   not separated by any other operator. Action is invoked between the
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   actions for the two terms.
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        {
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fixOpStack(RBBINode.precOpCat);
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode catNode = pushNewNode(RBBINode.opCat);
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            catNode.fLeftChild = operandNode;
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operandNode.fParent = catNode;
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doLParen:
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Open Paren.
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   The openParen node is a dummy operation type with a low
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // precedence,
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //     which has the affect of ensuring that any real binary op that
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //     follows within the parens binds more tightly to the operands than
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //     stuff outside of the parens.
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pushNewNode(RBBINode.opLParen);
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExprRParen:
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fixOpStack(RBBINode.precLParen);
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doNOP:
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doStartAssign:
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We've just scanned "$variable = "
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The top of the node stack has the $variable ref node.
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Save the start position of the RHS text in the StartExpression
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // node
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   that precedes the $variableReference node on the stack.
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   This will eventually be used when saving the full $variable
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // replacement
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   text as a string.
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr - 1];
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fNextIndex; // move past the '='
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Push a new start-of-expression node; needed to keep parse of the
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   RHS expression happy.
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pushNewNode(RBBINode.opStart);
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doEndAssign: {
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We have reached the end of an assignement statement.
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   Current scan char is the ';' that terminates the assignment.
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Terminate expression, leaves expression parse tree rooted in TOS
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // node.
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fixOpStack(RBBINode.precStart);
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode startExprNode = fNodeStack[fNodeStackPtr - 2];
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode varRefNode = fNodeStack[fNodeStackPtr - 1];
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode RHSExprNode = fNodeStack[fNodeStackPtr];
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Save original text of right side of assignment, excluding the
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // terminating ';'
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  in the root of the node for the right-hand-side expression.
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RHSExprNode.fFirstPos = startExprNode.fFirstPos;
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RHSExprNode.fLastPos = fScanIndex;
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // fRB.fRules.extractBetween(RHSExprNode.fFirstPos,
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // RHSExprNode.fLastPos, RHSExprNode.fText);
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RHSExprNode.fText = fRB.fRules.substring(RHSExprNode.fFirstPos,
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    RHSExprNode.fLastPos);
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Expression parse tree becomes l. child of the $variable reference
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // node.
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            varRefNode.fLeftChild = RHSExprNode;
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RHSExprNode.fParent = varRefNode;
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Make a symbol table entry for the $variableRef node.
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fSymbolTable.addEntry(varRefNode.fText, varRefNode);
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Clean up the stack.
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStackPtr -= 3;
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doEndOfRule: {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fixOpStack(RBBINode.precStart); // Terminate expression, leaves
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                            // expression
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("rtree") >= 0) {
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                printNodeStack("end of rule");
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Assert.assrt(fNodeStackPtr == 1);
2541c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            RBBINode thisRule = fNodeStack[fNodeStackPtr];
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If this rule includes a look-ahead '/', add a endMark node to the
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   expression tree.
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fLookAheadRule) {
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                RBBINode endNode = pushNewNode(RBBINode.endMark);
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                RBBINode catNode = pushNewNode(RBBINode.opCat);
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fNodeStackPtr -= 2;
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                catNode.fLeftChild = thisRule;
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                catNode.fRightChild = endNode;
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fNodeStack[fNodeStackPtr] = catNode;
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                endNode.fVal = fRuleNum;
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                endNode.fLookAheadEnd = true;
2671c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                thisRule = catNode;
2681c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2691c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // TODO: Disable chaining out of look-ahead (hard break) rules.
2701c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                //   The break on rule match is forced, so there is no point in building up
2711c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                //   the state table to chain into another rule for a longer match.
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2741c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            // Mark this node as being the root of a rule.
2751c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            thisRule.fRuleRoot = true;
2761c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2771c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            // Flag if chaining into this rule is wanted.
2781c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            //
2791c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            if (fRB.fChainRules &&          // If rule chaining is enabled globally via !!chain
2801c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                    !fNoChainInRule) {      //     and no '^' chain-in inhibit was on this rule
2811c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                thisRule.fChainIn = true;
2821c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            }
2831c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2841c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // All rule expressions are ORed together.
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The ';' that terminates an expression really just functions as a
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // '|' with
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   a low operator prededence.
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Each of the four sets of rules are collected separately.
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  (forward, reverse, safe_forward, safe_reverse)
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  OR this rule into the appropriate group of them.
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int destRules = (fReverseRule ? RBBIRuleBuilder.fReverseTree : fRB.fDefaultTree);
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fTreeRoots[destRules] != null) {
2981c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                // This is not the first rule encountered.
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // OR previous stuff (from *destRules)
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // with the current rule expression (on the Node Stack)
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //  with the resulting OR expression going to *destRules
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //
3031c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert                thisRule = fNodeStack[fNodeStackPtr];
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                RBBINode prevRules = fRB.fTreeRoots[destRules];
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                RBBINode orNode = pushNewNode(RBBINode.opOr);
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                orNode.fLeftChild = prevRules;
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevRules.fParent = orNode;
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                orNode.fRightChild = thisRule;
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                thisRule.fParent = orNode;
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fTreeRoots[destRules] = orNode;
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // This is the first rule encountered (for this direction).
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Just move its parse tree from the stack to *destRules.
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fTreeRoots[destRules] = fNodeStack[fNodeStackPtr];
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fReverseRule = false; // in preparation for the next rule.
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fLookAheadRule = false;
3181c8a530973739aafa823d758240d2cd5dad96fe3Fredrik Roubert            fNoChainInRule = false;
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStackPtr = 0;
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doRuleError:
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            returnVal = false;
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doVariableNameExpectedErr:
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  Unary operands + ? *
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    These all appear after the operand to which they apply.
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    When we hit one, the operand (may be a whole sub expression)
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    will be on the top of the stack.
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //    Unary Operator becomes TOS, with the old TOS as its one child.
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doUnaryOpPlus: {
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode plusNode = pushNewNode(RBBINode.opPlus);
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            plusNode.fLeftChild = operandNode;
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operandNode.fParent = plusNode;
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doUnaryOpQuestion: {
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode qNode = pushNewNode(RBBINode.opQuestion);
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            qNode.fLeftChild = operandNode;
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operandNode.fParent = qNode;
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doUnaryOpStar: {
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operandNode = fNodeStack[fNodeStackPtr--];
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode starNode = pushNewNode(RBBINode.opStar);
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            starNode.fLeftChild = operandNode;
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operandNode.fParent = starNode;
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doRuleChar:
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // A "Rule Character" is any single character that is a literal part
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // of the regular expression. Like a, b and c in the expression "(abc*)
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // | [:L:]"
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // These are pretty uncommon in break rules; the terms are more commonly
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  sets. To keep things uniform, treat these characters like as
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // sets that just happen to contain only one character.
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        {
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = pushNewNode(RBBINode.setRef);
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String s = String.valueOf((char)fC.fChar);
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            findSetFor(s, n, null);
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fScanIndex;
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fNextIndex;
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doDotAny:
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // scanned a ".", meaning match any single character.
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        {
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = pushNewNode(RBBINode.setRef);
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            findSetFor(kAny, n, null);
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fScanIndex;
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fNextIndex;
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doSlash:
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Scanned a '/', which identifies a look-ahead break position in a
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // rule.
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = pushNewNode(RBBINode.lookAhead);
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fVal = fRuleNum;
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fScanIndex;
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fNextIndex;
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fLookAheadRule = true;
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doStartTagValue:
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Scanned a '{', the opening delimiter for a tag value within a
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // rule.
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = pushNewNode(RBBINode.tag);
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fVal = 0;
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fScanIndex;
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fNextIndex;
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doTagDigit:
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Just scanned a decimal digit that's part of a tag value
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        {
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr];
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int v = UCharacter.digit((char) fC.fChar, 10);
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fVal = n.fVal * 10 + v;
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doTagValue:
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr];
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fNextIndex;
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doTagExpectedError:
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_MALFORMED_RULE_TAG);
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            returnVal = false;
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doOptionStart:
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Scanning a !!option. At the start of string.
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fOptionStart = fScanIndex;
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doOptionEnd: {
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String opt = fRB.fRules.substring(fOptionStart, fScanIndex);
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (opt.equals("chain")) {
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fChainRules = true;
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("LBCMNoChain")) {
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fLBCMNoChain = true;
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("forward")) {
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fDefaultTree = RBBIRuleBuilder.fForwardTree;
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("reverse")) {
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fDefaultTree = RBBIRuleBuilder.fReverseTree;
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("safe_forward")) {
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fDefaultTree = RBBIRuleBuilder.fSafeFwdTree;
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("safe_reverse")) {
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fDefaultTree = RBBIRuleBuilder.fSafeRevTree;
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (opt.equals("lookAheadHardBreak")) {
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fLookAheadHardBreak = true;
45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else if (opt.equals("quoted_literals_only")) {
45205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128].clear();
45305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else if (opt.equals("unquoted_literals")) {
45405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                fRuleSets[RBBIRuleParseTable.kRuleSet_rule_char - 128].applyPattern(gRuleSet_rule_char_pattern);
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_UNRECOGNIZED_OPTION);
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doReverseDir:
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fReverseRule = true;
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doStartVariableName:
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = pushNewNode(RBBINode.varRef);
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fFirstPos = fScanIndex;
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doEndVariableName:
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr];
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n == null || n.fType != RBBINode.varRef) {
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLastPos = fScanIndex;
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fText = fRB.fRules.substring(n.fFirstPos + 1, n.fLastPos);
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Look the newly scanned name up in the symbol table
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   If there's an entry, set the l. child of the var ref to the
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // replacement expression.
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   (We also pass through here when scanning assignments, but no harm
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // is done, other
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //    than a slight wasted effort that seems hard to avoid. Lookup will
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // be null)
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fLeftChild = fSymbolTable.lookupNode(n.fText);
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doCheckVarDef:
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr];
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n.fLeftChild == null) {
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_UNDEFINED_VARIABLE);
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                returnVal = false;
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExprFinished:
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doRuleErrorAssignExpr:
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_ASSIGN_ERROR);
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            returnVal = false;
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doExit:
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            returnVal = false;
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case RBBIRuleParseTable.doScanUnicodeSet:
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            scanSet();
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        default:
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            returnVal = false;
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return returnVal;
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  Error Throw and IllegalArgumentException in response to a rule parse
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // error.
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void error(int e) {
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String s = "Error " + e + " at line " + fLineNum + " column "
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                + fCharNum;
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        IllegalArgumentException ex = new IllegalArgumentException(s);
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        throw ex;
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  fixOpStack The parse stack holds partially assembled chunks of the parse
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // tree.
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               An entry on the stack may be as small as a single setRef node,
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               or as large as the parse tree
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               for an entire expression (this will be the one item left on the stack
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               when the parsing of an RBBI rule completes.
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               This function is called when a binary operator is encountered.
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               It looks back up the stack for operators that are not yet associated
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               with a right operand, and if the precedence of the stacked operator >=
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               the precedence of the current operator, binds the operand left,
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //               to the previously encountered operator.
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void fixOpStack(int p) {
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBINode n;
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // printNodeStack("entering fixOpStack()");
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n = fNodeStack[fNodeStackPtr - 1]; // an operator node
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n.fPrecedence == 0) {
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.print("RBBIRuleScanner.fixOpStack, bad operator node");
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n.fPrecedence < p || n.fPrecedence <= RBBINode.precLParen) {
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The most recent operand goes with the current operator,
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   not with the previously stacked one.
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Stack operator is a binary op ( '|' or concatenation)
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   TOS operand becomes right child of this operator.
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   Resulting subexpression becomes the TOS operand.
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            n.fRightChild = fNodeStack[fNodeStackPtr];
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStack[fNodeStackPtr].fParent = n;
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStackPtr--;
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // printNodeStack("looping in fixOpStack() ");
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (p <= RBBINode.precLParen) {
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Scan is at a right paren or end of expression.
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  The scanned item must match the stack, or else there was an
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // error.
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  Discard the left paren (or start expr) node from the stack,
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  leaving the completed (sub)expression as TOS.
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (n.fPrecedence != p) {
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Right paren encountered matched start of expression node, or
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // end of expression matched with a left paren node.
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_MISMATCHED_PAREN);
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStack[fNodeStackPtr - 1] = fNodeStack[fNodeStackPtr];
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStackPtr--;
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Delete the now-discarded LParen or Start node.
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // delete n;
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // printNodeStack("leaving fixOpStack()");
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //       RBBISetTableEl is an entry in the hash table of UnicodeSets that have
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                        been encountered. The val Node will be of nodetype uset
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                        and contain pointers to the actual UnicodeSets.
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                        The Key is the source string for initializing the set.
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                        The hash table is used to avoid creating duplicate
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                        unnamed (not $var references) UnicodeSets.
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static class RBBISetTableEl {
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String key;
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBINode val;
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //   findSetFor given a String,
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                  - find the corresponding Unicode Set (uset node)
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                         (create one if necessary)
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                  - Set fLeftChild of the caller's node (should be a setRef node)
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                         to the uset node
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                 Maintain a hash table of uset nodes, so the same one is always used
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                    for the same string.
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                 If a "to adopt" set is provided and we haven't seen this key before,
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                    add the provided set to the hash table.
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                 If the string is one (32 bit) char in length, the set contains
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                    just one element which is the char in question.
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                 If the string is "any", return a set containing all chars.
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void findSetFor(String s, RBBINode node, UnicodeSet setToAdopt) {
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBISetTableEl el;
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // First check whether we've already cached a set for this string.
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If so, just use the cached set in the new node.
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   delete any set provided by the caller, since we own it.
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        el = fSetTable.get(s);
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (el != null) {
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node.fLeftChild = el.val;
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Assert.assrt(node.fLeftChild.fType == RBBINode.uset);
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Haven't seen this set before.
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If the caller didn't provide us with a prebuilt set,
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   create a new UnicodeSet now.
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (setToAdopt == null) {
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (s.equals(kAny)) {
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c;
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c = UTF16.charAt(s, 0);
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                setToAdopt = new UnicodeSet(c, c);
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Make a new uset node to refer to this UnicodeSet
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // This new uset node becomes the child of the caller's setReference
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // node.
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBINode usetNode = new RBBINode(RBBINode.uset);
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        usetNode.fInputSet = setToAdopt;
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        usetNode.fParent = node;
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node.fLeftChild = usetNode;
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        usetNode.fText = s;
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Add the new uset node to the list of all uset nodes.
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fRB.fUSetNodes.add(usetNode);
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Add the new set to the set hash table.
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        el = new RBBISetTableEl();
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        el.key = s;
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        el.val = usetNode;
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fSetTable.put(el.key, el);
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return;
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  Assorted Unicode character constants.
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     Numeric because there is no portable way to enter them as literals.
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     (Think EBCDIC).
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final int chNEL = 0x85; //    NEL newline variant
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static final int chLS = 0x2028; //    Unicode Line Separator
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  stripRules    Return a rules string without unnecessary
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                characters.
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static String stripRules(String rules) {
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder strippedRules = new StringBuilder();
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int rulesLength = rules.length();
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int idx = 0; idx < rulesLength;) {
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char ch = rules.charAt(idx++);
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (ch == '#') {
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (idx < rulesLength
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        && ch != '\r' && ch != '\n' && ch != chNEL) {
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ch = rules.charAt(idx++);
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!UCharacter.isISOControl(ch)) {
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strippedRules.append(ch);
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return strippedRules.toString();
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  nextCharLL    Low Level Next Char from rule input source.
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                Get a char from the input character iterator,
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                keep track of input position for error reporting.
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //----------------------------------------------------------------------------------------
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int nextCharLL() {
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ch;
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fNextIndex >= fRB.fRules.length()) {
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ch = UTF16.charAt(fRB.fRules, fNextIndex);
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fNextIndex = UTF16.moveCodePointOffset(fRB.fRules, fNextIndex, 1);
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (ch == '\r' ||
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ch == chNEL ||
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ch == chLS ||
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ch == '\n' && fLastChar != '\r') {
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Character is starting a new line.  Bump up the line number, and
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  reset the column to 0.
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fLineNum++;
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fCharNum = 0;
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fQuoteMode) {
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                error(RBBIRuleBuilder.U_BRK_NEW_LINE_IN_QUOTED_STRING);
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fQuoteMode = false;
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Character is not starting a new line.  Except in the case of a
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   LF following a CR, increment the column position.
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (ch != '\n') {
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fCharNum++;
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fLastChar = ch;
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ch;
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //   nextChar     for rules scanning.  At this level, we handle stripping
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                out comments and processing backslash character escapes.
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                The rest of the rules grammar is handled at the next level up.
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void nextChar(RBBIRuleChar c) {
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Unicode Character constants needed for the processing done by nextChar(),
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   in hex because literals wont work on EBCDIC machines.
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fScanIndex = fNextIndex;
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        c.fChar = nextCharLL();
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        c.fEscaped = false;
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  check for '' sequence.
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  These are recognized in all contexts, whether in quoted text or not.
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (c.fChar == '\'') {
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (UTF16.charAt(fRB.fRules, fNextIndex) == '\'') {
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c.fChar = nextCharLL(); // get nextChar officially so character counts
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c.fEscaped = true; //   stay correct.
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Single quote, by itself.
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   Toggle quoting mode.
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   Return either '('  or ')', because quotes cause a grouping of the quoted text.
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fQuoteMode = !fQuoteMode;
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (fQuoteMode == true) {
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c.fChar = '(';
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c.fChar = ')';
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c.fEscaped = false; // The paren that we return is not escaped.
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fQuoteMode) {
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c.fEscaped = true;
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We are not in a 'quoted region' of the source.
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c.fChar == '#') {
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Start of a comment.  Consume the rest of it.
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //  The new-line char that terminates the comment is always returned.
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //  It will be treated as white-space, and serves to break up anything
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    that might otherwise incorrectly clump together with a comment in
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //    the middle (a variable name, for example.)
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (;;) {
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c.fChar = nextCharLL();
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (c.fChar == -1 || // EOF
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c.fChar == '\r' ||
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c.fChar == '\n' ||
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c.fChar == chNEL ||
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        c.fChar == chLS)
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    {
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c.fChar == -1) {
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  check for backslash escaped characters.
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  Use String.unescapeAt() to handle them.
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (c.fChar == '\\') {
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c.fEscaped = true;
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int[] unescapeIndex = new int[1];
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                unescapeIndex[0] = fNextIndex;
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c.fChar = Utility.unescapeAt(fRB.fRules, unescapeIndex);
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (unescapeIndex[0] == fNextIndex) {
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    error(RBBIRuleBuilder.U_BRK_HEX_DIGITS_EXPECTED);
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fCharNum += unescapeIndex[0] - fNextIndex;
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fNextIndex = unescapeIndex[0];
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // putc(c.fChar, stdout);
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  Parse RBBI rules.   The state machine for rules parsing is here.
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                      The state tables are hand-written in the file rbbirpt.txt,
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                      and converted to the form used here by a perl
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                      script rbbicst.pl
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void parse() {
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int state;
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBIRuleParseTable.RBBIRuleTableElement tableEl;
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        state = 1;
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nextChar(fC);
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Main loop for the rule parsing state machine.
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   Runs once per state transition.
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   Each time through optionally performs, depending on the state table,
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      - an advance to the the next input char
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      - an action to be performed.
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //      - pushing or popping a state to/from the local state return stack.
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Quit if state == 0.  This is the normal way to exit the state machine.
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (state == 0) {
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Find the state table element that matches the input char from the rule, or the
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //    class of the input character.  Start with the first table row for this
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //    state, then linearly scan forward until we find a row that matches the
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //    character.  The last row for each state always matches all characters, so
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //    the search will stop there, if not before.
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            tableEl = RBBIRuleParseTable.gRuleParseStateTable[state];
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) {
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println("char, line, col = (\'" + (char) fC.fChar
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        + "\', " + fLineNum + ", " + fCharNum + "    state = "
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        + tableEl.fStateName);
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int tableRow = state;; tableRow++) { // loop over the state table rows associated with this state.
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                tableEl = RBBIRuleParseTable.gRuleParseStateTable[tableRow];
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) {
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    System.out.print(".");
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass < 127 && fC.fEscaped == false
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        && tableEl.fCharClass == fC.fChar) {
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Table row specified an individual character, not a set, and
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //   the input character is not escaped, and
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    //   the input character matched it.
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass == 255) {
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Table row specified default, match anything character class.
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass == 254 && fC.fEscaped) {
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Table row specified "escaped" and the char was escaped.
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass == 253 && fC.fEscaped
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        && (fC.fChar == 0x50 || fC.fChar == 0x70)) {
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Table row specified "escaped P" and the char is either 'p' or 'P'.
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass == 252 && fC.fChar == -1) {
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Table row specified eof and we hit eof on the input.
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (tableEl.fCharClass >= 128 && tableEl.fCharClass < 240 && // Table specs a char class &&
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        fC.fEscaped == false && //   char is not escaped &&
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        fC.fChar != -1) { //   char is not EOF
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    UnicodeSet uniset = fRuleSets[tableEl.fCharClass - 128];
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (uniset.contains(fC.fChar)) {
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Table row specified a character class, or set of characters,
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        //   and the current char matches it.
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("scan") >= 0) {
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println("");
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We've found the row of the state table that matches the current input
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   character from the rules string.
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Perform any action specified  by this row in the state table.
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (doParseActions(tableEl.fAction) == false) {
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Break out of the state machine loop if the
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   the action signalled some kind of error, or
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   the action was to exit, occurs on normal end-of-rules-input.
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (tableEl.fPushState != 0) {
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fStackPtr++;
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (fStackPtr >= kStackSize) {
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    System.out.println("RBBIRuleScanner.parse() - state stack overflow.");
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fStack[fStackPtr] = tableEl.fPushState;
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (tableEl.fNextChar) {
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextChar(fC);
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Get the next state from the table entry, or from the
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   state stack if the next state was specified as "pop".
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (tableEl.fNextState != 255) {
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state = tableEl.fNextState;
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                state = fStack[fStackPtr];
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fStackPtr--;
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (fStackPtr < 0) {
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    System.out.println("RBBIRuleScanner.parse() - state stack underflow.");
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
96505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
966f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        // If there are no forward rules throw an error.
967f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        //
968f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        if (fRB.fTreeRoots[RBBIRuleBuilder.fForwardTree] == null) {
969f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
970f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        }
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If there were NO user specified reverse rules, set up the equivalent of ".*;"
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] == null) {
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree] = pushNewNode(RBBINode.opStar);
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            RBBINode operand = pushNewNode(RBBINode.setRef);
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            findSetFor(kAny, operand, null);
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree].fLeftChild = operand;
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            operand.fParent = fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree];
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStackPtr -= 2;
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Parsing of the input RBBI rules is complete.
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We now have a parse tree for the rule expressions
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and a list of all UnicodeSets that are referenced.
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("symbols") >= 0) {
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fSymbolTable.rbbiSymtablePrint();
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fRB.fDebugEnv != null && fRB.fDebugEnv.indexOf("ptree") >= 0) {
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("Completed Forward Rules Parse Tree...");
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRB.fTreeRoots[RBBIRuleBuilder.fForwardTree].printTree(true);
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("\nCompleted Reverse Rules Parse Tree...");
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fRB.fTreeRoots[RBBIRuleBuilder.fReverseTree].printTree(true);
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("\nCompleted Safe Point Forward Rules Parse Tree...");
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree] == null) {
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println("  -- null -- ");
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fTreeRoots[RBBIRuleBuilder.fSafeFwdTree].printTree(true);
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("\nCompleted Safe Point Reverse Rules Parse Tree...");
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree] == null) {
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.println("  -- null -- ");
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                fRB.fTreeRoots[RBBIRuleBuilder.fSafeRevTree].printTree(true);
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  printNodeStack     for debugging...
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    ///CLOVER:OFF
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void printNodeStack(String title) {
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        System.out.println(title + ".  Dumping node stack...\n");
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = fNodeStackPtr; i > 0; i--) {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            fNodeStack[i].printTree(true);
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    ///CLOVER:ON
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  pushNewNode   create a new RBBINode of the specified type and push it
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //                onto the stack of nodes.
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    RBBINode pushNewNode(int nodeType) {
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fNodeStackPtr++;
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (fNodeStackPtr >= kStackSize) {
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("RBBIRuleScanner.pushNewNode - stack overflow.");
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_INTERNAL_ERROR);
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fNodeStack[fNodeStackPtr] = new RBBINode(nodeType);
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fNodeStack[fNodeStackPtr];
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //  scanSet    Construct a UnicodeSet from the text at the current scan
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             position.  Advance the scan position to the first character
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             after the set.
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             A new RBBI setref node referring to the set is pushed onto the node
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             stack.
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             The scan position is normally under the control of the state machine
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             that controls rule parsing.  UnicodeSets, however, are parsed by
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //             the UnicodeSet constructor, not by the RBBI rule parser.
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //---------------------------------------------------------------------------------
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void scanSet() {
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet uset = null;
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int startPos;
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ParsePosition pos = new ParsePosition(fScanIndex);
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i;
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        startPos = fScanIndex;
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            uset = new UnicodeSet(fRB.fRules, pos, fSymbolTable, UnicodeSet.IGNORE_SPACE);
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch (Exception e) { // TODO:  catch fewer exception types.
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Repackage UnicodeSet errors as RBBI rule builder errors, with location info.
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_MALFORMED_SET);
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Verify that the set contains at least one code point.
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (uset.isEmpty()) {
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This set is empty.
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  Make it an error, because it almost certainly is not what the user wanted.
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  Also, avoids having to think about corner cases in the tree manipulation code
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //   that occurs later on.
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //  TODO:  this shouldn't be an error; it does happen.
10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            error(RBBIRuleBuilder.U_BRK_RULE_EMPTY_SET);
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Advance the RBBI parse postion over the UnicodeSet pattern.
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   Don't just set fScanIndex because the line/char positions maintained
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //   for error reporting would be thrown off.
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        i = pos.getIndex();
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (fNextIndex >= i) {
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextCharLL();
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        RBBINode n;
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        n = pushNewNode(RBBINode.setRef);
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        n.fFirstPos = startPos;
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        n.fLastPos = fNextIndex;
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        n.fText = fRB.fRules.substring(n.fFirstPos, n.fLastPos);
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //  findSetFor() serves several purposes here:
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     - Adopts storage for the UnicodeSet, will be responsible for deleting.
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     - Mantains collection of all sets in use, needed later for establishing
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //          character categories for run time engine.
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     - Eliminates mulitiple instances of the same set.
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //     - Creates a new uset node if necessary (if this isn't a duplicate.)
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        findSetFor(n.fText, n, uset);
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1110