1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// file: rbbiscan.cpp 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Copyright (C) 2002-2012, International Business Machines Corporation and others. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// This file contains the Rule Based Break Iterator Rule Builder functions for 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// scanning the rules and assembling a parse tree. This is the first phase 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// of compiling the rules. 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The overall of the rules is managed by class RBBIRuleBuilder, which will 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// create and use an instance of this class as part of the process. 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unistr.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchriter.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parsepos.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/parseerr.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirpt.h" // Contains state table for the rbbi rules parser. 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // generated by a Perl script. 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbirb.h" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbinode.h" 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbiscan.h" 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbbitblb.h" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uassert.h" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Unicode Set init strings for each of the character classes needed for parsing a rule file. 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// (Initialized with hex values for portability to EBCDIC based machines. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Really ugly, but there's no good way to avoid it.) 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The sets are referred to by name in the rbbirpt.txt, which is the 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// source form of the state transition table for the RBBI rule parser. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar gRuleSet_rule_char_pattern[] = { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // [ ^ [ \ p { Z } \ u 0 0 2 0 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - \ u 0 0 7 f ] - [ \ p 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // { L } ] - [ \ p { N } ] ] 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0}; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar gRuleSet_name_char_pattern[] = { 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// [ _ \ p { L } \ p { N } ] 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0}; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar gRuleSet_digit_char_pattern[] = { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// [ 0 - 9 ] 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0}; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar gRuleSet_name_start_char_pattern[] = { 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// [ _ \ p { L } ] 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 }; 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00}; // "any" 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV RBBISetTable_deleter(void *p) { 7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete px->key; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: px->val is owned by the linked list "fSetsListHead" in scanner. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't delete the value nodes here. 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(px); 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constructor. 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 8985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB = rb; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStackPtr = 0; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStack[fStackPtr] = 0; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr = 0; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRuleNum = 0; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStack[0] = NULL; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSymbolTable = NULL; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSetTable = NULL; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fScanIndex = 0; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNextIndex = 0; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fReverseRule = FALSE; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLookAheadRule = FALSE; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLineNum = 1; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCharNum = 0; 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fQuoteMode = FALSE; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Do not check status until after all critical fields are sufficiently initialized 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that the destructor can run cleanly. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*rb->fStatus)) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Set up the constant Unicode Sets. 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: These could be made static, lazily initialized, and shared among 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // all instances of RBBIRuleScanners. BUT this is quite a bit simpler, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and the time to build these few sets should be small compared to a 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // full break iterator build. 12483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fRuleSets[kRuleSet_rule_char-128] 12583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern), *rb->fStatus); 12683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:] 12783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fRuleSets[kRuleSet_white_space-128]. 12883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); 12983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fRuleSets[kRuleSet_name_char-128] 13083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern), *rb->fStatus); 13183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fRuleSets[kRuleSet_name_start_char-128] 13283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus); 13383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fRuleSets[kRuleSet_digit_char-128] 13483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern), *rb->fStatus); 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This case happens if ICU's data is missing. UnicodeSet tries to look up property 13783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // names from the init string, can't find them, and claims an illegal argument. 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Change the error so that the actual problem will be clearer to users. 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *rb->fStatus = U_BRK_INIT_ERROR; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*rb->fStatus)) { 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus); 14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (fSymbolTable == NULL) { 14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *rb->fStatus = U_MEMORY_ALLOCATION_ERROR; 14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSetTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus); 15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (U_FAILURE(*rb->fStatus)) { 15285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 15385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(fSetTable, RBBISetTable_deleter); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Destructor 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBIRuleScanner::~RBBIRuleScanner() { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete fSymbolTable; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fSetTable != NULL) { 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_close(fSetTable); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSetTable = NULL; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Node Stack. 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Normally has one entry, which is the entire parse tree for the rules. 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If errors occured, there may be additional subtrees left on the stack. 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (fNodeStackPtr > 0) { 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete fNodeStack[fNodeStackPtr]; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr--; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// doParseAction Do some action during rule parsing. 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Called by the parse state machine. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Actions build the parse tree and Unicode Sets, 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and maintain the parse stack for nested expressions. 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// TODO: unify EParseAction and RBBI_RuleParseAction enum types. 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// They represent exactly the same thing. They're separate 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// only to work around enum forward declaration restrictions 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// in some compilers, while at the same time avoiding multiple 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// definitions problems. I'm sure that there's a better way. 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool RBBIRuleScanner::doParseActions(int32_t action) 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *n = NULL; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool returnVal = TRUE; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (action) { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExprStart: 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pushNewNode(RBBINode::opStart); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRuleNum++; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExprOrOperator: 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixOpStack(RBBINode::precOpCat); 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *orNode = pushNewNode(RBBINode::opOr); 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru orNode->fLeftChild = operandNode; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operandNode->fParent = orNode; 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExprCatOperator: 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // concatenation operator. 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // For the implicit concatenation of adjacent terms in an expression that are 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // not separated by any other operator. Action is invoked between the 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // actions for the two terms. 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixOpStack(RBBINode::precOpCat); 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *catNode = pushNewNode(RBBINode::opCat); 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru catNode->fLeftChild = operandNode; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operandNode->fParent = catNode; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doLParen: 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Open Paren. 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The openParen node is a dummy operation type with a low precedence, 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which has the affect of ensuring that any real binary op that 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // follows within the parens binds more tightly to the operands than 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // stuff outside of the parens. 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pushNewNode(RBBINode::opLParen); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExprRParen: 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixOpStack(RBBINode::precLParen); 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doNOP: 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doStartAssign: 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We've just scanned "$variable = " 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The top of the node stack has the $variable ref node. 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Save the start position of the RHS text in the StartExpression node 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that precedes the $variableReference node on the stack. 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This will eventually be used when saving the full $variable replacement 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // text as a string. 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr-1]; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fNextIndex; // move past the '=' 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Push a new start-of-expression node; needed to keep parse of the 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // RHS expression happy. 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pushNewNode(RBBINode::opStart); 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doEndAssign: 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We have reached the end of an assignement statement. 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Current scan char is the ';' that terminates the assignment. 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Terminate expression, leaves expression parse tree rooted in TOS node. 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixOpStack(RBBINode::precStart); 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2]; 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1]; 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr]; 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Save original text of right side of assignment, excluding the terminating ';' 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in the root of the node for the right-hand-side expression. 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RHSExprNode->fFirstPos = startExprNode->fFirstPos; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RHSExprNode->fLastPos = fScanIndex; 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText); 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Expression parse tree becomes l. child of the $variable reference node. 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru varRefNode->fLeftChild = RHSExprNode; 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RHSExprNode->fParent = varRefNode; 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Make a symbol table entry for the $variableRef node. 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus); 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is a round-about way to get the parse position set 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // so that duplicate symbols error messages include a line number. 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode t = *fRB->fStatus; 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *fRB->fStatus = U_ZERO_ERROR; 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(t); 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Clean up the stack. 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete startExprNode; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr-=3; 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doEndOfRule: 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node. 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");} 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_ASSERT(fNodeStackPtr == 1); 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If this rule includes a look-ahead '/', add a endMark node to the 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // expression tree. 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fLookAheadRule) { 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *thisRule = fNodeStack[fNodeStackPtr]; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *endNode = pushNewNode(RBBINode::endMark); 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *catNode = pushNewNode(RBBINode::opCat); 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr -= 2; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru catNode->fLeftChild = thisRule; 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru catNode->fRightChild = endNode; 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStack[fNodeStackPtr] = catNode; 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru endNode->fVal = fRuleNum; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru endNode->fLookAheadEnd = TRUE; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // All rule expressions are ORed together. 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The ';' that terminates an expression really just functions as a '|' with 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a low operator prededence. 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Each of the four sets of rules are collected separately. 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (forward, reverse, safe_forward, safe_reverse) 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // OR this rule into the appropriate group of them. 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode **destRules = (fReverseRule? &fRB->fReverseTree : fRB->fDefaultTree); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*destRules != NULL) { 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is not the first rule encounted. 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // OR previous stuff (from *destRules) 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // with the current rule expression (on the Node Stack) 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // with the resulting OR expression going to *destRules 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *thisRule = fNodeStack[fNodeStackPtr]; 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *prevRules = *destRules; 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *orNode = pushNewNode(RBBINode::opOr); 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru orNode->fLeftChild = prevRules; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prevRules->fParent = orNode; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru orNode->fRightChild = thisRule; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru thisRule->fParent = orNode; 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *destRules = orNode; 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is the first rule encountered (for this direction). 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Just move its parse tree from the stack to *destRules. 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *destRules = fNodeStack[fNodeStackPtr]; 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fReverseRule = FALSE; // in preparation for the next rule. 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLookAheadRule = FALSE; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr = 0; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doRuleError: 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_RULE_SYNTAX); 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doVariableNameExpectedErr: 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_RULE_SYNTAX); 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Unary operands + ? * 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // These all appear after the operand to which they apply. 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // When we hit one, the operand (may be a whole sub expression) 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // will be on the top of the stack. 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Unary Operator becomes TOS, with the old TOS as its one child. 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doUnaryOpPlus: 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *plusNode = pushNewNode(RBBINode::opPlus); 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru plusNode->fLeftChild = operandNode; 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operandNode->fParent = plusNode; 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doUnaryOpQuestion: 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *qNode = pushNewNode(RBBINode::opQuestion); 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru qNode->fLeftChild = operandNode; 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operandNode->fParent = qNode; 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doUnaryOpStar: 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *starNode = pushNewNode(RBBINode::opStar); 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru starNode->fLeftChild = operandNode; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operandNode->fParent = starNode; 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doRuleChar: 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // A "Rule Character" is any single character that is a literal part 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]" 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // These are pretty uncommon in break rules; the terms are more commonly 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // sets. To keep things uniform, treat these characters like as 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // sets that just happen to contain only one character. 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::setRef); 42383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius findSetFor(UnicodeString(fC.fChar), n); 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fScanIndex; 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doDotAny: 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // scanned a ".", meaning match any single character. 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::setRef); 43483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius findSetFor(UnicodeString(TRUE, kAny, 3), n); 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fScanIndex; 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doSlash: 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Scanned a '/', which identifies a look-ahead break position in a rule. 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::lookAhead); 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fVal = fRuleNum; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fScanIndex; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLookAheadRule = TRUE; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doStartTagValue: 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Scanned a '{', the opening delimiter for a tag value within a rule. 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::tag); 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fVal = 0; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fScanIndex; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doTagDigit: 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Just scanned a decimal digit that's part of a tag value 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr]; 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t v = u_charDigitValue(fC.fChar); 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_ASSERT(v < 10); 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fVal = n->fVal*10 + v; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doTagValue: 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr]; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doTagExpectedError: 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_MALFORMED_RULE_TAG); 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doOptionStart: 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Scanning a !!option. At the start of string. 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fOptionStart = fScanIndex; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doOptionEnd: 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (opt == UNICODE_STRING("chain", 5)) { 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fChainRules = TRUE; 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fLBCMNoChain = TRUE; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("forward", 7)) { 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fDefaultTree = &fRB->fForwardTree; 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("reverse", 7)) { 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fDefaultTree = &fRB->fReverseTree; 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("safe_forward", 12)) { 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fDefaultTree = &fRB->fSafeFwdTree; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("safe_reverse", 12)) { 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fDefaultTree = &fRB->fSafeRevTree; 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fLookAheadHardBreak = TRUE; 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_UNRECOGNIZED_OPTION); 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doReverseDir: 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fReverseRule = TRUE; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doStartVariableName: 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::varRef); 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = fScanIndex; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doEndVariableName: 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr]; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (n==NULL || n->fType != RBBINode::varRef) { 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fScanIndex; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText); 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Look the newly scanned name up in the symbol table 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If there's an entry, set the l. child of the var ref to the replacement expression. 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (We also pass through here when scanning assignments, but no harm is done, other 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // than a slight wasted effort that seems hard to avoid. Lookup will be null) 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLeftChild = fSymbolTable->lookupNode(n->fText); 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doCheckVarDef: 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr]; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (n->fLeftChild == NULL) { 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_UNDEFINED_VARIABLE); 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExprFinished: 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doRuleErrorAssignExpr: 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_ASSIGN_ERROR); 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doExit: 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case doScanUnicodeSet: 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scanSet(); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru returnVal = FALSE; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return returnVal; 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Error Report a rule parse error. 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Only report it if no previous error has been recorded. 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::error(UErrorCode e) { 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(*fRB->fStatus)) { 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *fRB->fStatus = e; 58085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (fRB->fParseError) { 58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fRB->fParseError->line = fLineNum; 58285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fRB->fParseError->offset = fCharNum; 58385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fRB->fParseError->preContext[0] = 0; 58485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fRB->fParseError->preContext[0] = 0; 58585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// fixOpStack The parse stack holds partially assembled chunks of the parse tree. 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// An entry on the stack may be as small as a single setRef node, 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// or as large as the parse tree 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// for an entire expression (this will be the one item left on the stack 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// when the parsing of an RBBI rule completes. 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// This function is called when a binary operator is encountered. 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// It looks back up the stack for operators that are not yet associated 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// with a right operand, and if the precedence of the stacked operator >= 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the precedence of the current operator, binds the operand left, 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// to the previously encountered operator. 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 60685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *n; 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // printNodeStack("entering fixOpStack()"); 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = fNodeStack[fNodeStackPtr-1]; // an operator node 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (n->fPrecedence == 0) { 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node"); 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) { 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The most recent operand goes with the current operator, 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // not with the previously stacked one. 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Stack operator is a binary op ( '|' or concatenation) 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TOS operand becomes right child of this operator. 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Resulting subexpression becomes the TOS operand. 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fRightChild = fNodeStack[fNodeStackPtr]; 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStack[fNodeStackPtr]->fParent = n; 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr--; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // printNodeStack("looping in fixOpStack() "); 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (p <= RBBINode::precLParen) { 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Scan is at a right paren or end of expression. 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The scanned item must match the stack, or else there was an error. 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Discard the left paren (or start expr) node from the stack, 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // leaving the completed (sub)expression as TOS. 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (n->fPrecedence != p) { 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Right paren encountered matched start of expression node, or 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // end of expression matched with a left paren node. 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_MISMATCHED_PAREN); 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr]; 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr--; 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delete the now-discarded LParen or Start node. 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete n; 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // printNodeStack("leaving fixOpStack()"); 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// findSetFor given a UnicodeString, 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// - find the corresponding Unicode Set (uset node) 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// (create one if necessary) 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// - Set fLeftChild of the caller's node (should be a setRef node) 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// to the uset node 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Maintain a hash table of uset nodes, so the same one is always used 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// for the same string. 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If a "to adopt" set is provided and we haven't seen this key before, 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// add the provided set to the hash table. 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If the string is one (32 bit) char in length, the set contains 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// just one element which is the char in question. 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If the string is "any", return a set containing all chars. 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 66885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBISetTableEl *el; 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // First check whether we've already cached a set for this string. 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If so, just use the cached set in the new node. 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // delete any set provided by the caller, since we own it. 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru el = (RBBISetTableEl *)uhash_get(fSetTable, &s); 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (el != NULL) { 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete setToAdopt; 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru node->fLeftChild = el->val; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_ASSERT(node->fLeftChild->fType == RBBINode::uset); 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Haven't seen this set before. 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the caller didn't provide us with a prebuilt set, 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // create a new UnicodeSet now. 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (setToAdopt == NULL) { 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (s.compare(kAny, -1) == 0) { 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToAdopt = new UnicodeSet(0x000000, 0x10ffff); 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = s.char32At(0); 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToAdopt = new UnicodeSet(c, c); 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Make a new uset node to refer to this UnicodeSet 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This new uset node becomes the child of the caller's setReference node. 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *usetNode = new RBBINode(RBBINode::uset); 70285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (usetNode == NULL) { 70385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho error(U_MEMORY_ALLOCATION_ERROR); 70485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 70585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usetNode->fInputSet = setToAdopt; 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usetNode->fParent = node; 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru node->fLeftChild = usetNode; 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usetNode->fText = s; 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Add the new uset node to the list of all uset nodes. 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus); 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Add the new set to the set hash table. 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl)); 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString *tkey = new UnicodeString(s); 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tkey == NULL || el == NULL || setToAdopt == NULL) { 72485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // Delete to avoid memory leak 72585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho delete tkey; 72685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho tkey = NULL; 72785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(el); 72885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho el = NULL; 72985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho delete setToAdopt; 73085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setToAdopt = NULL; 73185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_MEMORY_ALLOCATION_ERROR); 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru el->key = tkey; 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru el->val = usetNode; 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_put(fSetTable, el->key, el, fRB->fStatus); 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Assorted Unicode character constants. 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Numeric because there is no portable way to enter them as literals. 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// (Think EBCDIC). 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chCR = 0x0d; // New lines, for terminating comments. 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chLF = 0x0a; 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chNEL = 0x85; // NEL newline variant 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chLS = 0x2028; // Unicode Line Separator 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chApos = 0x27; // single quote, for quoted chars. 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chPound = 0x23; // '#', introduces a comment. 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chBackSlash = 0x5c; // '\' introduces a char escape 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chLParen = 0x28; 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar chRParen = 0x29; 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 76085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// stripRules Return a rules string without unnecessary 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// characters. 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 76585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString strippedRules; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int rulesLength = rules.length(); 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int idx = 0; idx < rulesLength; ) { 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar ch = rules[idx++]; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch == chPound) { 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (idx < rulesLength 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru && ch != chCR && ch != chLF && ch != chNEL) 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = rules[idx++]; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!u_isISOControl(ch)) { 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strippedRules.append(ch); 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // strippedRules = strippedRules.unescape(); 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return strippedRules; 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// nextCharLL Low Level Next Char from rule input source. 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Get a char from the input character iterator, 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// keep track of input position for error reporting. 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 79385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUChar32 RBBIRuleScanner::nextCharLL() { 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fNextIndex >= fRB->fRules.length()) { 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UChar32)-1; 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = fRB->fRules.char32At(fNextIndex); 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1); 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch == chCR || 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch == chNEL || 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch == chLS || 80627f654740f2a26ad62a5c155af9199af9e69b889claireho (ch == chLF && fLastChar != chCR)) { 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Character is starting a new line. Bump up the line number, and 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // reset the column to 0. 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLineNum++; 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCharNum=0; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fQuoteMode) { 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_NEW_LINE_IN_QUOTED_STRING); 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fQuoteMode = FALSE; 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Character is not starting a new line. Except in the case of a 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // LF following a CR, increment the column position. 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (ch != chLF) { 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCharNum++; 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fLastChar = ch; 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ch; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// nextChar for rules scanning. At this level, we handle stripping 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// out comments and processing backslash character escapes. 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The rest of the rules grammar is handled at the next level up. 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 83485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::nextChar(RBBIRuleChar &c) { 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Unicode Character constants needed for the processing done by nextChar(), 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in hex because literals wont work on EBCDIC machines. 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fScanIndex = fNextIndex; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = nextCharLL(); 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fEscaped = FALSE; 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // check for '' sequence. 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // These are recognized in all contexts, whether in quoted text or not. 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c.fChar == chApos) { 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fRules.char32At(fNextIndex) == chApos) { 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = nextCharLL(); // get nextChar officially so character counts 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fEscaped = TRUE; // stay correct. 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Single quote, by itself. 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Toggle quoting mode. 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Return either '(' or ')', because quotes cause a grouping of the quoted text. 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fQuoteMode = !fQuoteMode; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fQuoteMode == TRUE) { 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = chLParen; 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = chRParen; 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fEscaped = FALSE; // The paren that we return is not escaped. 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fQuoteMode) { 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fEscaped = TRUE; 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We are not in a 'quoted region' of the source. 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c.fChar == chPound) { 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Start of a comment. Consume the rest of it. 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The new-line char that terminates the comment is always returned. 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // It will be treated as white-space, and serves to break up anything 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that might otherwise incorrectly clump together with a comment in 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the middle (a variable name, for example.) 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = nextCharLL(); 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c.fChar == (UChar32)-1 || // EOF 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar == chCR || 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar == chLF || 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar == chNEL || 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar == chLS) {break;} 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c.fChar == (UChar32)-1) { 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // check for backslash escaped characters. 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Use UnicodeString::unescapeAt() to handle them. 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c.fChar == chBackSlash) { 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fEscaped = TRUE; 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t startX = fNextIndex; 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c.fChar = fRB->fRules.unescapeAt(fNextIndex); 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fNextIndex == startX) { 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_HEX_DIGITS_EXPECTED); 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCharNum += fNextIndex-startX; 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // putc(c.fChar, stdout); 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Parse RBBI rules. The state machine for rules parsing is here. 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The state tables are hand-written in the file rbbirpt.txt, 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and converted to the form used here by a perl 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// script rbbicst.pl 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 91985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::parse() { 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t state; 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RBBIRuleTableEl *tableEl; 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state = 1; 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextChar(fC); 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Main loop for the rule parsing state machine. 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Runs once per state transition. 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Each time through optionally performs, depending on the state table, 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - an advance to the the next input char 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - an action to be performed. 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - pushing or popping a state to/from the local state return stack. 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Bail out if anything has gone wrong. 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // RBBI rule file parsing stops on the first error encountered. 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Quit if state == 0. This is the normal way to exit the state machine. 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (state == 0) { 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Find the state table element that matches the input char from the rule, or the 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // class of the input character. Start with the first table row for this 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // state, then linearly scan forward until we find a row that matches the 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // character. The last row for each state always matches all characters, so 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the search will stop there, if not before. 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableEl = &gRuleParseStateTable[state]; 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #ifdef RBBI_DEBUG 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ", 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]); 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #endif 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #ifdef RBBI_DEBUG 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");} 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #endif 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) { 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified an individual character, not a set, and 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the input character is not escaped, and 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the input character matched it. 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass == 255) { 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified default, match anything character class. 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass == 254 && fC.fEscaped) { 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified "escaped" and the char was escaped. 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass == 253 && fC.fEscaped && 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (fC.fChar == 0x50 || fC.fChar == 0x70 )) { 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified "escaped P" and the char is either 'p' or 'P'. 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified eof and we hit eof on the input. 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fC.fEscaped == FALSE && // char is not escaped && 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fC.fChar != (UChar32)-1) { // char is not EOF 99683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U_ASSERT((tableEl->fCharClass-128) < LENGTHOF(fRuleSets)); 99785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Table row specified a character class, or set of characters, 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and the current char matches it. 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // No match on this row, advance to the next row for this state, 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru tableEl++; 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");} 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We've found the row of the state table that matches the current input 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // character from the rules string. 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Perform any action specified by this row in the state table. 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (doParseActions((int32_t)tableEl->fAction) == FALSE) { 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Break out of the state machine loop if the 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the action signalled some kind of error, or 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the action was to exit, occurs on normal end-of-rules-input. 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fPushState != 0) { 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStackPtr++; 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fStackPtr >= kStackSize) { 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow."); 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStackPtr--; 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStack[fStackPtr] = tableEl->fPushState; 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fNextChar) { 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextChar(fC); 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the next state from the table entry, or from the 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // state stack if the next state was specified as "pop". 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (tableEl->fNextState != 255) { 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state = tableEl->fNextState; 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru state = fStack[fStackPtr]; 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStackPtr--; 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fStackPtr < 0) { 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow."); 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fStackPtr++; 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If there were NO user specified reverse rules, set up the equivalent of ".*;" 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fReverseTree == NULL) { 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fReverseTree = pushNewNode(RBBINode::opStar); 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *operand = pushNewNode(RBBINode::setRef); 105683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius findSetFor(UnicodeString(TRUE, kAny, 3), operand); 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fReverseTree->fLeftChild = operand; 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru operand->fParent = fRB->fReverseTree; 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr -= 2; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Parsing of the input RBBI rules is complete. 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We now have a parse tree for the rule expressions 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and a list of all UnicodeSets that are referenced. 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();} 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n"); 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fForwardTree->printTree(TRUE); 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n"); 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fReverseTree->printTree(TRUE); 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n"); 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fSafeFwdTree->printTree(TRUE); 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n"); 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fSafeRevTree->printTree(TRUE); 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1083ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 1087ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// printNodeStack for debugging... 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 108985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef RBBI_DEBUG 1091ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::printNodeStack(const char *title) { 1092ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 1093ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("%s. Dumping node stack...\n", title); 1094ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=fNodeStackPtr; i>0; i--) {fNodeStack[i]->printTree(TRUE);} 1095ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1096ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 1097ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1098ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1099ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 1103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// pushNewNode create a new RBBINode of the specified type and push it 1104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// onto the stack of nodes. 1105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 110685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { 1108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStackPtr++; 1109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fNodeStackPtr >= kStackSize) { 1110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_INTERNAL_ERROR); 1111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); 1112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *fRB->fStatus = U_BRK_INTERNAL_ERROR; 1113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 1114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fNodeStack[fNodeStackPtr] = new RBBINode(t); 1116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fNodeStack[fNodeStackPtr] == NULL) { 1117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; 1118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fNodeStack[fNodeStackPtr]; 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 112485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// scanSet Construct a UnicodeSet from the text at the current scan 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// position. Advance the scan position to the first character 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// after the set. 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// A new RBBI setref node referring to the set is pushed onto the node 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// stack. 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// The scan position is normally under the control of the state machine 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// that controls rule parsing. UnicodeSets, however, are parsed by 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the UnicodeSet constructor, not by the RBBI rule parser. 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 113785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho//------------------------------------------------------------------------------ 1138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RBBIRuleScanner::scanSet() { 1139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet *uset; 1140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ParsePosition pos; 1141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int startPos; 1142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 1143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(*fRB->fStatus)) { 1145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.setIndex(fScanIndex); 1149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru startPos = fScanIndex; 1150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode localStatus = U_ZERO_ERROR; 115183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uset = new UnicodeSet(); 115285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (uset == NULL) { 115385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho localStatus = U_MEMORY_ALLOCATION_ERROR; 115483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else { 115583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); 115685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 1157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(localStatus)) { 1158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO: Get more accurate position of the error from UnicodeSet's return info. 1159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // UnicodeSet appears to not be reporting correctly at this time. 1160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #ifdef RBBI_DEBUG 1161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); 1162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru #endif 1163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(localStatus); 1164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete uset; 1165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Verify that the set contains at least one code point. 1169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 117083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U_ASSERT(uset!=NULL); 1171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (uset->isEmpty()) { 1172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This set is empty. 1173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Make it an error, because it almost certainly is not what the user wanted. 1174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Also, avoids having to think about corner cases in the tree manipulation code 1175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // that occurs later on. 1176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error(U_BRK_RULE_EMPTY_SET); 1177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete uset; 1178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Advance the RBBI parse postion over the UnicodeSet pattern. 1183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't just set fScanIndex because the line/char positions maintained 1184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // for error reporting would be thrown off. 1185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = pos.getIndex(); 1186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 1187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fNextIndex >= i) { 1188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 1189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextCharLL(); 1191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(*fRB->fStatus)) { 1194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru RBBINode *n; 1195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n = pushNewNode(RBBINode::setRef); 1197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fFirstPos = startPos; 1198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru n->fLastPos = fNextIndex; 1199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); 1200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // findSetFor() serves several purposes here: 1201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - Adopts storage for the UnicodeSet, will be responsible for deleting. 1202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - Mantains collection of all sets in use, needed later for establishing 1203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // character categories for run time engine. 1204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - Eliminates mulitiple instances of the same set. 1205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // - Creates a new uset node if necessary (if this isn't a duplicate.) 1206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru findSetFor(n->fText, n, uset); 1207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 1212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1214