1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (c) 2001-2004, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 07/23/01 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "strmatch.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar EMPTY[] = { 0 }; // empty string: "" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher) 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::StringMatcher(const UnicodeString& theString, 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t segmentNum, 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData& theData) : 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data(&theData), 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru segmentNumber(segmentNum), 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchStart(-1), 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLimit(-1) 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru theString.extractBetween(start, limit, pattern); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::StringMatcher(const StringMatcher& o) : 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor(o), 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeMatcher(o), 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer(o), 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pattern(o.pattern), 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data(o.data), 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru segmentNumber(o.segmentNumber), 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchStart(o.matchStart), 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLimit(o.matchLimit) 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringMatcher::~StringMatcher() { 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* StringMatcher::clone() const { 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new StringMatcher(*this); 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeMatcher* StringMatcher::toMatcher() const { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UnicodeMatcher*) this; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and return the pointer. 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer* StringMatcher::toReplacer() const { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UnicodeReplacer*) this; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUMatchDegree StringMatcher::matches(const Replaceable& text, 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& offset, 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) { 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t cursor = offset; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (limit < cursor) { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Match in the reverse direction 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=pattern.length()-1; i>=0; --i) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar keyChar = pattern.charAt(i); 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeMatcher* subm = data->lookupMatcher(keyChar); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (subm == 0) { 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cursor > limit && 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru keyChar == text.charAt(cursor)) { 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --cursor; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UMatchDegree m = 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subm->matches(text, cursor, limit, incremental); 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (m != U_MATCH) { 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return m; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Record the match position, but adjust for a normal 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // forward start, limit, and only if a prior match does not 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // exist -- we want the rightmost match. 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (matchStart < 0) { 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchStart = cursor+1; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLimit = offset+1; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<pattern.length(); ++i) { 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental && cursor == limit) { 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We've reached the context limit without a mismatch and 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // without completing our match. 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_PARTIAL_MATCH; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar keyChar = pattern.charAt(i); 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeMatcher* subm = data->lookupMatcher(keyChar); 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (subm == 0) { 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't need the cursor < limit check if 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // incremental is TRUE (because it's done above); do need 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // it otherwise. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cursor < limit && 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru keyChar == text.charAt(cursor)) { 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++cursor; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MISMATCH; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UMatchDegree m = 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subm->matches(text, cursor, limit, incremental); 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (m != U_MATCH) { 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return m; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Record the match position 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchStart = offset; 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLimit = cursor; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset = cursor; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_MATCH; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringMatcher::toPattern(UnicodeString& result, 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.truncate(0); 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str, quoteBuf; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (segmentNumber > 0) { 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append((UChar)40); /*(*/ 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<pattern.length(); ++i) { 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar keyChar = pattern.charAt(i); 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeMatcher* m = data->lookupMatcher(keyChar); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (m == 0) { 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable), 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TRUE, escapeUnprintable, quoteBuf); 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (segmentNumber > 0) { 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append((UChar)41); /*)*/ 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Flush quoteBuf out to result 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(result, -1, 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TRUE, escapeUnprintable, quoteBuf); 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool StringMatcher::matchesIndexValue(uint8_t v) const { 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pattern.length() == 0) { 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c = pattern.char32At(0); 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeMatcher *m = data->lookupMatcher(c); 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v); 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeMatcher 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const { 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = pattern.char32At(i); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeMatcher* matcher = data->lookupMatcher(ch); 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (matcher == NULL) { 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnionTo.add(ch); 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matcher->addMatchSetTo(toUnionTo); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t StringMatcher::replace(Replaceable& text, 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& /*cursor*/) { 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t outLen = 0; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Copy segment with out-of-band data 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t dest = limit; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If there was no match, that means that a quantifier 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // matched zero-length. E.g., x (a)* y matched "xy". 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (matchStart >= 0) { 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (matchStart != matchLimit) { 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.copy(matchStart, matchLimit, dest); 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = matchLimit - matchStart; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(start, limit, EMPTY); // delete original text 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return outLen; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule, 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool /*escapeUnprintable*/) const { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(segmentNumber > 0); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rule.truncate(0); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rule.append((UChar)0x0024 /*$*/); 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendNumber(rule, segmentNumber, 10, 1); 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return rule; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove any match info. This must be called before performing a 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * set of matches with this segment. 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void StringMatcher::resetMatch() { 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchStart = matchLimit = -1; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Union the set of all characters that may output by this object 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into the given set. 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param toUnionTo the set into which to union the output characters 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const { 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The output of this replacer varies; it is the source text between 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // matchStart and matchLimit. Since this varies depending on the 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // input text, we can't compute it here. We can either do nothing 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // or we can add ALL characters to the set. It's probably more useful 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // to do nothing. 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringMatcher::setData(const TransliterationRuleData* d) { 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data = d; 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 0; 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (i<pattern.length()) { 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c = pattern.char32At(i); 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor* f = data->lookup(c); 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (f != NULL) { 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru f->setData(data); 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i += UTF_CHAR_LENGTH(c); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 290